bricolage-spreadsheet 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 465aebc471ab4d6adfac54aedc78c2766204d202c49009c29c848e1ef391519e
4
+ data.tar.gz: b8d4b1e847bd67d8f865ed9b5fd42ba05f0d34a08a30787166d1bee09a2b0320
5
+ SHA512:
6
+ metadata.gz: 35d5f63b44638ffc54ff4279a60e95e50337137fb635c2fe5c517db0ccf0fd6ae731427fe5997575610b8e3213eaedb9f513d73c9e51fc64c81eebc46af6e7ee
7
+ data.tar.gz: add032b531a4d56067e1cf937746777a1be317084b0c763afb2079551fc7407cbd3d4fdb698ec38ae96772eb9a09f624d7ec4dde3004966c31813ed4f77eb494
data/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # bricolage-spreadsheet
2
+
3
+ Google Spreadsheet-related job classes for Bricolage batch job framework.
4
+
5
+ ## Home Page
6
+
7
+ https://github.com/bricolages/bricolage-spreadsheet
8
+
9
+ ## Usage
10
+
11
+ Add following line in your Gemfile:
12
+ ```
13
+ gem 'bricolage-spreadsheet'
14
+ ```
15
+
16
+ Job Options
17
+
18
+ ```
19
+ % bundle exec bricolage spreadsheet-import -h
20
+ Usage: bricolage spreadsheet-import [job_class_options]
21
+ --src-ds=NAME [optional] Main data source. [default: spreadsheet]
22
+ --sheet-id=ID Spreadsheet ID
23
+ --range=RANGE_EXPR Spreadsheet Range
24
+ --format=VALUE [optional] Data file format. (csv, json)
25
+ --value-render-option=VALUE [optional] For values with format on sheets (FORMATTED_VALUE, UNFORMATTED_VALUE, FORMULA)
26
+ --s3-ds=NAME [optional] Main data source. [default: s3]
27
+ --s3-file=PATH Target file name.
28
+ --dest-ds=NAME [optional] Main data source. [default: psql]
29
+ --dest-table=[SCHEMA.]TABLE [optional] Target table name.
30
+ --options=OPTIONS [optional] Loader options.
31
+ --table-def=PATH Create table file.
32
+ --no-backup [optional] Drop dest table with suffix "_old".
33
+ --analyze [optional] ANALYZE table after SQL is executed.
34
+ --grant=KEY:VALUE [optional] GRANT table after SQL is executed. (required keys: privilege, to)
35
+ --gzip [optional] Compress Temporary files.
36
+ -v, --variable=NAME=VALUE Set variable.
37
+ --help Shows this message and quit.
38
+ --version Shows program version and quit.
39
+ ```
40
+
41
+ ## License
42
+
43
+ MIT license.
44
+ See LICENSES file for details.
45
+
46
+ ## Credit
47
+
48
+ Author: Shimpei Kodama
49
+
50
+ This software is written in working time in Cookpad, Inc.
@@ -0,0 +1,72 @@
1
+ require 'bricolage/psqldatasource'
2
+
3
+ module Bricolage
4
+ JobClass.define('spreadsheet-import') {
5
+ parameters {|params|
6
+ # S3Export
7
+ params.add DataSourceParam.new('spreadsheet', 'src-ds')
8
+ params.add StringParam.new('sheet-id', 'ID', 'Google Spreadsheet ID')
9
+ params.add StringParam.new('range', 'RANGE_EXPR', 'Google Spreadsheet Range Expression')
10
+ params.add EnumParam.new('format', %w(csv json), 'Intermediate data file format.', default: 'json')
11
+ params.add EnumParam.new('value-render-option', %w(FORMATTED_VALUE UNFORMATTED_VALUE FORMULA), 'For values with format on sheets', default: 'FORMATTED_VALUE')
12
+ params.add DataSourceParam.new('s3', 's3-ds')
13
+ params.add DestFileParam.new('s3-file')
14
+
15
+ # Load
16
+ params.add DataSourceParam.new('psql', 'dest-ds')
17
+ params.add DestTableParam.new
18
+ params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
19
+ optional: true, default: PSQLLoadOptions.new,
20
+ value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
21
+ params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
22
+ params.add OptionalBoolParam.new('no-backup', 'Drop dest table with suffix "_old".', default: false)
23
+
24
+ # Misc
25
+ params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
26
+ params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
27
+ params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
28
+ }
29
+
30
+ script {|params, script|
31
+ # S3Export
32
+ script.task(params['src-ds']) {|task|
33
+ task.s3export params['sheet-id'],
34
+ params['range'],
35
+ params['format'],
36
+ params['value-render-option'],
37
+ params['s3-ds'],
38
+ params['s3-file'],
39
+ params['gzip']
40
+ }
41
+
42
+ # Load
43
+ script.task(params['dest-ds']) {|task|
44
+ prev_table = '${dest_table}_old'
45
+ work_table = '${dest_table}_wk'
46
+
47
+ task.transaction {
48
+ # CREATE
49
+ task.drop_force prev_table
50
+ task.drop_force work_table
51
+ task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
52
+
53
+ # COPY
54
+ options = params['gzip'] ? params['options'].merge('gzip' => params['gzip']) : params['options']
55
+ task.load params['s3-ds'], params['s3-file'], work_table,
56
+ params['format'], nil, options
57
+
58
+ # GRANT, ANALYZE
59
+ task.grant_if params['grant'], work_table
60
+ task.analyze_if params['analyze'], work_table
61
+
62
+ # RENAME
63
+ task.create_dummy_table '${dest_table}'
64
+ task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
65
+ task.rename_table work_table, params['dest-table'].name
66
+ }
67
+ # No Backup
68
+ task.drop_force prev_table if params['no-backup']
69
+ }
70
+ }
71
+ } #spreadsheet-import job class
72
+ end
@@ -0,0 +1,5 @@
1
+ require 'bricolage/jobclass'
2
+ require 'pathname'
3
+
4
+ jobclass_path = Pathname(__dir__).realpath.parent.cleanpath + 'jobclass'
5
+ Bricolage::JobClass.add_load_path jobclass_path
@@ -0,0 +1,181 @@
1
+ require 'bricolage/datasource'
2
+ require 'bricolage/jobresult'
3
+ require 'google/apis/sheets_v4'
4
+ require 'googleauth'
5
+ require 'csv'
6
+ require 'json'
7
+ require 'uri'
8
+ require 'tempfile'
9
+ require 'zlib'
10
+ require 'pathname'
11
+
12
+ module Bricolage
13
+
14
+ class SpreadsheetDataSource < DataSource
15
+ declare_type 'spreadsheet'
16
+
17
+ SCOPE_BASE = "Google::Apis::SheetsV4::"
18
+ DEFAULT_SCOPE = 'AUTH_SPREADSHEETS_READONLY'
19
+ DEFAULT_APPLICATION_NAME = 'Bricolage'
20
+
21
+ def initialize(credentials: nil, scope: nil, application_name: nil)
22
+ @credentials = credentials
23
+ @scope = "#{SCOPE_BASE}#{(scope || DEFAULT_SCOPE)}"
24
+ @application_name = application_name || DEFAULT_APPLICATION_NAME
25
+ end
26
+
27
+ attr_reader :scope, :application_name
28
+
29
+ def new_task
30
+ SpreadsheetTask.new(self)
31
+ end
32
+
33
+ def rows(sheet_id, range, **get_options)
34
+ return enum_for(:rows, sheet_id, range, **get_options) unless block_given?
35
+ response = service.get_spreadsheet_values(sheet_id, range, **get_options)
36
+ response.values.each do |row|
37
+ next if row.all?(&:empty?) # skip empty row
38
+ yield row
39
+ end
40
+ end
41
+
42
+ def formatted_rows(sheet_id, range, format = 'csv', **get_options)
43
+ return enum_for(:formatted_rows, sheet_id, range, format, **get_options) unless block_given?
44
+ row_formatter = RowFormatterFactory.new_formatter(format)
45
+ fields = []
46
+ rows(sheet_id, range, **get_options).each_with_index do |row, idx|
47
+ if idx == 0
48
+ fields = row
49
+ next if row_formatter.skip_header?
50
+ end
51
+ yield row_formatter.format(row, fields)
52
+ end
53
+ end
54
+
55
+ def credential
56
+ if @credentials
57
+ case
58
+ when @credentials.is_a?(Hash) then StringIO.new(@credentials.to_json)
59
+ when Pathname.new(@credentials).exist? then File.open(@credentials)
60
+ else raise ParameterError, "credentials must be a JSON or PATH. credentials=#{@credentials}"
61
+ end
62
+ elsif ENV['GOOGLE_APPLICATION_CREDENTIALS']
63
+ File.open(ENV['GOOGLE_APPLICATION_CREDENTIALS'])
64
+ else
65
+ raise ParameterError, "credentials or GOOGLE_APPLICATION_CREDENTIALS is required."
66
+ end
67
+ end
68
+
69
+ class SpreadsheetTask < DataSourceTask
70
+
71
+ def s3export(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
72
+ add S3ExportAction.new(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
73
+ end
74
+
75
+ class S3ExportAction < Action
76
+ def initialize(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
77
+ @sheet_id = sheet_id
78
+ @range = range
79
+ @format = format
80
+ @value_render_option = value_render_option
81
+ @dest_ds = dest_ds
82
+ @dest_file = dest_file
83
+ @gzip = gzip
84
+ end
85
+
86
+ attr_reader :sheet_id, :range, :dest_ds, :dest_file, :gzip
87
+
88
+ def url_encoded_range
89
+ @url_encoded_range ||= URI.encode(range)
90
+ end
91
+
92
+ def value_render_option
93
+ @value_render_option&.upcase
94
+ end
95
+
96
+ def format
97
+ @format.downcase
98
+ end
99
+
100
+ def source
101
+ <<~SOURCE
102
+ "GET https://sheets.googleapis.com/v4/spreadsheets/#{sheet_id}/values/#{url_encoded_range}"
103
+ "PUT s3://#{dest_ds.bucket_name}/#{dest_ds.prefix}/#{dest_file}"
104
+ SOURCE
105
+ end
106
+
107
+ def run
108
+ ds.logger.info source
109
+ rows = ds.formatted_rows(sheet_id, range, format, value_render_option: value_render_option)
110
+ Tempfile.open do |f|
111
+ f = Zlib::GzipWriter.wrap(f) if gzip
112
+ f.write rows.to_a.join("\n")
113
+ f.close # flush
114
+ dest_ds.object(dest_file).upload_file(f.path)
115
+ end
116
+ nil
117
+ end
118
+
119
+ end #S3ExportAction
120
+ end #SpreadsheetTask
121
+
122
+ private
123
+
124
+ def service
125
+ return @service if @service
126
+ @service = Google::Apis::SheetsV4::SheetsService.new
127
+ @service.client_options.application_name = application_name
128
+ @service.authorization = authorizer
129
+ @service
130
+ end
131
+
132
+ def authorizer
133
+ return @authorizer if @authorizer
134
+ @authorizer = Google::Auth::ServiceAccountCredentials.make_creds(
135
+ json_key_io: credential,
136
+ scope: scope,
137
+ )
138
+ # token lifetime is 3600 sec
139
+ # May need to implement refresh logic for long running app???
140
+ @authorizer.fetch_access_token!
141
+ @authorizer
142
+ end
143
+
144
+ class RowFormatterFactory
145
+ def self.new_formatter(format)
146
+ const_get(format.upcase! + "Formatter").new
147
+ end
148
+
149
+ class CSVFormatter
150
+ def format(values, fields)
151
+ # remove '\n' with row_sep: nil
152
+ escaped_values = values.map {|v| v.gsub(/\n/,'\\n') }
153
+ CSV.generate_line(escaped_values, row_sep: nil, quote_char: '"', force_quotes: true)
154
+ end
155
+
156
+ def skip_header?
157
+ false
158
+ end
159
+ end
160
+
161
+ class JSONFormatter
162
+ def format(values, fields)
163
+ # https://stackoverflow.com/questions/1509915/converting-camel-case-to-underscore-case-in-ruby
164
+ normalized_fields = fields.map {|f|
165
+ f.gsub(/::/, '/').
166
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
167
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
168
+ tr("-", "_").
169
+ tr(" ", "_").
170
+ downcase
171
+ }
172
+ normalized_fields.zip(values).to_h.to_json
173
+ end
174
+
175
+ def skip_header?
176
+ true
177
+ end
178
+ end
179
+ end # RowFormatterFactory
180
+ end
181
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bricolage-spreadsheet
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Shimpei Kodama
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-04-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bricolage
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 5.26.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 5.26.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: google-apis-sheets_v4
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.4.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.4.0
41
+ description:
42
+ email: shimpeko@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - jobclass/spreadsheet-import.rb
49
+ - lib/bricolage-spreadsheet.rb
50
+ - lib/bricolage/spreadsheetdatasource.rb
51
+ homepage: https://github.com/bricolages/bricolage-spreadsheet
52
+ licenses:
53
+ - MIT
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: 2.7.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.1.4
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Google Spreadsheet-related job classes for Bricolage batch framework
74
+ test_files: []