bricolage-spreadsheet 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +50 -0
- data/jobclass/spreadsheet-import.rb +72 -0
- data/lib/bricolage-spreadsheet.rb +5 -0
- data/lib/bricolage/spreadsheetdatasource.rb +181 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 465aebc471ab4d6adfac54aedc78c2766204d202c49009c29c848e1ef391519e
|
4
|
+
data.tar.gz: b8d4b1e847bd67d8f865ed9b5fd42ba05f0d34a08a30787166d1bee09a2b0320
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 35d5f63b44638ffc54ff4279a60e95e50337137fb635c2fe5c517db0ccf0fd6ae731427fe5997575610b8e3213eaedb9f513d73c9e51fc64c81eebc46af6e7ee
|
7
|
+
data.tar.gz: add032b531a4d56067e1cf937746777a1be317084b0c763afb2079551fc7407cbd3d4fdb698ec38ae96772eb9a09f624d7ec4dde3004966c31813ed4f77eb494
|
data/README.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# bricolage-spreadsheet
|
2
|
+
|
3
|
+
Google Spreadsheet-related job classes for Bricolage batch job framework.
|
4
|
+
|
5
|
+
## Home Page
|
6
|
+
|
7
|
+
https://github.com/bricolages/bricolage-spreadsheet
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
Add following line in your Gemfile:
|
12
|
+
```
|
13
|
+
gem 'bricolage-spreadsheet'
|
14
|
+
```
|
15
|
+
|
16
|
+
Job Options
|
17
|
+
|
18
|
+
```
|
19
|
+
% bundle exec bricolage spreadsheet-import -h
|
20
|
+
Usage: bricolage spreadsheet-import [job_class_options]
|
21
|
+
--src-ds=NAME [optional] Main data source. [default: spreadsheet]
|
22
|
+
--sheet-id=ID Spreadsheet ID
|
23
|
+
--range=RANGE_EXPR Spreadsheet Range
|
24
|
+
--format=VALUE [optional] Data file format. (csv, json)
|
25
|
+
--value-render-option=VALUE [optional] For values with format on sheets (FORMATTED_VALUE, UNFORMATTED_VALUE, FORMULA)
|
26
|
+
--s3-ds=NAME [optional] Main data source. [default: s3]
|
27
|
+
--s3-file=PATH Target file name.
|
28
|
+
--dest-ds=NAME [optional] Main data source. [default: psql]
|
29
|
+
--dest-table=[SCHEMA.]TABLE [optional] Target table name.
|
30
|
+
--options=OPTIONS [optional] Loader options.
|
31
|
+
--table-def=PATH Create table file.
|
32
|
+
--no-backup [optional] Drop dest table with suffix "_old".
|
33
|
+
--analyze [optional] ANALYZE table after SQL is executed.
|
34
|
+
--grant=KEY:VALUE [optional] GRANT table after SQL is executed. (required keys: privilege, to)
|
35
|
+
--gzip [optional] Compress Temporary files.
|
36
|
+
-v, --variable=NAME=VALUE Set variable.
|
37
|
+
--help Shows this message and quit.
|
38
|
+
--version Shows program version and quit.
|
39
|
+
```
|
40
|
+
|
41
|
+
## License
|
42
|
+
|
43
|
+
MIT license.
|
44
|
+
See LICENSES file for details.
|
45
|
+
|
46
|
+
## Credit
|
47
|
+
|
48
|
+
Author: Shimpei Kodama
|
49
|
+
|
50
|
+
This software is written in working time in Cookpad, Inc.
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'bricolage/psqldatasource'
|
2
|
+
|
3
|
+
module Bricolage
|
4
|
+
JobClass.define('spreadsheet-import') {
|
5
|
+
parameters {|params|
|
6
|
+
# S3Export
|
7
|
+
params.add DataSourceParam.new('spreadsheet', 'src-ds')
|
8
|
+
params.add StringParam.new('sheet-id', 'ID', 'Google Spreadsheet ID')
|
9
|
+
params.add StringParam.new('range', 'RANGE_EXPR', 'Google Spreadsheet Range Expression')
|
10
|
+
params.add EnumParam.new('format', %w(csv json), 'Intermediate data file format.', default: 'json')
|
11
|
+
params.add EnumParam.new('value-render-option', %w(FORMATTED_VALUE UNFORMATTED_VALUE FORMULA), 'For values with format on sheets', default: 'FORMATTED_VALUE')
|
12
|
+
params.add DataSourceParam.new('s3', 's3-ds')
|
13
|
+
params.add DestFileParam.new('s3-file')
|
14
|
+
|
15
|
+
# Load
|
16
|
+
params.add DataSourceParam.new('psql', 'dest-ds')
|
17
|
+
params.add DestTableParam.new
|
18
|
+
params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
|
19
|
+
optional: true, default: PSQLLoadOptions.new,
|
20
|
+
value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
|
21
|
+
params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
|
22
|
+
params.add OptionalBoolParam.new('no-backup', 'Drop dest table with suffix "_old".', default: false)
|
23
|
+
|
24
|
+
# Misc
|
25
|
+
params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
|
26
|
+
params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
|
27
|
+
params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
|
28
|
+
}
|
29
|
+
|
30
|
+
script {|params, script|
|
31
|
+
# S3Export
|
32
|
+
script.task(params['src-ds']) {|task|
|
33
|
+
task.s3export params['sheet-id'],
|
34
|
+
params['range'],
|
35
|
+
params['format'],
|
36
|
+
params['value-render-option'],
|
37
|
+
params['s3-ds'],
|
38
|
+
params['s3-file'],
|
39
|
+
params['gzip']
|
40
|
+
}
|
41
|
+
|
42
|
+
# Load
|
43
|
+
script.task(params['dest-ds']) {|task|
|
44
|
+
prev_table = '${dest_table}_old'
|
45
|
+
work_table = '${dest_table}_wk'
|
46
|
+
|
47
|
+
task.transaction {
|
48
|
+
# CREATE
|
49
|
+
task.drop_force prev_table
|
50
|
+
task.drop_force work_table
|
51
|
+
task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
|
52
|
+
|
53
|
+
# COPY
|
54
|
+
options = params['gzip'] ? params['options'].merge('gzip' => params['gzip']) : params['options']
|
55
|
+
task.load params['s3-ds'], params['s3-file'], work_table,
|
56
|
+
params['format'], nil, options
|
57
|
+
|
58
|
+
# GRANT, ANALYZE
|
59
|
+
task.grant_if params['grant'], work_table
|
60
|
+
task.analyze_if params['analyze'], work_table
|
61
|
+
|
62
|
+
# RENAME
|
63
|
+
task.create_dummy_table '${dest_table}'
|
64
|
+
task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
|
65
|
+
task.rename_table work_table, params['dest-table'].name
|
66
|
+
}
|
67
|
+
# No Backup
|
68
|
+
task.drop_force prev_table if params['no-backup']
|
69
|
+
}
|
70
|
+
}
|
71
|
+
} #spreadsheet-import job class
|
72
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'bricolage/datasource'
|
2
|
+
require 'bricolage/jobresult'
|
3
|
+
require 'google/apis/sheets_v4'
|
4
|
+
require 'googleauth'
|
5
|
+
require 'csv'
|
6
|
+
require 'json'
|
7
|
+
require 'uri'
|
8
|
+
require 'tempfile'
|
9
|
+
require 'zlib'
|
10
|
+
require 'pathname'
|
11
|
+
|
12
|
+
module Bricolage
|
13
|
+
|
14
|
+
class SpreadsheetDataSource < DataSource
|
15
|
+
declare_type 'spreadsheet'
|
16
|
+
|
17
|
+
SCOPE_BASE = "Google::Apis::SheetsV4::"
|
18
|
+
DEFAULT_SCOPE = 'AUTH_SPREADSHEETS_READONLY'
|
19
|
+
DEFAULT_APPLICATION_NAME = 'Bricolage'
|
20
|
+
|
21
|
+
def initialize(credentials: nil, scope: nil, application_name: nil)
|
22
|
+
@credentials = credentials
|
23
|
+
@scope = "#{SCOPE_BASE}#{(scope || DEFAULT_SCOPE)}"
|
24
|
+
@application_name = application_name || DEFAULT_APPLICATION_NAME
|
25
|
+
end
|
26
|
+
|
27
|
+
attr_reader :scope, :application_name
|
28
|
+
|
29
|
+
def new_task
|
30
|
+
SpreadsheetTask.new(self)
|
31
|
+
end
|
32
|
+
|
33
|
+
def rows(sheet_id, range, **get_options)
|
34
|
+
return enum_for(:rows, sheet_id, range, **get_options) unless block_given?
|
35
|
+
response = service.get_spreadsheet_values(sheet_id, range, **get_options)
|
36
|
+
response.values.each do |row|
|
37
|
+
next if row.all?(&:empty?) # skip empty row
|
38
|
+
yield row
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def formatted_rows(sheet_id, range, format = 'csv', **get_options)
|
43
|
+
return enum_for(:formatted_rows, sheet_id, range, format, **get_options) unless block_given?
|
44
|
+
row_formatter = RowFormatterFactory.new_formatter(format)
|
45
|
+
fields = []
|
46
|
+
rows(sheet_id, range, **get_options).each_with_index do |row, idx|
|
47
|
+
if idx == 0
|
48
|
+
fields = row
|
49
|
+
next if row_formatter.skip_header?
|
50
|
+
end
|
51
|
+
yield row_formatter.format(row, fields)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def credential
|
56
|
+
if @credentials
|
57
|
+
case
|
58
|
+
when @credentials.is_a?(Hash) then StringIO.new(@credentials.to_json)
|
59
|
+
when Pathname.new(@credentials).exist? then File.open(@credentials)
|
60
|
+
else raise ParameterError, "credentials must be a JSON or PATH. credentials=#{@credentials}"
|
61
|
+
end
|
62
|
+
elsif ENV['GOOGLE_APPLICATION_CREDENTIALS']
|
63
|
+
File.open(ENV['GOOGLE_APPLICATION_CREDENTIALS'])
|
64
|
+
else
|
65
|
+
raise ParameterError, "credentials or GOOGLE_APPLICATION_CREDENTIALS is required."
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class SpreadsheetTask < DataSourceTask
|
70
|
+
|
71
|
+
def s3export(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
|
72
|
+
add S3ExportAction.new(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
|
73
|
+
end
|
74
|
+
|
75
|
+
class S3ExportAction < Action
|
76
|
+
def initialize(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
|
77
|
+
@sheet_id = sheet_id
|
78
|
+
@range = range
|
79
|
+
@format = format
|
80
|
+
@value_render_option = value_render_option
|
81
|
+
@dest_ds = dest_ds
|
82
|
+
@dest_file = dest_file
|
83
|
+
@gzip = gzip
|
84
|
+
end
|
85
|
+
|
86
|
+
attr_reader :sheet_id, :range, :dest_ds, :dest_file, :gzip
|
87
|
+
|
88
|
+
def url_encoded_range
|
89
|
+
@url_encoded_range ||= URI.encode(range)
|
90
|
+
end
|
91
|
+
|
92
|
+
def value_render_option
|
93
|
+
@value_render_option&.upcase
|
94
|
+
end
|
95
|
+
|
96
|
+
def format
|
97
|
+
@format.downcase
|
98
|
+
end
|
99
|
+
|
100
|
+
def source
|
101
|
+
<<~SOURCE
|
102
|
+
"GET https://sheets.googleapis.com/v4/spreadsheets/#{sheet_id}/values/#{url_encoded_range}"
|
103
|
+
"PUT s3://#{dest_ds.bucket_name}/#{dest_ds.prefix}/#{dest_file}"
|
104
|
+
SOURCE
|
105
|
+
end
|
106
|
+
|
107
|
+
def run
|
108
|
+
ds.logger.info source
|
109
|
+
rows = ds.formatted_rows(sheet_id, range, format, value_render_option: value_render_option)
|
110
|
+
Tempfile.open do |f|
|
111
|
+
f = Zlib::GzipWriter.wrap(f) if gzip
|
112
|
+
f.write rows.to_a.join("\n")
|
113
|
+
f.close # flush
|
114
|
+
dest_ds.object(dest_file).upload_file(f.path)
|
115
|
+
end
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
end #S3ExportAction
|
120
|
+
end #SpreadsheetTask
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def service
|
125
|
+
return @service if @service
|
126
|
+
@service = Google::Apis::SheetsV4::SheetsService.new
|
127
|
+
@service.client_options.application_name = application_name
|
128
|
+
@service.authorization = authorizer
|
129
|
+
@service
|
130
|
+
end
|
131
|
+
|
132
|
+
def authorizer
|
133
|
+
return @authorizer if @authorizer
|
134
|
+
@authorizer = Google::Auth::ServiceAccountCredentials.make_creds(
|
135
|
+
json_key_io: credential,
|
136
|
+
scope: scope,
|
137
|
+
)
|
138
|
+
# token lifetime is 3600 sec
|
139
|
+
# May need to implement refresh logic for long running app???
|
140
|
+
@authorizer.fetch_access_token!
|
141
|
+
@authorizer
|
142
|
+
end
|
143
|
+
|
144
|
+
class RowFormatterFactory
|
145
|
+
def self.new_formatter(format)
|
146
|
+
const_get(format.upcase! + "Formatter").new
|
147
|
+
end
|
148
|
+
|
149
|
+
class CSVFormatter
|
150
|
+
def format(values, fields)
|
151
|
+
# remove '\n' with row_sep: nil
|
152
|
+
escaped_values = values.map {|v| v.gsub(/\n/,'\\n') }
|
153
|
+
CSV.generate_line(escaped_values, row_sep: nil, quote_char: '"', force_quotes: true)
|
154
|
+
end
|
155
|
+
|
156
|
+
def skip_header?
|
157
|
+
false
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class JSONFormatter
|
162
|
+
def format(values, fields)
|
163
|
+
# https://stackoverflow.com/questions/1509915/converting-camel-case-to-underscore-case-in-ruby
|
164
|
+
normalized_fields = fields.map {|f|
|
165
|
+
f.gsub(/::/, '/').
|
166
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
167
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
168
|
+
tr("-", "_").
|
169
|
+
tr(" ", "_").
|
170
|
+
downcase
|
171
|
+
}
|
172
|
+
normalized_fields.zip(values).to_h.to_json
|
173
|
+
end
|
174
|
+
|
175
|
+
def skip_header?
|
176
|
+
true
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end # RowFormatterFactory
|
180
|
+
end
|
181
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bricolage-spreadsheet
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shimpei Kodama
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-04-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bricolage
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 5.26.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 5.26.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: google-apis-sheets_v4
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.0
|
41
|
+
description:
|
42
|
+
email: shimpeko@gmail.com
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- README.md
|
48
|
+
- jobclass/spreadsheet-import.rb
|
49
|
+
- lib/bricolage-spreadsheet.rb
|
50
|
+
- lib/bricolage/spreadsheetdatasource.rb
|
51
|
+
homepage: https://github.com/bricolages/bricolage-spreadsheet
|
52
|
+
licenses:
|
53
|
+
- MIT
|
54
|
+
metadata: {}
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - "~>"
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 2.7.0
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubygems_version: 3.1.4
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Google Spreadsheet-related job classes for Bricolage batch framework
|
74
|
+
test_files: []
|