bricolage-spreadsheet 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +50 -0
- data/jobclass/spreadsheet-import.rb +72 -0
- data/lib/bricolage-spreadsheet.rb +5 -0
- data/lib/bricolage/spreadsheetdatasource.rb +181 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 465aebc471ab4d6adfac54aedc78c2766204d202c49009c29c848e1ef391519e
|
4
|
+
data.tar.gz: b8d4b1e847bd67d8f865ed9b5fd42ba05f0d34a08a30787166d1bee09a2b0320
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 35d5f63b44638ffc54ff4279a60e95e50337137fb635c2fe5c517db0ccf0fd6ae731427fe5997575610b8e3213eaedb9f513d73c9e51fc64c81eebc46af6e7ee
|
7
|
+
data.tar.gz: add032b531a4d56067e1cf937746777a1be317084b0c763afb2079551fc7407cbd3d4fdb698ec38ae96772eb9a09f624d7ec4dde3004966c31813ed4f77eb494
|
data/README.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# bricolage-spreadsheet
|
2
|
+
|
3
|
+
Google Spreadsheet-related job classes for Bricolage batch job framework.
|
4
|
+
|
5
|
+
## Home Page
|
6
|
+
|
7
|
+
https://github.com/bricolages/bricolage-spreadsheet
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
Add following line in your Gemfile:
|
12
|
+
```
|
13
|
+
gem 'bricolage-spreadsheet'
|
14
|
+
```
|
15
|
+
|
16
|
+
Job Options
|
17
|
+
|
18
|
+
```
|
19
|
+
% bundle exec bricolage spreadsheet-import -h
|
20
|
+
Usage: bricolage spreadsheet-import [job_class_options]
|
21
|
+
--src-ds=NAME [optional] Main data source. [default: spreadsheet]
|
22
|
+
--sheet-id=ID Spreadsheet ID
|
23
|
+
--range=RANGE_EXPR Spreadsheet Range
|
24
|
+
--format=VALUE [optional] Data file format. (csv, json)
|
25
|
+
--value-render-option=VALUE [optional] For values with format on sheets (FORMATTED_VALUE, UNFORMATTED_VALUE, FORMULA)
|
26
|
+
--s3-ds=NAME [optional] Main data source. [default: s3]
|
27
|
+
--s3-file=PATH Target file name.
|
28
|
+
--dest-ds=NAME [optional] Main data source. [default: psql]
|
29
|
+
--dest-table=[SCHEMA.]TABLE [optional] Target table name.
|
30
|
+
--options=OPTIONS [optional] Loader options.
|
31
|
+
--table-def=PATH Create table file.
|
32
|
+
--no-backup [optional] Drop dest table with suffix "_old".
|
33
|
+
--analyze [optional] ANALYZE table after SQL is executed.
|
34
|
+
--grant=KEY:VALUE [optional] GRANT table after SQL is executed. (required keys: privilege, to)
|
35
|
+
--gzip [optional] Compress Temporary files.
|
36
|
+
-v, --variable=NAME=VALUE Set variable.
|
37
|
+
--help Shows this message and quit.
|
38
|
+
--version Shows program version and quit.
|
39
|
+
```
|
40
|
+
|
41
|
+
## License
|
42
|
+
|
43
|
+
MIT license.
|
44
|
+
See LICENSES file for details.
|
45
|
+
|
46
|
+
## Credit
|
47
|
+
|
48
|
+
Author: Shimpei Kodama
|
49
|
+
|
50
|
+
This software is written in working time in Cookpad, Inc.
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'bricolage/psqldatasource'
|
2
|
+
|
3
|
+
module Bricolage
|
4
|
+
JobClass.define('spreadsheet-import') {
|
5
|
+
parameters {|params|
|
6
|
+
# S3Export
|
7
|
+
params.add DataSourceParam.new('spreadsheet', 'src-ds')
|
8
|
+
params.add StringParam.new('sheet-id', 'ID', 'Google Spreadsheet ID')
|
9
|
+
params.add StringParam.new('range', 'RANGE_EXPR', 'Google Spreadsheet Range Expression')
|
10
|
+
params.add EnumParam.new('format', %w(csv json), 'Intermediate data file format.', default: 'json')
|
11
|
+
params.add EnumParam.new('value-render-option', %w(FORMATTED_VALUE UNFORMATTED_VALUE FORMULA), 'For values with format on sheets', default: 'FORMATTED_VALUE')
|
12
|
+
params.add DataSourceParam.new('s3', 's3-ds')
|
13
|
+
params.add DestFileParam.new('s3-file')
|
14
|
+
|
15
|
+
# Load
|
16
|
+
params.add DataSourceParam.new('psql', 'dest-ds')
|
17
|
+
params.add DestTableParam.new
|
18
|
+
params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
|
19
|
+
optional: true, default: PSQLLoadOptions.new,
|
20
|
+
value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
|
21
|
+
params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
|
22
|
+
params.add OptionalBoolParam.new('no-backup', 'Drop dest table with suffix "_old".', default: false)
|
23
|
+
|
24
|
+
# Misc
|
25
|
+
params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
|
26
|
+
params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
|
27
|
+
params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
|
28
|
+
}
|
29
|
+
|
30
|
+
script {|params, script|
|
31
|
+
# S3Export
|
32
|
+
script.task(params['src-ds']) {|task|
|
33
|
+
task.s3export params['sheet-id'],
|
34
|
+
params['range'],
|
35
|
+
params['format'],
|
36
|
+
params['value-render-option'],
|
37
|
+
params['s3-ds'],
|
38
|
+
params['s3-file'],
|
39
|
+
params['gzip']
|
40
|
+
}
|
41
|
+
|
42
|
+
# Load
|
43
|
+
script.task(params['dest-ds']) {|task|
|
44
|
+
prev_table = '${dest_table}_old'
|
45
|
+
work_table = '${dest_table}_wk'
|
46
|
+
|
47
|
+
task.transaction {
|
48
|
+
# CREATE
|
49
|
+
task.drop_force prev_table
|
50
|
+
task.drop_force work_table
|
51
|
+
task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
|
52
|
+
|
53
|
+
# COPY
|
54
|
+
options = params['gzip'] ? params['options'].merge('gzip' => params['gzip']) : params['options']
|
55
|
+
task.load params['s3-ds'], params['s3-file'], work_table,
|
56
|
+
params['format'], nil, options
|
57
|
+
|
58
|
+
# GRANT, ANALYZE
|
59
|
+
task.grant_if params['grant'], work_table
|
60
|
+
task.analyze_if params['analyze'], work_table
|
61
|
+
|
62
|
+
# RENAME
|
63
|
+
task.create_dummy_table '${dest_table}'
|
64
|
+
task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
|
65
|
+
task.rename_table work_table, params['dest-table'].name
|
66
|
+
}
|
67
|
+
# No Backup
|
68
|
+
task.drop_force prev_table if params['no-backup']
|
69
|
+
}
|
70
|
+
}
|
71
|
+
} #spreadsheet-import job class
|
72
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'bricolage/datasource'
|
2
|
+
require 'bricolage/jobresult'
|
3
|
+
require 'google/apis/sheets_v4'
|
4
|
+
require 'googleauth'
|
5
|
+
require 'csv'
|
6
|
+
require 'json'
|
7
|
+
require 'uri'
|
8
|
+
require 'tempfile'
|
9
|
+
require 'zlib'
|
10
|
+
require 'pathname'
|
11
|
+
|
12
|
+
module Bricolage
|
13
|
+
|
14
|
+
class SpreadsheetDataSource < DataSource
|
15
|
+
declare_type 'spreadsheet'
|
16
|
+
|
17
|
+
SCOPE_BASE = "Google::Apis::SheetsV4::"
|
18
|
+
DEFAULT_SCOPE = 'AUTH_SPREADSHEETS_READONLY'
|
19
|
+
DEFAULT_APPLICATION_NAME = 'Bricolage'
|
20
|
+
|
21
|
+
def initialize(credentials: nil, scope: nil, application_name: nil)
|
22
|
+
@credentials = credentials
|
23
|
+
@scope = "#{SCOPE_BASE}#{(scope || DEFAULT_SCOPE)}"
|
24
|
+
@application_name = application_name || DEFAULT_APPLICATION_NAME
|
25
|
+
end
|
26
|
+
|
27
|
+
attr_reader :scope, :application_name
|
28
|
+
|
29
|
+
def new_task
|
30
|
+
SpreadsheetTask.new(self)
|
31
|
+
end
|
32
|
+
|
33
|
+
def rows(sheet_id, range, **get_options)
|
34
|
+
return enum_for(:rows, sheet_id, range, **get_options) unless block_given?
|
35
|
+
response = service.get_spreadsheet_values(sheet_id, range, **get_options)
|
36
|
+
response.values.each do |row|
|
37
|
+
next if row.all?(&:empty?) # skip empty row
|
38
|
+
yield row
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def formatted_rows(sheet_id, range, format = 'csv', **get_options)
|
43
|
+
return enum_for(:formatted_rows, sheet_id, range, format, **get_options) unless block_given?
|
44
|
+
row_formatter = RowFormatterFactory.new_formatter(format)
|
45
|
+
fields = []
|
46
|
+
rows(sheet_id, range, **get_options).each_with_index do |row, idx|
|
47
|
+
if idx == 0
|
48
|
+
fields = row
|
49
|
+
next if row_formatter.skip_header?
|
50
|
+
end
|
51
|
+
yield row_formatter.format(row, fields)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def credential
|
56
|
+
if @credentials
|
57
|
+
case
|
58
|
+
when @credentials.is_a?(Hash) then StringIO.new(@credentials.to_json)
|
59
|
+
when Pathname.new(@credentials).exist? then File.open(@credentials)
|
60
|
+
else raise ParameterError, "credentials must be a JSON or PATH. credentials=#{@credentials}"
|
61
|
+
end
|
62
|
+
elsif ENV['GOOGLE_APPLICATION_CREDENTIALS']
|
63
|
+
File.open(ENV['GOOGLE_APPLICATION_CREDENTIALS'])
|
64
|
+
else
|
65
|
+
raise ParameterError, "credentials or GOOGLE_APPLICATION_CREDENTIALS is required."
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class SpreadsheetTask < DataSourceTask
|
70
|
+
|
71
|
+
def s3export(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
|
72
|
+
add S3ExportAction.new(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
|
73
|
+
end
|
74
|
+
|
75
|
+
class S3ExportAction < Action
|
76
|
+
def initialize(sheet_id, range, format, value_render_option, dest_ds, dest_file, gzip)
|
77
|
+
@sheet_id = sheet_id
|
78
|
+
@range = range
|
79
|
+
@format = format
|
80
|
+
@value_render_option = value_render_option
|
81
|
+
@dest_ds = dest_ds
|
82
|
+
@dest_file = dest_file
|
83
|
+
@gzip = gzip
|
84
|
+
end
|
85
|
+
|
86
|
+
attr_reader :sheet_id, :range, :dest_ds, :dest_file, :gzip
|
87
|
+
|
88
|
+
def url_encoded_range
|
89
|
+
@url_encoded_range ||= URI.encode(range)
|
90
|
+
end
|
91
|
+
|
92
|
+
def value_render_option
|
93
|
+
@value_render_option&.upcase
|
94
|
+
end
|
95
|
+
|
96
|
+
def format
|
97
|
+
@format.downcase
|
98
|
+
end
|
99
|
+
|
100
|
+
def source
|
101
|
+
<<~SOURCE
|
102
|
+
"GET https://sheets.googleapis.com/v4/spreadsheets/#{sheet_id}/values/#{url_encoded_range}"
|
103
|
+
"PUT s3://#{dest_ds.bucket_name}/#{dest_ds.prefix}/#{dest_file}"
|
104
|
+
SOURCE
|
105
|
+
end
|
106
|
+
|
107
|
+
def run
|
108
|
+
ds.logger.info source
|
109
|
+
rows = ds.formatted_rows(sheet_id, range, format, value_render_option: value_render_option)
|
110
|
+
Tempfile.open do |f|
|
111
|
+
f = Zlib::GzipWriter.wrap(f) if gzip
|
112
|
+
f.write rows.to_a.join("\n")
|
113
|
+
f.close # flush
|
114
|
+
dest_ds.object(dest_file).upload_file(f.path)
|
115
|
+
end
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
end #S3ExportAction
|
120
|
+
end #SpreadsheetTask
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def service
|
125
|
+
return @service if @service
|
126
|
+
@service = Google::Apis::SheetsV4::SheetsService.new
|
127
|
+
@service.client_options.application_name = application_name
|
128
|
+
@service.authorization = authorizer
|
129
|
+
@service
|
130
|
+
end
|
131
|
+
|
132
|
+
def authorizer
|
133
|
+
return @authorizer if @authorizer
|
134
|
+
@authorizer = Google::Auth::ServiceAccountCredentials.make_creds(
|
135
|
+
json_key_io: credential,
|
136
|
+
scope: scope,
|
137
|
+
)
|
138
|
+
# token lifetime is 3600 sec
|
139
|
+
# May need to implement refresh logic for long running app???
|
140
|
+
@authorizer.fetch_access_token!
|
141
|
+
@authorizer
|
142
|
+
end
|
143
|
+
|
144
|
+
class RowFormatterFactory
|
145
|
+
def self.new_formatter(format)
|
146
|
+
const_get(format.upcase! + "Formatter").new
|
147
|
+
end
|
148
|
+
|
149
|
+
class CSVFormatter
|
150
|
+
def format(values, fields)
|
151
|
+
# remove '\n' with row_sep: nil
|
152
|
+
escaped_values = values.map {|v| v.gsub(/\n/,'\\n') }
|
153
|
+
CSV.generate_line(escaped_values, row_sep: nil, quote_char: '"', force_quotes: true)
|
154
|
+
end
|
155
|
+
|
156
|
+
def skip_header?
|
157
|
+
false
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class JSONFormatter
|
162
|
+
def format(values, fields)
|
163
|
+
# https://stackoverflow.com/questions/1509915/converting-camel-case-to-underscore-case-in-ruby
|
164
|
+
normalized_fields = fields.map {|f|
|
165
|
+
f.gsub(/::/, '/').
|
166
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
167
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
168
|
+
tr("-", "_").
|
169
|
+
tr(" ", "_").
|
170
|
+
downcase
|
171
|
+
}
|
172
|
+
normalized_fields.zip(values).to_h.to_json
|
173
|
+
end
|
174
|
+
|
175
|
+
def skip_header?
|
176
|
+
true
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end # RowFormatterFactory
|
180
|
+
end
|
181
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bricolage-spreadsheet
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shimpei Kodama
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-04-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bricolage
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 5.26.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 5.26.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: google-apis-sheets_v4
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.0
|
41
|
+
description:
|
42
|
+
email: shimpeko@gmail.com
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- README.md
|
48
|
+
- jobclass/spreadsheet-import.rb
|
49
|
+
- lib/bricolage-spreadsheet.rb
|
50
|
+
- lib/bricolage/spreadsheetdatasource.rb
|
51
|
+
homepage: https://github.com/bricolages/bricolage-spreadsheet
|
52
|
+
licenses:
|
53
|
+
- MIT
|
54
|
+
metadata: {}
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - "~>"
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 2.7.0
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubygems_version: 3.1.4
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Google Spreadsheet-related job classes for Bricolage batch framework
|
74
|
+
test_files: []
|