export_to_gcloud 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/lib/export_to_gcloud/exporter/context.rb +49 -0
- data/lib/export_to_gcloud/exporter/definition.rb +47 -0
- data/lib/export_to_gcloud/exporters/csv_exporter.rb +18 -0
- data/lib/export_to_gcloud/exporters/exporter.rb +112 -0
- data/lib/export_to_gcloud/exporters/pg_exporter.rb +37 -0
- data/lib/export_to_gcloud/library.rb +67 -0
- data/lib/export_to_gcloud/version.rb +5 -0
- data/lib/export_to_gcloud.rb +16 -0
- metadata +80 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b10b7cddc6441b96f235f33936eb9560e9662f86
|
4
|
+
data.tar.gz: 91f284abd485f6c6bbf8e7cd2ff99d49e3983157
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ac65925a3d33b4b0d081e170a5a0f0a7d29d86e08a7490b9703b05e9163628514ef815db66c9c09422e50ab89d73376753b7e402935062b915a26e02eefb1129
|
7
|
+
data.tar.gz: 0a3b60ce3cfdc1c6523a9700701f502d0a6cf8e7329be4bc63bb72d4c41e373e7294ccf31fa2ccec7f9d3b3de5ecaee62cb6ad37b6ae5ed93422e54fdbb982bd
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2016 Ondřej Želazko
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class ExportToGcloud::Exporter::Context
|
2
|
+
|
3
|
+
attr_reader :client
|
4
|
+
|
5
|
+
OPTIONS = %i[dump_path storage_prefix bucket dataset].freeze
|
6
|
+
|
7
|
+
def initialize client, **opts
|
8
|
+
@client = client
|
9
|
+
set opts
|
10
|
+
end
|
11
|
+
|
12
|
+
def set **opts
|
13
|
+
OPTIONS.each do |key|
|
14
|
+
value = opts[key]
|
15
|
+
send "set_#{key}", value if value
|
16
|
+
end
|
17
|
+
self
|
18
|
+
end
|
19
|
+
|
20
|
+
def set_dump_path path
|
21
|
+
@dump_path = Pathname.new path
|
22
|
+
end
|
23
|
+
|
24
|
+
def set_storage_prefix prefix
|
25
|
+
@storage_prefix = prefix
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_bucket bucket
|
29
|
+
bucket = client.storage.bucket bucket if String === bucket
|
30
|
+
@bucket = bucket
|
31
|
+
end
|
32
|
+
|
33
|
+
def set_dataset dataset
|
34
|
+
dataset = client.bigquery.dataset dataset if String === dataset
|
35
|
+
@dataset = dataset
|
36
|
+
end
|
37
|
+
|
38
|
+
OPTIONS.each do |key|
|
39
|
+
define_method key do
|
40
|
+
value = instance_variable_get "@#{key}"
|
41
|
+
value || raise("Undefined value for #{key} in exporter options!")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def copy
|
46
|
+
self.class.new client, OPTIONS.inject({}){|h, k| h[k] = instance_variable_get "@#{k}"; h}
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
class ExportToGcloud::Exporter::Definition < OpenStruct
|
2
|
+
|
3
|
+
def initialize exporter_type, attrs
|
4
|
+
super attrs.merge!(type: exporter_type)
|
5
|
+
end
|
6
|
+
|
7
|
+
def validate!
|
8
|
+
(String === name && !name.empty?) || raise('`name` must be defined!')
|
9
|
+
Proc === bq_schema || raise('`bq_schema` must be defined as a Proc!')
|
10
|
+
data || raise('`data` must be defined!')
|
11
|
+
type.validate_definition! self if type.respond_to? 'validate_definition!'
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_data *args
|
15
|
+
Proc === data ? data.call(*args) : data
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_bq_table_name
|
19
|
+
bq_table_name || name
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.set_last_definition klass, attrs={}, &block
|
23
|
+
last_definition = new klass, attrs
|
24
|
+
block.call last_definition if block
|
25
|
+
|
26
|
+
last_definition.validate!
|
27
|
+
@last_definition = last_definition
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.load_definition name, finder
|
31
|
+
file_path = finder.call name
|
32
|
+
load file_path
|
33
|
+
definition = @last_definition
|
34
|
+
@last_definition = nil
|
35
|
+
|
36
|
+
unless definition
|
37
|
+
raise("File #{file_path.to_s} must define exporter for '#{name}'!")
|
38
|
+
end
|
39
|
+
|
40
|
+
unless definition.name == name
|
41
|
+
raise "File #{file_path.to_s} defines '#{definition.name}' instead of '#{name}'"
|
42
|
+
end
|
43
|
+
|
44
|
+
definition
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
module ExportToGcloud
|
3
|
+
|
4
|
+
class CSVExporter < Exporter
|
5
|
+
|
6
|
+
def create_data_file! file, *part_data
|
7
|
+
data = @definition.get_data(*part_data)
|
8
|
+
|
9
|
+
csv_data = CSV.generate col_sep: ';', force_quotes: false do |csv|
|
10
|
+
data.each{|row| csv << row}
|
11
|
+
end
|
12
|
+
|
13
|
+
File.write file.to_path, csv_data
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
|
2
|
+
module ExportToGcloud
|
3
|
+
|
4
|
+
class Exporter
|
5
|
+
|
6
|
+
def initialize definition, context
|
7
|
+
@definition = definition
|
8
|
+
@context = context
|
9
|
+
|
10
|
+
@parts = []
|
11
|
+
case definition.parts
|
12
|
+
when Array then definition.parts.each{|label, *part_args| add_data_part *part_args, label: label}
|
13
|
+
when Proc then definition.parts.call self
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def local_file_path label
|
18
|
+
@context.dump_path.join "#{@definition.name}#{prepend_underscore label}.csv"
|
19
|
+
end
|
20
|
+
|
21
|
+
def storage_file_path label
|
22
|
+
prefix = @definition.storage_prefix || @context.storage_prefix
|
23
|
+
"#{prefix}#{@definition.name}#{prepend_underscore label}.csv"
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_data_part *args, label:nil
|
27
|
+
args.unshift(label ? label.to_s : (@parts.length+1).to_s)
|
28
|
+
@parts << args
|
29
|
+
end
|
30
|
+
|
31
|
+
def process_all_parts! recreate_table=true
|
32
|
+
add_data_part label: 'all' if @parts.empty?
|
33
|
+
recreate_bq_table! if recreate_table
|
34
|
+
|
35
|
+
@parts.map{|*args| process_part! *args}
|
36
|
+
end
|
37
|
+
|
38
|
+
def process_part! label, *part_args
|
39
|
+
file = local_file_path label
|
40
|
+
create_data_file! file, *part_args
|
41
|
+
|
42
|
+
storage_name = storage_file_path label
|
43
|
+
gcloud_file = upload_file! file, storage_name
|
44
|
+
start_load_job gcloud_file
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_data_file! file, *part_data
|
48
|
+
File.write file.to_path, @definition.get_data(*part_data)
|
49
|
+
end
|
50
|
+
|
51
|
+
def upload_file!(file, storage_name)
|
52
|
+
file = compress_file! file
|
53
|
+
gcloud_file = @context.bucket.create_file file, storage_name, chunk_size: 2**21 # 2MB
|
54
|
+
file.delete
|
55
|
+
gcloud_file
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_storage_files
|
59
|
+
@parts.map do |label, *_|
|
60
|
+
@context.bucket.file storage_file_path(label)
|
61
|
+
end.compact
|
62
|
+
end
|
63
|
+
|
64
|
+
def bq_table
|
65
|
+
unless defined? @bq_table
|
66
|
+
@bq_table = @context.dataset.table @definition.get_bq_table_name
|
67
|
+
end
|
68
|
+
@bq_table
|
69
|
+
end
|
70
|
+
|
71
|
+
def recreate_bq_table!
|
72
|
+
bq_table.delete if bq_table
|
73
|
+
@bq_table = @context.dataset.create_table @definition.get_bq_table_name, &@definition.bq_schema
|
74
|
+
end
|
75
|
+
|
76
|
+
def start_load_job gcloud_file, **_load_settings
|
77
|
+
load_settings = {
|
78
|
+
format: 'csv',
|
79
|
+
quote: '"',
|
80
|
+
delimiter: ';',
|
81
|
+
create: 'never',
|
82
|
+
write: 'append',
|
83
|
+
max_bad_records: 0
|
84
|
+
}
|
85
|
+
load_settings.merge! _load_settings unless _load_settings.empty?
|
86
|
+
bq_table.load gcloud_file, **load_settings
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.define **kwargs, &block
|
90
|
+
::ExportToGcloud::Exporter::Definition.set_last_definition self, kwargs, &block
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def compress_file!(original_file)
|
96
|
+
err = %x(pigz -f9 #{original_file.to_path} 2>&1)
|
97
|
+
compressed_file = Pathname.new "#{original_file.to_path}.gz"
|
98
|
+
raise "Compression of #{original_file.to_path} failed: #{err}" unless compressed_file.exist?
|
99
|
+
original_file.delete if original_file.exist?
|
100
|
+
compressed_file
|
101
|
+
end
|
102
|
+
|
103
|
+
def prepend_underscore text
|
104
|
+
"_#{text}" if String === text && !text.empty?
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
require_relative '../exporter/definition'
|
112
|
+
require_relative '../exporter/context'
|
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
module ExportToGcloud
|
3
|
+
|
4
|
+
class PGExporter < Exporter
|
5
|
+
|
6
|
+
def create_data_file! file, *part_data
|
7
|
+
sql = @definition.get_data(*part_data)
|
8
|
+
|
9
|
+
schema = ::Gcloud::Bigquery::Table::Schema.new nil
|
10
|
+
@definition.bq_schema.call schema
|
11
|
+
string_fields = schema.fields.select{|f| f['type']=='STRING'}.map{|f| f['name']}
|
12
|
+
|
13
|
+
force_quote = if string_fields.empty?
|
14
|
+
''
|
15
|
+
else
|
16
|
+
", FORCE_QUOTE (#{string_fields.join ', '})"
|
17
|
+
end
|
18
|
+
sql = "COPY (#{sql}) TO '#{file.to_path}' WITH (FORMAT CSV, DELIMITER ';', QUOTE '\"'#{force_quote});"
|
19
|
+
|
20
|
+
|
21
|
+
executor = @definition.get_sql_executor || self.class.default_executor
|
22
|
+
executor.call sql
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.validate_definition! definition
|
26
|
+
definition.get_sql_executor || default_executor || raise('`sql_executor` needs to be defined!')
|
27
|
+
end
|
28
|
+
|
29
|
+
class << self
|
30
|
+
|
31
|
+
attr_accessor :default_executor
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'gcloud'
|
2
|
+
require 'gcloud/bigquery'
|
3
|
+
|
4
|
+
# large files uploading
|
5
|
+
require 'httpclient'
|
6
|
+
Faraday.default_adapter = :httpclient
|
7
|
+
|
8
|
+
# monkeypatch :/ some issue in google-api
|
9
|
+
# see http://googlecloudplatform.github.io/gcloud-ruby/docs/master/Gcloud/Storage.html
|
10
|
+
# -> A note about large uploads
|
11
|
+
require 'google/api_client'
|
12
|
+
Faraday::Response.register_middleware gzip: Faraday::Response::Middleware
|
13
|
+
|
14
|
+
module ExportToGcloud
|
15
|
+
|
16
|
+
def self.definitions_resolver= proc
|
17
|
+
@definitions_resolver = proc
|
18
|
+
end
|
19
|
+
|
20
|
+
# waits for BigQuery jobs
|
21
|
+
# - send a block to do something with failed
|
22
|
+
def self.wait_for_load_jobs(jobs, &block)
|
23
|
+
jobs_left = jobs.dup
|
24
|
+
failed = []
|
25
|
+
sleeper = ->(_retries) {sleep 2 * _retries + 5}
|
26
|
+
retries = 0
|
27
|
+
|
28
|
+
until jobs_left.empty?
|
29
|
+
sleeper.call retries
|
30
|
+
retries += 1
|
31
|
+
jobs_left.each &:reload!
|
32
|
+
jobs_left.delete_if do |j|
|
33
|
+
if j.done?
|
34
|
+
failed << {id: j.job_id, error: j.error, sources: j.sources} if j.failed?
|
35
|
+
true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
block.call failed unless failed.empty?
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.get_exporter name, context
|
44
|
+
name = name.to_s
|
45
|
+
|
46
|
+
@definitions ||= {}
|
47
|
+
unless @definitions.has_key? name
|
48
|
+
@definitions[name] = ::ExportToGcloud::Exporter::Definition.load_definition name, @definitions_resolver
|
49
|
+
end
|
50
|
+
|
51
|
+
definition = @definitions[name]
|
52
|
+
definition.type.new definition, context
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.create_context **opts
|
56
|
+
::ExportToGcloud::Exporter::Context.new client, opts
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
require 'pathname'
|
62
|
+
require 'ostruct'
|
63
|
+
require 'csv'
|
64
|
+
|
65
|
+
require_relative 'exporters/exporter'
|
66
|
+
require_relative 'exporters/csv_exporter'
|
67
|
+
require_relative 'exporters/pg_exporter'
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require_relative 'export_to_gcloud/version'
|
2
|
+
|
3
|
+
module ExportToGcloud
|
4
|
+
|
5
|
+
def self.setup project_name:, config_file:, definitions_resolver:nil
|
6
|
+
require_relative 'export_to_gcloud/library'
|
7
|
+
|
8
|
+
self.definitions_resolver = definitions_resolver if definitions_resolver
|
9
|
+
@client = ::Gcloud.new project_name, config_file
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.client
|
13
|
+
@client || raise('Gcloud client not present. call ExportToGcloud#setup first.')
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: export_to_gcloud
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ondřej Želazko
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-11-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: gcloud
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.5.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.5.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: httpclient
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.8'
|
41
|
+
description: A simple helper to export data to BigQuery via Google Drive
|
42
|
+
email: zelazk.o@email.cz
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- LICENSE
|
48
|
+
- lib/export_to_gcloud.rb
|
49
|
+
- lib/export_to_gcloud/exporter/context.rb
|
50
|
+
- lib/export_to_gcloud/exporter/definition.rb
|
51
|
+
- lib/export_to_gcloud/exporters/csv_exporter.rb
|
52
|
+
- lib/export_to_gcloud/exporters/exporter.rb
|
53
|
+
- lib/export_to_gcloud/exporters/pg_exporter.rb
|
54
|
+
- lib/export_to_gcloud/library.rb
|
55
|
+
- lib/export_to_gcloud/version.rb
|
56
|
+
homepage: https://github.com/doooby/export_to_gcloud
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata: {}
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 2.5.1
|
77
|
+
signing_key:
|
78
|
+
specification_version: 4
|
79
|
+
summary: Exporter to BigQuery
|
80
|
+
test_files: []
|