sequel-bigquery 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -0
- data/lib/sequel-bigquery.rb +48 -30
- data/lib/sequel_bigquery/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e444c2d4a5c6b54b5a1bb0efde17e1dd83761ec53f1b796f5e959646bd37f315
|
4
|
+
data.tar.gz: 41875a2325c00eb26786fbd3ab8430e9880b93d795a18b000bff3dd75ed0ea47
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 725fce3447e58e75c5cf817d736322c567661752d1b2d2f4f45aa0ce712825e8792214d1f56313272d8a84facdadff51f63ee4a6b3bda1b8c55d92a80a826798
|
7
|
+
data.tar.gz: 3aa8900e8fba7400049e8f83879998b246296e1705d01b25f2a1cf20855cf8faeea09b9d7e4cb7280b2783296d0a27f61ed295275089d6de311c256c5f7ea763
|
data/README.md
CHANGED
@@ -4,6 +4,10 @@
|
|
4
4
|
|
5
5
|
A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
|
6
6
|
|
7
|
+
This gem was created in order to manage schema migrations of a BigQuery dataset at GreenSync. At the time of writing, we couldn't find any good tools in any language to manage changes to the schema as a set of migrations.
|
8
|
+
|
9
|
+
Beyond migrations, I'm unsure how useful this gem is. I haven't yet tested what the performance would be for data interactions vs. directly using the `google-cloud-bigquery` gem's native facilities. If you're inserting a bunch of data, it's probably a better idea to use an [inserter from that gem](https://googleapis.dev/ruby/google-cloud-bigquery/latest/Google/Cloud/Bigquery/Dataset.html#insert_async-instance_method) rather than going through SQL.
|
10
|
+
|
7
11
|
## Contents
|
8
12
|
|
9
13
|
<!-- MarkdownTOC autolink=true -->
|
@@ -68,6 +72,7 @@ Connect to BigQuery:
|
|
68
72
|
|
69
73
|
```
|
70
74
|
require 'sequel-bigquery'
|
75
|
+
require 'logger'
|
71
76
|
|
72
77
|
db = Sequel.connect(
|
73
78
|
adapter: :bigquery,
|
@@ -80,6 +85,8 @@ db = Sequel.connect(
|
|
80
85
|
|
81
86
|
And use Sequel like normal.
|
82
87
|
|
88
|
+
Note that it is important to supply a logger that will at least output warning messages so you know when your queries are being modifed or buffered, which may be unexpected behaviour.
|
89
|
+
|
83
90
|
## Contributing
|
84
91
|
|
85
92
|
Pull requests welcome! =)
|
data/lib/sequel-bigquery.rb
CHANGED
@@ -4,6 +4,7 @@ require 'delegate'
|
|
4
4
|
require 'time'
|
5
5
|
|
6
6
|
require 'google/cloud/bigquery'
|
7
|
+
require 'amazing_print'
|
7
8
|
require 'paint'
|
8
9
|
require 'sequel'
|
9
10
|
|
@@ -16,39 +17,33 @@ module Sequel
|
|
16
17
|
class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
|
17
18
|
set_adapter_scheme :bigquery
|
18
19
|
|
19
|
-
def initialize(*args, **
|
20
|
-
|
21
|
-
@orig_opts = kawrgs.fetch(:orig_opts)
|
20
|
+
def initialize(*args, **kwargs)
|
21
|
+
@bigquery_config = kwargs.fetch(:orig_opts)
|
22
22
|
@sql_buffer = []
|
23
23
|
@sql_buffering = false
|
24
24
|
super
|
25
25
|
end
|
26
26
|
|
27
27
|
def connect(*_args)
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
@bigquery = Google::Cloud::Bigquery.new(config)
|
28
|
+
log_each(:debug, '#connect')
|
29
|
+
get_or_create_bigquery_dataset
|
30
|
+
.tap { log_each(:debug, '#connect end') }
|
31
|
+
end
|
32
|
+
|
33
|
+
def bigquery
|
35
34
|
# ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
|
36
|
-
@bigquery.
|
37
|
-
@loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
|
38
|
-
@bigquery.create_dataset(bq_dataset_name, location: location)
|
39
|
-
end
|
40
|
-
.tap { puts '#connect end' }
|
35
|
+
@bigquery ||= Google::Cloud::Bigquery.new(google_cloud_bigquery_gem_config)
|
41
36
|
end
|
42
37
|
|
43
38
|
def disconnect_connection(_c)
|
44
|
-
|
39
|
+
log_each(:debug, '#disconnect_connection')
|
45
40
|
# c.disconnect
|
46
41
|
end
|
47
42
|
|
48
43
|
def drop_datasets(*dataset_names_to_drop)
|
49
44
|
dataset_names_to_drop.each do |dataset_name_to_drop|
|
50
|
-
|
51
|
-
dataset_to_drop =
|
45
|
+
log_each(:debug, "Dropping dataset #{dataset_name_to_drop.inspect}")
|
46
|
+
dataset_to_drop = bigquery.dataset(dataset_name_to_drop)
|
52
47
|
next unless dataset_to_drop
|
53
48
|
dataset_to_drop.tables.each(&:delete)
|
54
49
|
dataset_to_drop.delete
|
@@ -57,7 +52,7 @@ module Sequel
|
|
57
52
|
alias drop_dataset drop_datasets
|
58
53
|
|
59
54
|
def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
60
|
-
|
55
|
+
log_each(:debug, '#execute')
|
61
56
|
log_query(sql)
|
62
57
|
|
63
58
|
# require 'pry'; binding.pry if sql =~ /CREATE TABLE IF NOT EXISTS/i
|
@@ -88,15 +83,12 @@ module Sequel
|
|
88
83
|
sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
|
89
84
|
conn.query(sql_to_execute)
|
90
85
|
end
|
91
|
-
|
92
|
-
ap results
|
86
|
+
log_each(:debug, results.awesome_inspect)
|
93
87
|
if block_given?
|
94
88
|
yield results
|
95
89
|
else
|
96
90
|
results
|
97
91
|
end
|
98
|
-
# TODO
|
99
|
-
# rescue ::ODBC::Error, ArgumentError => e
|
100
92
|
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
101
93
|
raise_error(e)
|
102
94
|
end # rubocop:disable Style/MultilineBlockChain
|
@@ -124,6 +116,33 @@ module Sequel
|
|
124
116
|
|
125
117
|
private
|
126
118
|
|
119
|
+
attr_reader :bigquery_config
|
120
|
+
|
121
|
+
def google_cloud_bigquery_gem_config
|
122
|
+
bigquery_config.dup.tap do |config|
|
123
|
+
%i[
|
124
|
+
adapter
|
125
|
+
database
|
126
|
+
dataset
|
127
|
+
location
|
128
|
+
logger
|
129
|
+
].each do |option|
|
130
|
+
config.delete(option)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_or_create_bigquery_dataset # rubocop:disable Naming/AccessorMethodName
|
136
|
+
bigquery.dataset(bigquery_dataset_name) || begin
|
137
|
+
log_each(:debug, 'BigQuery dataset %s does not exist; creating it' % bigquery_dataset_name)
|
138
|
+
bigquery.create_dataset(bigquery_dataset_name, location: bigquery_config[:location])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def bigquery_dataset_name
|
143
|
+
bigquery_config[:dataset] || bigquery_config[:database] || (raise ArgumentError, 'BigQuery dataset must be specified')
|
144
|
+
end
|
145
|
+
|
127
146
|
def connection_execute_method
|
128
147
|
:query
|
129
148
|
end
|
@@ -138,9 +157,9 @@ module Sequel
|
|
138
157
|
end
|
139
158
|
|
140
159
|
def schema_parse_table(_table_name, _opts)
|
141
|
-
|
160
|
+
log_each(:debug, Paint['schema_parse_table', :red, :bold])
|
142
161
|
# require 'pry'; binding.pry
|
143
|
-
|
162
|
+
bigquery.datasets.map do |dataset|
|
144
163
|
[
|
145
164
|
dataset.dataset_id,
|
146
165
|
{},
|
@@ -155,13 +174,12 @@ module Sequel
|
|
155
174
|
|
156
175
|
# Padded to horizontally align with post-execution log message which includes the execution time
|
157
176
|
def log_query(sql)
|
158
|
-
pad = '
|
159
|
-
|
160
|
-
# @loggers[0]&.debug(' ' + sql)
|
177
|
+
pad = ' ' * 12
|
178
|
+
log_each(:debug, Paint[pad + sql, :cyan, :bold])
|
161
179
|
end
|
162
180
|
|
163
181
|
def warn(msg)
|
164
|
-
|
182
|
+
log_each(:warn, Paint[msg, '#FFA500', :bold])
|
165
183
|
end
|
166
184
|
|
167
185
|
def warn_default_removal(sql)
|
@@ -195,7 +213,7 @@ module Sequel
|
|
195
213
|
|
196
214
|
class Dataset < Sequel::Dataset
|
197
215
|
def fetch_rows(sql, &block)
|
198
|
-
|
216
|
+
db.send(:log_each, :debug, '#fetch_rows')
|
199
217
|
|
200
218
|
execute(sql) do |bq_result|
|
201
219
|
self.columns = bq_result.fields.map { |field| field.name.to_sym }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brendan Weibrecht
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazing_print
|