sequel-bigquery 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -0
- data/lib/sequel-bigquery.rb +48 -30
- data/lib/sequel_bigquery/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e444c2d4a5c6b54b5a1bb0efde17e1dd83761ec53f1b796f5e959646bd37f315
|
4
|
+
data.tar.gz: 41875a2325c00eb26786fbd3ab8430e9880b93d795a18b000bff3dd75ed0ea47
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 725fce3447e58e75c5cf817d736322c567661752d1b2d2f4f45aa0ce712825e8792214d1f56313272d8a84facdadff51f63ee4a6b3bda1b8c55d92a80a826798
|
7
|
+
data.tar.gz: 3aa8900e8fba7400049e8f83879998b246296e1705d01b25f2a1cf20855cf8faeea09b9d7e4cb7280b2783296d0a27f61ed295275089d6de311c256c5f7ea763
|
data/README.md
CHANGED
@@ -4,6 +4,10 @@
|
|
4
4
|
|
5
5
|
A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
|
6
6
|
|
7
|
+
This gem was created in order to manage schema migrations of a BigQuery dataset at GreenSync. At the time of writing, we couldn't find any good tools in any language to manage changes to the schema as a set of migrations.
|
8
|
+
|
9
|
+
Beyond migrations, I'm unsure how useful this gem is. I haven't yet tested what the performance would be for data interactions vs. directly using the `google-cloud-bigquery` gem's native facilities. If you're inserting a bunch of data, it's probably a better idea to use an [inserter from that gem](https://googleapis.dev/ruby/google-cloud-bigquery/latest/Google/Cloud/Bigquery/Dataset.html#insert_async-instance_method) rather than going through SQL.
|
10
|
+
|
7
11
|
## Contents
|
8
12
|
|
9
13
|
<!-- MarkdownTOC autolink=true -->
|
@@ -68,6 +72,7 @@ Connect to BigQuery:
|
|
68
72
|
|
69
73
|
```
|
70
74
|
require 'sequel-bigquery'
|
75
|
+
require 'logger'
|
71
76
|
|
72
77
|
db = Sequel.connect(
|
73
78
|
adapter: :bigquery,
|
@@ -80,6 +85,8 @@ db = Sequel.connect(
|
|
80
85
|
|
81
86
|
And use Sequel like normal.
|
82
87
|
|
88
|
+
Note that it is important to supply a logger that will at least output warning messages so you know when your queries are being modifed or buffered, which may be unexpected behaviour.
|
89
|
+
|
83
90
|
## Contributing
|
84
91
|
|
85
92
|
Pull requests welcome! =)
|
data/lib/sequel-bigquery.rb
CHANGED
@@ -4,6 +4,7 @@ require 'delegate'
|
|
4
4
|
require 'time'
|
5
5
|
|
6
6
|
require 'google/cloud/bigquery'
|
7
|
+
require 'amazing_print'
|
7
8
|
require 'paint'
|
8
9
|
require 'sequel'
|
9
10
|
|
@@ -16,39 +17,33 @@ module Sequel
|
|
16
17
|
class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
|
17
18
|
set_adapter_scheme :bigquery
|
18
19
|
|
19
|
-
def initialize(*args, **
|
20
|
-
|
21
|
-
@orig_opts = kawrgs.fetch(:orig_opts)
|
20
|
+
def initialize(*args, **kwargs)
|
21
|
+
@bigquery_config = kwargs.fetch(:orig_opts)
|
22
22
|
@sql_buffer = []
|
23
23
|
@sql_buffering = false
|
24
24
|
super
|
25
25
|
end
|
26
26
|
|
27
27
|
def connect(*_args)
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
@bigquery = Google::Cloud::Bigquery.new(config)
|
28
|
+
log_each(:debug, '#connect')
|
29
|
+
get_or_create_bigquery_dataset
|
30
|
+
.tap { log_each(:debug, '#connect end') }
|
31
|
+
end
|
32
|
+
|
33
|
+
def bigquery
|
35
34
|
# ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
|
36
|
-
@bigquery.
|
37
|
-
@loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
|
38
|
-
@bigquery.create_dataset(bq_dataset_name, location: location)
|
39
|
-
end
|
40
|
-
.tap { puts '#connect end' }
|
35
|
+
@bigquery ||= Google::Cloud::Bigquery.new(google_cloud_bigquery_gem_config)
|
41
36
|
end
|
42
37
|
|
43
38
|
def disconnect_connection(_c)
|
44
|
-
|
39
|
+
log_each(:debug, '#disconnect_connection')
|
45
40
|
# c.disconnect
|
46
41
|
end
|
47
42
|
|
48
43
|
def drop_datasets(*dataset_names_to_drop)
|
49
44
|
dataset_names_to_drop.each do |dataset_name_to_drop|
|
50
|
-
|
51
|
-
dataset_to_drop =
|
45
|
+
log_each(:debug, "Dropping dataset #{dataset_name_to_drop.inspect}")
|
46
|
+
dataset_to_drop = bigquery.dataset(dataset_name_to_drop)
|
52
47
|
next unless dataset_to_drop
|
53
48
|
dataset_to_drop.tables.each(&:delete)
|
54
49
|
dataset_to_drop.delete
|
@@ -57,7 +52,7 @@ module Sequel
|
|
57
52
|
alias drop_dataset drop_datasets
|
58
53
|
|
59
54
|
def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
60
|
-
|
55
|
+
log_each(:debug, '#execute')
|
61
56
|
log_query(sql)
|
62
57
|
|
63
58
|
# require 'pry'; binding.pry if sql =~ /CREATE TABLE IF NOT EXISTS/i
|
@@ -88,15 +83,12 @@ module Sequel
|
|
88
83
|
sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
|
89
84
|
conn.query(sql_to_execute)
|
90
85
|
end
|
91
|
-
|
92
|
-
ap results
|
86
|
+
log_each(:debug, results.awesome_inspect)
|
93
87
|
if block_given?
|
94
88
|
yield results
|
95
89
|
else
|
96
90
|
results
|
97
91
|
end
|
98
|
-
# TODO
|
99
|
-
# rescue ::ODBC::Error, ArgumentError => e
|
100
92
|
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
101
93
|
raise_error(e)
|
102
94
|
end # rubocop:disable Style/MultilineBlockChain
|
@@ -124,6 +116,33 @@ module Sequel
|
|
124
116
|
|
125
117
|
private
|
126
118
|
|
119
|
+
attr_reader :bigquery_config
|
120
|
+
|
121
|
+
def google_cloud_bigquery_gem_config
|
122
|
+
bigquery_config.dup.tap do |config|
|
123
|
+
%i[
|
124
|
+
adapter
|
125
|
+
database
|
126
|
+
dataset
|
127
|
+
location
|
128
|
+
logger
|
129
|
+
].each do |option|
|
130
|
+
config.delete(option)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_or_create_bigquery_dataset # rubocop:disable Naming/AccessorMethodName
|
136
|
+
bigquery.dataset(bigquery_dataset_name) || begin
|
137
|
+
log_each(:debug, 'BigQuery dataset %s does not exist; creating it' % bigquery_dataset_name)
|
138
|
+
bigquery.create_dataset(bigquery_dataset_name, location: bigquery_config[:location])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def bigquery_dataset_name
|
143
|
+
bigquery_config[:dataset] || bigquery_config[:database] || (raise ArgumentError, 'BigQuery dataset must be specified')
|
144
|
+
end
|
145
|
+
|
127
146
|
def connection_execute_method
|
128
147
|
:query
|
129
148
|
end
|
@@ -138,9 +157,9 @@ module Sequel
|
|
138
157
|
end
|
139
158
|
|
140
159
|
def schema_parse_table(_table_name, _opts)
|
141
|
-
|
160
|
+
log_each(:debug, Paint['schema_parse_table', :red, :bold])
|
142
161
|
# require 'pry'; binding.pry
|
143
|
-
|
162
|
+
bigquery.datasets.map do |dataset|
|
144
163
|
[
|
145
164
|
dataset.dataset_id,
|
146
165
|
{},
|
@@ -155,13 +174,12 @@ module Sequel
|
|
155
174
|
|
156
175
|
# Padded to horizontally align with post-execution log message which includes the execution time
|
157
176
|
def log_query(sql)
|
158
|
-
pad = '
|
159
|
-
|
160
|
-
# @loggers[0]&.debug(' ' + sql)
|
177
|
+
pad = ' ' * 12
|
178
|
+
log_each(:debug, Paint[pad + sql, :cyan, :bold])
|
161
179
|
end
|
162
180
|
|
163
181
|
def warn(msg)
|
164
|
-
|
182
|
+
log_each(:warn, Paint[msg, '#FFA500', :bold])
|
165
183
|
end
|
166
184
|
|
167
185
|
def warn_default_removal(sql)
|
@@ -195,7 +213,7 @@ module Sequel
|
|
195
213
|
|
196
214
|
class Dataset < Sequel::Dataset
|
197
215
|
def fetch_rows(sql, &block)
|
198
|
-
|
216
|
+
db.send(:log_each, :debug, '#fetch_rows')
|
199
217
|
|
200
218
|
execute(sql) do |bq_result|
|
201
219
|
self.columns = bq_result.fields.map { |field| field.name.to_sym }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brendan Weibrecht
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazing_print
|