sequel-bigquery 0.2.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -0
- data/lib/sequel-bigquery.rb +66 -30
- data/lib/sequel_bigquery/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e444c2d4a5c6b54b5a1bb0efde17e1dd83761ec53f1b796f5e959646bd37f315
|
4
|
+
data.tar.gz: 41875a2325c00eb26786fbd3ab8430e9880b93d795a18b000bff3dd75ed0ea47
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 725fce3447e58e75c5cf817d736322c567661752d1b2d2f4f45aa0ce712825e8792214d1f56313272d8a84facdadff51f63ee4a6b3bda1b8c55d92a80a826798
|
7
|
+
data.tar.gz: 3aa8900e8fba7400049e8f83879998b246296e1705d01b25f2a1cf20855cf8faeea09b9d7e4cb7280b2783296d0a27f61ed295275089d6de311c256c5f7ea763
|
data/README.md
CHANGED
@@ -4,6 +4,10 @@
|
|
4
4
|
|
5
5
|
A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
|
6
6
|
|
7
|
+
This gem was created in order to manage schema migrations of a BigQuery dataset at GreenSync. At the time of writing, we couldn't find any good tools in any language to manage changes to the schema as a set of migrations.
|
8
|
+
|
9
|
+
Beyond migrations, I'm unsure how useful this gem is. I haven't yet tested what the performance would be for data interactions vs. directly using the `google-cloud-bigquery` gem's native facilities. If you're inserting a bunch of data, it's probably a better idea to use an [inserter from that gem](https://googleapis.dev/ruby/google-cloud-bigquery/latest/Google/Cloud/Bigquery/Dataset.html#insert_async-instance_method) rather than going through SQL.
|
10
|
+
|
7
11
|
## Contents
|
8
12
|
|
9
13
|
<!-- MarkdownTOC autolink=true -->
|
@@ -31,6 +35,7 @@ Features:
|
|
31
35
|
- Updating rows, with automatic addition of `where 1 = 1` to statements (since BigQuery requires a `where` clause)
|
32
36
|
- Querying
|
33
37
|
- Transactions (buffered since BigQuery only supports them when you execute the whole transaction at once)
|
38
|
+
- Table partitioning
|
34
39
|
- Ruby types:
|
35
40
|
+ String
|
36
41
|
+ Integer
|
@@ -39,6 +44,7 @@ Features:
|
|
39
44
|
+ Date
|
40
45
|
+ Float
|
41
46
|
+ BigDecimal
|
47
|
+
- Selecting the BigQuery server location
|
42
48
|
|
43
49
|
## Installation
|
44
50
|
|
@@ -66,17 +72,21 @@ Connect to BigQuery:
|
|
66
72
|
|
67
73
|
```
|
68
74
|
require 'sequel-bigquery'
|
75
|
+
require 'logger'
|
69
76
|
|
70
77
|
db = Sequel.connect(
|
71
78
|
adapter: :bigquery,
|
72
79
|
project: 'your-gcp-project',
|
73
80
|
database: 'your_bigquery_dataset_name',
|
81
|
+
location: 'australia-southeast2',
|
74
82
|
logger: Logger.new(STDOUT),
|
75
83
|
)
|
76
84
|
```
|
77
85
|
|
78
86
|
And use Sequel like normal.
|
79
87
|
|
88
|
+
Note that it is important to supply a logger that will at least output warning messages so you know when your queries are being modifed or buffered, which may be unexpected behaviour.
|
89
|
+
|
80
90
|
## Contributing
|
81
91
|
|
82
92
|
Pull requests welcome! =)
|
data/lib/sequel-bigquery.rb
CHANGED
@@ -4,6 +4,7 @@ require 'delegate'
|
|
4
4
|
require 'time'
|
5
5
|
|
6
6
|
require 'google/cloud/bigquery'
|
7
|
+
require 'amazing_print'
|
7
8
|
require 'paint'
|
8
9
|
require 'sequel'
|
9
10
|
|
@@ -16,38 +17,34 @@ module Sequel
|
|
16
17
|
class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
|
17
18
|
set_adapter_scheme :bigquery
|
18
19
|
|
19
|
-
def initialize(*args, **
|
20
|
-
|
21
|
-
@orig_opts = kawrgs.fetch(:orig_opts)
|
20
|
+
def initialize(*args, **kwargs)
|
21
|
+
@bigquery_config = kwargs.fetch(:orig_opts)
|
22
22
|
@sql_buffer = []
|
23
23
|
@sql_buffering = false
|
24
24
|
super
|
25
25
|
end
|
26
26
|
|
27
27
|
def connect(*_args)
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
log_each(:debug, '#connect')
|
29
|
+
get_or_create_bigquery_dataset
|
30
|
+
.tap { log_each(:debug, '#connect end') }
|
31
|
+
end
|
32
|
+
|
33
|
+
def bigquery
|
34
34
|
# ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
|
35
|
-
@bigquery.
|
36
|
-
@loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
|
37
|
-
@bigquery.create_dataset(bq_dataset_name)
|
38
|
-
end
|
39
|
-
.tap { puts '#connect end' }
|
35
|
+
@bigquery ||= Google::Cloud::Bigquery.new(google_cloud_bigquery_gem_config)
|
40
36
|
end
|
41
37
|
|
42
38
|
def disconnect_connection(_c)
|
43
|
-
|
39
|
+
log_each(:debug, '#disconnect_connection')
|
44
40
|
# c.disconnect
|
45
41
|
end
|
46
42
|
|
47
43
|
def drop_datasets(*dataset_names_to_drop)
|
48
44
|
dataset_names_to_drop.each do |dataset_name_to_drop|
|
49
|
-
|
50
|
-
dataset_to_drop =
|
45
|
+
log_each(:debug, "Dropping dataset #{dataset_name_to_drop.inspect}")
|
46
|
+
dataset_to_drop = bigquery.dataset(dataset_name_to_drop)
|
47
|
+
next unless dataset_to_drop
|
51
48
|
dataset_to_drop.tables.each(&:delete)
|
52
49
|
dataset_to_drop.delete
|
53
50
|
end
|
@@ -55,7 +52,7 @@ module Sequel
|
|
55
52
|
alias drop_dataset drop_datasets
|
56
53
|
|
57
54
|
def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
58
|
-
|
55
|
+
log_each(:debug, '#execute')
|
59
56
|
log_query(sql)
|
60
57
|
|
61
58
|
# require 'pry'; binding.pry if sql =~ /CREATE TABLE IF NOT EXISTS/i
|
@@ -86,15 +83,12 @@ module Sequel
|
|
86
83
|
sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
|
87
84
|
conn.query(sql_to_execute)
|
88
85
|
end
|
89
|
-
|
90
|
-
ap results
|
86
|
+
log_each(:debug, results.awesome_inspect)
|
91
87
|
if block_given?
|
92
88
|
yield results
|
93
89
|
else
|
94
90
|
results
|
95
91
|
end
|
96
|
-
# TODO
|
97
|
-
# rescue ::ODBC::Error, ArgumentError => e
|
98
92
|
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
99
93
|
raise_error(e)
|
100
94
|
end # rubocop:disable Style/MultilineBlockChain
|
@@ -122,6 +116,33 @@ module Sequel
|
|
122
116
|
|
123
117
|
private
|
124
118
|
|
119
|
+
attr_reader :bigquery_config
|
120
|
+
|
121
|
+
def google_cloud_bigquery_gem_config
|
122
|
+
bigquery_config.dup.tap do |config|
|
123
|
+
%i[
|
124
|
+
adapter
|
125
|
+
database
|
126
|
+
dataset
|
127
|
+
location
|
128
|
+
logger
|
129
|
+
].each do |option|
|
130
|
+
config.delete(option)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_or_create_bigquery_dataset # rubocop:disable Naming/AccessorMethodName
|
136
|
+
bigquery.dataset(bigquery_dataset_name) || begin
|
137
|
+
log_each(:debug, 'BigQuery dataset %s does not exist; creating it' % bigquery_dataset_name)
|
138
|
+
bigquery.create_dataset(bigquery_dataset_name, location: bigquery_config[:location])
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def bigquery_dataset_name
|
143
|
+
bigquery_config[:dataset] || bigquery_config[:database] || (raise ArgumentError, 'BigQuery dataset must be specified')
|
144
|
+
end
|
145
|
+
|
125
146
|
def connection_execute_method
|
126
147
|
:query
|
127
148
|
end
|
@@ -136,9 +157,9 @@ module Sequel
|
|
136
157
|
end
|
137
158
|
|
138
159
|
def schema_parse_table(_table_name, _opts)
|
139
|
-
|
160
|
+
log_each(:debug, Paint['schema_parse_table', :red, :bold])
|
140
161
|
# require 'pry'; binding.pry
|
141
|
-
|
162
|
+
bigquery.datasets.map do |dataset|
|
142
163
|
[
|
143
164
|
dataset.dataset_id,
|
144
165
|
{},
|
@@ -153,13 +174,12 @@ module Sequel
|
|
153
174
|
|
154
175
|
# Padded to horizontally align with post-execution log message which includes the execution time
|
155
176
|
def log_query(sql)
|
156
|
-
pad = '
|
157
|
-
|
158
|
-
# @loggers[0]&.debug(' ' + sql)
|
177
|
+
pad = ' ' * 12
|
178
|
+
log_each(:debug, Paint[pad + sql, :cyan, :bold])
|
159
179
|
end
|
160
180
|
|
161
181
|
def warn(msg)
|
162
|
-
|
182
|
+
log_each(:warn, Paint[msg, '#FFA500', :bold])
|
163
183
|
end
|
164
184
|
|
165
185
|
def warn_default_removal(sql)
|
@@ -173,11 +193,27 @@ module Sequel
|
|
173
193
|
'Note that no result data is returned while the transaction is open.',
|
174
194
|
)
|
175
195
|
end
|
196
|
+
|
197
|
+
# SQL for creating a table with BigQuery specific options
|
198
|
+
def create_table_sql(name, generator, options)
|
199
|
+
"#{super}#{create_table_suffix_sql(name, options)}"
|
200
|
+
end
|
201
|
+
|
202
|
+
# Handle BigQuery specific table extensions (i.e. partitioning)
|
203
|
+
def create_table_suffix_sql(_name, options)
|
204
|
+
sql = +''
|
205
|
+
|
206
|
+
if (partition_by = options[:partition_by])
|
207
|
+
sql << " PARTITION BY #{literal(Array(partition_by))}"
|
208
|
+
end
|
209
|
+
|
210
|
+
sql
|
211
|
+
end
|
176
212
|
end
|
177
213
|
|
178
214
|
class Dataset < Sequel::Dataset
|
179
215
|
def fetch_rows(sql, &block)
|
180
|
-
|
216
|
+
db.send(:log_each, :debug, '#fetch_rows')
|
181
217
|
|
182
218
|
execute(sql) do |bq_result|
|
183
219
|
self.columns = bq_result.fields.map { |field| field.name.to_sym }
|
@@ -203,7 +239,7 @@ module Sequel
|
|
203
239
|
|
204
240
|
# Like MySQL, BigQuery uses the nonstandard ` (backtick) for quoting identifiers.
|
205
241
|
def quoted_identifier_append(sql, c)
|
206
|
-
sql << '`%s`' % c
|
242
|
+
sql << ('`%s`' % c)
|
207
243
|
end
|
208
244
|
|
209
245
|
def input_identifier(v)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brendan Weibrecht
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazing_print
|