sequel-bigquery 0.1.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +114 -1
- data/lib/sequel-bigquery.rb +75 -120
- data/lib/sequel_bigquery/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e8459abb689482e387bc4b0cd7dac2c582a40a9c449d5cd5bad5c8da57fffdb
|
4
|
+
data.tar.gz: 4409fa1b5704c03afcd068915311c4944bdaec1326a21ba6df3ed47d9ea118a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8573f4d5d1f46d63fd062d97efa1bf19ffc7d2e9a3dd24433cdd079554380f7639e2d767657285a9cc1eea829a300c08aa40cfd3a7e24eb9f0f229c0f13e0e8c
|
7
|
+
data.tar.gz: 51371f99aba0a7799a0c8c34f9d3541c86c76113d4acda78badb420375abded01cdd456f184753793505be70f92eba1d9f0a99f35abdd1f766806023f3860979
|
data/README.md
CHANGED
@@ -1,2 +1,115 @@
|
|
1
1
|
# sequel-bigquery
|
2
|
-
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/sequel-bigquery.svg)](https://rubygems.org/gems/sequel-bigquery)
|
4
|
+
|
5
|
+
A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
|
6
|
+
|
7
|
+
## Contents
|
8
|
+
|
9
|
+
<!-- MarkdownTOC autolink=true -->
|
10
|
+
|
11
|
+
- [Intro](#intro)
|
12
|
+
- [Installation](#installation)
|
13
|
+
- [Usage](#usage)
|
14
|
+
- [Contributing](#contributing)
|
15
|
+
- [Development](#development)
|
16
|
+
- [Pre-push hook](#pre-push-hook)
|
17
|
+
- [Release](#release)
|
18
|
+
|
19
|
+
<!-- /MarkdownTOC -->
|
20
|
+
|
21
|
+
## Intro
|
22
|
+
|
23
|
+
**Be warned: Given I was unable to find Sequel documentation covering how to write a database adapter, this was put together by reading Sequel's source and hacking at things until they worked. There are probably a lot of rough edges.**
|
24
|
+
|
25
|
+
Features:
|
26
|
+
|
27
|
+
- Connecting
|
28
|
+
- Migrating
|
29
|
+
- Table creation, with automatic removal of defaults from statements (since BigQuery doesn't support it)
|
30
|
+
- Inserting rows
|
31
|
+
- Updating rows, with automatic addition of `where 1 = 1` to statements (since BigQuery requires a `where` clause)
|
32
|
+
- Querying
|
33
|
+
- Transactions (buffered since BigQuery only supports them when you execute the whole transaction at once)
|
34
|
+
- Table partitioning
|
35
|
+
- Ruby types:
|
36
|
+
+ String
|
37
|
+
+ Integer
|
38
|
+
+ _Boolean_ (`TrueClass`/`FalseClass`)
|
39
|
+
+ DateTime (note that BigQuery does not persist timezone)
|
40
|
+
+ Date
|
41
|
+
+ Float
|
42
|
+
+ BigDecimal
|
43
|
+
- Selecting the BigQuery server location
|
44
|
+
|
45
|
+
## Installation
|
46
|
+
|
47
|
+
Add it to the `Gemfile` of your project:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
gem 'sequel-bigquery'
|
51
|
+
```
|
52
|
+
|
53
|
+
and install all your gems:
|
54
|
+
|
55
|
+
```bash
|
56
|
+
bundle install
|
57
|
+
```
|
58
|
+
|
59
|
+
Or you can install it to your system directly using:
|
60
|
+
|
61
|
+
```bash
|
62
|
+
gem install sequel-bigquery
|
63
|
+
```
|
64
|
+
|
65
|
+
## Usage
|
66
|
+
|
67
|
+
Connect to BigQuery:
|
68
|
+
|
69
|
+
```
|
70
|
+
require 'sequel-bigquery'
|
71
|
+
|
72
|
+
db = Sequel.connect(
|
73
|
+
adapter: :bigquery,
|
74
|
+
project: 'your-gcp-project',
|
75
|
+
database: 'your_bigquery_dataset_name',
|
76
|
+
location: 'australia-southeast2',
|
77
|
+
logger: Logger.new(STDOUT),
|
78
|
+
)
|
79
|
+
```
|
80
|
+
|
81
|
+
And use Sequel like normal.
|
82
|
+
|
83
|
+
## Contributing
|
84
|
+
|
85
|
+
Pull requests welcome! =)
|
86
|
+
|
87
|
+
## Development
|
88
|
+
|
89
|
+
### Pre-push hook
|
90
|
+
|
91
|
+
This hook runs style checks and tests.
|
92
|
+
|
93
|
+
To set up the pre-push hook:
|
94
|
+
|
95
|
+
```bash
|
96
|
+
echo -e "#\!/bin/bash\n\$(dirname \$0)/../../auto/pre-push-hook" > .git/hooks/pre-push
|
97
|
+
chmod +x .git/hooks/pre-push
|
98
|
+
```
|
99
|
+
|
100
|
+
### Release
|
101
|
+
|
102
|
+
To release a new version:
|
103
|
+
|
104
|
+
```bash
|
105
|
+
auto/release/update-version && auto/release/tag && auto/release/publish
|
106
|
+
```
|
107
|
+
|
108
|
+
This takes care of the whole process:
|
109
|
+
|
110
|
+
- Incrementing the version number (the patch number by default)
|
111
|
+
- Tagging & pushing commits
|
112
|
+
- Publishing the gem to RubyGems
|
113
|
+
- Creating a draft GitHub release
|
114
|
+
|
115
|
+
To increment the minor or major versions instead of the patch number, run `auto/release/update-version` with `--minor` or `--major`.
|
data/lib/sequel-bigquery.rb
CHANGED
@@ -11,9 +11,9 @@ module Sequel
|
|
11
11
|
module Bigquery
|
12
12
|
# Contains procs keyed on subadapter type that extend the
|
13
13
|
# given database object so it supports the correct database type.
|
14
|
-
DATABASE_SETUP = {}
|
15
|
-
|
16
|
-
class Database < Sequel::Database
|
14
|
+
DATABASE_SETUP = {}.freeze
|
15
|
+
|
16
|
+
class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
|
17
17
|
set_adapter_scheme :bigquery
|
18
18
|
|
19
19
|
def initialize(*args, **kawrgs)
|
@@ -26,28 +26,36 @@ module Sequel
|
|
26
26
|
|
27
27
|
def connect(*_args)
|
28
28
|
puts '#connect'
|
29
|
-
# self.input_identifier_meth = nil
|
30
|
-
# self.identifier_output_method = nil
|
31
|
-
|
32
29
|
config = @orig_opts.dup
|
33
30
|
config.delete(:adapter)
|
34
31
|
config.delete(:logger)
|
32
|
+
location = config.delete(:location)
|
35
33
|
bq_dataset_name = config.delete(:dataset) || config.delete(:database)
|
36
34
|
@bigquery = Google::Cloud::Bigquery.new(config)
|
37
35
|
# ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
|
38
36
|
@bigquery.dataset(bq_dataset_name) || begin
|
39
37
|
@loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
|
40
|
-
@bigquery.create_dataset(bq_dataset_name)
|
38
|
+
@bigquery.create_dataset(bq_dataset_name, location: location)
|
41
39
|
end
|
42
40
|
.tap { puts '#connect end' }
|
43
41
|
end
|
44
42
|
|
45
|
-
def disconnect_connection(
|
43
|
+
def disconnect_connection(_c)
|
46
44
|
puts '#disconnect_connection'
|
47
45
|
# c.disconnect
|
48
46
|
end
|
49
47
|
|
50
|
-
def
|
48
|
+
def drop_datasets(*dataset_names_to_drop)
|
49
|
+
dataset_names_to_drop.each do |dataset_name_to_drop|
|
50
|
+
puts "Dropping dataset #{dataset_name_to_drop.inspect}"
|
51
|
+
dataset_to_drop = @bigquery.dataset(dataset_name_to_drop)
|
52
|
+
dataset_to_drop.tables.each(&:delete)
|
53
|
+
dataset_to_drop.delete
|
54
|
+
end
|
55
|
+
end
|
56
|
+
alias drop_dataset drop_datasets
|
57
|
+
|
58
|
+
def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
51
59
|
puts '#execute'
|
52
60
|
log_query(sql)
|
53
61
|
|
@@ -60,44 +68,37 @@ module Sequel
|
|
60
68
|
|
61
69
|
if sql =~ /^update/i && sql !~ / where /i
|
62
70
|
warn("Warning: Appended 'where 1 = 1' to query since BigQuery requires UPDATE statements to include a WHERE clause")
|
63
|
-
sql
|
71
|
+
sql += ' where 1 = 1'
|
64
72
|
end
|
65
73
|
|
66
|
-
if
|
74
|
+
if /^begin/i.match?(sql)
|
67
75
|
warn_transaction
|
68
76
|
@sql_buffering = true
|
69
77
|
end
|
70
78
|
|
71
79
|
if @sql_buffering
|
72
80
|
@sql_buffer << sql
|
73
|
-
|
74
|
-
|
75
|
-
else
|
76
|
-
return []
|
77
|
-
end
|
81
|
+
return [] unless /^commit/i.match?(sql)
|
82
|
+
warn("Warning: Will now execute entire buffered transaction:\n" + @sql_buffer.join("\n"))
|
78
83
|
end
|
79
84
|
|
80
85
|
synchronize(opts[:server]) do |conn|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
conn.query(sql_to_execute)
|
85
|
-
# raw_result = conn.query(sql_to_execute)
|
86
|
-
# BQResult.new(raw_result)
|
87
|
-
end
|
88
|
-
require 'amazing_print'
|
89
|
-
ap results
|
90
|
-
if block_given?
|
91
|
-
yield results
|
92
|
-
else
|
93
|
-
results
|
94
|
-
end
|
95
|
-
# TODO
|
96
|
-
# rescue ::ODBC::Error, ArgumentError => e
|
97
|
-
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
98
|
-
raise_error(e)
|
86
|
+
results = log_connection_yield(sql, conn) do
|
87
|
+
sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
|
88
|
+
conn.query(sql_to_execute)
|
99
89
|
end
|
100
|
-
|
90
|
+
require 'amazing_print'
|
91
|
+
ap results
|
92
|
+
if block_given?
|
93
|
+
yield results
|
94
|
+
else
|
95
|
+
results
|
96
|
+
end
|
97
|
+
# TODO
|
98
|
+
# rescue ::ODBC::Error, ArgumentError => e
|
99
|
+
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
100
|
+
raise_error(e)
|
101
|
+
end # rubocop:disable Style/MultilineBlockChain
|
101
102
|
.tap do
|
102
103
|
@sql_buffer = []
|
103
104
|
@sql_buffering = false
|
@@ -116,34 +117,11 @@ module Sequel
|
|
116
117
|
end
|
117
118
|
end
|
118
119
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
# def execute_dui(sql, opts=OPTS)
|
124
|
-
# end
|
125
|
-
|
126
|
-
# def execute_dui(sql, opts=OPTS)
|
127
|
-
# # require 'pry'; binding.pry
|
128
|
-
# synchronize(opts[:server]) do |conn|
|
129
|
-
# begin
|
130
|
-
# log_connection_yield(sql, conn){conn.do(sql)}
|
131
|
-
# # TODO:
|
132
|
-
# # rescue ::ODBC::Error, ArgumentError => e
|
133
|
-
# rescue ArgumentError => e
|
134
|
-
# raise_error(e)
|
135
|
-
# end
|
136
|
-
# end
|
137
|
-
# end
|
120
|
+
def type_literal_generic_float(_column)
|
121
|
+
:float64
|
122
|
+
end
|
138
123
|
|
139
124
|
private
|
140
|
-
|
141
|
-
def adapter_initialize
|
142
|
-
puts '#adapter_initialize'
|
143
|
-
self.extension(:identifier_mangling)
|
144
|
-
self.identifier_input_method = nil
|
145
|
-
self.quote_identifiers = false
|
146
|
-
end
|
147
125
|
|
148
126
|
def connection_execute_method
|
149
127
|
:query
|
@@ -158,18 +136,18 @@ module Sequel
|
|
158
136
|
Dataset
|
159
137
|
end
|
160
138
|
|
161
|
-
def schema_parse_table(
|
139
|
+
def schema_parse_table(_table_name, _opts)
|
162
140
|
logger.debug(Paint['schema_parse_table', :red, :bold])
|
163
141
|
# require 'pry'; binding.pry
|
164
142
|
@bigquery.datasets.map do |dataset|
|
165
143
|
[
|
166
144
|
dataset.dataset_id,
|
167
|
-
{}
|
145
|
+
{},
|
168
146
|
]
|
169
147
|
end
|
170
148
|
end
|
171
149
|
|
172
|
-
def disconnect_error?(e, opts)
|
150
|
+
def disconnect_error?(e, opts) # rubocop:disable Lint/UselessMethodDefinition
|
173
151
|
# super || (e.is_a?(::ODBC::Error) && /\A08S01/.match(e.message))
|
174
152
|
super
|
175
153
|
end
|
@@ -190,87 +168,64 @@ module Sequel
|
|
190
168
|
end
|
191
169
|
|
192
170
|
def warn_transaction
|
193
|
-
warn(
|
171
|
+
warn(
|
172
|
+
'Warning: Transaction detected. This only supported on BigQuery in a script or session. '\
|
173
|
+
'Commencing buffering to run the whole transaction at once as a script upon commit. ' \
|
174
|
+
'Note that no result data is returned while the transaction is open.',
|
175
|
+
)
|
194
176
|
end
|
195
|
-
end
|
196
177
|
|
197
|
-
|
178
|
+
# SQL for creating a table with BigQuery specific options
|
179
|
+
def create_table_sql(name, generator, options)
|
180
|
+
"#{super}#{create_table_suffix_sql(name, options)}"
|
181
|
+
end
|
182
|
+
|
183
|
+
# Handle BigQuery specific table extensions (i.e. partitioning)
|
184
|
+
def create_table_suffix_sql(_name, options)
|
185
|
+
sql = +''
|
186
|
+
|
187
|
+
if (partition_by = options[:partition_by])
|
188
|
+
sql << " PARTITION BY #{literal(Array(partition_by))}"
|
189
|
+
end
|
190
|
+
|
191
|
+
sql
|
192
|
+
end
|
193
|
+
end
|
198
194
|
|
199
|
-
# end
|
200
|
-
|
201
195
|
class Dataset < Sequel::Dataset
|
202
|
-
def fetch_rows(sql)
|
196
|
+
def fetch_rows(sql, &block)
|
203
197
|
puts '#fetch_rows'
|
204
|
-
# execute(sql) do |s|
|
205
|
-
# i = -1
|
206
|
-
# cols = s.columns(true).map{|c| [output_identifier(c.name), c.type, i+=1]}
|
207
|
-
# columns = cols.map{|c| c[0]}
|
208
|
-
# self.columns = columns
|
209
|
-
# s.each do |row|
|
210
|
-
# hash = {}
|
211
|
-
# cols.each{|n,t,j| hash[n] = convert_odbc_value(row[j], t)}
|
212
|
-
# yield hash
|
213
|
-
# end
|
214
|
-
# end
|
215
|
-
# self
|
216
198
|
|
217
199
|
execute(sql) do |bq_result|
|
218
200
|
self.columns = bq_result.fields.map { |field| field.name.to_sym }
|
219
|
-
bq_result.each
|
220
|
-
yield row
|
221
|
-
end
|
201
|
+
bq_result.each(&block)
|
222
202
|
end
|
223
203
|
|
224
|
-
# execute(sql).each do |row|
|
225
|
-
# yield row
|
226
|
-
# end
|
227
204
|
self
|
228
205
|
end
|
229
206
|
|
230
|
-
# def columns
|
231
|
-
# fields.map { |field| field.name.to_sym }
|
232
|
-
# end
|
233
|
-
|
234
207
|
private
|
235
208
|
|
236
|
-
# def convert_odbc_value(v, t)
|
237
|
-
# # When fetching a result set, the Ruby ODBC driver converts all ODBC
|
238
|
-
# # SQL types to an equivalent Ruby type; with the exception of
|
239
|
-
# # SQL_TYPE_DATE, SQL_TYPE_TIME and SQL_TYPE_TIMESTAMP.
|
240
|
-
# #
|
241
|
-
# # The conversions below are consistent with the mappings in
|
242
|
-
# # ODBCColumn#mapSqlTypeToGenericType and Column#klass.
|
243
|
-
# case v
|
244
|
-
# when ::ODBC::TimeStamp
|
245
|
-
# db.to_application_timestamp([v.year, v.month, v.day, v.hour, v.minute, v.second, v.fraction])
|
246
|
-
# when ::ODBC::Time
|
247
|
-
# Sequel::SQLTime.create(v.hour, v.minute, v.second)
|
248
|
-
# when ::ODBC::Date
|
249
|
-
# Date.new(v.year, v.month, v.day)
|
250
|
-
# else
|
251
|
-
# if t == ::ODBC::SQL_BIT
|
252
|
-
# v == 1
|
253
|
-
# else
|
254
|
-
# v
|
255
|
-
# end
|
256
|
-
# end
|
257
|
-
# end
|
258
|
-
|
259
209
|
def literal_time(v)
|
260
210
|
"'#{v.iso8601}'"
|
261
211
|
end
|
262
212
|
|
263
|
-
# def literal_date(v)
|
264
|
-
# v.strftime("{d '%Y-%m-%d'}")
|
265
|
-
# end
|
266
|
-
|
267
213
|
def literal_false
|
268
214
|
'false'
|
269
215
|
end
|
270
|
-
|
216
|
+
|
271
217
|
def literal_true
|
272
218
|
'true'
|
273
219
|
end
|
220
|
+
|
221
|
+
# Like MySQL, BigQuery uses the nonstandard ` (backtick) for quoting identifiers.
|
222
|
+
def quoted_identifier_append(sql, c)
|
223
|
+
sql << '`%s`' % c
|
224
|
+
end
|
225
|
+
|
226
|
+
def input_identifier(v)
|
227
|
+
v.to_s
|
228
|
+
end
|
274
229
|
end
|
275
230
|
end
|
276
231
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brendan Weibrecht
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazing_print
|
@@ -98,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
98
|
- !ruby/object:Gem::Version
|
99
99
|
version: '0'
|
100
100
|
requirements: []
|
101
|
-
rubygems_version: 3.
|
101
|
+
rubygems_version: 3.2.16
|
102
102
|
signing_key:
|
103
103
|
specification_version: 4
|
104
104
|
summary: A Sequel adapter for Google's BigQuery
|