sequel-bigquery 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +114 -1
- data/lib/sequel-bigquery.rb +75 -120
- data/lib/sequel_bigquery/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e8459abb689482e387bc4b0cd7dac2c582a40a9c449d5cd5bad5c8da57fffdb
|
4
|
+
data.tar.gz: 4409fa1b5704c03afcd068915311c4944bdaec1326a21ba6df3ed47d9ea118a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8573f4d5d1f46d63fd062d97efa1bf19ffc7d2e9a3dd24433cdd079554380f7639e2d767657285a9cc1eea829a300c08aa40cfd3a7e24eb9f0f229c0f13e0e8c
|
7
|
+
data.tar.gz: 51371f99aba0a7799a0c8c34f9d3541c86c76113d4acda78badb420375abded01cdd456f184753793505be70f92eba1d9f0a99f35abdd1f766806023f3860979
|
data/README.md
CHANGED
@@ -1,2 +1,115 @@
|
|
1
1
|
# sequel-bigquery
|
2
|
-
|
2
|
+
|
3
|
+
[](https://rubygems.org/gems/sequel-bigquery)
|
4
|
+
|
5
|
+
A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
|
6
|
+
|
7
|
+
## Contents
|
8
|
+
|
9
|
+
<!-- MarkdownTOC autolink=true -->
|
10
|
+
|
11
|
+
- [Intro](#intro)
|
12
|
+
- [Installation](#installation)
|
13
|
+
- [Usage](#usage)
|
14
|
+
- [Contributing](#contributing)
|
15
|
+
- [Development](#development)
|
16
|
+
- [Pre-push hook](#pre-push-hook)
|
17
|
+
- [Release](#release)
|
18
|
+
|
19
|
+
<!-- /MarkdownTOC -->
|
20
|
+
|
21
|
+
## Intro
|
22
|
+
|
23
|
+
**Be warned: Given I was unable to find Sequel documentation covering how to write a database adapter, this was put together by reading Sequel's source and hacking at things until they worked. There are probably a lot of rough edges.**
|
24
|
+
|
25
|
+
Features:
|
26
|
+
|
27
|
+
- Connecting
|
28
|
+
- Migrating
|
29
|
+
- Table creation, with automatic removal of defaults from statements (since BigQuery doesn't support it)
|
30
|
+
- Inserting rows
|
31
|
+
- Updating rows, with automatic addition of `where 1 = 1` to statements (since BigQuery requires a `where` clause)
|
32
|
+
- Querying
|
33
|
+
- Transactions (buffered since BigQuery only supports them when you execute the whole transaction at once)
|
34
|
+
- Table partitioning
|
35
|
+
- Ruby types:
|
36
|
+
+ String
|
37
|
+
+ Integer
|
38
|
+
+ _Boolean_ (`TrueClass`/`FalseClass`)
|
39
|
+
+ DateTime (note that BigQuery does not persist timezone)
|
40
|
+
+ Date
|
41
|
+
+ Float
|
42
|
+
+ BigDecimal
|
43
|
+
- Selecting the BigQuery server location
|
44
|
+
|
45
|
+
## Installation
|
46
|
+
|
47
|
+
Add it to the `Gemfile` of your project:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
gem 'sequel-bigquery'
|
51
|
+
```
|
52
|
+
|
53
|
+
and install all your gems:
|
54
|
+
|
55
|
+
```bash
|
56
|
+
bundle install
|
57
|
+
```
|
58
|
+
|
59
|
+
Or you can install it to your system directly using:
|
60
|
+
|
61
|
+
```bash
|
62
|
+
gem install sequel-bigquery
|
63
|
+
```
|
64
|
+
|
65
|
+
## Usage
|
66
|
+
|
67
|
+
Connect to BigQuery:
|
68
|
+
|
69
|
+
```
|
70
|
+
require 'sequel-bigquery'
|
71
|
+
|
72
|
+
db = Sequel.connect(
|
73
|
+
adapter: :bigquery,
|
74
|
+
project: 'your-gcp-project',
|
75
|
+
database: 'your_bigquery_dataset_name',
|
76
|
+
location: 'australia-southeast2',
|
77
|
+
logger: Logger.new(STDOUT),
|
78
|
+
)
|
79
|
+
```
|
80
|
+
|
81
|
+
And use Sequel like normal.
|
82
|
+
|
83
|
+
## Contributing
|
84
|
+
|
85
|
+
Pull requests welcome! =)
|
86
|
+
|
87
|
+
## Development
|
88
|
+
|
89
|
+
### Pre-push hook
|
90
|
+
|
91
|
+
This hook runs style checks and tests.
|
92
|
+
|
93
|
+
To set up the pre-push hook:
|
94
|
+
|
95
|
+
```bash
|
96
|
+
echo -e "#\!/bin/bash\n\$(dirname \$0)/../../auto/pre-push-hook" > .git/hooks/pre-push
|
97
|
+
chmod +x .git/hooks/pre-push
|
98
|
+
```
|
99
|
+
|
100
|
+
### Release
|
101
|
+
|
102
|
+
To release a new version:
|
103
|
+
|
104
|
+
```bash
|
105
|
+
auto/release/update-version && auto/release/tag && auto/release/publish
|
106
|
+
```
|
107
|
+
|
108
|
+
This takes care of the whole process:
|
109
|
+
|
110
|
+
- Incrementing the version number (the patch number by default)
|
111
|
+
- Tagging & pushing commits
|
112
|
+
- Publishing the gem to RubyGems
|
113
|
+
- Creating a draft GitHub release
|
114
|
+
|
115
|
+
To increment the minor or major versions instead of the patch number, run `auto/release/update-version` with `--minor` or `--major`.
|
data/lib/sequel-bigquery.rb
CHANGED
@@ -11,9 +11,9 @@ module Sequel
|
|
11
11
|
module Bigquery
|
12
12
|
# Contains procs keyed on subadapter type that extend the
|
13
13
|
# given database object so it supports the correct database type.
|
14
|
-
DATABASE_SETUP = {}
|
15
|
-
|
16
|
-
class Database < Sequel::Database
|
14
|
+
DATABASE_SETUP = {}.freeze
|
15
|
+
|
16
|
+
class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
|
17
17
|
set_adapter_scheme :bigquery
|
18
18
|
|
19
19
|
def initialize(*args, **kawrgs)
|
@@ -26,28 +26,36 @@ module Sequel
|
|
26
26
|
|
27
27
|
def connect(*_args)
|
28
28
|
puts '#connect'
|
29
|
-
# self.input_identifier_meth = nil
|
30
|
-
# self.identifier_output_method = nil
|
31
|
-
|
32
29
|
config = @orig_opts.dup
|
33
30
|
config.delete(:adapter)
|
34
31
|
config.delete(:logger)
|
32
|
+
location = config.delete(:location)
|
35
33
|
bq_dataset_name = config.delete(:dataset) || config.delete(:database)
|
36
34
|
@bigquery = Google::Cloud::Bigquery.new(config)
|
37
35
|
# ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
|
38
36
|
@bigquery.dataset(bq_dataset_name) || begin
|
39
37
|
@loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
|
40
|
-
@bigquery.create_dataset(bq_dataset_name)
|
38
|
+
@bigquery.create_dataset(bq_dataset_name, location: location)
|
41
39
|
end
|
42
40
|
.tap { puts '#connect end' }
|
43
41
|
end
|
44
42
|
|
45
|
-
def disconnect_connection(
|
43
|
+
def disconnect_connection(_c)
|
46
44
|
puts '#disconnect_connection'
|
47
45
|
# c.disconnect
|
48
46
|
end
|
49
47
|
|
50
|
-
def
|
48
|
+
def drop_datasets(*dataset_names_to_drop)
|
49
|
+
dataset_names_to_drop.each do |dataset_name_to_drop|
|
50
|
+
puts "Dropping dataset #{dataset_name_to_drop.inspect}"
|
51
|
+
dataset_to_drop = @bigquery.dataset(dataset_name_to_drop)
|
52
|
+
dataset_to_drop.tables.each(&:delete)
|
53
|
+
dataset_to_drop.delete
|
54
|
+
end
|
55
|
+
end
|
56
|
+
alias drop_dataset drop_datasets
|
57
|
+
|
58
|
+
def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
51
59
|
puts '#execute'
|
52
60
|
log_query(sql)
|
53
61
|
|
@@ -60,44 +68,37 @@ module Sequel
|
|
60
68
|
|
61
69
|
if sql =~ /^update/i && sql !~ / where /i
|
62
70
|
warn("Warning: Appended 'where 1 = 1' to query since BigQuery requires UPDATE statements to include a WHERE clause")
|
63
|
-
sql
|
71
|
+
sql += ' where 1 = 1'
|
64
72
|
end
|
65
73
|
|
66
|
-
if
|
74
|
+
if /^begin/i.match?(sql)
|
67
75
|
warn_transaction
|
68
76
|
@sql_buffering = true
|
69
77
|
end
|
70
78
|
|
71
79
|
if @sql_buffering
|
72
80
|
@sql_buffer << sql
|
73
|
-
|
74
|
-
|
75
|
-
else
|
76
|
-
return []
|
77
|
-
end
|
81
|
+
return [] unless /^commit/i.match?(sql)
|
82
|
+
warn("Warning: Will now execute entire buffered transaction:\n" + @sql_buffer.join("\n"))
|
78
83
|
end
|
79
84
|
|
80
85
|
synchronize(opts[:server]) do |conn|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
conn.query(sql_to_execute)
|
85
|
-
# raw_result = conn.query(sql_to_execute)
|
86
|
-
# BQResult.new(raw_result)
|
87
|
-
end
|
88
|
-
require 'amazing_print'
|
89
|
-
ap results
|
90
|
-
if block_given?
|
91
|
-
yield results
|
92
|
-
else
|
93
|
-
results
|
94
|
-
end
|
95
|
-
# TODO
|
96
|
-
# rescue ::ODBC::Error, ArgumentError => e
|
97
|
-
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
98
|
-
raise_error(e)
|
86
|
+
results = log_connection_yield(sql, conn) do
|
87
|
+
sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
|
88
|
+
conn.query(sql_to_execute)
|
99
89
|
end
|
100
|
-
|
90
|
+
require 'amazing_print'
|
91
|
+
ap results
|
92
|
+
if block_given?
|
93
|
+
yield results
|
94
|
+
else
|
95
|
+
results
|
96
|
+
end
|
97
|
+
# TODO
|
98
|
+
# rescue ::ODBC::Error, ArgumentError => e
|
99
|
+
rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
|
100
|
+
raise_error(e)
|
101
|
+
end # rubocop:disable Style/MultilineBlockChain
|
101
102
|
.tap do
|
102
103
|
@sql_buffer = []
|
103
104
|
@sql_buffering = false
|
@@ -116,34 +117,11 @@ module Sequel
|
|
116
117
|
end
|
117
118
|
end
|
118
119
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
# def execute_dui(sql, opts=OPTS)
|
124
|
-
# end
|
125
|
-
|
126
|
-
# def execute_dui(sql, opts=OPTS)
|
127
|
-
# # require 'pry'; binding.pry
|
128
|
-
# synchronize(opts[:server]) do |conn|
|
129
|
-
# begin
|
130
|
-
# log_connection_yield(sql, conn){conn.do(sql)}
|
131
|
-
# # TODO:
|
132
|
-
# # rescue ::ODBC::Error, ArgumentError => e
|
133
|
-
# rescue ArgumentError => e
|
134
|
-
# raise_error(e)
|
135
|
-
# end
|
136
|
-
# end
|
137
|
-
# end
|
120
|
+
def type_literal_generic_float(_column)
|
121
|
+
:float64
|
122
|
+
end
|
138
123
|
|
139
124
|
private
|
140
|
-
|
141
|
-
def adapter_initialize
|
142
|
-
puts '#adapter_initialize'
|
143
|
-
self.extension(:identifier_mangling)
|
144
|
-
self.identifier_input_method = nil
|
145
|
-
self.quote_identifiers = false
|
146
|
-
end
|
147
125
|
|
148
126
|
def connection_execute_method
|
149
127
|
:query
|
@@ -158,18 +136,18 @@ module Sequel
|
|
158
136
|
Dataset
|
159
137
|
end
|
160
138
|
|
161
|
-
def schema_parse_table(
|
139
|
+
def schema_parse_table(_table_name, _opts)
|
162
140
|
logger.debug(Paint['schema_parse_table', :red, :bold])
|
163
141
|
# require 'pry'; binding.pry
|
164
142
|
@bigquery.datasets.map do |dataset|
|
165
143
|
[
|
166
144
|
dataset.dataset_id,
|
167
|
-
{}
|
145
|
+
{},
|
168
146
|
]
|
169
147
|
end
|
170
148
|
end
|
171
149
|
|
172
|
-
def disconnect_error?(e, opts)
|
150
|
+
def disconnect_error?(e, opts) # rubocop:disable Lint/UselessMethodDefinition
|
173
151
|
# super || (e.is_a?(::ODBC::Error) && /\A08S01/.match(e.message))
|
174
152
|
super
|
175
153
|
end
|
@@ -190,87 +168,64 @@ module Sequel
|
|
190
168
|
end
|
191
169
|
|
192
170
|
def warn_transaction
|
193
|
-
warn(
|
171
|
+
warn(
|
172
|
+
'Warning: Transaction detected. This only supported on BigQuery in a script or session. '\
|
173
|
+
'Commencing buffering to run the whole transaction at once as a script upon commit. ' \
|
174
|
+
'Note that no result data is returned while the transaction is open.',
|
175
|
+
)
|
194
176
|
end
|
195
|
-
end
|
196
177
|
|
197
|
-
|
178
|
+
# SQL for creating a table with BigQuery specific options
|
179
|
+
def create_table_sql(name, generator, options)
|
180
|
+
"#{super}#{create_table_suffix_sql(name, options)}"
|
181
|
+
end
|
182
|
+
|
183
|
+
# Handle BigQuery specific table extensions (i.e. partitioning)
|
184
|
+
def create_table_suffix_sql(_name, options)
|
185
|
+
sql = +''
|
186
|
+
|
187
|
+
if (partition_by = options[:partition_by])
|
188
|
+
sql << " PARTITION BY #{literal(Array(partition_by))}"
|
189
|
+
end
|
190
|
+
|
191
|
+
sql
|
192
|
+
end
|
193
|
+
end
|
198
194
|
|
199
|
-
# end
|
200
|
-
|
201
195
|
class Dataset < Sequel::Dataset
|
202
|
-
def fetch_rows(sql)
|
196
|
+
def fetch_rows(sql, &block)
|
203
197
|
puts '#fetch_rows'
|
204
|
-
# execute(sql) do |s|
|
205
|
-
# i = -1
|
206
|
-
# cols = s.columns(true).map{|c| [output_identifier(c.name), c.type, i+=1]}
|
207
|
-
# columns = cols.map{|c| c[0]}
|
208
|
-
# self.columns = columns
|
209
|
-
# s.each do |row|
|
210
|
-
# hash = {}
|
211
|
-
# cols.each{|n,t,j| hash[n] = convert_odbc_value(row[j], t)}
|
212
|
-
# yield hash
|
213
|
-
# end
|
214
|
-
# end
|
215
|
-
# self
|
216
198
|
|
217
199
|
execute(sql) do |bq_result|
|
218
200
|
self.columns = bq_result.fields.map { |field| field.name.to_sym }
|
219
|
-
bq_result.each
|
220
|
-
yield row
|
221
|
-
end
|
201
|
+
bq_result.each(&block)
|
222
202
|
end
|
223
203
|
|
224
|
-
# execute(sql).each do |row|
|
225
|
-
# yield row
|
226
|
-
# end
|
227
204
|
self
|
228
205
|
end
|
229
206
|
|
230
|
-
# def columns
|
231
|
-
# fields.map { |field| field.name.to_sym }
|
232
|
-
# end
|
233
|
-
|
234
207
|
private
|
235
208
|
|
236
|
-
# def convert_odbc_value(v, t)
|
237
|
-
# # When fetching a result set, the Ruby ODBC driver converts all ODBC
|
238
|
-
# # SQL types to an equivalent Ruby type; with the exception of
|
239
|
-
# # SQL_TYPE_DATE, SQL_TYPE_TIME and SQL_TYPE_TIMESTAMP.
|
240
|
-
# #
|
241
|
-
# # The conversions below are consistent with the mappings in
|
242
|
-
# # ODBCColumn#mapSqlTypeToGenericType and Column#klass.
|
243
|
-
# case v
|
244
|
-
# when ::ODBC::TimeStamp
|
245
|
-
# db.to_application_timestamp([v.year, v.month, v.day, v.hour, v.minute, v.second, v.fraction])
|
246
|
-
# when ::ODBC::Time
|
247
|
-
# Sequel::SQLTime.create(v.hour, v.minute, v.second)
|
248
|
-
# when ::ODBC::Date
|
249
|
-
# Date.new(v.year, v.month, v.day)
|
250
|
-
# else
|
251
|
-
# if t == ::ODBC::SQL_BIT
|
252
|
-
# v == 1
|
253
|
-
# else
|
254
|
-
# v
|
255
|
-
# end
|
256
|
-
# end
|
257
|
-
# end
|
258
|
-
|
259
209
|
def literal_time(v)
|
260
210
|
"'#{v.iso8601}'"
|
261
211
|
end
|
262
212
|
|
263
|
-
# def literal_date(v)
|
264
|
-
# v.strftime("{d '%Y-%m-%d'}")
|
265
|
-
# end
|
266
|
-
|
267
213
|
def literal_false
|
268
214
|
'false'
|
269
215
|
end
|
270
|
-
|
216
|
+
|
271
217
|
def literal_true
|
272
218
|
'true'
|
273
219
|
end
|
220
|
+
|
221
|
+
# Like MySQL, BigQuery uses the nonstandard ` (backtick) for quoting identifiers.
|
222
|
+
def quoted_identifier_append(sql, c)
|
223
|
+
sql << '`%s`' % c
|
224
|
+
end
|
225
|
+
|
226
|
+
def input_identifier(v)
|
227
|
+
v.to_s
|
228
|
+
end
|
274
229
|
end
|
275
230
|
end
|
276
231
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brendan Weibrecht
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amazing_print
|
@@ -98,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
98
|
- !ruby/object:Gem::Version
|
99
99
|
version: '0'
|
100
100
|
requirements: []
|
101
|
-
rubygems_version: 3.
|
101
|
+
rubygems_version: 3.2.16
|
102
102
|
signing_key:
|
103
103
|
specification_version: 4
|
104
104
|
summary: A Sequel adapter for Google's BigQuery
|