sequel-bigquery 0.2.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 13d55b1e469dca990e67e526e1f9774c0e86ce9ddee8619531c6f5ad3c671f7b
4
- data.tar.gz: 10b66d4878ca799922ae38e2c847b5d4a29ade0d1b65be098744210bc9c12d51
3
+ metadata.gz: e444c2d4a5c6b54b5a1bb0efde17e1dd83761ec53f1b796f5e959646bd37f315
4
+ data.tar.gz: 41875a2325c00eb26786fbd3ab8430e9880b93d795a18b000bff3dd75ed0ea47
5
5
  SHA512:
6
- metadata.gz: 2004fdac1976b084a361524147411451f514810bf1d8c0ff50dc8f2161bb63d620a79357a70a5b761cffd8995f9b144023f64cefb40330b36a4961844b6aefbd
7
- data.tar.gz: b131f9e89bea9e6839c65220961e9d7b157b5baf6cb7761f26872d7c35750a9995d115e99454830cd90b2fe8df705b0ee90e05c8de0ce0d115ef1ff92ad8340f
6
+ metadata.gz: 725fce3447e58e75c5cf817d736322c567661752d1b2d2f4f45aa0ce712825e8792214d1f56313272d8a84facdadff51f63ee4a6b3bda1b8c55d92a80a826798
7
+ data.tar.gz: 3aa8900e8fba7400049e8f83879998b246296e1705d01b25f2a1cf20855cf8faeea09b9d7e4cb7280b2783296d0a27f61ed295275089d6de311c256c5f7ea763
data/README.md CHANGED
@@ -4,6 +4,10 @@
4
4
 
5
5
  A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
6
6
 
7
+ This gem was created in order to manage schema migrations of a BigQuery dataset at GreenSync. At the time of writing, we couldn't find any good tools in any language to manage changes to the schema as a set of migrations.
8
+
9
+ Beyond migrations, I'm unsure how useful this gem is. I haven't yet tested what the performance would be for data interactions vs. directly using the `google-cloud-bigquery` gem's native facilities. If you're inserting a bunch of data, it's probably a better idea to use an [inserter from that gem](https://googleapis.dev/ruby/google-cloud-bigquery/latest/Google/Cloud/Bigquery/Dataset.html#insert_async-instance_method) rather than going through SQL.
10
+
7
11
  ## Contents
8
12
 
9
13
  <!-- MarkdownTOC autolink=true -->
@@ -31,6 +35,7 @@ Features:
31
35
  - Updating rows, with automatic addition of `where 1 = 1` to statements (since BigQuery requires a `where` clause)
32
36
  - Querying
33
37
  - Transactions (buffered since BigQuery only supports them when you execute the whole transaction at once)
38
+ - Table partitioning
34
39
  - Ruby types:
35
40
  + String
36
41
  + Integer
@@ -39,6 +44,7 @@ Features:
39
44
  + Date
40
45
  + Float
41
46
  + BigDecimal
47
+ - Selecting the BigQuery server location
42
48
 
43
49
  ## Installation
44
50
 
@@ -66,17 +72,21 @@ Connect to BigQuery:
66
72
 
67
73
  ```
68
74
  require 'sequel-bigquery'
75
+ require 'logger'
69
76
 
70
77
  db = Sequel.connect(
71
78
  adapter: :bigquery,
72
79
  project: 'your-gcp-project',
73
80
  database: 'your_bigquery_dataset_name',
81
+ location: 'australia-southeast2',
74
82
  logger: Logger.new(STDOUT),
75
83
  )
76
84
  ```
77
85
 
78
86
  And use Sequel like normal.
79
87
 
88
+ Note that it is important to supply a logger that will at least output warning messages so you know when your queries are being modifed or buffered, which may be unexpected behaviour.
89
+
80
90
  ## Contributing
81
91
 
82
92
  Pull requests welcome! =)
@@ -4,6 +4,7 @@ require 'delegate'
4
4
  require 'time'
5
5
 
6
6
  require 'google/cloud/bigquery'
7
+ require 'amazing_print'
7
8
  require 'paint'
8
9
  require 'sequel'
9
10
 
@@ -16,38 +17,34 @@ module Sequel
16
17
  class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
17
18
  set_adapter_scheme :bigquery
18
19
 
19
- def initialize(*args, **kawrgs)
20
- puts '.new'
21
- @orig_opts = kawrgs.fetch(:orig_opts)
20
+ def initialize(*args, **kwargs)
21
+ @bigquery_config = kwargs.fetch(:orig_opts)
22
22
  @sql_buffer = []
23
23
  @sql_buffering = false
24
24
  super
25
25
  end
26
26
 
27
27
  def connect(*_args)
28
- puts '#connect'
29
- config = @orig_opts.dup
30
- config.delete(:adapter)
31
- config.delete(:logger)
32
- bq_dataset_name = config.delete(:dataset) || config.delete(:database)
33
- @bigquery = Google::Cloud::Bigquery.new(config)
28
+ log_each(:debug, '#connect')
29
+ get_or_create_bigquery_dataset
30
+ .tap { log_each(:debug, '#connect end') }
31
+ end
32
+
33
+ def bigquery
34
34
  # ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
35
- @bigquery.dataset(bq_dataset_name) || begin
36
- @loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
37
- @bigquery.create_dataset(bq_dataset_name)
38
- end
39
- .tap { puts '#connect end' }
35
+ @bigquery ||= Google::Cloud::Bigquery.new(google_cloud_bigquery_gem_config)
40
36
  end
41
37
 
42
38
  def disconnect_connection(_c)
43
- puts '#disconnect_connection'
39
+ log_each(:debug, '#disconnect_connection')
44
40
  # c.disconnect
45
41
  end
46
42
 
47
43
  def drop_datasets(*dataset_names_to_drop)
48
44
  dataset_names_to_drop.each do |dataset_name_to_drop|
49
- puts "Dropping dataset #{dataset_name_to_drop.inspect}"
50
- dataset_to_drop = @bigquery.dataset(dataset_name_to_drop)
45
+ log_each(:debug, "Dropping dataset #{dataset_name_to_drop.inspect}")
46
+ dataset_to_drop = bigquery.dataset(dataset_name_to_drop)
47
+ next unless dataset_to_drop
51
48
  dataset_to_drop.tables.each(&:delete)
52
49
  dataset_to_drop.delete
53
50
  end
@@ -55,7 +52,7 @@ module Sequel
55
52
  alias drop_dataset drop_datasets
56
53
 
57
54
  def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
58
- puts '#execute'
55
+ log_each(:debug, '#execute')
59
56
  log_query(sql)
60
57
 
61
58
  # require 'pry'; binding.pry if sql =~ /CREATE TABLE IF NOT EXISTS/i
@@ -86,15 +83,12 @@ module Sequel
86
83
  sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
87
84
  conn.query(sql_to_execute)
88
85
  end
89
- require 'amazing_print'
90
- ap results
86
+ log_each(:debug, results.awesome_inspect)
91
87
  if block_given?
92
88
  yield results
93
89
  else
94
90
  results
95
91
  end
96
- # TODO
97
- # rescue ::ODBC::Error, ArgumentError => e
98
92
  rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
99
93
  raise_error(e)
100
94
  end # rubocop:disable Style/MultilineBlockChain
@@ -122,6 +116,33 @@ module Sequel
122
116
 
123
117
  private
124
118
 
119
+ attr_reader :bigquery_config
120
+
121
+ def google_cloud_bigquery_gem_config
122
+ bigquery_config.dup.tap do |config|
123
+ %i[
124
+ adapter
125
+ database
126
+ dataset
127
+ location
128
+ logger
129
+ ].each do |option|
130
+ config.delete(option)
131
+ end
132
+ end
133
+ end
134
+
135
+ def get_or_create_bigquery_dataset # rubocop:disable Naming/AccessorMethodName
136
+ bigquery.dataset(bigquery_dataset_name) || begin
137
+ log_each(:debug, 'BigQuery dataset %s does not exist; creating it' % bigquery_dataset_name)
138
+ bigquery.create_dataset(bigquery_dataset_name, location: bigquery_config[:location])
139
+ end
140
+ end
141
+
142
+ def bigquery_dataset_name
143
+ bigquery_config[:dataset] || bigquery_config[:database] || (raise ArgumentError, 'BigQuery dataset must be specified')
144
+ end
145
+
125
146
  def connection_execute_method
126
147
  :query
127
148
  end
@@ -136,9 +157,9 @@ module Sequel
136
157
  end
137
158
 
138
159
  def schema_parse_table(_table_name, _opts)
139
- logger.debug(Paint['schema_parse_table', :red, :bold])
160
+ log_each(:debug, Paint['schema_parse_table', :red, :bold])
140
161
  # require 'pry'; binding.pry
141
- @bigquery.datasets.map do |dataset|
162
+ bigquery.datasets.map do |dataset|
142
163
  [
143
164
  dataset.dataset_id,
144
165
  {},
@@ -153,13 +174,12 @@ module Sequel
153
174
 
154
175
  # Padded to horizontally align with post-execution log message which includes the execution time
155
176
  def log_query(sql)
156
- pad = ' '
157
- puts Paint[pad + sql, :cyan, :bold]
158
- # @loggers[0]&.debug(' ' + sql)
177
+ pad = ' ' * 12
178
+ log_each(:debug, Paint[pad + sql, :cyan, :bold])
159
179
  end
160
180
 
161
181
  def warn(msg)
162
- @loggers[0].warn(Paint[msg, '#FFA500', :bold])
182
+ log_each(:warn, Paint[msg, '#FFA500', :bold])
163
183
  end
164
184
 
165
185
  def warn_default_removal(sql)
@@ -173,11 +193,27 @@ module Sequel
173
193
  'Note that no result data is returned while the transaction is open.',
174
194
  )
175
195
  end
196
+
197
+ # SQL for creating a table with BigQuery specific options
198
+ def create_table_sql(name, generator, options)
199
+ "#{super}#{create_table_suffix_sql(name, options)}"
200
+ end
201
+
202
+ # Handle BigQuery specific table extensions (i.e. partitioning)
203
+ def create_table_suffix_sql(_name, options)
204
+ sql = +''
205
+
206
+ if (partition_by = options[:partition_by])
207
+ sql << " PARTITION BY #{literal(Array(partition_by))}"
208
+ end
209
+
210
+ sql
211
+ end
176
212
  end
177
213
 
178
214
  class Dataset < Sequel::Dataset
179
215
  def fetch_rows(sql, &block)
180
- puts '#fetch_rows'
216
+ db.send(:log_each, :debug, '#fetch_rows')
181
217
 
182
218
  execute(sql) do |bq_result|
183
219
  self.columns = bq_result.fields.map { |field| field.name.to_sym }
@@ -203,7 +239,7 @@ module Sequel
203
239
 
204
240
  # Like MySQL, BigQuery uses the nonstandard ` (backtick) for quoting identifiers.
205
241
  def quoted_identifier_append(sql, c)
206
- sql << '`%s`' % c
242
+ sql << ('`%s`' % c)
207
243
  end
208
244
 
209
245
  def input_identifier(v)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Sequel
4
4
  module Bigquery
5
- VERSION = '0.2.0'
5
+ VERSION = '0.4.2'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequel-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brendan Weibrecht
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-10-26 00:00:00.000000000 Z
11
+ date: 2021-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazing_print