sequel-bigquery 0.2.0 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 13d55b1e469dca990e67e526e1f9774c0e86ce9ddee8619531c6f5ad3c671f7b
4
- data.tar.gz: 10b66d4878ca799922ae38e2c847b5d4a29ade0d1b65be098744210bc9c12d51
3
+ metadata.gz: e444c2d4a5c6b54b5a1bb0efde17e1dd83761ec53f1b796f5e959646bd37f315
4
+ data.tar.gz: 41875a2325c00eb26786fbd3ab8430e9880b93d795a18b000bff3dd75ed0ea47
5
5
  SHA512:
6
- metadata.gz: 2004fdac1976b084a361524147411451f514810bf1d8c0ff50dc8f2161bb63d620a79357a70a5b761cffd8995f9b144023f64cefb40330b36a4961844b6aefbd
7
- data.tar.gz: b131f9e89bea9e6839c65220961e9d7b157b5baf6cb7761f26872d7c35750a9995d115e99454830cd90b2fe8df705b0ee90e05c8de0ce0d115ef1ff92ad8340f
6
+ metadata.gz: 725fce3447e58e75c5cf817d736322c567661752d1b2d2f4f45aa0ce712825e8792214d1f56313272d8a84facdadff51f63ee4a6b3bda1b8c55d92a80a826798
7
+ data.tar.gz: 3aa8900e8fba7400049e8f83879998b246296e1705d01b25f2a1cf20855cf8faeea09b9d7e4cb7280b2783296d0a27f61ed295275089d6de311c256c5f7ea763
data/README.md CHANGED
@@ -4,6 +4,10 @@
4
4
 
5
5
  A Sequel adapter for [Google's BigQuery](https://cloud.google.com/bigquery).
6
6
 
7
+ This gem was created in order to manage schema migrations of a BigQuery dataset at GreenSync. At the time of writing, we couldn't find any good tools in any language to manage changes to the schema as a set of migrations.
8
+
9
+ Beyond migrations, I'm unsure how useful this gem is. I haven't yet tested what the performance would be for data interactions vs. directly using the `google-cloud-bigquery` gem's native facilities. If you're inserting a bunch of data, it's probably a better idea to use an [inserter from that gem](https://googleapis.dev/ruby/google-cloud-bigquery/latest/Google/Cloud/Bigquery/Dataset.html#insert_async-instance_method) rather than going through SQL.
10
+
7
11
  ## Contents
8
12
 
9
13
  <!-- MarkdownTOC autolink=true -->
@@ -31,6 +35,7 @@ Features:
31
35
  - Updating rows, with automatic addition of `where 1 = 1` to statements (since BigQuery requires a `where` clause)
32
36
  - Querying
33
37
  - Transactions (buffered since BigQuery only supports them when you execute the whole transaction at once)
38
+ - Table partitioning
34
39
  - Ruby types:
35
40
  + String
36
41
  + Integer
@@ -39,6 +44,7 @@ Features:
39
44
  + Date
40
45
  + Float
41
46
  + BigDecimal
47
+ - Selecting the BigQuery server location
42
48
 
43
49
  ## Installation
44
50
 
@@ -66,17 +72,21 @@ Connect to BigQuery:
66
72
 
67
73
  ```
68
74
  require 'sequel-bigquery'
75
+ require 'logger'
69
76
 
70
77
  db = Sequel.connect(
71
78
  adapter: :bigquery,
72
79
  project: 'your-gcp-project',
73
80
  database: 'your_bigquery_dataset_name',
81
+ location: 'australia-southeast2',
74
82
  logger: Logger.new(STDOUT),
75
83
  )
76
84
  ```
77
85
 
78
86
  And use Sequel like normal.
79
87
 
88
+ Note that it is important to supply a logger that will at least output warning messages so you know when your queries are being modifed or buffered, which may be unexpected behaviour.
89
+
80
90
  ## Contributing
81
91
 
82
92
  Pull requests welcome! =)
@@ -4,6 +4,7 @@ require 'delegate'
4
4
  require 'time'
5
5
 
6
6
  require 'google/cloud/bigquery'
7
+ require 'amazing_print'
7
8
  require 'paint'
8
9
  require 'sequel'
9
10
 
@@ -16,38 +17,34 @@ module Sequel
16
17
  class Database < Sequel::Database # rubocop:disable Metrics/ClassLength
17
18
  set_adapter_scheme :bigquery
18
19
 
19
- def initialize(*args, **kawrgs)
20
- puts '.new'
21
- @orig_opts = kawrgs.fetch(:orig_opts)
20
+ def initialize(*args, **kwargs)
21
+ @bigquery_config = kwargs.fetch(:orig_opts)
22
22
  @sql_buffer = []
23
23
  @sql_buffering = false
24
24
  super
25
25
  end
26
26
 
27
27
  def connect(*_args)
28
- puts '#connect'
29
- config = @orig_opts.dup
30
- config.delete(:adapter)
31
- config.delete(:logger)
32
- bq_dataset_name = config.delete(:dataset) || config.delete(:database)
33
- @bigquery = Google::Cloud::Bigquery.new(config)
28
+ log_each(:debug, '#connect')
29
+ get_or_create_bigquery_dataset
30
+ .tap { log_each(:debug, '#connect end') }
31
+ end
32
+
33
+ def bigquery
34
34
  # ObjectSpace.each_object(HTTPClient).each { |c| c.debug_dev = STDOUT }
35
- @bigquery.dataset(bq_dataset_name) || begin
36
- @loggers[0].debug('BigQuery dataset %s does not exist; creating it' % bq_dataset_name)
37
- @bigquery.create_dataset(bq_dataset_name)
38
- end
39
- .tap { puts '#connect end' }
35
+ @bigquery ||= Google::Cloud::Bigquery.new(google_cloud_bigquery_gem_config)
40
36
  end
41
37
 
42
38
  def disconnect_connection(_c)
43
- puts '#disconnect_connection'
39
+ log_each(:debug, '#disconnect_connection')
44
40
  # c.disconnect
45
41
  end
46
42
 
47
43
  def drop_datasets(*dataset_names_to_drop)
48
44
  dataset_names_to_drop.each do |dataset_name_to_drop|
49
- puts "Dropping dataset #{dataset_name_to_drop.inspect}"
50
- dataset_to_drop = @bigquery.dataset(dataset_name_to_drop)
45
+ log_each(:debug, "Dropping dataset #{dataset_name_to_drop.inspect}")
46
+ dataset_to_drop = bigquery.dataset(dataset_name_to_drop)
47
+ next unless dataset_to_drop
51
48
  dataset_to_drop.tables.each(&:delete)
52
49
  dataset_to_drop.delete
53
50
  end
@@ -55,7 +52,7 @@ module Sequel
55
52
  alias drop_dataset drop_datasets
56
53
 
57
54
  def execute(sql, opts = OPTS) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
58
- puts '#execute'
55
+ log_each(:debug, '#execute')
59
56
  log_query(sql)
60
57
 
61
58
  # require 'pry'; binding.pry if sql =~ /CREATE TABLE IF NOT EXISTS/i
@@ -86,15 +83,12 @@ module Sequel
86
83
  sql_to_execute = @sql_buffer.any? ? @sql_buffer.join("\n") : sql
87
84
  conn.query(sql_to_execute)
88
85
  end
89
- require 'amazing_print'
90
- ap results
86
+ log_each(:debug, results.awesome_inspect)
91
87
  if block_given?
92
88
  yield results
93
89
  else
94
90
  results
95
91
  end
96
- # TODO
97
- # rescue ::ODBC::Error, ArgumentError => e
98
92
  rescue Google::Cloud::InvalidArgumentError, ArgumentError => e
99
93
  raise_error(e)
100
94
  end # rubocop:disable Style/MultilineBlockChain
@@ -122,6 +116,33 @@ module Sequel
122
116
 
123
117
  private
124
118
 
119
+ attr_reader :bigquery_config
120
+
121
+ def google_cloud_bigquery_gem_config
122
+ bigquery_config.dup.tap do |config|
123
+ %i[
124
+ adapter
125
+ database
126
+ dataset
127
+ location
128
+ logger
129
+ ].each do |option|
130
+ config.delete(option)
131
+ end
132
+ end
133
+ end
134
+
135
+ def get_or_create_bigquery_dataset # rubocop:disable Naming/AccessorMethodName
136
+ bigquery.dataset(bigquery_dataset_name) || begin
137
+ log_each(:debug, 'BigQuery dataset %s does not exist; creating it' % bigquery_dataset_name)
138
+ bigquery.create_dataset(bigquery_dataset_name, location: bigquery_config[:location])
139
+ end
140
+ end
141
+
142
+ def bigquery_dataset_name
143
+ bigquery_config[:dataset] || bigquery_config[:database] || (raise ArgumentError, 'BigQuery dataset must be specified')
144
+ end
145
+
125
146
  def connection_execute_method
126
147
  :query
127
148
  end
@@ -136,9 +157,9 @@ module Sequel
136
157
  end
137
158
 
138
159
  def schema_parse_table(_table_name, _opts)
139
- logger.debug(Paint['schema_parse_table', :red, :bold])
160
+ log_each(:debug, Paint['schema_parse_table', :red, :bold])
140
161
  # require 'pry'; binding.pry
141
- @bigquery.datasets.map do |dataset|
162
+ bigquery.datasets.map do |dataset|
142
163
  [
143
164
  dataset.dataset_id,
144
165
  {},
@@ -153,13 +174,12 @@ module Sequel
153
174
 
154
175
  # Padded to horizontally align with post-execution log message which includes the execution time
155
176
  def log_query(sql)
156
- pad = ' '
157
- puts Paint[pad + sql, :cyan, :bold]
158
- # @loggers[0]&.debug(' ' + sql)
177
+ pad = ' ' * 12
178
+ log_each(:debug, Paint[pad + sql, :cyan, :bold])
159
179
  end
160
180
 
161
181
  def warn(msg)
162
- @loggers[0].warn(Paint[msg, '#FFA500', :bold])
182
+ log_each(:warn, Paint[msg, '#FFA500', :bold])
163
183
  end
164
184
 
165
185
  def warn_default_removal(sql)
@@ -173,11 +193,27 @@ module Sequel
173
193
  'Note that no result data is returned while the transaction is open.',
174
194
  )
175
195
  end
196
+
197
+ # SQL for creating a table with BigQuery specific options
198
+ def create_table_sql(name, generator, options)
199
+ "#{super}#{create_table_suffix_sql(name, options)}"
200
+ end
201
+
202
+ # Handle BigQuery specific table extensions (i.e. partitioning)
203
+ def create_table_suffix_sql(_name, options)
204
+ sql = +''
205
+
206
+ if (partition_by = options[:partition_by])
207
+ sql << " PARTITION BY #{literal(Array(partition_by))}"
208
+ end
209
+
210
+ sql
211
+ end
176
212
  end
177
213
 
178
214
  class Dataset < Sequel::Dataset
179
215
  def fetch_rows(sql, &block)
180
- puts '#fetch_rows'
216
+ db.send(:log_each, :debug, '#fetch_rows')
181
217
 
182
218
  execute(sql) do |bq_result|
183
219
  self.columns = bq_result.fields.map { |field| field.name.to_sym }
@@ -203,7 +239,7 @@ module Sequel
203
239
 
204
240
  # Like MySQL, BigQuery uses the nonstandard ` (backtick) for quoting identifiers.
205
241
  def quoted_identifier_append(sql, c)
206
- sql << '`%s`' % c
242
+ sql << ('`%s`' % c)
207
243
  end
208
244
 
209
245
  def input_identifier(v)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Sequel
4
4
  module Bigquery
5
- VERSION = '0.2.0'
5
+ VERSION = '0.4.2'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequel-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brendan Weibrecht
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-10-26 00:00:00.000000000 Z
11
+ date: 2021-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: amazing_print