google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,73 +63,132 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @param [String] view Specifies the view that determines which table information is returned.
69
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
70
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
71
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
72
+ # The default value is the `:unspecified` view type.
56
73
  #
57
- def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id,
61
- table.dataset_id,
62
- table.table_id
74
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
75
+ #
76
+ def source view: nil
77
+ if (table = @gapi.configuration.extract.source_table)
78
+ retrieve_table table.project_id, table.dataset_id, table.table_id, metadata_view: view
79
+ elsif (model = @gapi.configuration.extract.source_model)
80
+ retrieve_model model.project_id, model.dataset_id, model.model_id
81
+ end
63
82
  end
64
83
 
65
84
  ##
66
- # Checks if the export operation compresses the data using gzip. The
67
- # default is `false`.
85
+ # Whether the source of the export job is a table. See {#source}.
68
86
  #
69
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
87
+ # @return [Boolean] `true` when the source is a table, `false`
88
+ # otherwise.
70
89
  #
71
- def compression?
72
- val = @gapi.configuration.extract.compression
73
- val == "GZIP"
90
+ def table?
91
+ !@gapi.configuration.extract.source_table.nil?
74
92
  end
75
93
 
76
94
  ##
77
- # Checks if the destination format for the data is [newline-delimited
78
- # JSON](http://jsonlines.org/). The default is `false`.
95
+ # Whether the source of the export job is a model. See {#source}.
79
96
  #
80
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
97
+ # @return [Boolean] `true` when the source is a model, `false`
81
98
  # otherwise.
82
99
  #
100
+ def model?
101
+ !@gapi.configuration.extract.source_model.nil?
102
+ end
103
+
104
+ ##
105
+ # Checks if the export operation compresses the data using gzip. The
106
+ # default is `false`. Not applicable when extracting models.
107
+ #
108
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
109
+ # table extraction.
110
+ def compression?
111
+ return false unless table?
112
+ @gapi.configuration.extract.compression == "GZIP"
113
+ end
114
+
115
+ ##
116
+ # Checks if the destination format for the table data is [newline-delimited
117
+ # JSON](https://jsonlines.org/). The default is `false`. Not applicable when
118
+ # extracting models.
119
+ #
120
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
121
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
122
+ #
83
123
  def json?
84
- val = @gapi.configuration.extract.destination_format
85
- val == "NEWLINE_DELIMITED_JSON"
124
+ return false unless table?
125
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
86
126
  end
87
127
 
88
128
  ##
89
- # Checks if the destination format for the data is CSV. Tables with
129
+ # Checks if the destination format for the table data is CSV. Tables with
90
130
  # nested or repeated fields cannot be exported as CSV. The default is
91
- # `true`.
131
+ # `true` for tables. Not applicable when extracting models.
92
132
  #
93
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
133
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
134
+ # table extraction.
94
135
  #
95
136
  def csv?
137
+ return false unless table?
96
138
  val = @gapi.configuration.extract.destination_format
97
139
  return true if val.nil?
98
140
  val == "CSV"
99
141
  end
100
142
 
101
143
  ##
102
- # Checks if the destination format for the data is
103
- # [Avro](http://avro.apache.org/). The default is `false`.
144
+ # Checks if the destination format for the table data is
145
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
146
+ # when extracting models.
104
147
  #
105
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
148
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
149
+ # table extraction.
106
150
  #
107
151
  def avro?
152
+ return false unless table?
153
+ @gapi.configuration.extract.destination_format == "AVRO"
154
+ end
155
+
156
+ ##
157
+ # Checks if the destination format for the model is TensorFlow SavedModel.
158
+ # The default is `true` for models. Not applicable when extracting tables.
159
+ #
160
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
161
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
162
+ #
163
+ def ml_tf_saved_model?
164
+ return false unless model?
108
165
  val = @gapi.configuration.extract.destination_format
109
- val == "AVRO"
166
+ return true if val.nil?
167
+ val == "ML_TF_SAVED_MODEL"
168
+ end
169
+
170
+ ##
171
+ # Checks if the destination format for the model is XGBoost. The default
172
+ # is `false`. Not applicable when extracting tables.
173
+ #
174
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
175
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
176
+ #
177
+ def ml_xgboost_booster?
178
+ return false unless model?
179
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
110
180
  end
111
181
 
112
182
  ##
113
183
  # The character or symbol the operation uses to delimit fields in the
114
- # exported data. The default is a comma (,).
184
+ # exported data. The default is a comma (,) for tables. Not applicable
185
+ # when extracting models.
115
186
  #
116
- # @return [String] A string containing the character, such as `","`.
187
+ # @return [String, nil] A string containing the character, such as `","`,
188
+ # `nil` if not a table extraction.
117
189
  #
118
190
  def delimiter
191
+ return unless table?
119
192
  val = @gapi.configuration.extract.field_delimiter
120
193
  val = "," if val.nil?
121
194
  val
@@ -123,12 +196,13 @@ module Google
123
196
 
124
197
  ##
125
198
  # Checks if the exported data contains a header row. The default is
126
- # `true`.
199
+ # `true` for tables. Not applicable when extracting models.
127
200
  #
128
201
  # @return [Boolean] `true` when the print header configuration is
129
- # present or `nil`, `false` otherwise.
202
+ # present or `nil`, `false` if disabled or not a table extraction.
130
203
  #
131
204
  def print_header?
205
+ return false unless table?
132
206
  val = @gapi.configuration.extract.print_header
133
207
  val = true if val.nil?
134
208
  val
@@ -153,7 +227,23 @@ module Google
153
227
  # and the counts as values.
154
228
  #
155
229
  def destinations_counts
156
- Hash[destinations.zip destinations_file_counts]
230
+ destinations.zip(destinations_file_counts).to_h
231
+ end
232
+
233
+ ##
234
+ # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
235
+ # whether to enable extracting applicable column types (such as
236
+ # `TIMESTAMP`) to their corresponding AVRO logical types
237
+ # (`timestamp-micros`), instead of only using their raw types
238
+ # (`avro-long`). Not applicable when extracting models.
239
+ #
240
+ # @return [Boolean] `true` when applicable column types will use their
241
+ # corresponding AVRO logical types, `false` if not enabled or not a
242
+ # table extraction.
243
+ #
244
+ def use_avro_logical_types?
245
+ return false unless table?
246
+ @gapi.configuration.extract.use_avro_logical_types
157
247
  end
158
248
 
159
249
  ##
@@ -162,6 +252,7 @@ module Google
162
252
  ##
163
253
  # @private Create an Updater object.
164
254
  def initialize gapi
255
+ super()
165
256
  @gapi = gapi
166
257
  end
167
258
 
@@ -170,32 +261,47 @@ module Google
170
261
  #
171
262
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
172
263
  # configuration object for setting query options.
173
- def self.from_options service, table, storage_files, options = {}
264
+ def self.from_options service, source, storage_files, options
174
265
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
175
266
  storage_urls = Array(storage_files).map do |url|
176
267
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
177
268
  end
178
- dest_format = options[:format]
179
- if dest_format.nil?
180
- dest_format = Convert.derive_source_format storage_urls.first
269
+ options[:format] ||= Convert.derive_source_format storage_urls.first
270
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
271
+ destination_uris: Array(storage_urls)
272
+ )
273
+ case source
274
+ when Google::Apis::BigqueryV2::TableReference
275
+ extract_config.source_table = source
276
+ when Google::Apis::BigqueryV2::ModelReference
277
+ extract_config.source_model = source
181
278
  end
182
- req = Google::Apis::BigqueryV2::Job.new(
279
+ job = Google::Apis::BigqueryV2::Job.new(
183
280
  job_reference: job_ref,
184
281
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
185
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
186
- destination_uris: Array(storage_urls),
187
- source_table: table
188
- ),
282
+ extract: extract_config,
189
283
  dry_run: options[:dryrun]
190
284
  )
191
285
  )
192
286
 
193
- updater = ExtractJob::Updater.new req
287
+ from_job_and_options job, options
288
+ end
289
+
290
+ ##
291
+ # @private Create an Updater from a Job and options hash.
292
+ #
293
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
294
+ # configuration object for setting query options.
295
+ def self.from_job_and_options request, options
296
+ updater = ExtractJob::Updater.new request
194
297
  updater.compression = options[:compression]
195
298
  updater.delimiter = options[:delimiter]
196
- updater.format = dest_format
299
+ updater.format = options[:format]
197
300
  updater.header = options[:header]
198
301
  updater.labels = options[:labels] if options[:labels]
302
+ unless options[:use_avro_logical_types].nil?
303
+ updater.use_avro_logical_types = options[:use_avro_logical_types]
304
+ end
199
305
  updater
200
306
  end
201
307
 
@@ -232,7 +338,7 @@ module Google
232
338
  end
233
339
 
234
340
  ##
235
- # Sets the compression type.
341
+ # Sets the compression type. Not applicable when extracting models.
236
342
  #
237
343
  # @param [String] value The compression type to use for exported
238
344
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -244,7 +350,7 @@ module Google
244
350
  end
245
351
 
246
352
  ##
247
- # Sets the field delimiter.
353
+ # Sets the field delimiter. Not applicable when extracting models.
248
354
  #
249
355
  # @param [String] value Delimiter to use between fields in the
250
356
  # exported data. Default is <code>,</code>.
@@ -255,25 +361,32 @@ module Google
255
361
  end
256
362
 
257
363
  ##
258
- # Sets the destination file format. The default value is `csv`.
364
+ # Sets the destination file format. The default value for
365
+ # tables is `csv`. Tables with nested or repeated fields cannot be
366
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
259
367
  #
260
- # The following values are supported:
368
+ # Supported values for tables:
261
369
  #
262
370
  # * `csv` - CSV
263
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
371
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
264
372
  # * `avro` - [Avro](http://avro.apache.org/)
265
373
  #
374
+ # Supported values for models:
375
+ #
376
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
377
+ # * `ml_xgboost_booster` - XGBoost Booster
378
+ #
266
379
  # @param [String] new_format The new source format.
267
380
  #
268
381
  # @!group Attributes
269
382
  #
270
383
  def format= new_format
271
- @gapi.configuration.extract.update! \
272
- destination_format: Convert.source_format(new_format)
384
+ @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
273
385
  end
274
386
 
275
387
  ##
276
- # Print a header row in the exported file.
388
+ # Print a header row in the exported file. Not applicable when
389
+ # extracting models.
277
390
  #
278
391
  # @param [Boolean] value Whether to print out a header row in the
279
392
  # results. Default is `true`.
@@ -287,12 +400,21 @@ module Google
287
400
  # Sets the labels to use for the job.
288
401
  #
289
402
  # @param [Hash] value A hash of user-provided labels associated with
290
- # the job. You can use these to organize and group your jobs. Label
291
- # keys and values can be no longer than 63 characters, can only
292
- # contain lowercase letters, numeric characters, underscores and
293
- # dashes. International characters are allowed. Label values are
294
- # optional. Label keys must start with a letter and each label in
295
- # the list must have a different key.
403
+ # the job. You can use these to organize and group your jobs.
404
+ #
405
+ # The labels applied to a resource must meet the following requirements:
406
+ #
407
+ # * Each resource can have multiple labels, up to a maximum of 64.
408
+ # * Each label must be a key-value pair.
409
+ # * Keys have a minimum length of 1 character and a maximum length of
410
+ # 63 characters, and cannot be empty. Values can be empty, and have
411
+ # a maximum length of 63 characters.
412
+ # * Keys and values can contain only lowercase letters, numeric characters,
413
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
414
+ # international characters are allowed.
415
+ # * The key portion of a label must be unique. However, you can use the
416
+ # same key with multiple resources.
417
+ # * Keys must start with a lowercase letter or international character.
296
418
  #
297
419
  # @!group Attributes
298
420
  #
@@ -300,6 +422,39 @@ module Google
300
422
  @gapi.configuration.update! labels: value
301
423
  end
302
424
 
425
+ ##
426
+ # Indicate whether to enable extracting applicable column types (such
427
+ # as `TIMESTAMP`) to their corresponding AVRO logical types
428
+ # (`timestamp-micros`), instead of only using their raw types
429
+ # (`avro-long`).
430
+ #
431
+ # Only used when `#format` is set to `"AVRO"` (`#avro?`).
432
+ #
433
+ # @param [Boolean] value Whether applicable column types will use
434
+ # their corresponding AVRO logical types.
435
+ #
436
+ # @!group Attributes
437
+ def use_avro_logical_types= value
438
+ @gapi.configuration.extract.use_avro_logical_types = value
439
+ end
440
+
441
+ def cancel
442
+ raise "not implemented in #{self.class}"
443
+ end
444
+
445
+ def rerun!
446
+ raise "not implemented in #{self.class}"
447
+ end
448
+
449
+ def reload!
450
+ raise "not implemented in #{self.class}"
451
+ end
452
+ alias refresh! reload!
453
+
454
+ def wait_until_done!
455
+ raise "not implemented in #{self.class}"
456
+ end
457
+
303
458
  ##
304
459
  # @private Returns the Google API client library version of this job.
305
460
  #
@@ -309,6 +464,16 @@ module Google
309
464
  @gapi
310
465
  end
311
466
  end
467
+
468
+ protected
469
+
470
+ def retrieve_model project_id, dataset_id, model_id
471
+ ensure_service!
472
+ gapi = service.get_project_model project_id, dataset_id, model_id
473
+ Model.from_gapi_json gapi, service
474
+ rescue Google::Cloud::NotFoundError
475
+ nil
476
+ end
312
477
  end
313
478
  end
314
479
  end
@@ -99,9 +99,7 @@ module Google
99
99
  # data.
100
100
  #
101
101
  def error_rows
102
- Array(@gapi.insert_errors).map do |ie|
103
- @rows[ie.index]
104
- end
102
+ Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
105
103
  end
106
104
 
107
105
  ##
@@ -71,9 +71,9 @@ module Google
71
71
  def next
72
72
  return nil unless next?
73
73
  ensure_service!
74
- next_options = @options.merge token: token
75
- next_gapi = @service.list_jobs next_options
76
- self.class.from_gapi next_gapi, @service, next_options
74
+ next_kwargs = @kwargs.merge token: token
75
+ next_gapi = @service.list_jobs(**next_kwargs)
76
+ self.class.from_gapi next_gapi, @service, **next_kwargs
77
77
  end
78
78
 
79
79
  ##
@@ -121,17 +121,15 @@ module Google
121
121
  # puts job.state
122
122
  # end
123
123
  #
124
- def all request_limit: nil
124
+ def all request_limit: nil, &block
125
125
  request_limit = request_limit.to_i if request_limit
126
- unless block_given?
127
- return enum_for :all, request_limit: request_limit
128
- end
126
+ return enum_for :all, request_limit: request_limit unless block_given?
129
127
  results = self
130
128
  loop do
131
- results.each { |r| yield r }
129
+ results.each(&block)
132
130
  if request_limit
133
131
  request_limit -= 1
134
- break if request_limit < 0
132
+ break if request_limit.negative?
135
133
  end
136
134
  break unless results.next?
137
135
  results = results.next
@@ -141,14 +139,12 @@ module Google
141
139
  ##
142
140
  # @private New Job::List from a Google API Client
143
141
  # Google::Apis::BigqueryV2::JobList object.
144
- def self.from_gapi gapi_list, service, options = {}
145
- jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
146
- Job.from_gapi gapi_object, service
147
- end)
142
+ def self.from_gapi gapi_list, service, **kwargs
143
+ jobs = List.new(Array(gapi_list.jobs).map { |gapi_object| Job.from_gapi gapi_object, service })
148
144
  jobs.instance_variable_set :@token, gapi_list.next_page_token
149
145
  jobs.instance_variable_set :@etag, gapi_list.etag
150
146
  jobs.instance_variable_set :@service, service
151
- jobs.instance_variable_set :@options, options
147
+ jobs.instance_variable_set :@kwargs, kwargs
152
148
  jobs
153
149
  end
154
150