google-cloud-bigquery 1.14.0 → 1.42.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,73 +63,132 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @param [String] view Specifies the view that determines which table information is returned.
69
+ # By default, basic table information and storage statistics (STORAGE_STATS) are returned.
70
+ # Accepted values include `:unspecified`, `:basic`, `:storage`, and
71
+ # `:full`. For more information, see [BigQuery Classes](@todo: Update the link).
72
+ # The default value is the `:unspecified` view type.
56
73
  #
57
- def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id,
61
- table.dataset_id,
62
- table.table_id
74
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
75
+ #
76
+ def source view: nil
77
+ if (table = @gapi.configuration.extract.source_table)
78
+ retrieve_table table.project_id, table.dataset_id, table.table_id, metadata_view: view
79
+ elsif (model = @gapi.configuration.extract.source_model)
80
+ retrieve_model model.project_id, model.dataset_id, model.model_id
81
+ end
63
82
  end
64
83
 
65
84
  ##
66
- # Checks if the export operation compresses the data using gzip. The
67
- # default is `false`.
85
+ # Whether the source of the export job is a table. See {#source}.
68
86
  #
69
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
87
+ # @return [Boolean] `true` when the source is a table, `false`
88
+ # otherwise.
70
89
  #
71
- def compression?
72
- val = @gapi.configuration.extract.compression
73
- val == "GZIP"
90
+ def table?
91
+ !@gapi.configuration.extract.source_table.nil?
74
92
  end
75
93
 
76
94
  ##
77
- # Checks if the destination format for the data is [newline-delimited
78
- # JSON](http://jsonlines.org/). The default is `false`.
95
+ # Whether the source of the export job is a model. See {#source}.
79
96
  #
80
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
97
+ # @return [Boolean] `true` when the source is a model, `false`
81
98
  # otherwise.
82
99
  #
100
+ def model?
101
+ !@gapi.configuration.extract.source_model.nil?
102
+ end
103
+
104
+ ##
105
+ # Checks if the export operation compresses the data using gzip. The
106
+ # default is `false`. Not applicable when extracting models.
107
+ #
108
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
109
+ # table extraction.
110
+ def compression?
111
+ return false unless table?
112
+ @gapi.configuration.extract.compression == "GZIP"
113
+ end
114
+
115
+ ##
116
+ # Checks if the destination format for the table data is [newline-delimited
117
+ # JSON](https://jsonlines.org/). The default is `false`. Not applicable when
118
+ # extracting models.
119
+ #
120
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
121
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
122
+ #
83
123
  def json?
84
- val = @gapi.configuration.extract.destination_format
85
- val == "NEWLINE_DELIMITED_JSON"
124
+ return false unless table?
125
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
86
126
  end
87
127
 
88
128
  ##
89
- # Checks if the destination format for the data is CSV. Tables with
129
+ # Checks if the destination format for the table data is CSV. Tables with
90
130
  # nested or repeated fields cannot be exported as CSV. The default is
91
- # `true`.
131
+ # `true` for tables. Not applicable when extracting models.
92
132
  #
93
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
133
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
134
+ # table extraction.
94
135
  #
95
136
  def csv?
137
+ return false unless table?
96
138
  val = @gapi.configuration.extract.destination_format
97
139
  return true if val.nil?
98
140
  val == "CSV"
99
141
  end
100
142
 
101
143
  ##
102
- # Checks if the destination format for the data is
103
- # [Avro](http://avro.apache.org/). The default is `false`.
144
+ # Checks if the destination format for the table data is
145
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
146
+ # when extracting models.
104
147
  #
105
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
148
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
149
+ # table extraction.
106
150
  #
107
151
  def avro?
152
+ return false unless table?
153
+ @gapi.configuration.extract.destination_format == "AVRO"
154
+ end
155
+
156
+ ##
157
+ # Checks if the destination format for the model is TensorFlow SavedModel.
158
+ # The default is `true` for models. Not applicable when extracting tables.
159
+ #
160
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
161
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
162
+ #
163
+ def ml_tf_saved_model?
164
+ return false unless model?
108
165
  val = @gapi.configuration.extract.destination_format
109
- val == "AVRO"
166
+ return true if val.nil?
167
+ val == "ML_TF_SAVED_MODEL"
168
+ end
169
+
170
+ ##
171
+ # Checks if the destination format for the model is XGBoost. The default
172
+ # is `false`. Not applicable when extracting tables.
173
+ #
174
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
175
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
176
+ #
177
+ def ml_xgboost_booster?
178
+ return false unless model?
179
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
110
180
  end
111
181
 
112
182
  ##
113
183
  # The character or symbol the operation uses to delimit fields in the
114
- # exported data. The default is a comma (,).
184
+ # exported data. The default is a comma (,) for tables. Not applicable
185
+ # when extracting models.
115
186
  #
116
- # @return [String] A string containing the character, such as `","`.
187
+ # @return [String, nil] A string containing the character, such as `","`,
188
+ # `nil` if not a table extraction.
117
189
  #
118
190
  def delimiter
191
+ return unless table?
119
192
  val = @gapi.configuration.extract.field_delimiter
120
193
  val = "," if val.nil?
121
194
  val
@@ -123,12 +196,13 @@ module Google
123
196
 
124
197
  ##
125
198
  # Checks if the exported data contains a header row. The default is
126
- # `true`.
199
+ # `true` for tables. Not applicable when extracting models.
127
200
  #
128
201
  # @return [Boolean] `true` when the print header configuration is
129
- # present or `nil`, `false` otherwise.
202
+ # present or `nil`, `false` if disabled or not a table extraction.
130
203
  #
131
204
  def print_header?
205
+ return false unless table?
132
206
  val = @gapi.configuration.extract.print_header
133
207
  val = true if val.nil?
134
208
  val
@@ -153,7 +227,23 @@ module Google
153
227
  # and the counts as values.
154
228
  #
155
229
  def destinations_counts
156
- Hash[destinations.zip destinations_file_counts]
230
+ destinations.zip(destinations_file_counts).to_h
231
+ end
232
+
233
+ ##
234
+ # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
235
+ # whether to enable extracting applicable column types (such as
236
+ # `TIMESTAMP`) to their corresponding AVRO logical types
237
+ # (`timestamp-micros`), instead of only using their raw types
238
+ # (`avro-long`). Not applicable when extracting models.
239
+ #
240
+ # @return [Boolean] `true` when applicable column types will use their
241
+ # corresponding AVRO logical types, `false` if not enabled or not a
242
+ # table extraction.
243
+ #
244
+ def use_avro_logical_types?
245
+ return false unless table?
246
+ @gapi.configuration.extract.use_avro_logical_types
157
247
  end
158
248
 
159
249
  ##
@@ -162,6 +252,7 @@ module Google
162
252
  ##
163
253
  # @private Create an Updater object.
164
254
  def initialize gapi
255
+ super()
165
256
  @gapi = gapi
166
257
  end
167
258
 
@@ -170,32 +261,47 @@ module Google
170
261
  #
171
262
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
172
263
  # configuration object for setting query options.
173
- def self.from_options service, table, storage_files, options = {}
264
+ def self.from_options service, source, storage_files, options
174
265
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
175
266
  storage_urls = Array(storage_files).map do |url|
176
267
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
177
268
  end
178
- dest_format = options[:format]
179
- if dest_format.nil?
180
- dest_format = Convert.derive_source_format storage_urls.first
269
+ options[:format] ||= Convert.derive_source_format storage_urls.first
270
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
271
+ destination_uris: Array(storage_urls)
272
+ )
273
+ case source
274
+ when Google::Apis::BigqueryV2::TableReference
275
+ extract_config.source_table = source
276
+ when Google::Apis::BigqueryV2::ModelReference
277
+ extract_config.source_model = source
181
278
  end
182
- req = Google::Apis::BigqueryV2::Job.new(
279
+ job = Google::Apis::BigqueryV2::Job.new(
183
280
  job_reference: job_ref,
184
281
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
185
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
186
- destination_uris: Array(storage_urls),
187
- source_table: table
188
- ),
282
+ extract: extract_config,
189
283
  dry_run: options[:dryrun]
190
284
  )
191
285
  )
192
286
 
193
- updater = ExtractJob::Updater.new req
287
+ from_job_and_options job, options
288
+ end
289
+
290
+ ##
291
+ # @private Create an Updater from a Job and options hash.
292
+ #
293
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
294
+ # configuration object for setting query options.
295
+ def self.from_job_and_options request, options
296
+ updater = ExtractJob::Updater.new request
194
297
  updater.compression = options[:compression]
195
298
  updater.delimiter = options[:delimiter]
196
- updater.format = dest_format
299
+ updater.format = options[:format]
197
300
  updater.header = options[:header]
198
301
  updater.labels = options[:labels] if options[:labels]
302
+ unless options[:use_avro_logical_types].nil?
303
+ updater.use_avro_logical_types = options[:use_avro_logical_types]
304
+ end
199
305
  updater
200
306
  end
201
307
 
@@ -232,7 +338,7 @@ module Google
232
338
  end
233
339
 
234
340
  ##
235
- # Sets the compression type.
341
+ # Sets the compression type. Not applicable when extracting models.
236
342
  #
237
343
  # @param [String] value The compression type to use for exported
238
344
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -244,7 +350,7 @@ module Google
244
350
  end
245
351
 
246
352
  ##
247
- # Sets the field delimiter.
353
+ # Sets the field delimiter. Not applicable when extracting models.
248
354
  #
249
355
  # @param [String] value Delimiter to use between fields in the
250
356
  # exported data. Default is <code>,</code>.
@@ -255,25 +361,32 @@ module Google
255
361
  end
256
362
 
257
363
  ##
258
- # Sets the destination file format. The default value is `csv`.
364
+ # Sets the destination file format. The default value for
365
+ # tables is `csv`. Tables with nested or repeated fields cannot be
366
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
259
367
  #
260
- # The following values are supported:
368
+ # Supported values for tables:
261
369
  #
262
370
  # * `csv` - CSV
263
- # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
371
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
264
372
  # * `avro` - [Avro](http://avro.apache.org/)
265
373
  #
374
+ # Supported values for models:
375
+ #
376
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
377
+ # * `ml_xgboost_booster` - XGBoost Booster
378
+ #
266
379
  # @param [String] new_format The new source format.
267
380
  #
268
381
  # @!group Attributes
269
382
  #
270
383
  def format= new_format
271
- @gapi.configuration.extract.update! \
272
- destination_format: Convert.source_format(new_format)
384
+ @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
273
385
  end
274
386
 
275
387
  ##
276
- # Print a header row in the exported file.
388
+ # Print a header row in the exported file. Not applicable when
389
+ # extracting models.
277
390
  #
278
391
  # @param [Boolean] value Whether to print out a header row in the
279
392
  # results. Default is `true`.
@@ -287,12 +400,21 @@ module Google
287
400
  # Sets the labels to use for the job.
288
401
  #
289
402
  # @param [Hash] value A hash of user-provided labels associated with
290
- # the job. You can use these to organize and group your jobs. Label
291
- # keys and values can be no longer than 63 characters, can only
292
- # contain lowercase letters, numeric characters, underscores and
293
- # dashes. International characters are allowed. Label values are
294
- # optional. Label keys must start with a letter and each label in
295
- # the list must have a different key.
403
+ # the job. You can use these to organize and group your jobs.
404
+ #
405
+ # The labels applied to a resource must meet the following requirements:
406
+ #
407
+ # * Each resource can have multiple labels, up to a maximum of 64.
408
+ # * Each label must be a key-value pair.
409
+ # * Keys have a minimum length of 1 character and a maximum length of
410
+ # 63 characters, and cannot be empty. Values can be empty, and have
411
+ # a maximum length of 63 characters.
412
+ # * Keys and values can contain only lowercase letters, numeric characters,
413
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
414
+ # international characters are allowed.
415
+ # * The key portion of a label must be unique. However, you can use the
416
+ # same key with multiple resources.
417
+ # * Keys must start with a lowercase letter or international character.
296
418
  #
297
419
  # @!group Attributes
298
420
  #
@@ -300,6 +422,39 @@ module Google
300
422
  @gapi.configuration.update! labels: value
301
423
  end
302
424
 
425
+ ##
426
+ # Indicate whether to enable extracting applicable column types (such
427
+ # as `TIMESTAMP`) to their corresponding AVRO logical types
428
+ # (`timestamp-micros`), instead of only using their raw types
429
+ # (`avro-long`).
430
+ #
431
+ # Only used when `#format` is set to `"AVRO"` (`#avro?`).
432
+ #
433
+ # @param [Boolean] value Whether applicable column types will use
434
+ # their corresponding AVRO logical types.
435
+ #
436
+ # @!group Attributes
437
+ def use_avro_logical_types= value
438
+ @gapi.configuration.extract.use_avro_logical_types = value
439
+ end
440
+
441
+ def cancel
442
+ raise "not implemented in #{self.class}"
443
+ end
444
+
445
+ def rerun!
446
+ raise "not implemented in #{self.class}"
447
+ end
448
+
449
+ def reload!
450
+ raise "not implemented in #{self.class}"
451
+ end
452
+ alias refresh! reload!
453
+
454
+ def wait_until_done!
455
+ raise "not implemented in #{self.class}"
456
+ end
457
+
303
458
  ##
304
459
  # @private Returns the Google API client library version of this job.
305
460
  #
@@ -309,6 +464,16 @@ module Google
309
464
  @gapi
310
465
  end
311
466
  end
467
+
468
+ protected
469
+
470
+ def retrieve_model project_id, dataset_id, model_id
471
+ ensure_service!
472
+ gapi = service.get_project_model project_id, dataset_id, model_id
473
+ Model.from_gapi_json gapi, service
474
+ rescue Google::Cloud::NotFoundError
475
+ nil
476
+ end
312
477
  end
313
478
  end
314
479
  end
@@ -99,9 +99,7 @@ module Google
99
99
  # data.
100
100
  #
101
101
  def error_rows
102
- Array(@gapi.insert_errors).map do |ie|
103
- @rows[ie.index]
104
- end
102
+ Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
105
103
  end
106
104
 
107
105
  ##
@@ -71,9 +71,9 @@ module Google
71
71
  def next
72
72
  return nil unless next?
73
73
  ensure_service!
74
- next_options = @options.merge token: token
75
- next_gapi = @service.list_jobs next_options
76
- self.class.from_gapi next_gapi, @service, next_options
74
+ next_kwargs = @kwargs.merge token: token
75
+ next_gapi = @service.list_jobs(**next_kwargs)
76
+ self.class.from_gapi next_gapi, @service, **next_kwargs
77
77
  end
78
78
 
79
79
  ##
@@ -121,17 +121,15 @@ module Google
121
121
  # puts job.state
122
122
  # end
123
123
  #
124
- def all request_limit: nil
124
+ def all request_limit: nil, &block
125
125
  request_limit = request_limit.to_i if request_limit
126
- unless block_given?
127
- return enum_for :all, request_limit: request_limit
128
- end
126
+ return enum_for :all, request_limit: request_limit unless block_given?
129
127
  results = self
130
128
  loop do
131
- results.each { |r| yield r }
129
+ results.each(&block)
132
130
  if request_limit
133
131
  request_limit -= 1
134
- break if request_limit < 0
132
+ break if request_limit.negative?
135
133
  end
136
134
  break unless results.next?
137
135
  results = results.next
@@ -141,14 +139,12 @@ module Google
141
139
  ##
142
140
  # @private New Job::List from a Google API Client
143
141
  # Google::Apis::BigqueryV2::JobList object.
144
- def self.from_gapi gapi_list, service, options = {}
145
- jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
146
- Job.from_gapi gapi_object, service
147
- end)
142
+ def self.from_gapi gapi_list, service, **kwargs
143
+ jobs = List.new(Array(gapi_list.jobs).map { |gapi_object| Job.from_gapi gapi_object, service })
148
144
  jobs.instance_variable_set :@token, gapi_list.next_page_token
149
145
  jobs.instance_variable_set :@etag, gapi_list.etag
150
146
  jobs.instance_variable_set :@service, service
151
- jobs.instance_variable_set :@options, options
147
+ jobs.instance_variable_set :@kwargs, kwargs
152
148
  jobs
153
149
  end
154
150