google-cloud-bigquery 1.12.0 → 1.38.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +9 -28
  3. data/CHANGELOG.md +372 -1
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +2 -2
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +154 -170
  10. data/lib/google/cloud/bigquery/copy_job.rb +40 -23
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset.rb +960 -279
  16. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  17. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  20. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  21. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  22. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  23. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  24. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  25. data/lib/google/cloud/bigquery/external.rb +50 -2256
  26. data/lib/google/cloud/bigquery/extract_job.rb +217 -58
  27. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  28. data/lib/google/cloud/bigquery/job/list.rb +13 -20
  29. data/lib/google/cloud/bigquery/job.rb +286 -11
  30. data/lib/google/cloud/bigquery/load_job.rb +801 -133
  31. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  32. data/lib/google/cloud/bigquery/model.rb +247 -16
  33. data/lib/google/cloud/bigquery/policy.rb +432 -0
  34. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  35. data/lib/google/cloud/bigquery/project.rb +526 -243
  36. data/lib/google/cloud/bigquery/query_job.rb +584 -125
  37. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  38. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  39. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  40. data/lib/google/cloud/bigquery/schema.rb +221 -48
  41. data/lib/google/cloud/bigquery/service.rb +186 -109
  42. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  43. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
  44. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  45. data/lib/google/cloud/bigquery/table.rb +1188 -326
  46. data/lib/google/cloud/bigquery/time.rb +6 -0
  47. data/lib/google/cloud/bigquery/version.rb +1 -1
  48. data/lib/google/cloud/bigquery.rb +18 -8
  49. data/lib/google-cloud-bigquery.rb +15 -13
  50. metadata +67 -40
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,73 +63,126 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id,
61
- table.dataset_id,
62
- table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
63
76
  end
64
77
 
65
78
  ##
66
- # Checks if the export operation compresses the data using gzip. The
67
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
68
80
  #
69
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
70
83
  #
71
- def compression?
72
- val = @gapi.configuration.extract.compression
73
- val == "GZIP"
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
74
86
  end
75
87
 
76
88
  ##
77
- # Checks if the destination format for the data is [newline-delimited
78
- # JSON](http://jsonlines.org/). The default is `false`.
89
+ # Whether the source of the export job is a model. See {#source}.
79
90
  #
80
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
91
+ # @return [Boolean] `true` when the source is a model, `false`
81
92
  # otherwise.
82
93
  #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
101
+ #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
104
+ def compression?
105
+ return false unless table?
106
+ @gapi.configuration.extract.compression == "GZIP"
107
+ end
108
+
109
+ ##
110
+ # Checks if the destination format for the table data is [newline-delimited
111
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
112
+ # extracting models.
113
+ #
114
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
115
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
116
+ #
83
117
  def json?
84
- val = @gapi.configuration.extract.destination_format
85
- val == "NEWLINE_DELIMITED_JSON"
118
+ return false unless table?
119
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
86
120
  end
87
121
 
88
122
  ##
89
- # Checks if the destination format for the data is CSV. Tables with
123
+ # Checks if the destination format for the table data is CSV. Tables with
90
124
  # nested or repeated fields cannot be exported as CSV. The default is
91
- # `true`.
125
+ # `true` for tables. Not applicable when extracting models.
92
126
  #
93
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
127
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
128
+ # table extraction.
94
129
  #
95
130
  def csv?
131
+ return false unless table?
96
132
  val = @gapi.configuration.extract.destination_format
97
133
  return true if val.nil?
98
134
  val == "CSV"
99
135
  end
100
136
 
101
137
  ##
102
- # Checks if the destination format for the data is
103
- # [Avro](http://avro.apache.org/). The default is `false`.
138
+ # Checks if the destination format for the table data is
139
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
140
+ # when extracting models.
104
141
  #
105
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
142
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
143
+ # table extraction.
106
144
  #
107
145
  def avro?
146
+ return false unless table?
147
+ @gapi.configuration.extract.destination_format == "AVRO"
148
+ end
149
+
150
+ ##
151
+ # Checks if the destination format for the model is TensorFlow SavedModel.
152
+ # The default is `true` for models. Not applicable when extracting tables.
153
+ #
154
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
155
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
156
+ #
157
+ def ml_tf_saved_model?
158
+ return false unless model?
108
159
  val = @gapi.configuration.extract.destination_format
109
- val == "AVRO"
160
+ return true if val.nil?
161
+ val == "ML_TF_SAVED_MODEL"
162
+ end
163
+
164
+ ##
165
+ # Checks if the destination format for the model is XGBoost. The default
166
+ # is `false`. Not applicable when extracting tables.
167
+ #
168
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
169
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
170
+ #
171
+ def ml_xgboost_booster?
172
+ return false unless model?
173
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
110
174
  end
111
175
 
112
176
  ##
113
177
  # The character or symbol the operation uses to delimit fields in the
114
- # exported data. The default is a comma (,).
178
+ # exported data. The default is a comma (,) for tables. Not applicable
179
+ # when extracting models.
115
180
  #
116
- # @return [String] A string containing the character, such as `","`.
181
+ # @return [String, nil] A string containing the character, such as `","`,
182
+ # `nil` if not a table extraction.
117
183
  #
118
184
  def delimiter
185
+ return unless table?
119
186
  val = @gapi.configuration.extract.field_delimiter
120
187
  val = "," if val.nil?
121
188
  val
@@ -123,12 +190,13 @@ module Google
123
190
 
124
191
  ##
125
192
  # Checks if the exported data contains a header row. The default is
126
- # `true`.
193
+ # `true` for tables. Not applicable when extracting models.
127
194
  #
128
195
  # @return [Boolean] `true` when the print header configuration is
129
- # present or `nil`, `false` otherwise.
196
+ # present or `nil`, `false` if disabled or not a table extraction.
130
197
  #
131
198
  def print_header?
199
+ return false unless table?
132
200
  val = @gapi.configuration.extract.print_header
133
201
  val = true if val.nil?
134
202
  val
@@ -156,12 +224,29 @@ module Google
156
224
  Hash[destinations.zip destinations_file_counts]
157
225
  end
158
226
 
227
+ ##
228
+ # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
229
+ # whether to enable extracting applicable column types (such as
230
+ # `TIMESTAMP`) to their corresponding AVRO logical types
231
+ # (`timestamp-micros`), instead of only using their raw types
232
+ # (`avro-long`). Not applicable when extracting models.
233
+ #
234
+ # @return [Boolean] `true` when applicable column types will use their
235
+ # corresponding AVRO logical types, `false` if not enabled or not a
236
+ # table extraction.
237
+ #
238
+ def use_avro_logical_types?
239
+ return false unless table?
240
+ @gapi.configuration.extract.use_avro_logical_types
241
+ end
242
+
159
243
  ##
160
244
  # Yielded to a block to accumulate changes for an API request.
161
245
  class Updater < ExtractJob
162
246
  ##
163
247
  # @private Create an Updater object.
164
248
  def initialize gapi
249
+ super()
165
250
  @gapi = gapi
166
251
  end
167
252
 
@@ -170,32 +255,47 @@ module Google
170
255
  #
171
256
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
172
257
  # configuration object for setting query options.
173
- def self.from_options service, table, storage_files, options = {}
258
+ def self.from_options service, source, storage_files, options
174
259
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
175
260
  storage_urls = Array(storage_files).map do |url|
176
261
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
177
262
  end
178
- dest_format = options[:format]
179
- if dest_format.nil?
180
- dest_format = Convert.derive_source_format storage_urls.first
263
+ options[:format] ||= Convert.derive_source_format storage_urls.first
264
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
265
+ destination_uris: Array(storage_urls)
266
+ )
267
+ case source
268
+ when Google::Apis::BigqueryV2::TableReference
269
+ extract_config.source_table = source
270
+ when Google::Apis::BigqueryV2::ModelReference
271
+ extract_config.source_model = source
181
272
  end
182
- req = Google::Apis::BigqueryV2::Job.new(
273
+ job = Google::Apis::BigqueryV2::Job.new(
183
274
  job_reference: job_ref,
184
275
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
185
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
186
- destination_uris: Array(storage_urls),
187
- source_table: table
188
- ),
276
+ extract: extract_config,
189
277
  dry_run: options[:dryrun]
190
278
  )
191
279
  )
192
280
 
193
- updater = ExtractJob::Updater.new req
281
+ from_job_and_options job, options
282
+ end
283
+
284
+ ##
285
+ # @private Create an Updater from a Job and options hash.
286
+ #
287
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
288
+ # configuration object for setting query options.
289
+ def self.from_job_and_options request, options
290
+ updater = ExtractJob::Updater.new request
194
291
  updater.compression = options[:compression]
195
292
  updater.delimiter = options[:delimiter]
196
- updater.format = dest_format
293
+ updater.format = options[:format]
197
294
  updater.header = options[:header]
198
295
  updater.labels = options[:labels] if options[:labels]
296
+ unless options[:use_avro_logical_types].nil?
297
+ updater.use_avro_logical_types = options[:use_avro_logical_types]
298
+ end
199
299
  updater
200
300
  end
201
301
 
@@ -232,7 +332,7 @@ module Google
232
332
  end
233
333
 
234
334
  ##
235
- # Sets the compression type.
335
+ # Sets the compression type. Not applicable when extracting models.
236
336
  #
237
337
  # @param [String] value The compression type to use for exported
238
338
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -244,7 +344,7 @@ module Google
244
344
  end
245
345
 
246
346
  ##
247
- # Sets the field delimiter.
347
+ # Sets the field delimiter. Not applicable when extracting models.
248
348
  #
249
349
  # @param [String] value Delimiter to use between fields in the
250
350
  # exported data. Default is <code>,</code>.
@@ -255,25 +355,32 @@ module Google
255
355
  end
256
356
 
257
357
  ##
258
- # Sets the destination file format. The default value is `csv`.
358
+ # Sets the destination file format. The default value for
359
+ # tables is `csv`. Tables with nested or repeated fields cannot be
360
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
259
361
  #
260
- # The following values are supported:
362
+ # Supported values for tables:
261
363
  #
262
364
  # * `csv` - CSV
263
365
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
264
366
  # * `avro` - [Avro](http://avro.apache.org/)
265
367
  #
368
+ # Supported values for models:
369
+ #
370
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
371
+ # * `ml_xgboost_booster` - XGBoost Booster
372
+ #
266
373
  # @param [String] new_format The new source format.
267
374
  #
268
375
  # @!group Attributes
269
376
  #
270
377
  def format= new_format
271
- @gapi.configuration.extract.update! \
272
- destination_format: Convert.source_format(new_format)
378
+ @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
273
379
  end
274
380
 
275
381
  ##
276
- # Print a header row in the exported file.
382
+ # Print a header row in the exported file. Not applicable when
383
+ # extracting models.
277
384
  #
278
385
  # @param [Boolean] value Whether to print out a header row in the
279
386
  # results. Default is `true`.
@@ -287,12 +394,21 @@ module Google
287
394
  # Sets the labels to use for the job.
288
395
  #
289
396
  # @param [Hash] value A hash of user-provided labels associated with
290
- # the job. You can use these to organize and group your jobs. Label
291
- # keys and values can be no longer than 63 characters, can only
292
- # contain lowercase letters, numeric characters, underscores and
293
- # dashes. International characters are allowed. Label values are
294
- # optional. Label keys must start with a letter and each label in
295
- # the list must have a different key.
397
+ # the job. You can use these to organize and group your jobs.
398
+ #
399
+ # The labels applied to a resource must meet the following requirements:
400
+ #
401
+ # * Each resource can have multiple labels, up to a maximum of 64.
402
+ # * Each label must be a key-value pair.
403
+ # * Keys have a minimum length of 1 character and a maximum length of
404
+ # 63 characters, and cannot be empty. Values can be empty, and have
405
+ # a maximum length of 63 characters.
406
+ # * Keys and values can contain only lowercase letters, numeric characters,
407
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
408
+ # international characters are allowed.
409
+ # * The key portion of a label must be unique. However, you can use the
410
+ # same key with multiple resources.
411
+ # * Keys must start with a lowercase letter or international character.
296
412
  #
297
413
  # @!group Attributes
298
414
  #
@@ -300,6 +416,39 @@ module Google
300
416
  @gapi.configuration.update! labels: value
301
417
  end
302
418
 
419
+ ##
420
+ # Indicate whether to enable extracting applicable column types (such
421
+ # as `TIMESTAMP`) to their corresponding AVRO logical types
422
+ # (`timestamp-micros`), instead of only using their raw types
423
+ # (`avro-long`).
424
+ #
425
+ # Only used when `#format` is set to `"AVRO"` (`#avro?`).
426
+ #
427
+ # @param [Boolean] value Whether applicable column types will use
428
+ # their corresponding AVRO logical types.
429
+ #
430
+ # @!group Attributes
431
+ def use_avro_logical_types= value
432
+ @gapi.configuration.extract.use_avro_logical_types = value
433
+ end
434
+
435
+ def cancel
436
+ raise "not implemented in #{self.class}"
437
+ end
438
+
439
+ def rerun!
440
+ raise "not implemented in #{self.class}"
441
+ end
442
+
443
+ def reload!
444
+ raise "not implemented in #{self.class}"
445
+ end
446
+ alias refresh! reload!
447
+
448
+ def wait_until_done!
449
+ raise "not implemented in #{self.class}"
450
+ end
451
+
303
452
  ##
304
453
  # @private Returns the Google API client library version of this job.
305
454
  #
@@ -309,6 +458,16 @@ module Google
309
458
  @gapi
310
459
  end
311
460
  end
461
+
462
+ protected
463
+
464
+ def retrieve_model project_id, dataset_id, model_id
465
+ ensure_service!
466
+ gapi = service.get_project_model project_id, dataset_id, model_id
467
+ Model.from_gapi_json gapi, service
468
+ rescue Google::Cloud::NotFoundError
469
+ nil
470
+ end
312
471
  end
313
472
  end
314
473
  end
@@ -99,9 +99,7 @@ module Google
99
99
  # data.
100
100
  #
101
101
  def error_rows
102
- Array(@gapi.insert_errors).map do |ie|
103
- @rows[ie.index]
104
- end
102
+ Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
105
103
  end
106
104
 
107
105
  ##
@@ -71,9 +71,9 @@ module Google
71
71
  def next
72
72
  return nil unless next?
73
73
  ensure_service!
74
- options = { all: @hidden, token: token, max: @max, filter: @filter }
75
- gapi = @service.list_jobs options
76
- self.class.from_gapi gapi, @service, @hidden, @max, @filter
74
+ next_kwargs = @kwargs.merge token: token
75
+ next_gapi = @service.list_jobs(**next_kwargs)
76
+ self.class.from_gapi next_gapi, @service, **next_kwargs
77
77
  end
78
78
 
79
79
  ##
@@ -121,17 +121,15 @@ module Google
121
121
  # puts job.state
122
122
  # end
123
123
  #
124
- def all request_limit: nil
124
+ def all request_limit: nil, &block
125
125
  request_limit = request_limit.to_i if request_limit
126
- unless block_given?
127
- return enum_for :all, request_limit: request_limit
128
- end
126
+ return enum_for :all, request_limit: request_limit unless block_given?
129
127
  results = self
130
128
  loop do
131
- results.each { |r| yield r }
129
+ results.each(&block)
132
130
  if request_limit
133
131
  request_limit -= 1
134
- break if request_limit < 0
132
+ break if request_limit.negative?
135
133
  end
136
134
  break unless results.next?
137
135
  results = results.next
@@ -141,17 +139,12 @@ module Google
141
139
  ##
142
140
  # @private New Job::List from a Google API Client
143
141
  # Google::Apis::BigqueryV2::JobList object.
144
- def self.from_gapi gapi_list, service, hidden = nil, max = nil,
145
- filter = nil
146
- jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
147
- Job.from_gapi gapi_object, service
148
- end)
149
- jobs.instance_variable_set :@token, gapi_list.next_page_token
150
- jobs.instance_variable_set :@etag, gapi_list.etag
151
- jobs.instance_variable_set :@service, service
152
- jobs.instance_variable_set :@hidden, hidden
153
- jobs.instance_variable_set :@max, max
154
- jobs.instance_variable_set :@filter, filter
142
+ def self.from_gapi gapi_list, service, **kwargs
143
+ jobs = List.new(Array(gapi_list.jobs).map { |gapi_object| Job.from_gapi gapi_object, service })
144
+ jobs.instance_variable_set :@token, gapi_list.next_page_token
145
+ jobs.instance_variable_set :@etag, gapi_list.etag
146
+ jobs.instance_variable_set :@service, service
147
+ jobs.instance_variable_set :@kwargs, kwargs
155
148
  jobs
156
149
  end
157
150