google-cloud-bigquery 1.20.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
83
+ #
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
86
+ end
87
+
88
+ ##
89
+ # Whether the source of the export job is a model. See {#source}.
90
+ #
91
+ # @return [Boolean] `true` when the source is a model, `false`
92
+ # otherwise.
93
+ #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
68
101
  #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
69
104
  def compression?
105
+ return false unless table?
70
106
  val = @gapi.configuration.extract.compression
71
107
  val == "GZIP"
72
108
  end
73
109
 
74
110
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
111
+ # Checks if the destination format for the table data is [newline-delimited
112
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
113
+ # extracting models.
77
114
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
- # otherwise.
115
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
116
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
80
117
  #
81
118
  def json?
119
+ return false unless table?
82
120
  val = @gapi.configuration.extract.destination_format
83
121
  val == "NEWLINE_DELIMITED_JSON"
84
122
  end
85
123
 
86
124
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
125
+ # Checks if the destination format for the table data is CSV. Tables with
88
126
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
127
+ # `true` for tables. Not applicable when extracting models.
90
128
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
129
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
130
+ # table extraction.
92
131
  #
93
132
  def csv?
133
+ return false unless table?
94
134
  val = @gapi.configuration.extract.destination_format
95
135
  return true if val.nil?
96
136
  val == "CSV"
97
137
  end
98
138
 
99
139
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
140
+ # Checks if the destination format for the table data is
141
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
142
+ # when extracting models.
102
143
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
144
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
145
+ # table extraction.
104
146
  #
105
147
  def avro?
148
+ return false unless table?
106
149
  val = @gapi.configuration.extract.destination_format
107
150
  val == "AVRO"
108
151
  end
109
152
 
153
+ ##
154
+ # Checks if the destination format for the model is TensorFlow SavedModel.
155
+ # The default is `true` for models. Not applicable when extracting tables.
156
+ #
157
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
158
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
159
+ #
160
+ def ml_tf_saved_model?
161
+ return false unless model?
162
+ val = @gapi.configuration.extract.destination_format
163
+ return true if val.nil?
164
+ val == "ML_TF_SAVED_MODEL"
165
+ end
166
+
167
+ ##
168
+ # Checks if the destination format for the model is XGBoost. The default
169
+ # is `false`. Not applicable when extracting tables.
170
+ #
171
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
172
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
173
+ #
174
+ def ml_xgboost_booster?
175
+ return false unless model?
176
+ val = @gapi.configuration.extract.destination_format
177
+ val == "ML_XGBOOST_BOOSTER"
178
+ end
179
+
110
180
  ##
111
181
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
182
+ # exported data. The default is a comma (,) for tables. Not applicable
183
+ # when extracting models.
113
184
  #
114
- # @return [String] A string containing the character, such as `","`.
185
+ # @return [String, nil] A string containing the character, such as `","`,
186
+ # `nil` if not a table extraction.
115
187
  #
116
188
  def delimiter
189
+ return unless table?
117
190
  val = @gapi.configuration.extract.field_delimiter
118
191
  val = "," if val.nil?
119
192
  val
@@ -121,12 +194,13 @@ module Google
121
194
 
122
195
  ##
123
196
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
197
+ # `true` for tables. Not applicable when extracting models.
125
198
  #
126
199
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
200
+ # present or `nil`, `false` if disabled or not a table extraction.
128
201
  #
129
202
  def print_header?
203
+ return false unless table?
130
204
  val = @gapi.configuration.extract.print_header
131
205
  val = true if val.nil?
132
206
  val
@@ -159,12 +233,14 @@ module Google
159
233
  # whether to enable extracting applicable column types (such as
160
234
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
235
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
236
+ # (`avro-long`). Not applicable when extracting models.
163
237
  #
164
238
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
239
+ # corresponding AVRO logical types, `false` if not enabled or not a
240
+ # table extraction.
166
241
  #
167
242
  def use_avro_logical_types?
243
+ return false unless table?
168
244
  @gapi.configuration.extract.use_avro_logical_types
169
245
  end
170
246
 
@@ -182,19 +258,24 @@ module Google
182
258
  #
183
259
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
260
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
261
+ def self.from_options service, source, storage_files, options
186
262
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
263
  storage_urls = Array(storage_files).map do |url|
188
264
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
265
  end
190
266
  options[:format] ||= Convert.derive_source_format storage_urls.first
267
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
268
+ destination_uris: Array(storage_urls)
269
+ )
270
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
271
+ extract_config.source_table = source
272
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
273
+ extract_config.source_model = source
274
+ end
191
275
  job = Google::Apis::BigqueryV2::Job.new(
192
276
  job_reference: job_ref,
193
277
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
278
+ extract: extract_config,
198
279
  dry_run: options[:dryrun]
199
280
  )
200
281
  )
@@ -253,7 +334,7 @@ module Google
253
334
  end
254
335
 
255
336
  ##
256
- # Sets the compression type.
337
+ # Sets the compression type. Not applicable when extracting models.
257
338
  #
258
339
  # @param [String] value The compression type to use for exported
259
340
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
265
346
  end
266
347
 
267
348
  ##
268
- # Sets the field delimiter.
349
+ # Sets the field delimiter. Not applicable when extracting models.
269
350
  #
270
351
  # @param [String] value Delimiter to use between fields in the
271
352
  # exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
276
357
  end
277
358
 
278
359
  ##
279
- # Sets the destination file format. The default value is `csv`.
360
+ # Sets the destination file format. The default value for
361
+ # tables is `csv`. Tables with nested or repeated fields cannot be
362
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
363
  #
281
- # The following values are supported:
364
+ # Supported values for tables:
282
365
  #
283
366
  # * `csv` - CSV
284
367
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
368
  # * `avro` - [Avro](http://avro.apache.org/)
286
369
  #
370
+ # Supported values for models:
371
+ #
372
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
373
+ # * `ml_xgboost_booster` - XGBoost Booster
374
+ #
287
375
  # @param [String] new_format The new source format.
288
376
  #
289
377
  # @!group Attributes
@@ -293,7 +381,8 @@ module Google
293
381
  end
294
382
 
295
383
  ##
296
- # Print a header row in the exported file.
384
+ # Print a header row in the exported file. Not applicable when
385
+ # extracting models.
297
386
  #
298
387
  # @param [Boolean] value Whether to print out a header row in the
299
388
  # results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
307
396
  # Sets the labels to use for the job.
308
397
  #
309
398
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
399
+ # the job. You can use these to organize and group your jobs.
400
+ #
401
+ # The labels applied to a resource must meet the following requirements:
402
+ #
403
+ # * Each resource can have multiple labels, up to a maximum of 64.
404
+ # * Each label must be a key-value pair.
405
+ # * Keys have a minimum length of 1 character and a maximum length of
406
+ # 63 characters, and cannot be empty. Values can be empty, and have
407
+ # a maximum length of 63 characters.
408
+ # * Keys and values can contain only lowercase letters, numeric characters,
409
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
410
+ # international characters are allowed.
411
+ # * The key portion of a label must be unique. However, you can use the
412
+ # same key with multiple resources.
413
+ # * Keys must start with a lowercase letter or international character.
316
414
  #
317
415
  # @!group Attributes
318
416
  #
@@ -362,6 +460,16 @@ module Google
362
460
  @gapi
363
461
  end
364
462
  end
463
+
464
+ protected
465
+
466
+ def retrieve_model project_id, dataset_id, model_id
467
+ ensure_service!
468
+ gapi = service.get_project_model project_id, dataset_id, model_id
469
+ Model.from_gapi_json gapi, service
470
+ rescue Google::Cloud::NotFoundError
471
+ nil
472
+ end
365
473
  end
366
474
  end
367
475
  end
@@ -197,6 +197,72 @@ module Google
197
197
  Convert.millis_to_time @gapi.statistics.end_time
198
198
  end
199
199
 
200
+ ##
201
+ # The number of child jobs executed.
202
+ #
203
+ # @return [Integer] The number of child jobs executed.
204
+ #
205
+ def num_child_jobs
206
+ @gapi.statistics.num_child_jobs || 0
207
+ end
208
+
209
+ ##
210
+ # If this is a child job, the id of the parent.
211
+ #
212
+ # @return [String, nil] The ID of the parent job, or `nil` if not a child job.
213
+ #
214
+ def parent_job_id
215
+ @gapi.statistics.parent_job_id
216
+ end
217
+
218
+ ##
219
+ # The statistics including stack frames for a child job of a script.
220
+ #
221
+ # @return [Google::Cloud::Bigquery::Job::ScriptStatistics, nil] The script statistics, or `nil` if the job is
222
+ # not a child job.
223
+ #
224
+ # @example
225
+ # require "google/cloud/bigquery"
226
+ #
227
+ # bigquery = Google::Cloud::Bigquery.new
228
+ #
229
+ # multi_statement_sql = <<~SQL
230
+ # -- Declare a variable to hold names as an array.
231
+ # DECLARE top_names ARRAY<STRING>;
232
+ # -- Build an array of the top 100 names from the year 2017.
233
+ # SET top_names = (
234
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
235
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
236
+ # WHERE year = 2017
237
+ # );
238
+ # -- Which names appear as words in Shakespeare's plays?
239
+ # SELECT
240
+ # name AS shakespeare_name
241
+ # FROM UNNEST(top_names) AS name
242
+ # WHERE name IN (
243
+ # SELECT word
244
+ # FROM `bigquery-public-data.samples.shakespeare`
245
+ # );
246
+ # SQL
247
+ #
248
+ # job = bigquery.query_job multi_statement_sql
249
+ #
250
+ # job.wait_until_done!
251
+ #
252
+ # child_jobs = bigquery.jobs parent_job: job
253
+ #
254
+ # child_jobs.each do |child_job|
255
+ # script_statistics = child_job.script_statistics
256
+ # puts script_statistics.evaluation_kind
257
+ # script_statistics.stack_frames.each do |stack_frame|
258
+ # puts stack_frame.text
259
+ # end
260
+ # end
261
+ #
262
+ def script_statistics
263
+ ScriptStatistics.from_gapi @gapi.statistics.script_statistics if @gapi.statistics.script_statistics
264
+ end
265
+
200
266
  ##
201
267
  # The configuration for the job. Returns a hash.
202
268
  #
@@ -423,6 +489,138 @@ module Google
423
489
  end
424
490
  end
425
491
 
492
+ ##
493
+ # Represents statistics for a child job of a script.
494
+ #
495
+ # @attr_reader [String] evaluation_kind Indicates the type of child job. Possible values include `STATEMENT` and
496
+ # `EXPRESSION`.
497
+ # @attr_reader [Array<Google::Cloud::Bigquery::Job::ScriptStackFrame>] stack_frames Stack trace where the
498
+ # current evaluation happened. Shows line/column/procedure name of each frame on the stack at the point where
499
+ # the current evaluation happened. The leaf frame is first, the primary script is last.
500
+ #
501
+ # @example
502
+ # require "google/cloud/bigquery"
503
+ #
504
+ # bigquery = Google::Cloud::Bigquery.new
505
+ #
506
+ # multi_statement_sql = <<~SQL
507
+ # -- Declare a variable to hold names as an array.
508
+ # DECLARE top_names ARRAY<STRING>;
509
+ # -- Build an array of the top 100 names from the year 2017.
510
+ # SET top_names = (
511
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
512
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
513
+ # WHERE year = 2017
514
+ # );
515
+ # -- Which names appear as words in Shakespeare's plays?
516
+ # SELECT
517
+ # name AS shakespeare_name
518
+ # FROM UNNEST(top_names) AS name
519
+ # WHERE name IN (
520
+ # SELECT word
521
+ # FROM `bigquery-public-data.samples.shakespeare`
522
+ # );
523
+ # SQL
524
+ #
525
+ # job = bigquery.query_job multi_statement_sql
526
+ #
527
+ # job.wait_until_done!
528
+ #
529
+ # child_jobs = bigquery.jobs parent_job: job
530
+ #
531
+ # child_jobs.each do |child_job|
532
+ # script_statistics = child_job.script_statistics
533
+ # puts script_statistics.evaluation_kind
534
+ # script_statistics.stack_frames.each do |stack_frame|
535
+ # puts stack_frame.text
536
+ # end
537
+ # end
538
+ #
539
+ class ScriptStatistics
540
+ attr_reader :evaluation_kind, :stack_frames
541
+
542
+ ##
543
+ # @private Creates a new ScriptStatistics instance.
544
+ def initialize evaluation_kind, stack_frames
545
+ @evaluation_kind = evaluation_kind
546
+ @stack_frames = stack_frames
547
+ end
548
+
549
+ ##
550
+ # @private New ScriptStatistics from a statistics.script_statistics object.
551
+ def self.from_gapi gapi
552
+ frames = Array(gapi.stack_frames).map { |g| ScriptStackFrame.from_gapi g }
553
+ new gapi.evaluation_kind, frames
554
+ end
555
+ end
556
+
557
+ ##
558
+ # Represents a stack frame showing the line/column/procedure name where the current evaluation happened.
559
+ #
560
+ # @attr_reader [Integer] start_line One-based start line.
561
+ # @attr_reader [Integer] start_column One-based start column.
562
+ # @attr_reader [Integer] end_line One-based end line.
563
+ # @attr_reader [Integer] end_column One-based end column.
564
+ # @attr_reader [String] text Text of the current statement/expression.
565
+ #
566
+ # @example
567
+ # require "google/cloud/bigquery"
568
+ #
569
+ # bigquery = Google::Cloud::Bigquery.new
570
+ #
571
+ # multi_statement_sql = <<~SQL
572
+ # -- Declare a variable to hold names as an array.
573
+ # DECLARE top_names ARRAY<STRING>;
574
+ # -- Build an array of the top 100 names from the year 2017.
575
+ # SET top_names = (
576
+ # SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
577
+ # FROM `bigquery-public-data.usa_names.usa_1910_current`
578
+ # WHERE year = 2017
579
+ # );
580
+ # -- Which names appear as words in Shakespeare's plays?
581
+ # SELECT
582
+ # name AS shakespeare_name
583
+ # FROM UNNEST(top_names) AS name
584
+ # WHERE name IN (
585
+ # SELECT word
586
+ # FROM `bigquery-public-data.samples.shakespeare`
587
+ # );
588
+ # SQL
589
+ #
590
+ # job = bigquery.query_job multi_statement_sql
591
+ #
592
+ # job.wait_until_done!
593
+ #
594
+ # child_jobs = bigquery.jobs parent_job: job
595
+ #
596
+ # child_jobs.each do |child_job|
597
+ # script_statistics = child_job.script_statistics
598
+ # puts script_statistics.evaluation_kind
599
+ # script_statistics.stack_frames.each do |stack_frame|
600
+ # puts stack_frame.text
601
+ # end
602
+ # end
603
+ #
604
+ class ScriptStackFrame
605
+ attr_reader :start_line, :start_column, :end_line, :end_column, :text
606
+
607
+ ##
608
+ # @private Creates a new ScriptStackFrame instance.
609
+ def initialize start_line, start_column, end_line, end_column, text
610
+ @start_line = start_line
611
+ @start_column = start_column
612
+ @end_line = end_line
613
+ @end_column = end_column
614
+ @text = text
615
+ end
616
+
617
+ ##
618
+ # @private New ScriptStackFrame from a statistics.script_statistics[].stack_frames element.
619
+ def self.from_gapi gapi
620
+ new gapi.start_line, gapi.start_column, gapi.end_line, gapi.end_column, gapi.text
621
+ end
622
+ end
623
+
426
624
  protected
427
625
 
428
626
  ##