google-cloud-bigquery 1.21.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,9 +45,12 @@ module Google
45
45
  # data = bigquery.query "SELECT * FROM my_ext_table",
46
46
  # external: { my_ext_table: csv_table }
47
47
  #
48
+ # # Iterate over the first page of results
48
49
  # data.each do |row|
49
50
  # puts row[:name]
50
51
  # end
52
+ # # Retrieve the next page of results
53
+ # data = data.next if data.next?
51
54
  #
52
55
  module External
53
56
  ##
@@ -138,9 +141,12 @@ module Google
138
141
  # data = bigquery.query "SELECT * FROM my_ext_table",
139
142
  # external: { my_ext_table: avro_table }
140
143
  #
144
+ # # Iterate over the first page of results
141
145
  # data.each do |row|
142
146
  # puts row[:name]
143
147
  # end
148
+ # # Retrieve the next page of results
149
+ # data = data.next if data.next?
144
150
  #
145
151
  class DataSource
146
152
  ##
@@ -575,9 +581,12 @@ module Google
575
581
  # data = bigquery.query "SELECT * FROM my_ext_table",
576
582
  # external: { my_ext_table: csv_table }
577
583
  #
584
+ # # Iterate over the first page of results
578
585
  # data.each do |row|
579
586
  # puts row[:name]
580
587
  # end
588
+ # # Retrieve the next page of results
589
+ # data = data.next if data.next?
581
590
  #
582
591
  class CsvSource < External::DataSource
583
592
  ##
@@ -1037,9 +1046,12 @@ module Google
1037
1046
  # data = bigquery.query "SELECT * FROM my_ext_table",
1038
1047
  # external: { my_ext_table: json_table }
1039
1048
  #
1049
+ # # Iterate over the first page of results
1040
1050
  # data.each do |row|
1041
1051
  # puts row[:name]
1042
1052
  # end
1053
+ # # Retrieve the next page of results
1054
+ # data = data.next if data.next?
1043
1055
  #
1044
1056
  class JsonSource < External::DataSource
1045
1057
  ##
@@ -1173,9 +1185,12 @@ module Google
1173
1185
  # data = bigquery.query "SELECT * FROM my_ext_table",
1174
1186
  # external: { my_ext_table: sheets_table }
1175
1187
  #
1188
+ # # Iterate over the first page of results
1176
1189
  # data.each do |row|
1177
1190
  # puts row[:name]
1178
1191
  # end
1192
+ # # Retrieve the next page of results
1193
+ # data = data.next if data.next?
1179
1194
  #
1180
1195
  class SheetsSource < External::DataSource
1181
1196
  ##
@@ -1318,9 +1333,12 @@ module Google
1318
1333
  # data = bigquery.query "SELECT * FROM my_ext_table",
1319
1334
  # external: { my_ext_table: bigtable_table }
1320
1335
  #
1336
+ # # Iterate over the first page of results
1321
1337
  # data.each do |row|
1322
1338
  # puts row[:name]
1323
1339
  # end
1340
+ # # Retrieve the next page of results
1341
+ # data = data.next if data.next?
1324
1342
  #
1325
1343
  class BigtableSource < External::DataSource
1326
1344
  ##
@@ -1516,9 +1534,12 @@ module Google
1516
1534
  # data = bigquery.query "SELECT * FROM my_ext_table",
1517
1535
  # external: { my_ext_table: bigtable_table }
1518
1536
  #
1537
+ # # Iterate over the first page of results
1519
1538
  # data.each do |row|
1520
1539
  # puts row[:name]
1521
1540
  # end
1541
+ # # Retrieve the next page of results
1542
+ # data = data.next if data.next?
1522
1543
  #
1523
1544
  class ColumnFamily
1524
1545
  ##
@@ -2053,9 +2074,12 @@ module Google
2053
2074
  # data = bigquery.query "SELECT * FROM my_ext_table",
2054
2075
  # external: { my_ext_table: bigtable_table }
2055
2076
  #
2077
+ # # Iterate over the first page of results
2056
2078
  # data.each do |row|
2057
2079
  # puts row[:name]
2058
2080
  # end
2081
+ # # Retrieve the next page of results
2082
+ # data = data.next if data.next?
2059
2083
  #
2060
2084
  class Column
2061
2085
  ##
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
83
+ #
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
86
+ end
87
+
88
+ ##
89
+ # Whether the source of the export job is a model. See {#source}.
90
+ #
91
+ # @return [Boolean] `true` when the source is a model, `false`
92
+ # otherwise.
93
+ #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
68
101
  #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
69
104
  def compression?
105
+ return false unless table?
70
106
  val = @gapi.configuration.extract.compression
71
107
  val == "GZIP"
72
108
  end
73
109
 
74
110
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
111
+ # Checks if the destination format for the table data is [newline-delimited
112
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
113
+ # extracting models.
77
114
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
- # otherwise.
115
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
116
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
80
117
  #
81
118
  def json?
119
+ return false unless table?
82
120
  val = @gapi.configuration.extract.destination_format
83
121
  val == "NEWLINE_DELIMITED_JSON"
84
122
  end
85
123
 
86
124
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
125
+ # Checks if the destination format for the table data is CSV. Tables with
88
126
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
127
+ # `true` for tables. Not applicable when extracting models.
90
128
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
129
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
130
+ # table extraction.
92
131
  #
93
132
  def csv?
133
+ return false unless table?
94
134
  val = @gapi.configuration.extract.destination_format
95
135
  return true if val.nil?
96
136
  val == "CSV"
97
137
  end
98
138
 
99
139
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
140
+ # Checks if the destination format for the table data is
141
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
142
+ # when extracting models.
102
143
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
144
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
145
+ # table extraction.
104
146
  #
105
147
  def avro?
148
+ return false unless table?
106
149
  val = @gapi.configuration.extract.destination_format
107
150
  val == "AVRO"
108
151
  end
109
152
 
153
+ ##
154
+ # Checks if the destination format for the model is TensorFlow SavedModel.
155
+ # The default is `true` for models. Not applicable when extracting tables.
156
+ #
157
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
158
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
159
+ #
160
+ def ml_tf_saved_model?
161
+ return false unless model?
162
+ val = @gapi.configuration.extract.destination_format
163
+ return true if val.nil?
164
+ val == "ML_TF_SAVED_MODEL"
165
+ end
166
+
167
+ ##
168
+ # Checks if the destination format for the model is XGBoost. The default
169
+ # is `false`. Not applicable when extracting tables.
170
+ #
171
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
172
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
173
+ #
174
+ def ml_xgboost_booster?
175
+ return false unless model?
176
+ val = @gapi.configuration.extract.destination_format
177
+ val == "ML_XGBOOST_BOOSTER"
178
+ end
179
+
110
180
  ##
111
181
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
182
+ # exported data. The default is a comma (,) for tables. Not applicable
183
+ # when extracting models.
113
184
  #
114
- # @return [String] A string containing the character, such as `","`.
185
+ # @return [String, nil] A string containing the character, such as `","`,
186
+ # `nil` if not a table extraction.
115
187
  #
116
188
  def delimiter
189
+ return unless table?
117
190
  val = @gapi.configuration.extract.field_delimiter
118
191
  val = "," if val.nil?
119
192
  val
@@ -121,12 +194,13 @@ module Google
121
194
 
122
195
  ##
123
196
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
197
+ # `true` for tables. Not applicable when extracting models.
125
198
  #
126
199
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
200
+ # present or `nil`, `false` if disabled or not a table extraction.
128
201
  #
129
202
  def print_header?
203
+ return false unless table?
130
204
  val = @gapi.configuration.extract.print_header
131
205
  val = true if val.nil?
132
206
  val
@@ -159,12 +233,14 @@ module Google
159
233
  # whether to enable extracting applicable column types (such as
160
234
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
235
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
236
+ # (`avro-long`). Not applicable when extracting models.
163
237
  #
164
238
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
239
+ # corresponding AVRO logical types, `false` if not enabled or not a
240
+ # table extraction.
166
241
  #
167
242
  def use_avro_logical_types?
243
+ return false unless table?
168
244
  @gapi.configuration.extract.use_avro_logical_types
169
245
  end
170
246
 
@@ -182,19 +258,24 @@ module Google
182
258
  #
183
259
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
260
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
261
+ def self.from_options service, source, storage_files, options
186
262
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
263
  storage_urls = Array(storage_files).map do |url|
188
264
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
265
  end
190
266
  options[:format] ||= Convert.derive_source_format storage_urls.first
267
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
268
+ destination_uris: Array(storage_urls)
269
+ )
270
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
271
+ extract_config.source_table = source
272
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
273
+ extract_config.source_model = source
274
+ end
191
275
  job = Google::Apis::BigqueryV2::Job.new(
192
276
  job_reference: job_ref,
193
277
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
278
+ extract: extract_config,
198
279
  dry_run: options[:dryrun]
199
280
  )
200
281
  )
@@ -253,7 +334,7 @@ module Google
253
334
  end
254
335
 
255
336
  ##
256
- # Sets the compression type.
337
+ # Sets the compression type. Not applicable when extracting models.
257
338
  #
258
339
  # @param [String] value The compression type to use for exported
259
340
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
265
346
  end
266
347
 
267
348
  ##
268
- # Sets the field delimiter.
349
+ # Sets the field delimiter. Not applicable when extracting models.
269
350
  #
270
351
  # @param [String] value Delimiter to use between fields in the
271
352
  # exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
276
357
  end
277
358
 
278
359
  ##
279
- # Sets the destination file format. The default value is `csv`.
360
+ # Sets the destination file format. The default value for
361
+ # tables is `csv`. Tables with nested or repeated fields cannot be
362
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
363
  #
281
- # The following values are supported:
364
+ # Supported values for tables:
282
365
  #
283
366
  # * `csv` - CSV
284
367
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
368
  # * `avro` - [Avro](http://avro.apache.org/)
286
369
  #
370
+ # Supported values for models:
371
+ #
372
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
373
+ # * `ml_xgboost_booster` - XGBoost Booster
374
+ #
287
375
  # @param [String] new_format The new source format.
288
376
  #
289
377
  # @!group Attributes
@@ -293,7 +381,8 @@ module Google
293
381
  end
294
382
 
295
383
  ##
296
- # Print a header row in the exported file.
384
+ # Print a header row in the exported file. Not applicable when
385
+ # extracting models.
297
386
  #
298
387
  # @param [Boolean] value Whether to print out a header row in the
299
388
  # results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
307
396
  # Sets the labels to use for the job.
308
397
  #
309
398
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
399
+ # the job. You can use these to organize and group your jobs.
400
+ #
401
+ # The labels applied to a resource must meet the following requirements:
402
+ #
403
+ # * Each resource can have multiple labels, up to a maximum of 64.
404
+ # * Each label must be a key-value pair.
405
+ # * Keys have a minimum length of 1 character and a maximum length of
406
+ # 63 characters, and cannot be empty. Values can be empty, and have
407
+ # a maximum length of 63 characters.
408
+ # * Keys and values can contain only lowercase letters, numeric characters,
409
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
410
+ # international characters are allowed.
411
+ # * The key portion of a label must be unique. However, you can use the
412
+ # same key with multiple resources.
413
+ # * Keys must start with a lowercase letter or international character.
316
414
  #
317
415
  # @!group Attributes
318
416
  #
@@ -362,6 +460,16 @@ module Google
362
460
  @gapi
363
461
  end
364
462
  end
463
+
464
+ protected
465
+
466
+ def retrieve_model project_id, dataset_id, model_id
467
+ ensure_service!
468
+ gapi = service.get_project_model project_id, dataset_id, model_id
469
+ Model.from_gapi_json gapi, service
470
+ rescue Google::Cloud::NotFoundError
471
+ nil
472
+ end
365
473
  end
366
474
  end
367
475
  end
@@ -428,8 +428,9 @@ module Google
428
428
  # The period for which the destination table will be time partitioned, if
429
429
  # any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
430
430
  #
431
- # @return [String, nil] The time partition type. Currently the only supported
432
- # value is "DAY", or `nil` if not present.
431
+ # @return [String, nil] The time partition type. The supported types are `DAY`,
432
+ # `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
433
+ # hour, month, and year, respectively; or `nil` if not present.
433
434
  #
434
435
  # @!group Attributes
435
436
  #
@@ -1303,12 +1304,21 @@ module Google
1303
1304
  # Sets the labels to use for the load job.
1304
1305
  #
1305
1306
  # @param [Hash] val A hash of user-provided labels associated with
1306
- # the job. You can use these to organize and group your jobs. Label
1307
- # keys and values can be no longer than 63 characters, can only
1308
- # contain lowercase letters, numeric characters, underscores and
1309
- # dashes. International characters are allowed. Label values are
1310
- # optional. Label keys must start with a letter and each label in
1311
- # the list must have a different key.
1307
+ # the job. You can use these to organize and group your jobs.
1308
+ #
1309
+ # The labels applied to a resource must meet the following requirements:
1310
+ #
1311
+ # * Each resource can have multiple labels, up to a maximum of 64.
1312
+ # * Each label must be a key-value pair.
1313
+ # * Keys have a minimum length of 1 character and a maximum length of
1314
+ # 63 characters, and cannot be empty. Values can be empty, and have
1315
+ # a maximum length of 63 characters.
1316
+ # * Keys and values can contain only lowercase letters, numeric characters,
1317
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1318
+ # international characters are allowed.
1319
+ # * The key portion of a label must be unique. However, you can use the
1320
+ # same key with multiple resources.
1321
+ # * Keys must start with a lowercase letter or international character.
1312
1322
  #
1313
1323
  # @!group Attributes
1314
1324
  #
@@ -1490,8 +1500,9 @@ module Google
1490
1500
  # BigQuery does not allow you to change partitioning on an existing
1491
1501
  # table.
1492
1502
  #
1493
- # @param [String] type The time partition type. Currently the only
1494
- # supported value is "DAY".
1503
+ # @param [String] type The time partition type. The supported types are `DAY`,
1504
+ # `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
1505
+ # hour, month, and year, respectively.
1495
1506
  #
1496
1507
  # @example
1497
1508
  # require "google/cloud/bigquery"