google-cloud-bigquery 1.21.1 → 1.25.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -45,9 +45,12 @@ module Google
45
45
  # data = bigquery.query "SELECT * FROM my_ext_table",
46
46
  # external: { my_ext_table: csv_table }
47
47
  #
48
+ # # Iterate over the first page of results
48
49
  # data.each do |row|
49
50
  # puts row[:name]
50
51
  # end
52
+ # # Retrieve the next page of results
53
+ # data = data.next if data.next?
51
54
  #
52
55
  module External
53
56
  ##
@@ -138,9 +141,12 @@ module Google
138
141
  # data = bigquery.query "SELECT * FROM my_ext_table",
139
142
  # external: { my_ext_table: avro_table }
140
143
  #
144
+ # # Iterate over the first page of results
141
145
  # data.each do |row|
142
146
  # puts row[:name]
143
147
  # end
148
+ # # Retrieve the next page of results
149
+ # data = data.next if data.next?
144
150
  #
145
151
  class DataSource
146
152
  ##
@@ -575,9 +581,12 @@ module Google
575
581
  # data = bigquery.query "SELECT * FROM my_ext_table",
576
582
  # external: { my_ext_table: csv_table }
577
583
  #
584
+ # # Iterate over the first page of results
578
585
  # data.each do |row|
579
586
  # puts row[:name]
580
587
  # end
588
+ # # Retrieve the next page of results
589
+ # data = data.next if data.next?
581
590
  #
582
591
  class CsvSource < External::DataSource
583
592
  ##
@@ -1037,9 +1046,12 @@ module Google
1037
1046
  # data = bigquery.query "SELECT * FROM my_ext_table",
1038
1047
  # external: { my_ext_table: json_table }
1039
1048
  #
1049
+ # # Iterate over the first page of results
1040
1050
  # data.each do |row|
1041
1051
  # puts row[:name]
1042
1052
  # end
1053
+ # # Retrieve the next page of results
1054
+ # data = data.next if data.next?
1043
1055
  #
1044
1056
  class JsonSource < External::DataSource
1045
1057
  ##
@@ -1173,9 +1185,12 @@ module Google
1173
1185
  # data = bigquery.query "SELECT * FROM my_ext_table",
1174
1186
  # external: { my_ext_table: sheets_table }
1175
1187
  #
1188
+ # # Iterate over the first page of results
1176
1189
  # data.each do |row|
1177
1190
  # puts row[:name]
1178
1191
  # end
1192
+ # # Retrieve the next page of results
1193
+ # data = data.next if data.next?
1179
1194
  #
1180
1195
  class SheetsSource < External::DataSource
1181
1196
  ##
@@ -1318,9 +1333,12 @@ module Google
1318
1333
  # data = bigquery.query "SELECT * FROM my_ext_table",
1319
1334
  # external: { my_ext_table: bigtable_table }
1320
1335
  #
1336
+ # # Iterate over the first page of results
1321
1337
  # data.each do |row|
1322
1338
  # puts row[:name]
1323
1339
  # end
1340
+ # # Retrieve the next page of results
1341
+ # data = data.next if data.next?
1324
1342
  #
1325
1343
  class BigtableSource < External::DataSource
1326
1344
  ##
@@ -1516,9 +1534,12 @@ module Google
1516
1534
  # data = bigquery.query "SELECT * FROM my_ext_table",
1517
1535
  # external: { my_ext_table: bigtable_table }
1518
1536
  #
1537
+ # # Iterate over the first page of results
1519
1538
  # data.each do |row|
1520
1539
  # puts row[:name]
1521
1540
  # end
1541
+ # # Retrieve the next page of results
1542
+ # data = data.next if data.next?
1522
1543
  #
1523
1544
  class ColumnFamily
1524
1545
  ##
@@ -2053,9 +2074,12 @@ module Google
2053
2074
  # data = bigquery.query "SELECT * FROM my_ext_table",
2054
2075
  # external: { my_ext_table: bigtable_table }
2055
2076
  #
2077
+ # # Iterate over the first page of results
2056
2078
  # data.each do |row|
2057
2079
  # puts row[:name]
2058
2080
  # end
2081
+ # # Retrieve the next page of results
2082
+ # data = data.next if data.next?
2059
2083
  #
2060
2084
  class Column
2061
2085
  ##
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
83
+ #
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
86
+ end
87
+
88
+ ##
89
+ # Whether the source of the export job is a model. See {#source}.
90
+ #
91
+ # @return [Boolean] `true` when the source is a model, `false`
92
+ # otherwise.
93
+ #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
68
101
  #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
69
104
  def compression?
105
+ return false unless table?
70
106
  val = @gapi.configuration.extract.compression
71
107
  val == "GZIP"
72
108
  end
73
109
 
74
110
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
111
+ # Checks if the destination format for the table data is [newline-delimited
112
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
113
+ # extracting models.
77
114
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
- # otherwise.
115
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
116
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
80
117
  #
81
118
  def json?
119
+ return false unless table?
82
120
  val = @gapi.configuration.extract.destination_format
83
121
  val == "NEWLINE_DELIMITED_JSON"
84
122
  end
85
123
 
86
124
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
125
+ # Checks if the destination format for the table data is CSV. Tables with
88
126
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
127
+ # `true` for tables. Not applicable when extracting models.
90
128
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
129
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
130
+ # table extraction.
92
131
  #
93
132
  def csv?
133
+ return false unless table?
94
134
  val = @gapi.configuration.extract.destination_format
95
135
  return true if val.nil?
96
136
  val == "CSV"
97
137
  end
98
138
 
99
139
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
140
+ # Checks if the destination format for the table data is
141
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
142
+ # when extracting models.
102
143
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
144
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
145
+ # table extraction.
104
146
  #
105
147
  def avro?
148
+ return false unless table?
106
149
  val = @gapi.configuration.extract.destination_format
107
150
  val == "AVRO"
108
151
  end
109
152
 
153
+ ##
154
+ # Checks if the destination format for the model is TensorFlow SavedModel.
155
+ # The default is `true` for models. Not applicable when extracting tables.
156
+ #
157
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
158
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
159
+ #
160
+ def ml_tf_saved_model?
161
+ return false unless model?
162
+ val = @gapi.configuration.extract.destination_format
163
+ return true if val.nil?
164
+ val == "ML_TF_SAVED_MODEL"
165
+ end
166
+
167
+ ##
168
+ # Checks if the destination format for the model is XGBoost. The default
169
+ # is `false`. Not applicable when extracting tables.
170
+ #
171
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
172
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
173
+ #
174
+ def ml_xgboost_booster?
175
+ return false unless model?
176
+ val = @gapi.configuration.extract.destination_format
177
+ val == "ML_XGBOOST_BOOSTER"
178
+ end
179
+
110
180
  ##
111
181
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
182
+ # exported data. The default is a comma (,) for tables. Not applicable
183
+ # when extracting models.
113
184
  #
114
- # @return [String] A string containing the character, such as `","`.
185
+ # @return [String, nil] A string containing the character, such as `","`,
186
+ # `nil` if not a table extraction.
115
187
  #
116
188
  def delimiter
189
+ return unless table?
117
190
  val = @gapi.configuration.extract.field_delimiter
118
191
  val = "," if val.nil?
119
192
  val
@@ -121,12 +194,13 @@ module Google
121
194
 
122
195
  ##
123
196
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
197
+ # `true` for tables. Not applicable when extracting models.
125
198
  #
126
199
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
200
+ # present or `nil`, `false` if disabled or not a table extraction.
128
201
  #
129
202
  def print_header?
203
+ return false unless table?
130
204
  val = @gapi.configuration.extract.print_header
131
205
  val = true if val.nil?
132
206
  val
@@ -159,12 +233,14 @@ module Google
159
233
  # whether to enable extracting applicable column types (such as
160
234
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
235
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
236
+ # (`avro-long`). Not applicable when extracting models.
163
237
  #
164
238
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
239
+ # corresponding AVRO logical types, `false` if not enabled or not a
240
+ # table extraction.
166
241
  #
167
242
  def use_avro_logical_types?
243
+ return false unless table?
168
244
  @gapi.configuration.extract.use_avro_logical_types
169
245
  end
170
246
 
@@ -182,19 +258,24 @@ module Google
182
258
  #
183
259
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
260
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
261
+ def self.from_options service, source, storage_files, options
186
262
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
263
  storage_urls = Array(storage_files).map do |url|
188
264
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
265
  end
190
266
  options[:format] ||= Convert.derive_source_format storage_urls.first
267
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
268
+ destination_uris: Array(storage_urls)
269
+ )
270
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
271
+ extract_config.source_table = source
272
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
273
+ extract_config.source_model = source
274
+ end
191
275
  job = Google::Apis::BigqueryV2::Job.new(
192
276
  job_reference: job_ref,
193
277
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
278
+ extract: extract_config,
198
279
  dry_run: options[:dryrun]
199
280
  )
200
281
  )
@@ -253,7 +334,7 @@ module Google
253
334
  end
254
335
 
255
336
  ##
256
- # Sets the compression type.
337
+ # Sets the compression type. Not applicable when extracting models.
257
338
  #
258
339
  # @param [String] value The compression type to use for exported
259
340
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
265
346
  end
266
347
 
267
348
  ##
268
- # Sets the field delimiter.
349
+ # Sets the field delimiter. Not applicable when extracting models.
269
350
  #
270
351
  # @param [String] value Delimiter to use between fields in the
271
352
  # exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
276
357
  end
277
358
 
278
359
  ##
279
- # Sets the destination file format. The default value is `csv`.
360
+ # Sets the destination file format. The default value for
361
+ # tables is `csv`. Tables with nested or repeated fields cannot be
362
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
363
  #
281
- # The following values are supported:
364
+ # Supported values for tables:
282
365
  #
283
366
  # * `csv` - CSV
284
367
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
368
  # * `avro` - [Avro](http://avro.apache.org/)
286
369
  #
370
+ # Supported values for models:
371
+ #
372
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
373
+ # * `ml_xgboost_booster` - XGBoost Booster
374
+ #
287
375
  # @param [String] new_format The new source format.
288
376
  #
289
377
  # @!group Attributes
@@ -293,7 +381,8 @@ module Google
293
381
  end
294
382
 
295
383
  ##
296
- # Print a header row in the exported file.
384
+ # Print a header row in the exported file. Not applicable when
385
+ # extracting models.
297
386
  #
298
387
  # @param [Boolean] value Whether to print out a header row in the
299
388
  # results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
307
396
  # Sets the labels to use for the job.
308
397
  #
309
398
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
399
+ # the job. You can use these to organize and group your jobs.
400
+ #
401
+ # The labels applied to a resource must meet the following requirements:
402
+ #
403
+ # * Each resource can have multiple labels, up to a maximum of 64.
404
+ # * Each label must be a key-value pair.
405
+ # * Keys have a minimum length of 1 character and a maximum length of
406
+ # 63 characters, and cannot be empty. Values can be empty, and have
407
+ # a maximum length of 63 characters.
408
+ # * Keys and values can contain only lowercase letters, numeric characters,
409
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
410
+ # international characters are allowed.
411
+ # * The key portion of a label must be unique. However, you can use the
412
+ # same key with multiple resources.
413
+ # * Keys must start with a lowercase letter or international character.
316
414
  #
317
415
  # @!group Attributes
318
416
  #
@@ -362,6 +460,16 @@ module Google
362
460
  @gapi
363
461
  end
364
462
  end
463
+
464
+ protected
465
+
466
+ def retrieve_model project_id, dataset_id, model_id
467
+ ensure_service!
468
+ gapi = service.get_project_model project_id, dataset_id, model_id
469
+ Model.from_gapi_json gapi, service
470
+ rescue Google::Cloud::NotFoundError
471
+ nil
472
+ end
365
473
  end
366
474
  end
367
475
  end
@@ -428,8 +428,9 @@ module Google
428
428
  # The period for which the destination table will be time partitioned, if
429
429
  # any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
430
430
  #
431
- # @return [String, nil] The time partition type. Currently the only supported
432
- # value is "DAY", or `nil` if not present.
431
+ # @return [String, nil] The time partition type. The supported types are `DAY`,
432
+ # `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
433
+ # hour, month, and year, respectively; or `nil` if not present.
433
434
  #
434
435
  # @!group Attributes
435
436
  #
@@ -1303,12 +1304,21 @@ module Google
1303
1304
  # Sets the labels to use for the load job.
1304
1305
  #
1305
1306
  # @param [Hash] val A hash of user-provided labels associated with
1306
- # the job. You can use these to organize and group your jobs. Label
1307
- # keys and values can be no longer than 63 characters, can only
1308
- # contain lowercase letters, numeric characters, underscores and
1309
- # dashes. International characters are allowed. Label values are
1310
- # optional. Label keys must start with a letter and each label in
1311
- # the list must have a different key.
1307
+ # the job. You can use these to organize and group your jobs.
1308
+ #
1309
+ # The labels applied to a resource must meet the following requirements:
1310
+ #
1311
+ # * Each resource can have multiple labels, up to a maximum of 64.
1312
+ # * Each label must be a key-value pair.
1313
+ # * Keys have a minimum length of 1 character and a maximum length of
1314
+ # 63 characters, and cannot be empty. Values can be empty, and have
1315
+ # a maximum length of 63 characters.
1316
+ # * Keys and values can contain only lowercase letters, numeric characters,
1317
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1318
+ # international characters are allowed.
1319
+ # * The key portion of a label must be unique. However, you can use the
1320
+ # same key with multiple resources.
1321
+ # * Keys must start with a lowercase letter or international character.
1312
1322
  #
1313
1323
  # @!group Attributes
1314
1324
  #
@@ -1490,8 +1500,9 @@ module Google
1490
1500
  # BigQuery does not allow you to change partitioning on an existing
1491
1501
  # table.
1492
1502
  #
1493
- # @param [String] type The time partition type. Currently the only
1494
- # supported value is "DAY".
1503
+ # @param [String] type The time partition type. The supported types are `DAY`,
1504
+ # `HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
1505
+ # hour, month, and year, respectively.
1495
1506
  #
1496
1507
  # @example
1497
1508
  # require "google/cloud/bigquery"