google-cloud-bigquery 1.21.2 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f38543236358fc319ecfcc2058ffa499e24034027aa56644924d2cf496815550
4
- data.tar.gz: 37196fa1c3db03e48df4cb0ae5ae5b9b1ee07b9d112063467ebc57d25b34551a
3
+ metadata.gz: 5fadedabb79035a0b983765731a3aee2af5ca5480671e4255e8644cfb6e8c68c
4
+ data.tar.gz: 9ae351ed3987f2a81e8d9046c88ab776e3ac39f37cac5c6961fa3c2daf33b8bb
5
5
  SHA512:
6
- metadata.gz: a234359fb8a04b42f22725f5540cf0378b7e51b3d847ddd4f24ba41f2a54b4de85068bb9aee10089995c29794c0a64694223ac6bf88202eda6a646ce02c85275
7
- data.tar.gz: 99c6c915df70afde18de06907802c60d18df253238e6f3ab752827ea3cfc0aee0ab9bef8dc0816f5980db26334dd2b15ce0a2ecf7db3b026e2f0caa101dcafc8
6
+ metadata.gz: eec1a19be0af9113a94183b9c0eeaa4edf1e08b177be10c449396541d8c6970e4ec666f3fbbb0613107989afc078799beb44655e085f360e3c97846e797bba15
7
+ data.tar.gz: 5f89e947e34384131026fdafd0bdd05f05ed742f6a52b4c18ae0da83f433da2f2f090a924ff82bdf480b79eac9267b923732144a0b32773e12e8b3b846b61a28
@@ -1,5 +1,16 @@
1
1
  # Release History
2
2
 
3
+ ### 1.22.0 / 2020-09-10
4
+
5
+ #### Features
6
+
7
+ * Add support for ML model export
8
+ * Add model support to Project#extract and #extract_job
9
+ * Add ExtractJob#model?
10
+ * Add ExtractJob#ml_tf_saved_model?
11
+ * Add ExtractJob#ml_xgboost_booster?
12
+ * Add Model#extract and #extract_job
13
+
3
14
  ### 1.21.2 / 2020-07-21
4
15
 
5
16
  #### Documentation
@@ -318,7 +318,9 @@ module Google
318
318
  "parquet" => "PARQUET",
319
319
  "datastore" => "DATASTORE_BACKUP",
320
320
  "backup" => "DATASTORE_BACKUP",
321
- "datastore_backup" => "DATASTORE_BACKUP"
321
+ "datastore_backup" => "DATASTORE_BACKUP",
322
+ "ml_tf_saved_model" => "ML_TF_SAVED_MODEL",
323
+ "ml_xgboost_booster" => "ML_XGBOOST_BOOSTER"
322
324
  }[format.to_s.downcase]
323
325
  return val unless val.nil?
324
326
  format
@@ -272,12 +272,21 @@ module Google
272
272
  # Sets the labels to use for the job.
273
273
  #
274
274
  # @param [Hash] value A hash of user-provided labels associated with
275
- # the job. You can use these to organize and group your jobs. Label
276
- # keys and values can be no longer than 63 characters, can only
277
- # contain lowercase letters, numeric characters, underscores and
278
- # dashes. International characters are allowed. Label values are
279
- # optional. Label keys must start with a letter and each label in
280
- # the list must have a different key.
275
+ # the job. You can use these to organize and group your jobs.
276
+ #
277
+ # The labels applied to a resource must meet the following requirements:
278
+ #
279
+ # * Each resource can have multiple labels, up to a maximum of 64.
280
+ # * Each label must be a key-value pair.
281
+ # * Keys have a minimum length of 1 character and a maximum length of
282
+ # 63 characters, and cannot be empty. Values can be empty, and have
283
+ # a maximum length of 63 characters.
284
+ # * Keys and values can contain only lowercase letters, numeric characters,
285
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
286
+ # international characters are allowed.
287
+ # * The key portion of a label must be unique. However, you can use the
288
+ # same key with multiple resources.
289
+ # * Keys must start with a lowercase letter or international character.
281
290
  #
282
291
  # @!group Attributes
283
292
  def labels= value
@@ -313,12 +313,19 @@ module Google
313
313
  # @param [Hash<String, String>] labels A hash containing key/value
314
314
  # pairs.
315
315
  #
316
- # * Label keys and values can be no longer than 63 characters.
317
- # * Label keys and values can contain only lowercase letters, numbers,
318
- # underscores, hyphens, and international characters.
319
- # * Label keys and values cannot exceed 128 bytes in size.
320
- # * Label keys must begin with a letter.
321
- # * Label keys must be unique within a dataset.
316
+ # The labels applied to a resource must meet the following requirements:
317
+ #
318
+ # * Each resource can have multiple labels, up to a maximum of 64.
319
+ # * Each label must be a key-value pair.
320
+ # * Keys have a minimum length of 1 character and a maximum length of
321
+ # 63 characters, and cannot be empty. Values can be empty, and have
322
+ # a maximum length of 63 characters.
323
+ # * Keys and values can contain only lowercase letters, numeric characters,
324
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
325
+ # international characters are allowed.
326
+ # * The key portion of a label must be unique. However, you can use the
327
+ # same key with multiple resources.
328
+ # * Keys must start with a lowercase letter or international character.
322
329
  #
323
330
  # @example
324
331
  # require "google/cloud/bigquery"
@@ -1171,13 +1178,21 @@ module Google
1171
1178
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1172
1179
  # be used.
1173
1180
  # @param [Hash] labels A hash of user-provided labels associated with
1174
- # the job. You can use these to organize and group your jobs. Label
1175
- # keys and values can be no longer than 63 characters, can only
1176
- # contain lowercase letters, numeric characters, underscores and
1177
- # dashes. International characters are allowed. Label values are
1178
- # optional. Label keys must start with a letter and each label in the
1179
- # list must have a different key. See [Requirements for
1180
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1181
+ # the job. You can use these to organize and group your jobs.
1182
+ #
1183
+ # The labels applied to a resource must meet the following requirements:
1184
+ #
1185
+ # * Each resource can have multiple labels, up to a maximum of 64.
1186
+ # * Each label must be a key-value pair.
1187
+ # * Keys have a minimum length of 1 character and a maximum length of
1188
+ # 63 characters, and cannot be empty. Values can be empty, and have
1189
+ # a maximum length of 63 characters.
1190
+ # * Keys and values can contain only lowercase letters, numeric characters,
1191
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1192
+ # international characters are allowed.
1193
+ # * The key portion of a label must be unique. However, you can use the
1194
+ # same key with multiple resources.
1195
+ # * Keys must start with a lowercase letter or international character.
1181
1196
  # @param [Array<String>, String] udfs User-defined function resources
1182
1197
  # used in a legacy SQL query. May be either a code resource to load from
1183
1198
  # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
@@ -1792,13 +1807,21 @@ module Google
1792
1807
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1793
1808
  # be used.
1794
1809
  # @param [Hash] labels A hash of user-provided labels associated with
1795
- # the job. You can use these to organize and group your jobs. Label
1796
- # keys and values can be no longer than 63 characters, can only
1797
- # contain lowercase letters, numeric characters, underscores and
1798
- # dashes. International characters are allowed. Label values are
1799
- # optional. Label keys must start with a letter and each label in the
1800
- # list must have a different key. See [Requirements for
1801
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1810
+ # the job. You can use these to organize and group your jobs.
1811
+ #
1812
+ # The labels applied to a resource must meet the following requirements:
1813
+ #
1814
+ # * Each resource can have multiple labels, up to a maximum of 64.
1815
+ # * Each label must be a key-value pair.
1816
+ # * Keys have a minimum length of 1 character and a maximum length of
1817
+ # 63 characters, and cannot be empty. Values can be empty, and have
1818
+ # a maximum length of 63 characters.
1819
+ # * Keys and values can contain only lowercase letters, numeric characters,
1820
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1821
+ # international characters are allowed.
1822
+ # * The key portion of a label must be unique. However, you can use the
1823
+ # same key with multiple resources.
1824
+ # * Keys must start with a lowercase letter or international character.
1802
1825
  # @yield [updater] A block for setting the schema and other
1803
1826
  # options for the destination table. The schema can be omitted if the
1804
1827
  # destination table already exists, or if you're loading data from a
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
83
+ #
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
86
+ end
87
+
88
+ ##
89
+ # Whether the source of the export job is a model. See {#source}.
90
+ #
91
+ # @return [Boolean] `true` when the source is a model, `false`
92
+ # otherwise.
93
+ #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
68
101
  #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
69
104
  def compression?
105
+ return false unless table?
70
106
  val = @gapi.configuration.extract.compression
71
107
  val == "GZIP"
72
108
  end
73
109
 
74
110
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
111
+ # Checks if the destination format for the table data is [newline-delimited
112
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
113
+ # extracting models.
77
114
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
- # otherwise.
115
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
116
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
80
117
  #
81
118
  def json?
119
+ return false unless table?
82
120
  val = @gapi.configuration.extract.destination_format
83
121
  val == "NEWLINE_DELIMITED_JSON"
84
122
  end
85
123
 
86
124
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
125
+ # Checks if the destination format for the table data is CSV. Tables with
88
126
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
127
+ # `true` for tables. Not applicable when extracting models.
90
128
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
129
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
130
+ # table extraction.
92
131
  #
93
132
  def csv?
133
+ return false unless table?
94
134
  val = @gapi.configuration.extract.destination_format
95
135
  return true if val.nil?
96
136
  val == "CSV"
97
137
  end
98
138
 
99
139
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
140
+ # Checks if the destination format for the table data is
141
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
142
+ # when extracting models.
102
143
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
144
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
145
+ # table extraction.
104
146
  #
105
147
  def avro?
148
+ return false unless table?
106
149
  val = @gapi.configuration.extract.destination_format
107
150
  val == "AVRO"
108
151
  end
109
152
 
153
+ ##
154
+ # Checks if the destination format for the model is TensorFlow SavedModel.
155
+ # The default is `true` for models. Not applicable when extracting tables.
156
+ #
157
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
158
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
159
+ #
160
+ def ml_tf_saved_model?
161
+ return false unless model?
162
+ val = @gapi.configuration.extract.destination_format
163
+ return true if val.nil?
164
+ val == "ML_TF_SAVED_MODEL"
165
+ end
166
+
167
+ ##
168
+ # Checks if the destination format for the model is XGBoost. The default
169
+ # is `false`. Not applicable when extracting tables.
170
+ #
171
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
172
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
173
+ #
174
+ def ml_xgboost_booster?
175
+ return false unless model?
176
+ val = @gapi.configuration.extract.destination_format
177
+ val == "ML_XGBOOST_BOOSTER"
178
+ end
179
+
110
180
  ##
111
181
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
182
+ # exported data. The default is a comma (,) for tables. Not applicable
183
+ # when extracting models.
113
184
  #
114
- # @return [String] A string containing the character, such as `","`.
185
+ # @return [String, nil] A string containing the character, such as `","`,
186
+ # `nil` if not a table extraction.
115
187
  #
116
188
  def delimiter
189
+ return unless table?
117
190
  val = @gapi.configuration.extract.field_delimiter
118
191
  val = "," if val.nil?
119
192
  val
@@ -121,12 +194,13 @@ module Google
121
194
 
122
195
  ##
123
196
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
197
+ # `true` for tables. Not applicable when extracting models.
125
198
  #
126
199
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
200
+ # present or `nil`, `false` if disabled or not a table extraction.
128
201
  #
129
202
  def print_header?
203
+ return false unless table?
130
204
  val = @gapi.configuration.extract.print_header
131
205
  val = true if val.nil?
132
206
  val
@@ -159,12 +233,14 @@ module Google
159
233
  # whether to enable extracting applicable column types (such as
160
234
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
235
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
236
+ # (`avro-long`). Not applicable when extracting models.
163
237
  #
164
238
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
239
+ # corresponding AVRO logical types, `false` if not enabled or not a
240
+ # table extraction.
166
241
  #
167
242
  def use_avro_logical_types?
243
+ return false unless table?
168
244
  @gapi.configuration.extract.use_avro_logical_types
169
245
  end
170
246
 
@@ -182,19 +258,24 @@ module Google
182
258
  #
183
259
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
260
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
261
+ def self.from_options service, source, storage_files, options
186
262
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
263
  storage_urls = Array(storage_files).map do |url|
188
264
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
265
  end
190
266
  options[:format] ||= Convert.derive_source_format storage_urls.first
267
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
268
+ destination_uris: Array(storage_urls)
269
+ )
270
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
271
+ extract_config.source_table = source
272
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
273
+ extract_config.source_model = source
274
+ end
191
275
  job = Google::Apis::BigqueryV2::Job.new(
192
276
  job_reference: job_ref,
193
277
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
278
+ extract: extract_config,
198
279
  dry_run: options[:dryrun]
199
280
  )
200
281
  )
@@ -253,7 +334,7 @@ module Google
253
334
  end
254
335
 
255
336
  ##
256
- # Sets the compression type.
337
+ # Sets the compression type. Not applicable when extracting models.
257
338
  #
258
339
  # @param [String] value The compression type to use for exported
259
340
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
265
346
  end
266
347
 
267
348
  ##
268
- # Sets the field delimiter.
349
+ # Sets the field delimiter. Not applicable when extracting models.
269
350
  #
270
351
  # @param [String] value Delimiter to use between fields in the
271
352
  # exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
276
357
  end
277
358
 
278
359
  ##
279
- # Sets the destination file format. The default value is `csv`.
360
+ # Sets the destination file format. The default value for
361
+ # tables is `csv`. Tables with nested or repeated fields cannot be
362
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
363
  #
281
- # The following values are supported:
364
+ # Supported values for tables:
282
365
  #
283
366
  # * `csv` - CSV
284
367
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
368
  # * `avro` - [Avro](http://avro.apache.org/)
286
369
  #
370
+ # Supported values for models:
371
+ #
372
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
373
+ # * `ml_xgboost_booster` - XGBoost Booster
374
+ #
287
375
  # @param [String] new_format The new source format.
288
376
  #
289
377
  # @!group Attributes
@@ -293,7 +381,8 @@ module Google
293
381
  end
294
382
 
295
383
  ##
296
- # Print a header row in the exported file.
384
+ # Print a header row in the exported file. Not applicable when
385
+ # extracting models.
297
386
  #
298
387
  # @param [Boolean] value Whether to print out a header row in the
299
388
  # results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
307
396
  # Sets the labels to use for the job.
308
397
  #
309
398
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
399
+ # the job. You can use these to organize and group your jobs.
400
+ #
401
+ # The labels applied to a resource must meet the following requirements:
402
+ #
403
+ # * Each resource can have multiple labels, up to a maximum of 64.
404
+ # * Each label must be a key-value pair.
405
+ # * Keys have a minimum length of 1 character and a maximum length of
406
+ # 63 characters, and cannot be empty. Values can be empty, and have
407
+ # a maximum length of 63 characters.
408
+ # * Keys and values can contain only lowercase letters, numeric characters,
409
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
410
+ # international characters are allowed.
411
+ # * The key portion of a label must be unique. However, you can use the
412
+ # same key with multiple resources.
413
+ # * Keys must start with a lowercase letter or international character.
316
414
  #
317
415
  # @!group Attributes
318
416
  #
@@ -362,6 +460,16 @@ module Google
362
460
  @gapi
363
461
  end
364
462
  end
463
+
464
+ protected
465
+
466
+ def retrieve_model project_id, dataset_id, model_id
467
+ ensure_service!
468
+ gapi = service.get_project_model project_id, dataset_id, model_id
469
+ Model.from_gapi_json gapi, service
470
+ rescue Google::Cloud::NotFoundError
471
+ nil
472
+ end
365
473
  end
366
474
  end
367
475
  end