google-cloud-bigquery 1.21.2 → 1.22.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f38543236358fc319ecfcc2058ffa499e24034027aa56644924d2cf496815550
4
- data.tar.gz: 37196fa1c3db03e48df4cb0ae5ae5b9b1ee07b9d112063467ebc57d25b34551a
3
+ metadata.gz: 5fadedabb79035a0b983765731a3aee2af5ca5480671e4255e8644cfb6e8c68c
4
+ data.tar.gz: 9ae351ed3987f2a81e8d9046c88ab776e3ac39f37cac5c6961fa3c2daf33b8bb
5
5
  SHA512:
6
- metadata.gz: a234359fb8a04b42f22725f5540cf0378b7e51b3d847ddd4f24ba41f2a54b4de85068bb9aee10089995c29794c0a64694223ac6bf88202eda6a646ce02c85275
7
- data.tar.gz: 99c6c915df70afde18de06907802c60d18df253238e6f3ab752827ea3cfc0aee0ab9bef8dc0816f5980db26334dd2b15ce0a2ecf7db3b026e2f0caa101dcafc8
6
+ metadata.gz: eec1a19be0af9113a94183b9c0eeaa4edf1e08b177be10c449396541d8c6970e4ec666f3fbbb0613107989afc078799beb44655e085f360e3c97846e797bba15
7
+ data.tar.gz: 5f89e947e34384131026fdafd0bdd05f05ed742f6a52b4c18ae0da83f433da2f2f090a924ff82bdf480b79eac9267b923732144a0b32773e12e8b3b846b61a28
@@ -1,5 +1,16 @@
1
1
  # Release History
2
2
 
3
+ ### 1.22.0 / 2020-09-10
4
+
5
+ #### Features
6
+
7
+ * Add support for ML model export
8
+ * Add model support to Project#extract and #extract_job
9
+ * Add ExtractJob#model?
10
+ * Add ExtractJob#ml_tf_saved_model?
11
+ * Add ExtractJob#ml_xgboost_booster?
12
+ * Add Model#extract and #extract_job
13
+
3
14
  ### 1.21.2 / 2020-07-21
4
15
 
5
16
  #### Documentation
@@ -318,7 +318,9 @@ module Google
318
318
  "parquet" => "PARQUET",
319
319
  "datastore" => "DATASTORE_BACKUP",
320
320
  "backup" => "DATASTORE_BACKUP",
321
- "datastore_backup" => "DATASTORE_BACKUP"
321
+ "datastore_backup" => "DATASTORE_BACKUP",
322
+ "ml_tf_saved_model" => "ML_TF_SAVED_MODEL",
323
+ "ml_xgboost_booster" => "ML_XGBOOST_BOOSTER"
322
324
  }[format.to_s.downcase]
323
325
  return val unless val.nil?
324
326
  format
@@ -272,12 +272,21 @@ module Google
272
272
  # Sets the labels to use for the job.
273
273
  #
274
274
  # @param [Hash] value A hash of user-provided labels associated with
275
- # the job. You can use these to organize and group your jobs. Label
276
- # keys and values can be no longer than 63 characters, can only
277
- # contain lowercase letters, numeric characters, underscores and
278
- # dashes. International characters are allowed. Label values are
279
- # optional. Label keys must start with a letter and each label in
280
- # the list must have a different key.
275
+ # the job. You can use these to organize and group your jobs.
276
+ #
277
+ # The labels applied to a resource must meet the following requirements:
278
+ #
279
+ # * Each resource can have multiple labels, up to a maximum of 64.
280
+ # * Each label must be a key-value pair.
281
+ # * Keys have a minimum length of 1 character and a maximum length of
282
+ # 63 characters, and cannot be empty. Values can be empty, and have
283
+ # a maximum length of 63 characters.
284
+ # * Keys and values can contain only lowercase letters, numeric characters,
285
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
286
+ # international characters are allowed.
287
+ # * The key portion of a label must be unique. However, you can use the
288
+ # same key with multiple resources.
289
+ # * Keys must start with a lowercase letter or international character.
281
290
  #
282
291
  # @!group Attributes
283
292
  def labels= value
@@ -313,12 +313,19 @@ module Google
313
313
  # @param [Hash<String, String>] labels A hash containing key/value
314
314
  # pairs.
315
315
  #
316
- # * Label keys and values can be no longer than 63 characters.
317
- # * Label keys and values can contain only lowercase letters, numbers,
318
- # underscores, hyphens, and international characters.
319
- # * Label keys and values cannot exceed 128 bytes in size.
320
- # * Label keys must begin with a letter.
321
- # * Label keys must be unique within a dataset.
316
+ # The labels applied to a resource must meet the following requirements:
317
+ #
318
+ # * Each resource can have multiple labels, up to a maximum of 64.
319
+ # * Each label must be a key-value pair.
320
+ # * Keys have a minimum length of 1 character and a maximum length of
321
+ # 63 characters, and cannot be empty. Values can be empty, and have
322
+ # a maximum length of 63 characters.
323
+ # * Keys and values can contain only lowercase letters, numeric characters,
324
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
325
+ # international characters are allowed.
326
+ # * The key portion of a label must be unique. However, you can use the
327
+ # same key with multiple resources.
328
+ # * Keys must start with a lowercase letter or international character.
322
329
  #
323
330
  # @example
324
331
  # require "google/cloud/bigquery"
@@ -1171,13 +1178,21 @@ module Google
1171
1178
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1172
1179
  # be used.
1173
1180
  # @param [Hash] labels A hash of user-provided labels associated with
1174
- # the job. You can use these to organize and group your jobs. Label
1175
- # keys and values can be no longer than 63 characters, can only
1176
- # contain lowercase letters, numeric characters, underscores and
1177
- # dashes. International characters are allowed. Label values are
1178
- # optional. Label keys must start with a letter and each label in the
1179
- # list must have a different key. See [Requirements for
1180
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1181
+ # the job. You can use these to organize and group your jobs.
1182
+ #
1183
+ # The labels applied to a resource must meet the following requirements:
1184
+ #
1185
+ # * Each resource can have multiple labels, up to a maximum of 64.
1186
+ # * Each label must be a key-value pair.
1187
+ # * Keys have a minimum length of 1 character and a maximum length of
1188
+ # 63 characters, and cannot be empty. Values can be empty, and have
1189
+ # a maximum length of 63 characters.
1190
+ # * Keys and values can contain only lowercase letters, numeric characters,
1191
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1192
+ # international characters are allowed.
1193
+ # * The key portion of a label must be unique. However, you can use the
1194
+ # same key with multiple resources.
1195
+ # * Keys must start with a lowercase letter or international character.
1181
1196
  # @param [Array<String>, String] udfs User-defined function resources
1182
1197
  # used in a legacy SQL query. May be either a code resource to load from
1183
1198
  # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
@@ -1792,13 +1807,21 @@ module Google
1792
1807
  # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1793
1808
  # be used.
1794
1809
  # @param [Hash] labels A hash of user-provided labels associated with
1795
- # the job. You can use these to organize and group your jobs. Label
1796
- # keys and values can be no longer than 63 characters, can only
1797
- # contain lowercase letters, numeric characters, underscores and
1798
- # dashes. International characters are allowed. Label values are
1799
- # optional. Label keys must start with a letter and each label in the
1800
- # list must have a different key. See [Requirements for
1801
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
1810
+ # the job. You can use these to organize and group your jobs.
1811
+ #
1812
+ # The labels applied to a resource must meet the following requirements:
1813
+ #
1814
+ # * Each resource can have multiple labels, up to a maximum of 64.
1815
+ # * Each label must be a key-value pair.
1816
+ # * Keys have a minimum length of 1 character and a maximum length of
1817
+ # 63 characters, and cannot be empty. Values can be empty, and have
1818
+ # a maximum length of 63 characters.
1819
+ # * Keys and values can contain only lowercase letters, numeric characters,
1820
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1821
+ # international characters are allowed.
1822
+ # * The key portion of a label must be unique. However, you can use the
1823
+ # same key with multiple resources.
1824
+ # * Keys must start with a lowercase letter or international character.
1802
1825
  # @yield [updater] A block for setting the schema and other
1803
1826
  # options for the destination table. The schema can be omitted if the
1804
1827
  # destination table already exists, or if you're loading data from a
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,130 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
83
+ #
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
86
+ end
87
+
88
+ ##
89
+ # Whether the source of the export job is a model. See {#source}.
90
+ #
91
+ # @return [Boolean] `true` when the source is a model, `false`
92
+ # otherwise.
93
+ #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
68
101
  #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
69
104
  def compression?
105
+ return false unless table?
70
106
  val = @gapi.configuration.extract.compression
71
107
  val == "GZIP"
72
108
  end
73
109
 
74
110
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
111
+ # Checks if the destination format for the table data is [newline-delimited
112
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
113
+ # extracting models.
77
114
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
- # otherwise.
115
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
116
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
80
117
  #
81
118
  def json?
119
+ return false unless table?
82
120
  val = @gapi.configuration.extract.destination_format
83
121
  val == "NEWLINE_DELIMITED_JSON"
84
122
  end
85
123
 
86
124
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
125
+ # Checks if the destination format for the table data is CSV. Tables with
88
126
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
127
+ # `true` for tables. Not applicable when extracting models.
90
128
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
129
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
130
+ # table extraction.
92
131
  #
93
132
  def csv?
133
+ return false unless table?
94
134
  val = @gapi.configuration.extract.destination_format
95
135
  return true if val.nil?
96
136
  val == "CSV"
97
137
  end
98
138
 
99
139
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
140
+ # Checks if the destination format for the table data is
141
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
142
+ # when extracting models.
102
143
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
144
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
145
+ # table extraction.
104
146
  #
105
147
  def avro?
148
+ return false unless table?
106
149
  val = @gapi.configuration.extract.destination_format
107
150
  val == "AVRO"
108
151
  end
109
152
 
153
+ ##
154
+ # Checks if the destination format for the model is TensorFlow SavedModel.
155
+ # The default is `true` for models. Not applicable when extracting tables.
156
+ #
157
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
158
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
159
+ #
160
+ def ml_tf_saved_model?
161
+ return false unless model?
162
+ val = @gapi.configuration.extract.destination_format
163
+ return true if val.nil?
164
+ val == "ML_TF_SAVED_MODEL"
165
+ end
166
+
167
+ ##
168
+ # Checks if the destination format for the model is XGBoost. The default
169
+ # is `false`. Not applicable when extracting tables.
170
+ #
171
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
172
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
173
+ #
174
+ def ml_xgboost_booster?
175
+ return false unless model?
176
+ val = @gapi.configuration.extract.destination_format
177
+ val == "ML_XGBOOST_BOOSTER"
178
+ end
179
+
110
180
  ##
111
181
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
182
+ # exported data. The default is a comma (,) for tables. Not applicable
183
+ # when extracting models.
113
184
  #
114
- # @return [String] A string containing the character, such as `","`.
185
+ # @return [String, nil] A string containing the character, such as `","`,
186
+ # `nil` if not a table extraction.
115
187
  #
116
188
  def delimiter
189
+ return unless table?
117
190
  val = @gapi.configuration.extract.field_delimiter
118
191
  val = "," if val.nil?
119
192
  val
@@ -121,12 +194,13 @@ module Google
121
194
 
122
195
  ##
123
196
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
197
+ # `true` for tables. Not applicable when extracting models.
125
198
  #
126
199
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
200
+ # present or `nil`, `false` if disabled or not a table extraction.
128
201
  #
129
202
  def print_header?
203
+ return false unless table?
130
204
  val = @gapi.configuration.extract.print_header
131
205
  val = true if val.nil?
132
206
  val
@@ -159,12 +233,14 @@ module Google
159
233
  # whether to enable extracting applicable column types (such as
160
234
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
235
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
236
+ # (`avro-long`). Not applicable when extracting models.
163
237
  #
164
238
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
239
+ # corresponding AVRO logical types, `false` if not enabled or not a
240
+ # table extraction.
166
241
  #
167
242
  def use_avro_logical_types?
243
+ return false unless table?
168
244
  @gapi.configuration.extract.use_avro_logical_types
169
245
  end
170
246
 
@@ -182,19 +258,24 @@ module Google
182
258
  #
183
259
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
260
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
261
+ def self.from_options service, source, storage_files, options
186
262
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
263
  storage_urls = Array(storage_files).map do |url|
188
264
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
265
  end
190
266
  options[:format] ||= Convert.derive_source_format storage_urls.first
267
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
268
+ destination_uris: Array(storage_urls)
269
+ )
270
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
271
+ extract_config.source_table = source
272
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
273
+ extract_config.source_model = source
274
+ end
191
275
  job = Google::Apis::BigqueryV2::Job.new(
192
276
  job_reference: job_ref,
193
277
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
278
+ extract: extract_config,
198
279
  dry_run: options[:dryrun]
199
280
  )
200
281
  )
@@ -253,7 +334,7 @@ module Google
253
334
  end
254
335
 
255
336
  ##
256
- # Sets the compression type.
337
+ # Sets the compression type. Not applicable when extracting models.
257
338
  #
258
339
  # @param [String] value The compression type to use for exported
259
340
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +346,7 @@ module Google
265
346
  end
266
347
 
267
348
  ##
268
- # Sets the field delimiter.
349
+ # Sets the field delimiter. Not applicable when extracting models.
269
350
  #
270
351
  # @param [String] value Delimiter to use between fields in the
271
352
  # exported data. Default is <code>,</code>.
@@ -276,14 +357,21 @@ module Google
276
357
  end
277
358
 
278
359
  ##
279
- # Sets the destination file format. The default value is `csv`.
360
+ # Sets the destination file format. The default value for
361
+ # tables is `csv`. Tables with nested or repeated fields cannot be
362
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
363
  #
281
- # The following values are supported:
364
+ # Supported values for tables:
282
365
  #
283
366
  # * `csv` - CSV
284
367
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
368
  # * `avro` - [Avro](http://avro.apache.org/)
286
369
  #
370
+ # Supported values for models:
371
+ #
372
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
373
+ # * `ml_xgboost_booster` - XGBoost Booster
374
+ #
287
375
  # @param [String] new_format The new source format.
288
376
  #
289
377
  # @!group Attributes
@@ -293,7 +381,8 @@ module Google
293
381
  end
294
382
 
295
383
  ##
296
- # Print a header row in the exported file.
384
+ # Print a header row in the exported file. Not applicable when
385
+ # extracting models.
297
386
  #
298
387
  # @param [Boolean] value Whether to print out a header row in the
299
388
  # results. Default is `true`.
@@ -307,12 +396,21 @@ module Google
307
396
  # Sets the labels to use for the job.
308
397
  #
309
398
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
399
+ # the job. You can use these to organize and group your jobs.
400
+ #
401
+ # The labels applied to a resource must meet the following requirements:
402
+ #
403
+ # * Each resource can have multiple labels, up to a maximum of 64.
404
+ # * Each label must be a key-value pair.
405
+ # * Keys have a minimum length of 1 character and a maximum length of
406
+ # 63 characters, and cannot be empty. Values can be empty, and have
407
+ # a maximum length of 63 characters.
408
+ # * Keys and values can contain only lowercase letters, numeric characters,
409
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
410
+ # international characters are allowed.
411
+ # * The key portion of a label must be unique. However, you can use the
412
+ # same key with multiple resources.
413
+ # * Keys must start with a lowercase letter or international character.
316
414
  #
317
415
  # @!group Attributes
318
416
  #
@@ -362,6 +460,16 @@ module Google
362
460
  @gapi
363
461
  end
364
462
  end
463
+
464
+ protected
465
+
466
+ def retrieve_model project_id, dataset_id, model_id
467
+ ensure_service!
468
+ gapi = service.get_project_model project_id, dataset_id, model_id
469
+ Model.from_gapi_json gapi, service
470
+ rescue Google::Cloud::NotFoundError
471
+ nil
472
+ end
365
473
  end
366
474
  end
367
475
  end