google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,13 +22,33 @@ module Google
22
22
  # # LoadJob
23
23
  #
24
24
  # A {Job} subclass representing a load operation that may be performed
25
- # on a {Table}. A LoadJob instance is created when you call {Table#load}.
25
+ # on a {Table}. A LoadJob instance is created when you call
26
+ # {Table#load_job}.
26
27
  #
27
- # @see https://cloud.google.com/bigquery/loading-data-into-bigquery
28
+ # @see https://cloud.google.com/bigquery/loading-data
28
29
  # Loading Data Into BigQuery
29
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
30
31
  # reference
31
32
  #
33
+ # @example
34
+ # require "google/cloud/bigquery"
35
+ #
36
+ # bigquery = Google::Cloud::Bigquery.new
37
+ # dataset = bigquery.dataset "my_dataset"
38
+ #
39
+ # gs_url = "gs://my-bucket/file-name.csv"
40
+ # load_job = dataset.load_job "my_new_table", gs_url do |schema|
41
+ # schema.string "first_name", mode: :required
42
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
43
+ # nested_schema.string "place", mode: :required
44
+ # nested_schema.integer "number_of_years", mode: :required
45
+ # end
46
+ # end
47
+ #
48
+ # load_job.wait_until_done!
49
+ # load_job.done? #=> true
50
+ #
51
+ #
32
52
  class LoadJob < Job
33
53
  ##
34
54
  # The URI or URIs representing the Google Cloud Storage files from which
@@ -39,7 +59,10 @@ module Google
39
59
 
40
60
  ##
41
61
  # The table into which the operation loads data. This is the table on
42
- # which {Table#load} was invoked. Returns a {Table} instance.
62
+ # which {Table#load_job} was invoked.
63
+ #
64
+ # @return [Table] A table instance.
65
+ #
43
66
  def destination
44
67
  table = @gapi.configuration.load.destination_table
45
68
  return nil unless table
@@ -51,13 +74,21 @@ module Google
51
74
  ##
52
75
  # The delimiter used between fields in the source data. The default is a
53
76
  # comma (,).
77
+ #
78
+ # @return [String] A string containing the character, such as `","`.
79
+ #
54
80
  def delimiter
55
81
  @gapi.configuration.load.field_delimiter || ","
56
82
  end
57
83
 
58
84
  ##
59
- # The number of header rows at the top of a CSV file to skip. The
60
- # default value is `0`.
85
+ # The number of rows at the top of a CSV file that BigQuery will skip
86
+ # when loading the data. The default value is 0. This property is useful
87
+ # if you have header rows in the file that should be skipped.
88
+ #
89
+ # @return [Integer] The number of header rows at the top of a CSV file
90
+ # to skip.
91
+ #
61
92
  def skip_leading_rows
62
93
  @gapi.configuration.load.skip_leading_rows || 0
63
94
  end
@@ -65,6 +96,10 @@ module Google
65
96
  ##
66
97
  # Checks if the character encoding of the data is UTF-8. This is the
67
98
  # default.
99
+ #
100
+ # @return [Boolean] `true` when the character encoding is UTF-8,
101
+ # `false` otherwise.
102
+ #
68
103
  def utf8?
69
104
  val = @gapi.configuration.load.encoding
70
105
  return true if val.nil?
@@ -73,6 +108,10 @@ module Google
73
108
 
74
109
  ##
75
110
  # Checks if the character encoding of the data is ISO-8859-1.
111
+ #
112
+ # @return [Boolean] `true` when the character encoding is ISO-8859-1,
113
+ # `false` otherwise.
114
+ #
76
115
  def iso8859_1?
77
116
  val = @gapi.configuration.load.encoding
78
117
  val == "ISO-8859-1"
@@ -84,6 +123,9 @@ module Google
84
123
  # quoted sections, the value should be an empty string. If your data
85
124
  # contains quoted newline characters, {#quoted_newlines?} should return
86
125
  # `true`.
126
+ #
127
+ # @return [String] A string containing the character, such as `"\""`.
128
+ #
87
129
  def quote
88
130
  val = @gapi.configuration.load.quote
89
131
  val = "\"" if val.nil?
@@ -94,24 +136,65 @@ module Google
94
136
  # The maximum number of bad records that the load operation can ignore.
95
137
  # If the number of bad records exceeds this value, an error is returned.
96
138
  # The default value is `0`, which requires that all records be valid.
139
+ #
140
+ # @return [Integer] The maximum number of bad records.
141
+ #
97
142
  def max_bad_records
98
143
  val = @gapi.configuration.load.max_bad_records
99
144
  val = 0 if val.nil?
100
145
  val
101
146
  end
102
147
 
148
+ ##
149
+ # Specifies a string that represents a null value in a CSV file. For
150
+ # example, if you specify `\N`, BigQuery interprets `\N` as a null value
151
+ # when loading a CSV file. The default value is the empty string. If you
152
+ # set this property to a custom value, BigQuery throws an error if an
153
+ # empty string is present for all data types except for STRING and BYTE.
154
+ # For STRING and BYTE columns, BigQuery interprets the empty string as
155
+ # an empty value.
156
+ #
157
+ # @return [String] A string representing null value in a CSV file.
158
+ #
159
+ def null_marker
160
+ val = @gapi.configuration.load.null_marker
161
+ val = "" if val.nil?
162
+ val
163
+ end
164
+
103
165
  ##
104
166
  # Checks if quoted data sections may contain newline characters in a CSV
105
167
  # file. The default is `false`.
168
+ #
169
+ # @return [Boolean] `true` when quoted newlines are allowed, `false`
170
+ # otherwise.
171
+ #
106
172
  def quoted_newlines?
107
173
  val = @gapi.configuration.load.allow_quoted_newlines
108
- val = true if val.nil?
174
+ val = false if val.nil?
175
+ val
176
+ end
177
+
178
+ ##
179
+ # Checks if BigQuery should automatically infer the options and schema
180
+ # for CSV and JSON sources. The default is `false`.
181
+ #
182
+ # @return [Boolean] `true` when autodetect is enabled, `false`
183
+ # otherwise.
184
+ #
185
+ def autodetect?
186
+ val = @gapi.configuration.load.autodetect
187
+ val = false if val.nil?
109
188
  val
110
189
  end
111
190
 
112
191
  ##
113
192
  # Checks if the format of the source data is [newline-delimited
114
193
  # JSON](http://jsonlines.org/). The default is `false`.
194
+ #
195
+ # @return [Boolean] `true` when the source format is
196
+ # `NEWLINE_DELIMITED_JSON`, `false` otherwise.
197
+ #
115
198
  def json?
116
199
  val = @gapi.configuration.load.source_format
117
200
  val == "NEWLINE_DELIMITED_JSON"
@@ -119,6 +202,10 @@ module Google
119
202
 
120
203
  ##
121
204
  # Checks if the format of the source data is CSV. The default is `true`.
205
+ #
206
+ # @return [Boolean] `true` when the source format is `CSV`, `false`
207
+ # otherwise.
208
+ #
122
209
  def csv?
123
210
  val = @gapi.configuration.load.source_format
124
211
  return true if val.nil?
@@ -127,6 +214,10 @@ module Google
127
214
 
128
215
  ##
129
216
  # Checks if the source data is a Google Cloud Datastore backup.
217
+ #
218
+ # @return [Boolean] `true` when the source format is `DATASTORE_BACKUP`,
219
+ # `false` otherwise.
220
+ #
130
221
  def backup?
131
222
  val = @gapi.configuration.load.source_format
132
223
  val == "DATASTORE_BACKUP"
@@ -138,6 +229,10 @@ module Google
138
229
  # records with missing trailing columns are treated as bad records, and
139
230
  # if there are too many bad records, an error is returned. The default
140
231
  # value is `false`. Only applicable to CSV, ignored for other formats.
232
+ #
233
+ # @return [Boolean] `true` when jagged rows are allowed, `false`
234
+ # otherwise.
235
+ #
141
236
  def allow_jagged_rows?
142
237
  val = @gapi.configuration.load.allow_jagged_rows
143
238
  val = false if val.nil?
@@ -150,6 +245,10 @@ module Google
150
245
  # ignored. If `false`, records with extra columns are treated as bad
151
246
  # records, and if there are too many bad records, an invalid error is
152
247
  # returned. The default is `false`.
248
+ #
249
+ # @return [Boolean] `true` when unknown values are ignored, `false`
250
+ # otherwise.
251
+ #
153
252
  def ignore_unknown_values?
154
253
  val = @gapi.configuration.load.ignore_unknown_values
155
254
  val = false if val.nil?
@@ -157,15 +256,24 @@ module Google
157
256
  end
158
257
 
159
258
  ##
160
- # The schema for the data. Returns a hash. Can be empty if the table has
161
- # already has the correct schema (see {Table#schema}), or if the schema
162
- # can be inferred from the loaded data.
259
+ # The schema for the destination table. The schema can be omitted if the
260
+ # destination table already exists, or if you're loading data from
261
+ # Google Cloud Datastore.
262
+ #
263
+ # The returned object is frozen and changes are not allowed. Use
264
+ # {Table#schema} to update the schema.
265
+ #
266
+ # @return [Schema, nil] A schema object, or `nil`.
267
+ #
163
268
  def schema
164
269
  Schema.from_gapi(@gapi.configuration.load.schema).freeze
165
270
  end
166
271
 
167
272
  ##
168
- # The number of source files.
273
+ # The number of source data files in the load job.
274
+ #
275
+ # @return [Integer] The number of source files.
276
+ #
169
277
  def input_files
170
278
  Integer @gapi.statistics.load.input_files
171
279
  rescue
@@ -173,7 +281,10 @@ module Google
173
281
  end
174
282
 
175
283
  ##
176
- # The number of bytes of source data.
284
+ # The number of bytes of source data in the load job.
285
+ #
286
+ # @return [Integer] The number of bytes.
287
+ #
177
288
  def input_file_bytes
178
289
  Integer @gapi.statistics.load.input_file_bytes
179
290
  rescue
@@ -183,6 +294,9 @@ module Google
183
294
  ##
184
295
  # The number of rows that have been loaded into the table. While an
185
296
  # import job is in the running state, this value may change.
297
+ #
298
+ # @return [Integer] The number of rows that have been loaded.
299
+ #
186
300
  def output_rows
187
301
  Integer @gapi.statistics.load.output_rows
188
302
  rescue
@@ -192,6 +306,9 @@ module Google
192
306
  ##
193
307
  # The number of bytes that have been loaded into the table. While an
194
308
  # import job is in the running state, this value may change.
309
+ #
310
+ # @return [Integer] The number of bytes that have been loaded.
311
+ #
195
312
  def output_bytes
196
313
  Integer @gapi.statistics.load.output_bytes
197
314
  rescue
@@ -19,7 +19,7 @@ require "google/cloud/bigquery/service"
19
19
  require "google/cloud/bigquery/credentials"
20
20
  require "google/cloud/bigquery/dataset"
21
21
  require "google/cloud/bigquery/job"
22
- require "google/cloud/bigquery/query_data"
22
+ require "google/cloud/bigquery/external"
23
23
  require "google/cloud/bigquery/project/list"
24
24
  require "google/cloud/bigquery/time"
25
25
  require "google/cloud/bigquery/schema"
@@ -128,6 +128,10 @@ module Google
128
128
  # passed is a hash `{ myparam: "foo" }`, the query must use named
129
129
  # query parameters. When set, `legacy_sql` will automatically be set
130
130
  # to false and `standard_sql` to true.
131
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
132
+ # that represents the mapping of the external tables to the table
133
+ # names used in the SQL query. The hash keys are the table names, and
134
+ # the hash values are the external table objects. See {Project#query}.
131
135
  # @param [String] priority Specifies a priority for the query. Possible
132
136
  # values include `INTERACTIVE` and `BATCH`. The default value is
133
137
  # `INTERACTIVE`.
@@ -158,9 +162,9 @@ module Google
158
162
  # table exists and contains data.
159
163
  # @param [Dataset, String] dataset The default dataset to use for
160
164
  # unqualified table names in the query. Optional.
161
- # @param [Boolean] large_results If `true`, allows the query to produce
162
- # arbitrarily large result tables at a slight cost in performance.
163
- # Requires `table` parameter to be set.
165
+ # @param [String] project Specifies the default projectId to assume for
166
+ # any unqualified table names in the query. Only used if `dataset`
167
+ # option is set.
164
168
  # @param [Boolean] standard_sql Specifies whether to use BigQuery's
165
169
  # [standard
166
170
  # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
@@ -192,6 +196,38 @@ module Google
192
196
  # job. Queries that will have bytes billed beyond this limit will fail
193
197
  # (without incurring a charge). Optional. If unspecified, this will be
194
198
  # set to your project default.
199
+ # @param [String] job_id A user-defined ID for the query job. The ID
200
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
201
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
202
+ # `job_id` is provided, then `prefix` will not be used.
203
+ #
204
+ # See [Generating a job
205
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
206
+ # @param [String] prefix A string, usually human-readable, that will be
207
+ # prepended to a generated value to produce a unique job ID. For
208
+ # example, the prefix `daily_import_job_` can be given to generate a
209
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
210
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
211
+ # underscores (_), or dashes (-). The maximum length of the entire ID
212
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
213
+ # be used.
214
+ #
215
+ # See [Generating a job
216
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
217
+ # @param [Hash] labels A hash of user-provided labels associated with
218
+ # the job. You can use these to organize and group your jobs. Label
219
+ # keys and values can be no longer than 63 characters, can only
220
+ # contain lowercase letters, numeric characters, underscores and
221
+ # dashes. International characters are allowed. Label values are
222
+ # optional. Label keys must start with a letter and each label in the
223
+ # list must have a different key.
224
+ # @param [Array<String>, String] udfs User-defined function resources
225
+ # used in the query. May be either a code resource to load from a
226
+ # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
227
+ # that contains code for a user-defined function (UDF). Providing an
228
+ # inline code resource is equivalent to providing a URI for a file
229
+ # containing the same code. See [User-Defined
230
+ # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
195
231
  #
196
232
  # @return [Google::Cloud::Bigquery::QueryJob]
197
233
  #
@@ -205,7 +241,7 @@ module Google
205
241
  #
206
242
  # job.wait_until_done!
207
243
  # if !job.failed?
208
- # job.query_results.each do |row|
244
+ # job.data.each do |row|
209
245
  # puts row[:name]
210
246
  # end
211
247
  # end
@@ -221,7 +257,7 @@ module Google
221
257
  #
222
258
  # job.wait_until_done!
223
259
  # if !job.failed?
224
- # job.query_results.each do |row|
260
+ # job.data.each do |row|
225
261
  # puts row[:name]
226
262
  # end
227
263
  # end
@@ -238,7 +274,7 @@ module Google
238
274
  #
239
275
  # job.wait_until_done!
240
276
  # if !job.failed?
241
- # job.query_results.each do |row|
277
+ # job.data.each do |row|
242
278
  # puts row[:name]
243
279
  # end
244
280
  # end
@@ -255,32 +291,58 @@ module Google
255
291
  #
256
292
  # job.wait_until_done!
257
293
  # if !job.failed?
258
- # job.query_results.each do |row|
294
+ # job.data.each do |row|
295
+ # puts row[:name]
296
+ # end
297
+ # end
298
+ #
299
+ # @example Query using external data source:
300
+ # require "google/cloud/bigquery"
301
+ #
302
+ # bigquery = Google::Cloud::Bigquery.new
303
+ #
304
+ # csv_url = "gs://bucket/path/to/data.csv"
305
+ # csv_table = bigquery.external csv_url do |csv|
306
+ # csv.autodetect = true
307
+ # csv.skip_leading_rows = 1
308
+ # end
309
+ #
310
+ # job = bigquery.query_job "SELECT * FROM my_ext_table",
311
+ # external: { my_ext_table: csv_table }
312
+ #
313
+ # job.wait_until_done!
314
+ # if !job.failed?
315
+ # job.data.each do |row|
259
316
  # puts row[:name]
260
317
  # end
261
318
  # end
262
319
  #
263
- def query_job query, params: nil, priority: "INTERACTIVE", cache: true,
264
- table: nil, create: nil, write: nil, dataset: nil,
320
+ def query_job query, params: nil, external: nil,
321
+ priority: "INTERACTIVE", cache: true, table: nil,
322
+ create: nil, write: nil, dataset: nil, project: nil,
265
323
  standard_sql: nil, legacy_sql: nil, large_results: nil,
266
324
  flatten: nil, maximum_billing_tier: nil,
267
- maximum_bytes_billed: nil
325
+ maximum_bytes_billed: nil, job_id: nil, prefix: nil,
326
+ labels: nil, udfs: nil
268
327
  ensure_service!
269
328
  options = { priority: priority, cache: cache, table: table,
270
329
  create: create, write: write,
271
330
  large_results: large_results, flatten: flatten,
272
- dataset: dataset, legacy_sql: legacy_sql,
273
- standard_sql: standard_sql,
331
+ dataset: dataset, project: project,
332
+ legacy_sql: legacy_sql, standard_sql: standard_sql,
274
333
  maximum_billing_tier: maximum_billing_tier,
275
334
  maximum_bytes_billed: maximum_bytes_billed,
276
- params: params }
335
+ params: params, external: external, labels: labels,
336
+ job_id: job_id, prefix: prefix, udfs: udfs }
277
337
  gapi = service.query_job query, options
278
338
  Job.from_gapi gapi, service
279
339
  end
280
340
 
281
341
  ##
282
- # Queries data using the [synchronous
283
- # method](https://cloud.google.com/bigquery/querying-data).
342
+ # Queries data using a synchronous method that blocks for a response. In
343
+ # this method, a {QueryJob} is created and its results are saved
344
+ # to a temporary table, then read from the table. Timeouts and transient
345
+ # errors are generally handled as needed to complete the query.
284
346
  #
285
347
  # When using standard SQL and passing arguments using `params`, Ruby
286
348
  # types are mapped to BigQuery types as follows:
@@ -302,6 +364,8 @@ module Google
302
364
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
303
365
  # for an overview of each BigQuery data type, including allowed values.
304
366
  #
367
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
368
+ #
305
369
  # @param [String] query A query string, following the BigQuery [query
306
370
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
307
371
  # query to execute. Example: "SELECT count(f1) FROM
@@ -313,22 +377,16 @@ module Google
313
377
  # passed is a hash `{ myparam: "foo" }`, the query must use named
314
378
  # query parameters. When set, `legacy_sql` will automatically be set
315
379
  # to false and `standard_sql` to true.
380
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
381
+ # that represents the mapping of the external tables to the table
382
+ # names used in the SQL query. The hash keys are the table names, and
383
+ # the hash values are the external table objects. See {Project#query}.
316
384
  # @param [Integer] max The maximum number of rows of data to return per
317
385
  # page of results. Setting this flag to a small value such as 1000 and
318
386
  # then paging through results might improve reliability when the query
319
387
  # result set is large. In addition to this limit, responses are also
320
388
  # limited to 10 MB. By default, there is no maximum row count, and
321
389
  # only the byte limit applies.
322
- # @param [Integer] timeout How long to wait for the query to complete,
323
- # in milliseconds, before the request times out and returns. Note that
324
- # this is only a timeout for the request, not the query. If the query
325
- # takes longer to run than the timeout value, the call returns without
326
- # any results and with QueryData#complete? set to false. The default
327
- # value is 10000 milliseconds (10 seconds).
328
- # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
329
- # job. Instead, if the query is valid, BigQuery returns statistics
330
- # about the job such as how many bytes would be processed. If the
331
- # query is invalid, an error returns. The default value is `false`.
332
390
  # @param [Boolean] cache Whether to look for the result in the query
333
391
  # cache. The query cache is a best-effort cache that will be flushed
334
392
  # whenever tables in the query are modified. The default value is
@@ -361,7 +419,7 @@ module Google
361
419
  # ignored; the query will be run as if `large_results` is true and
362
420
  # `flatten` is false. Optional. The default value is false.
363
421
  #
364
- # @return [Google::Cloud::Bigquery::QueryData]
422
+ # @return [Google::Cloud::Bigquery::Data]
365
423
  #
366
424
  # @example Query using standard SQL:
367
425
  # require "google/cloud/bigquery"
@@ -387,7 +445,7 @@ module Google
387
445
  # puts row[:name]
388
446
  # end
389
447
  #
390
- # @example Retrieve all rows: (See {QueryData#all})
448
+ # @example Retrieve all rows: (See {Data#all})
391
449
  # require "google/cloud/bigquery"
392
450
  #
393
451
  # bigquery = Google::Cloud::Bigquery.new
@@ -426,16 +484,96 @@ module Google
426
484
  # puts row[:name]
427
485
  # end
428
486
  #
429
- def query query, params: nil, max: nil, timeout: 10000, dryrun: nil,
430
- cache: true, dataset: nil, project: nil, standard_sql: nil,
431
- legacy_sql: nil
487
+ # @example Query using external data source:
488
+ # require "google/cloud/bigquery"
489
+ #
490
+ # bigquery = Google::Cloud::Bigquery.new
491
+ #
492
+ # csv_url = "gs://bucket/path/to/data.csv"
493
+ # csv_table = bigquery.external csv_url do |csv|
494
+ # csv.autodetect = true
495
+ # csv.skip_leading_rows = 1
496
+ # end
497
+ #
498
+ # data = bigquery.query "SELECT * FROM my_ext_table",
499
+ # external: { my_ext_table: csv_table }
500
+ #
501
+ # data.each do |row|
502
+ # puts row[:name]
503
+ # end
504
+ #
505
+ def query query, params: nil, external: nil, max: nil, cache: true,
506
+ dataset: nil, project: nil, standard_sql: nil, legacy_sql: nil
432
507
  ensure_service!
433
- options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
434
- dataset: dataset, project: project,
508
+ options = { cache: cache, dataset: dataset, project: project,
435
509
  legacy_sql: legacy_sql, standard_sql: standard_sql,
436
- params: params }
437
- gapi = service.query query, options
438
- QueryData.from_gapi gapi, service
510
+ params: params, external: external }
511
+
512
+ job = query_job query, options
513
+ job.wait_until_done!
514
+
515
+ if job.failed?
516
+ begin
517
+ # raise to activate ruby exception cause handling
518
+ fail job.gapi_error
519
+ rescue => e
520
+ # wrap Google::Apis::Error with Google::Cloud::Error
521
+ raise Google::Cloud::Error.from_error(e)
522
+ end
523
+ end
524
+
525
+ job.data max: max
526
+ end
527
+
528
+ ##
529
+ # Creates a new External::DataSource (or subclass) object that
530
+ # represents the external data source that can be queried from directly,
531
+ # even though the data is not stored in BigQuery. Instead of loading or
532
+ # streaming the data, this object references the external data source.
533
+ #
534
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
535
+ # External Data Sources
536
+ #
537
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
538
+ # point to your data in Google Cloud. An attempt will be made to
539
+ # derive the format from the URLs provided.
540
+ # @param [String|Symbol] format The data format. This value will be used
541
+ # even if the provided URLs are recognized as a different format.
542
+ # Optional.
543
+ #
544
+ # The following values are supported:
545
+ #
546
+ # * `csv` - CSV
547
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
548
+ # * `avro` - [Avro](http://avro.apache.org/)
549
+ # * `sheets` - Google Sheets
550
+ # * `datastore_backup` - Cloud Datastore backup
551
+ # * `bigtable` - Bigtable
552
+ #
553
+ # @return [External::DataSource] External data source.
554
+ #
555
+ # @example
556
+ # require "google/cloud/bigquery"
557
+ #
558
+ # bigquery = Google::Cloud::Bigquery.new
559
+ #
560
+ # csv_url = "gs://bucket/path/to/data.csv"
561
+ # csv_table = bigquery.external csv_url do |csv|
562
+ # csv.autodetect = true
563
+ # csv.skip_leading_rows = 1
564
+ # end
565
+ #
566
+ # data = bigquery.query "SELECT * FROM my_ext_table",
567
+ # external: { my_ext_table: csv_table }
568
+ #
569
+ # data.each do |row|
570
+ # puts row[:name]
571
+ # end
572
+ #
573
+ def external url, format: nil
574
+ ext = External.from_urls url, format
575
+ yield ext if block_given?
576
+ ext
439
577
  end
440
578
 
441
579
  ##
@@ -539,6 +677,11 @@ module Google
539
677
  #
540
678
  # @param [Boolean] all Whether to list all datasets, including hidden
541
679
  # ones. The default is `false`.
680
+ # @param [String] filter An expression for filtering the results of the
681
+ # request by label. The syntax is `labels.<name>[:<value>]`.
682
+ # Multiple filters can be `AND`ed together by connecting with a space.
683
+ # Example: `labels.department:receiving labels.active`. See [Filtering
684
+ # datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
542
685
  # @param [String] token A previously-returned page token representing
543
686
  # part of the larger set of results to view.
544
687
  # @param [Integer] max Maximum number of datasets to return.
@@ -573,11 +716,11 @@ module Google
573
716
  # puts dataset.name
574
717
  # end
575
718
  #
576
- def datasets all: nil, token: nil, max: nil
719
+ def datasets all: nil, filter: nil, token: nil, max: nil
577
720
  ensure_service!
578
- options = { all: all, token: token, max: max }
721
+ options = { all: all, filter: filter, token: token, max: max }
579
722
  gapi = service.list_datasets options
580
- Dataset::List.from_gapi gapi, service, all, max
723
+ Dataset::List.from_gapi gapi, service, all, filter, max
581
724
  end
582
725
 
583
726
  ##
@@ -757,10 +900,10 @@ module Google
757
900
  # configure the schema, otherwise the schema is returned empty and may
758
901
  # be configured directly.
759
902
  #
760
- # The returned schema can be passed to {Dataset#load} using the `schema`
761
- # option. However, for most use cases, the block yielded by
762
- # {Dataset#load} is a more convenient way to configure the schema for
763
- # the destination table.
903
+ # The returned schema can be passed to {Dataset#load} using the
904
+ # `schema` option. However, for most use cases, the block yielded by
905
+ # {Dataset#load} is a more convenient way to configure the schema
906
+ # for the destination table.
764
907
  #
765
908
  # @yield [schema] a block for setting the schema
766
909
  # @yieldparam [Schema] schema the object accepting the schema
@@ -783,7 +926,7 @@ module Google
783
926
  # dataset = bigquery.dataset "my_dataset"
784
927
  #
785
928
  # gs_url = "gs://my-bucket/file-name.csv"
786
- # load_job = dataset.load "my_new_table", gs_url, schema: schema
929
+ # load_job = dataset.load_job "my_new_table", gs_url, schema: schema
787
930
  #
788
931
  def schema
789
932
  s = Schema.from_gapi