google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,13 +22,33 @@ module Google
22
22
  # # LoadJob
23
23
  #
24
24
  # A {Job} subclass representing a load operation that may be performed
25
- # on a {Table}. A LoadJob instance is created when you call {Table#load}.
25
+ # on a {Table}. A LoadJob instance is created when you call
26
+ # {Table#load_job}.
26
27
  #
27
- # @see https://cloud.google.com/bigquery/loading-data-into-bigquery
28
+ # @see https://cloud.google.com/bigquery/loading-data
28
29
  # Loading Data Into BigQuery
29
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
30
31
  # reference
31
32
  #
33
+ # @example
34
+ # require "google/cloud/bigquery"
35
+ #
36
+ # bigquery = Google::Cloud::Bigquery.new
37
+ # dataset = bigquery.dataset "my_dataset"
38
+ #
39
+ # gs_url = "gs://my-bucket/file-name.csv"
40
+ # load_job = dataset.load_job "my_new_table", gs_url do |schema|
41
+ # schema.string "first_name", mode: :required
42
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
43
+ # nested_schema.string "place", mode: :required
44
+ # nested_schema.integer "number_of_years", mode: :required
45
+ # end
46
+ # end
47
+ #
48
+ # load_job.wait_until_done!
49
+ # load_job.done? #=> true
50
+ #
51
+ #
32
52
  class LoadJob < Job
33
53
  ##
34
54
  # The URI or URIs representing the Google Cloud Storage files from which
@@ -39,7 +59,10 @@ module Google
39
59
 
40
60
  ##
41
61
  # The table into which the operation loads data. This is the table on
42
- # which {Table#load} was invoked. Returns a {Table} instance.
62
+ # which {Table#load_job} was invoked.
63
+ #
64
+ # @return [Table] A table instance.
65
+ #
43
66
  def destination
44
67
  table = @gapi.configuration.load.destination_table
45
68
  return nil unless table
@@ -51,13 +74,21 @@ module Google
51
74
  ##
52
75
  # The delimiter used between fields in the source data. The default is a
53
76
  # comma (,).
77
+ #
78
+ # @return [String] A string containing the character, such as `","`.
79
+ #
54
80
  def delimiter
55
81
  @gapi.configuration.load.field_delimiter || ","
56
82
  end
57
83
 
58
84
  ##
59
- # The number of header rows at the top of a CSV file to skip. The
60
- # default value is `0`.
85
+ # The number of rows at the top of a CSV file that BigQuery will skip
86
+ # when loading the data. The default value is 0. This property is useful
87
+ # if you have header rows in the file that should be skipped.
88
+ #
89
+ # @return [Integer] The number of header rows at the top of a CSV file
90
+ # to skip.
91
+ #
61
92
  def skip_leading_rows
62
93
  @gapi.configuration.load.skip_leading_rows || 0
63
94
  end
@@ -65,6 +96,10 @@ module Google
65
96
  ##
66
97
  # Checks if the character encoding of the data is UTF-8. This is the
67
98
  # default.
99
+ #
100
+ # @return [Boolean] `true` when the character encoding is UTF-8,
101
+ # `false` otherwise.
102
+ #
68
103
  def utf8?
69
104
  val = @gapi.configuration.load.encoding
70
105
  return true if val.nil?
@@ -73,6 +108,10 @@ module Google
73
108
 
74
109
  ##
75
110
  # Checks if the character encoding of the data is ISO-8859-1.
111
+ #
112
+ # @return [Boolean] `true` when the character encoding is ISO-8859-1,
113
+ # `false` otherwise.
114
+ #
76
115
  def iso8859_1?
77
116
  val = @gapi.configuration.load.encoding
78
117
  val == "ISO-8859-1"
@@ -84,6 +123,9 @@ module Google
84
123
  # quoted sections, the value should be an empty string. If your data
85
124
  # contains quoted newline characters, {#quoted_newlines?} should return
86
125
  # `true`.
126
+ #
127
+ # @return [String] A string containing the character, such as `"\""`.
128
+ #
87
129
  def quote
88
130
  val = @gapi.configuration.load.quote
89
131
  val = "\"" if val.nil?
@@ -94,24 +136,65 @@ module Google
94
136
  # The maximum number of bad records that the load operation can ignore.
95
137
  # If the number of bad records exceeds this value, an error is returned.
96
138
  # The default value is `0`, which requires that all records be valid.
139
+ #
140
+ # @return [Integer] The maximum number of bad records.
141
+ #
97
142
  def max_bad_records
98
143
  val = @gapi.configuration.load.max_bad_records
99
144
  val = 0 if val.nil?
100
145
  val
101
146
  end
102
147
 
148
+ ##
149
+ # Specifies a string that represents a null value in a CSV file. For
150
+ # example, if you specify `\N`, BigQuery interprets `\N` as a null value
151
+ # when loading a CSV file. The default value is the empty string. If you
152
+ # set this property to a custom value, BigQuery throws an error if an
153
+ # empty string is present for all data types except for STRING and BYTE.
154
+ # For STRING and BYTE columns, BigQuery interprets the empty string as
155
+ # an empty value.
156
+ #
157
+ # @return [String] A string representing null value in a CSV file.
158
+ #
159
+ def null_marker
160
+ val = @gapi.configuration.load.null_marker
161
+ val = "" if val.nil?
162
+ val
163
+ end
164
+
103
165
  ##
104
166
  # Checks if quoted data sections may contain newline characters in a CSV
105
167
  # file. The default is `false`.
168
+ #
169
+ # @return [Boolean] `true` when quoted newlines are allowed, `false`
170
+ # otherwise.
171
+ #
106
172
  def quoted_newlines?
107
173
  val = @gapi.configuration.load.allow_quoted_newlines
108
- val = true if val.nil?
174
+ val = false if val.nil?
175
+ val
176
+ end
177
+
178
+ ##
179
+ # Checks if BigQuery should automatically infer the options and schema
180
+ # for CSV and JSON sources. The default is `false`.
181
+ #
182
+ # @return [Boolean] `true` when autodetect is enabled, `false`
183
+ # otherwise.
184
+ #
185
+ def autodetect?
186
+ val = @gapi.configuration.load.autodetect
187
+ val = false if val.nil?
109
188
  val
110
189
  end
111
190
 
112
191
  ##
113
192
  # Checks if the format of the source data is [newline-delimited
114
193
  # JSON](http://jsonlines.org/). The default is `false`.
194
+ #
195
+ # @return [Boolean] `true` when the source format is
196
+ # `NEWLINE_DELIMITED_JSON`, `false` otherwise.
197
+ #
115
198
  def json?
116
199
  val = @gapi.configuration.load.source_format
117
200
  val == "NEWLINE_DELIMITED_JSON"
@@ -119,6 +202,10 @@ module Google
119
202
 
120
203
  ##
121
204
  # Checks if the format of the source data is CSV. The default is `true`.
205
+ #
206
+ # @return [Boolean] `true` when the source format is `CSV`, `false`
207
+ # otherwise.
208
+ #
122
209
  def csv?
123
210
  val = @gapi.configuration.load.source_format
124
211
  return true if val.nil?
@@ -127,6 +214,10 @@ module Google
127
214
 
128
215
  ##
129
216
  # Checks if the source data is a Google Cloud Datastore backup.
217
+ #
218
+ # @return [Boolean] `true` when the source format is `DATASTORE_BACKUP`,
219
+ # `false` otherwise.
220
+ #
130
221
  def backup?
131
222
  val = @gapi.configuration.load.source_format
132
223
  val == "DATASTORE_BACKUP"
@@ -138,6 +229,10 @@ module Google
138
229
  # records with missing trailing columns are treated as bad records, and
139
230
  # if there are too many bad records, an error is returned. The default
140
231
  # value is `false`. Only applicable to CSV, ignored for other formats.
232
+ #
233
+ # @return [Boolean] `true` when jagged rows are allowed, `false`
234
+ # otherwise.
235
+ #
141
236
  def allow_jagged_rows?
142
237
  val = @gapi.configuration.load.allow_jagged_rows
143
238
  val = false if val.nil?
@@ -150,6 +245,10 @@ module Google
150
245
  # ignored. If `false`, records with extra columns are treated as bad
151
246
  # records, and if there are too many bad records, an invalid error is
152
247
  # returned. The default is `false`.
248
+ #
249
+ # @return [Boolean] `true` when unknown values are ignored, `false`
250
+ # otherwise.
251
+ #
153
252
  def ignore_unknown_values?
154
253
  val = @gapi.configuration.load.ignore_unknown_values
155
254
  val = false if val.nil?
@@ -157,15 +256,24 @@ module Google
157
256
  end
158
257
 
159
258
  ##
160
- # The schema for the data. Returns a hash. Can be empty if the table has
161
- # already has the correct schema (see {Table#schema}), or if the schema
162
- # can be inferred from the loaded data.
259
+ # The schema for the destination table. The schema can be omitted if the
260
+ # destination table already exists, or if you're loading data from
261
+ # Google Cloud Datastore.
262
+ #
263
+ # The returned object is frozen and changes are not allowed. Use
264
+ # {Table#schema} to update the schema.
265
+ #
266
+ # @return [Schema, nil] A schema object, or `nil`.
267
+ #
163
268
  def schema
164
269
  Schema.from_gapi(@gapi.configuration.load.schema).freeze
165
270
  end
166
271
 
167
272
  ##
168
- # The number of source files.
273
+ # The number of source data files in the load job.
274
+ #
275
+ # @return [Integer] The number of source files.
276
+ #
169
277
  def input_files
170
278
  Integer @gapi.statistics.load.input_files
171
279
  rescue
@@ -173,7 +281,10 @@ module Google
173
281
  end
174
282
 
175
283
  ##
176
- # The number of bytes of source data.
284
+ # The number of bytes of source data in the load job.
285
+ #
286
+ # @return [Integer] The number of bytes.
287
+ #
177
288
  def input_file_bytes
178
289
  Integer @gapi.statistics.load.input_file_bytes
179
290
  rescue
@@ -183,6 +294,9 @@ module Google
183
294
  ##
184
295
  # The number of rows that have been loaded into the table. While an
185
296
  # import job is in the running state, this value may change.
297
+ #
298
+ # @return [Integer] The number of rows that have been loaded.
299
+ #
186
300
  def output_rows
187
301
  Integer @gapi.statistics.load.output_rows
188
302
  rescue
@@ -192,6 +306,9 @@ module Google
192
306
  ##
193
307
  # The number of bytes that have been loaded into the table. While an
194
308
  # import job is in the running state, this value may change.
309
+ #
310
+ # @return [Integer] The number of bytes that have been loaded.
311
+ #
195
312
  def output_bytes
196
313
  Integer @gapi.statistics.load.output_bytes
197
314
  rescue
@@ -19,7 +19,7 @@ require "google/cloud/bigquery/service"
19
19
  require "google/cloud/bigquery/credentials"
20
20
  require "google/cloud/bigquery/dataset"
21
21
  require "google/cloud/bigquery/job"
22
- require "google/cloud/bigquery/query_data"
22
+ require "google/cloud/bigquery/external"
23
23
  require "google/cloud/bigquery/project/list"
24
24
  require "google/cloud/bigquery/time"
25
25
  require "google/cloud/bigquery/schema"
@@ -128,6 +128,10 @@ module Google
128
128
  # passed is a hash `{ myparam: "foo" }`, the query must use named
129
129
  # query parameters. When set, `legacy_sql` will automatically be set
130
130
  # to false and `standard_sql` to true.
131
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
132
+ # that represents the mapping of the external tables to the table
133
+ # names used in the SQL query. The hash keys are the table names, and
134
+ # the hash values are the external table objects. See {Project#query}.
131
135
  # @param [String] priority Specifies a priority for the query. Possible
132
136
  # values include `INTERACTIVE` and `BATCH`. The default value is
133
137
  # `INTERACTIVE`.
@@ -158,9 +162,9 @@ module Google
158
162
  # table exists and contains data.
159
163
  # @param [Dataset, String] dataset The default dataset to use for
160
164
  # unqualified table names in the query. Optional.
161
- # @param [Boolean] large_results If `true`, allows the query to produce
162
- # arbitrarily large result tables at a slight cost in performance.
163
- # Requires `table` parameter to be set.
165
+ # @param [String] project Specifies the default projectId to assume for
166
+ # any unqualified table names in the query. Only used if `dataset`
167
+ # option is set.
164
168
  # @param [Boolean] standard_sql Specifies whether to use BigQuery's
165
169
  # [standard
166
170
  # SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
@@ -192,6 +196,38 @@ module Google
192
196
  # job. Queries that will have bytes billed beyond this limit will fail
193
197
  # (without incurring a charge). Optional. If unspecified, this will be
194
198
  # set to your project default.
199
+ # @param [String] job_id A user-defined ID for the query job. The ID
200
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
201
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
202
+ # `job_id` is provided, then `prefix` will not be used.
203
+ #
204
+ # See [Generating a job
205
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
206
+ # @param [String] prefix A string, usually human-readable, that will be
207
+ # prepended to a generated value to produce a unique job ID. For
208
+ # example, the prefix `daily_import_job_` can be given to generate a
209
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
210
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
211
+ # underscores (_), or dashes (-). The maximum length of the entire ID
212
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
213
+ # be used.
214
+ #
215
+ # See [Generating a job
216
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
217
+ # @param [Hash] labels A hash of user-provided labels associated with
218
+ # the job. You can use these to organize and group your jobs. Label
219
+ # keys and values can be no longer than 63 characters, can only
220
+ # contain lowercase letters, numeric characters, underscores and
221
+ # dashes. International characters are allowed. Label values are
222
+ # optional. Label keys must start with a letter and each label in the
223
+ # list must have a different key.
224
+ # @param [Array<String>, String] udfs User-defined function resources
225
+ # used in the query. May be either a code resource to load from a
226
+ # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
227
+ # that contains code for a user-defined function (UDF). Providing an
228
+ # inline code resource is equivalent to providing a URI for a file
229
+ # containing the same code. See [User-Defined
230
+ # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
195
231
  #
196
232
  # @return [Google::Cloud::Bigquery::QueryJob]
197
233
  #
@@ -205,7 +241,7 @@ module Google
205
241
  #
206
242
  # job.wait_until_done!
207
243
  # if !job.failed?
208
- # job.query_results.each do |row|
244
+ # job.data.each do |row|
209
245
  # puts row[:name]
210
246
  # end
211
247
  # end
@@ -221,7 +257,7 @@ module Google
221
257
  #
222
258
  # job.wait_until_done!
223
259
  # if !job.failed?
224
- # job.query_results.each do |row|
260
+ # job.data.each do |row|
225
261
  # puts row[:name]
226
262
  # end
227
263
  # end
@@ -238,7 +274,7 @@ module Google
238
274
  #
239
275
  # job.wait_until_done!
240
276
  # if !job.failed?
241
- # job.query_results.each do |row|
277
+ # job.data.each do |row|
242
278
  # puts row[:name]
243
279
  # end
244
280
  # end
@@ -255,32 +291,58 @@ module Google
255
291
  #
256
292
  # job.wait_until_done!
257
293
  # if !job.failed?
258
- # job.query_results.each do |row|
294
+ # job.data.each do |row|
295
+ # puts row[:name]
296
+ # end
297
+ # end
298
+ #
299
+ # @example Query using external data source:
300
+ # require "google/cloud/bigquery"
301
+ #
302
+ # bigquery = Google::Cloud::Bigquery.new
303
+ #
304
+ # csv_url = "gs://bucket/path/to/data.csv"
305
+ # csv_table = bigquery.external csv_url do |csv|
306
+ # csv.autodetect = true
307
+ # csv.skip_leading_rows = 1
308
+ # end
309
+ #
310
+ # job = bigquery.query_job "SELECT * FROM my_ext_table",
311
+ # external: { my_ext_table: csv_table }
312
+ #
313
+ # job.wait_until_done!
314
+ # if !job.failed?
315
+ # job.data.each do |row|
259
316
  # puts row[:name]
260
317
  # end
261
318
  # end
262
319
  #
263
- def query_job query, params: nil, priority: "INTERACTIVE", cache: true,
264
- table: nil, create: nil, write: nil, dataset: nil,
320
+ def query_job query, params: nil, external: nil,
321
+ priority: "INTERACTIVE", cache: true, table: nil,
322
+ create: nil, write: nil, dataset: nil, project: nil,
265
323
  standard_sql: nil, legacy_sql: nil, large_results: nil,
266
324
  flatten: nil, maximum_billing_tier: nil,
267
- maximum_bytes_billed: nil
325
+ maximum_bytes_billed: nil, job_id: nil, prefix: nil,
326
+ labels: nil, udfs: nil
268
327
  ensure_service!
269
328
  options = { priority: priority, cache: cache, table: table,
270
329
  create: create, write: write,
271
330
  large_results: large_results, flatten: flatten,
272
- dataset: dataset, legacy_sql: legacy_sql,
273
- standard_sql: standard_sql,
331
+ dataset: dataset, project: project,
332
+ legacy_sql: legacy_sql, standard_sql: standard_sql,
274
333
  maximum_billing_tier: maximum_billing_tier,
275
334
  maximum_bytes_billed: maximum_bytes_billed,
276
- params: params }
335
+ params: params, external: external, labels: labels,
336
+ job_id: job_id, prefix: prefix, udfs: udfs }
277
337
  gapi = service.query_job query, options
278
338
  Job.from_gapi gapi, service
279
339
  end
280
340
 
281
341
  ##
282
- # Queries data using the [synchronous
283
- # method](https://cloud.google.com/bigquery/querying-data).
342
+ # Queries data using a synchronous method that blocks for a response. In
343
+ # this method, a {QueryJob} is created and its results are saved
344
+ # to a temporary table, then read from the table. Timeouts and transient
345
+ # errors are generally handled as needed to complete the query.
284
346
  #
285
347
  # When using standard SQL and passing arguments using `params`, Ruby
286
348
  # types are mapped to BigQuery types as follows:
@@ -302,6 +364,8 @@ module Google
302
364
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
303
365
  # for an overview of each BigQuery data type, including allowed values.
304
366
  #
367
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
368
+ #
305
369
  # @param [String] query A query string, following the BigQuery [query
306
370
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
307
371
  # query to execute. Example: "SELECT count(f1) FROM
@@ -313,22 +377,16 @@ module Google
313
377
  # passed is a hash `{ myparam: "foo" }`, the query must use named
314
378
  # query parameters. When set, `legacy_sql` will automatically be set
315
379
  # to false and `standard_sql` to true.
380
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
381
+ # that represents the mapping of the external tables to the table
382
+ # names used in the SQL query. The hash keys are the table names, and
383
+ # the hash values are the external table objects. See {Project#query}.
316
384
  # @param [Integer] max The maximum number of rows of data to return per
317
385
  # page of results. Setting this flag to a small value such as 1000 and
318
386
  # then paging through results might improve reliability when the query
319
387
  # result set is large. In addition to this limit, responses are also
320
388
  # limited to 10 MB. By default, there is no maximum row count, and
321
389
  # only the byte limit applies.
322
- # @param [Integer] timeout How long to wait for the query to complete,
323
- # in milliseconds, before the request times out and returns. Note that
324
- # this is only a timeout for the request, not the query. If the query
325
- # takes longer to run than the timeout value, the call returns without
326
- # any results and with QueryData#complete? set to false. The default
327
- # value is 10000 milliseconds (10 seconds).
328
- # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
329
- # job. Instead, if the query is valid, BigQuery returns statistics
330
- # about the job such as how many bytes would be processed. If the
331
- # query is invalid, an error returns. The default value is `false`.
332
390
  # @param [Boolean] cache Whether to look for the result in the query
333
391
  # cache. The query cache is a best-effort cache that will be flushed
334
392
  # whenever tables in the query are modified. The default value is
@@ -361,7 +419,7 @@ module Google
361
419
  # ignored; the query will be run as if `large_results` is true and
362
420
  # `flatten` is false. Optional. The default value is false.
363
421
  #
364
- # @return [Google::Cloud::Bigquery::QueryData]
422
+ # @return [Google::Cloud::Bigquery::Data]
365
423
  #
366
424
  # @example Query using standard SQL:
367
425
  # require "google/cloud/bigquery"
@@ -387,7 +445,7 @@ module Google
387
445
  # puts row[:name]
388
446
  # end
389
447
  #
390
- # @example Retrieve all rows: (See {QueryData#all})
448
+ # @example Retrieve all rows: (See {Data#all})
391
449
  # require "google/cloud/bigquery"
392
450
  #
393
451
  # bigquery = Google::Cloud::Bigquery.new
@@ -426,16 +484,96 @@ module Google
426
484
  # puts row[:name]
427
485
  # end
428
486
  #
429
- def query query, params: nil, max: nil, timeout: 10000, dryrun: nil,
430
- cache: true, dataset: nil, project: nil, standard_sql: nil,
431
- legacy_sql: nil
487
+ # @example Query using external data source:
488
+ # require "google/cloud/bigquery"
489
+ #
490
+ # bigquery = Google::Cloud::Bigquery.new
491
+ #
492
+ # csv_url = "gs://bucket/path/to/data.csv"
493
+ # csv_table = bigquery.external csv_url do |csv|
494
+ # csv.autodetect = true
495
+ # csv.skip_leading_rows = 1
496
+ # end
497
+ #
498
+ # data = bigquery.query "SELECT * FROM my_ext_table",
499
+ # external: { my_ext_table: csv_table }
500
+ #
501
+ # data.each do |row|
502
+ # puts row[:name]
503
+ # end
504
+ #
505
+ def query query, params: nil, external: nil, max: nil, cache: true,
506
+ dataset: nil, project: nil, standard_sql: nil, legacy_sql: nil
432
507
  ensure_service!
433
- options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
434
- dataset: dataset, project: project,
508
+ options = { cache: cache, dataset: dataset, project: project,
435
509
  legacy_sql: legacy_sql, standard_sql: standard_sql,
436
- params: params }
437
- gapi = service.query query, options
438
- QueryData.from_gapi gapi, service
510
+ params: params, external: external }
511
+
512
+ job = query_job query, options
513
+ job.wait_until_done!
514
+
515
+ if job.failed?
516
+ begin
517
+ # raise to activate ruby exception cause handling
518
+ fail job.gapi_error
519
+ rescue => e
520
+ # wrap Google::Apis::Error with Google::Cloud::Error
521
+ raise Google::Cloud::Error.from_error(e)
522
+ end
523
+ end
524
+
525
+ job.data max: max
526
+ end
527
+
528
+ ##
529
+ # Creates a new External::DataSource (or subclass) object that
530
+ # represents the external data source that can be queried from directly,
531
+ # even though the data is not stored in BigQuery. Instead of loading or
532
+ # streaming the data, this object references the external data source.
533
+ #
534
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
535
+ # External Data Sources
536
+ #
537
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
538
+ # point to your data in Google Cloud. An attempt will be made to
539
+ # derive the format from the URLs provided.
540
+ # @param [String|Symbol] format The data format. This value will be used
541
+ # even if the provided URLs are recognized as a different format.
542
+ # Optional.
543
+ #
544
+ # The following values are supported:
545
+ #
546
+ # * `csv` - CSV
547
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
548
+ # * `avro` - [Avro](http://avro.apache.org/)
549
+ # * `sheets` - Google Sheets
550
+ # * `datastore_backup` - Cloud Datastore backup
551
+ # * `bigtable` - Bigtable
552
+ #
553
+ # @return [External::DataSource] External data source.
554
+ #
555
+ # @example
556
+ # require "google/cloud/bigquery"
557
+ #
558
+ # bigquery = Google::Cloud::Bigquery.new
559
+ #
560
+ # csv_url = "gs://bucket/path/to/data.csv"
561
+ # csv_table = bigquery.external csv_url do |csv|
562
+ # csv.autodetect = true
563
+ # csv.skip_leading_rows = 1
564
+ # end
565
+ #
566
+ # data = bigquery.query "SELECT * FROM my_ext_table",
567
+ # external: { my_ext_table: csv_table }
568
+ #
569
+ # data.each do |row|
570
+ # puts row[:name]
571
+ # end
572
+ #
573
+ def external url, format: nil
574
+ ext = External.from_urls url, format
575
+ yield ext if block_given?
576
+ ext
439
577
  end
440
578
 
441
579
  ##
@@ -539,6 +677,11 @@ module Google
539
677
  #
540
678
  # @param [Boolean] all Whether to list all datasets, including hidden
541
679
  # ones. The default is `false`.
680
+ # @param [String] filter An expression for filtering the results of the
681
+ # request by label. The syntax is `labels.<name>[:<value>]`.
682
+ # Multiple filters can be `AND`ed together by connecting with a space.
683
+ # Example: `labels.department:receiving labels.active`. See [Filtering
684
+ # datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
542
685
  # @param [String] token A previously-returned page token representing
543
686
  # part of the larger set of results to view.
544
687
  # @param [Integer] max Maximum number of datasets to return.
@@ -573,11 +716,11 @@ module Google
573
716
  # puts dataset.name
574
717
  # end
575
718
  #
576
- def datasets all: nil, token: nil, max: nil
719
+ def datasets all: nil, filter: nil, token: nil, max: nil
577
720
  ensure_service!
578
- options = { all: all, token: token, max: max }
721
+ options = { all: all, filter: filter, token: token, max: max }
579
722
  gapi = service.list_datasets options
580
- Dataset::List.from_gapi gapi, service, all, max
723
+ Dataset::List.from_gapi gapi, service, all, filter, max
581
724
  end
582
725
 
583
726
  ##
@@ -757,10 +900,10 @@ module Google
757
900
  # configure the schema, otherwise the schema is returned empty and may
758
901
  # be configured directly.
759
902
  #
760
- # The returned schema can be passed to {Dataset#load} using the `schema`
761
- # option. However, for most use cases, the block yielded by
762
- # {Dataset#load} is a more convenient way to configure the schema for
763
- # the destination table.
903
+ # The returned schema can be passed to {Dataset#load} using the
904
+ # `schema` option. However, for most use cases, the block yielded by
905
+ # {Dataset#load} is a more convenient way to configure the schema
906
+ # for the destination table.
764
907
  #
765
908
  # @yield [schema] a block for setting the schema
766
909
  # @yieldparam [Schema] schema the object accepting the schema
@@ -783,7 +926,7 @@ module Google
783
926
  # dataset = bigquery.dataset "my_dataset"
784
927
  #
785
928
  # gs_url = "gs://my-bucket/file-name.csv"
786
- # load_job = dataset.load "my_new_table", gs_url, schema: schema
929
+ # load_job = dataset.load_job "my_new_table", gs_url, schema: schema
787
930
  #
788
931
  def schema
789
932
  s = Schema.from_gapi