google-cloud-bigquery 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -22,13 +22,33 @@ module Google
|
|
22
22
|
# # LoadJob
|
23
23
|
#
|
24
24
|
# A {Job} subclass representing a load operation that may be performed
|
25
|
-
# on a {Table}. A LoadJob instance is created when you call
|
25
|
+
# on a {Table}. A LoadJob instance is created when you call
|
26
|
+
# {Table#load_job}.
|
26
27
|
#
|
27
|
-
# @see https://cloud.google.com/bigquery/loading-data
|
28
|
+
# @see https://cloud.google.com/bigquery/loading-data
|
28
29
|
# Loading Data Into BigQuery
|
29
30
|
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
30
31
|
# reference
|
31
32
|
#
|
33
|
+
# @example
|
34
|
+
# require "google/cloud/bigquery"
|
35
|
+
#
|
36
|
+
# bigquery = Google::Cloud::Bigquery.new
|
37
|
+
# dataset = bigquery.dataset "my_dataset"
|
38
|
+
#
|
39
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
40
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |schema|
|
41
|
+
# schema.string "first_name", mode: :required
|
42
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
43
|
+
# nested_schema.string "place", mode: :required
|
44
|
+
# nested_schema.integer "number_of_years", mode: :required
|
45
|
+
# end
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# load_job.wait_until_done!
|
49
|
+
# load_job.done? #=> true
|
50
|
+
#
|
51
|
+
#
|
32
52
|
class LoadJob < Job
|
33
53
|
##
|
34
54
|
# The URI or URIs representing the Google Cloud Storage files from which
|
@@ -39,7 +59,10 @@ module Google
|
|
39
59
|
|
40
60
|
##
|
41
61
|
# The table into which the operation loads data. This is the table on
|
42
|
-
# which {Table#
|
62
|
+
# which {Table#load_job} was invoked.
|
63
|
+
#
|
64
|
+
# @return [Table] A table instance.
|
65
|
+
#
|
43
66
|
def destination
|
44
67
|
table = @gapi.configuration.load.destination_table
|
45
68
|
return nil unless table
|
@@ -51,13 +74,21 @@ module Google
|
|
51
74
|
##
|
52
75
|
# The delimiter used between fields in the source data. The default is a
|
53
76
|
# comma (,).
|
77
|
+
#
|
78
|
+
# @return [String] A string containing the character, such as `","`.
|
79
|
+
#
|
54
80
|
def delimiter
|
55
81
|
@gapi.configuration.load.field_delimiter || ","
|
56
82
|
end
|
57
83
|
|
58
84
|
##
|
59
|
-
# The number of
|
60
|
-
# default value is
|
85
|
+
# The number of rows at the top of a CSV file that BigQuery will skip
|
86
|
+
# when loading the data. The default value is 0. This property is useful
|
87
|
+
# if you have header rows in the file that should be skipped.
|
88
|
+
#
|
89
|
+
# @return [Integer] The number of header rows at the top of a CSV file
|
90
|
+
# to skip.
|
91
|
+
#
|
61
92
|
def skip_leading_rows
|
62
93
|
@gapi.configuration.load.skip_leading_rows || 0
|
63
94
|
end
|
@@ -65,6 +96,10 @@ module Google
|
|
65
96
|
##
|
66
97
|
# Checks if the character encoding of the data is UTF-8. This is the
|
67
98
|
# default.
|
99
|
+
#
|
100
|
+
# @return [Boolean] `true` when the character encoding is UTF-8,
|
101
|
+
# `false` otherwise.
|
102
|
+
#
|
68
103
|
def utf8?
|
69
104
|
val = @gapi.configuration.load.encoding
|
70
105
|
return true if val.nil?
|
@@ -73,6 +108,10 @@ module Google
|
|
73
108
|
|
74
109
|
##
|
75
110
|
# Checks if the character encoding of the data is ISO-8859-1.
|
111
|
+
#
|
112
|
+
# @return [Boolean] `true` when the character encoding is ISO-8859-1,
|
113
|
+
# `false` otherwise.
|
114
|
+
#
|
76
115
|
def iso8859_1?
|
77
116
|
val = @gapi.configuration.load.encoding
|
78
117
|
val == "ISO-8859-1"
|
@@ -84,6 +123,9 @@ module Google
|
|
84
123
|
# quoted sections, the value should be an empty string. If your data
|
85
124
|
# contains quoted newline characters, {#quoted_newlines?} should return
|
86
125
|
# `true`.
|
126
|
+
#
|
127
|
+
# @return [String] A string containing the character, such as `"\""`.
|
128
|
+
#
|
87
129
|
def quote
|
88
130
|
val = @gapi.configuration.load.quote
|
89
131
|
val = "\"" if val.nil?
|
@@ -94,24 +136,65 @@ module Google
|
|
94
136
|
# The maximum number of bad records that the load operation can ignore.
|
95
137
|
# If the number of bad records exceeds this value, an error is returned.
|
96
138
|
# The default value is `0`, which requires that all records be valid.
|
139
|
+
#
|
140
|
+
# @return [Integer] The maximum number of bad records.
|
141
|
+
#
|
97
142
|
def max_bad_records
|
98
143
|
val = @gapi.configuration.load.max_bad_records
|
99
144
|
val = 0 if val.nil?
|
100
145
|
val
|
101
146
|
end
|
102
147
|
|
148
|
+
##
|
149
|
+
# Specifies a string that represents a null value in a CSV file. For
|
150
|
+
# example, if you specify `\N`, BigQuery interprets `\N` as a null value
|
151
|
+
# when loading a CSV file. The default value is the empty string. If you
|
152
|
+
# set this property to a custom value, BigQuery throws an error if an
|
153
|
+
# empty string is present for all data types except for STRING and BYTE.
|
154
|
+
# For STRING and BYTE columns, BigQuery interprets the empty string as
|
155
|
+
# an empty value.
|
156
|
+
#
|
157
|
+
# @return [String] A string representing null value in a CSV file.
|
158
|
+
#
|
159
|
+
def null_marker
|
160
|
+
val = @gapi.configuration.load.null_marker
|
161
|
+
val = "" if val.nil?
|
162
|
+
val
|
163
|
+
end
|
164
|
+
|
103
165
|
##
|
104
166
|
# Checks if quoted data sections may contain newline characters in a CSV
|
105
167
|
# file. The default is `false`.
|
168
|
+
#
|
169
|
+
# @return [Boolean] `true` when quoted newlines are allowed, `false`
|
170
|
+
# otherwise.
|
171
|
+
#
|
106
172
|
def quoted_newlines?
|
107
173
|
val = @gapi.configuration.load.allow_quoted_newlines
|
108
|
-
val =
|
174
|
+
val = false if val.nil?
|
175
|
+
val
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# Checks if BigQuery should automatically infer the options and schema
|
180
|
+
# for CSV and JSON sources. The default is `false`.
|
181
|
+
#
|
182
|
+
# @return [Boolean] `true` when autodetect is enabled, `false`
|
183
|
+
# otherwise.
|
184
|
+
#
|
185
|
+
def autodetect?
|
186
|
+
val = @gapi.configuration.load.autodetect
|
187
|
+
val = false if val.nil?
|
109
188
|
val
|
110
189
|
end
|
111
190
|
|
112
191
|
##
|
113
192
|
# Checks if the format of the source data is [newline-delimited
|
114
193
|
# JSON](http://jsonlines.org/). The default is `false`.
|
194
|
+
#
|
195
|
+
# @return [Boolean] `true` when the source format is
|
196
|
+
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
197
|
+
#
|
115
198
|
def json?
|
116
199
|
val = @gapi.configuration.load.source_format
|
117
200
|
val == "NEWLINE_DELIMITED_JSON"
|
@@ -119,6 +202,10 @@ module Google
|
|
119
202
|
|
120
203
|
##
|
121
204
|
# Checks if the format of the source data is CSV. The default is `true`.
|
205
|
+
#
|
206
|
+
# @return [Boolean] `true` when the source format is `CSV`, `false`
|
207
|
+
# otherwise.
|
208
|
+
#
|
122
209
|
def csv?
|
123
210
|
val = @gapi.configuration.load.source_format
|
124
211
|
return true if val.nil?
|
@@ -127,6 +214,10 @@ module Google
|
|
127
214
|
|
128
215
|
##
|
129
216
|
# Checks if the source data is a Google Cloud Datastore backup.
|
217
|
+
#
|
218
|
+
# @return [Boolean] `true` when the source format is `DATASTORE_BACKUP`,
|
219
|
+
# `false` otherwise.
|
220
|
+
#
|
130
221
|
def backup?
|
131
222
|
val = @gapi.configuration.load.source_format
|
132
223
|
val == "DATASTORE_BACKUP"
|
@@ -138,6 +229,10 @@ module Google
|
|
138
229
|
# records with missing trailing columns are treated as bad records, and
|
139
230
|
# if there are too many bad records, an error is returned. The default
|
140
231
|
# value is `false`. Only applicable to CSV, ignored for other formats.
|
232
|
+
#
|
233
|
+
# @return [Boolean] `true` when jagged rows are allowed, `false`
|
234
|
+
# otherwise.
|
235
|
+
#
|
141
236
|
def allow_jagged_rows?
|
142
237
|
val = @gapi.configuration.load.allow_jagged_rows
|
143
238
|
val = false if val.nil?
|
@@ -150,6 +245,10 @@ module Google
|
|
150
245
|
# ignored. If `false`, records with extra columns are treated as bad
|
151
246
|
# records, and if there are too many bad records, an invalid error is
|
152
247
|
# returned. The default is `false`.
|
248
|
+
#
|
249
|
+
# @return [Boolean] `true` when unknown values are ignored, `false`
|
250
|
+
# otherwise.
|
251
|
+
#
|
153
252
|
def ignore_unknown_values?
|
154
253
|
val = @gapi.configuration.load.ignore_unknown_values
|
155
254
|
val = false if val.nil?
|
@@ -157,15 +256,24 @@ module Google
|
|
157
256
|
end
|
158
257
|
|
159
258
|
##
|
160
|
-
# The schema for the
|
161
|
-
#
|
162
|
-
#
|
259
|
+
# The schema for the destination table. The schema can be omitted if the
|
260
|
+
# destination table already exists, or if you're loading data from
|
261
|
+
# Google Cloud Datastore.
|
262
|
+
#
|
263
|
+
# The returned object is frozen and changes are not allowed. Use
|
264
|
+
# {Table#schema} to update the schema.
|
265
|
+
#
|
266
|
+
# @return [Schema, nil] A schema object, or `nil`.
|
267
|
+
#
|
163
268
|
def schema
|
164
269
|
Schema.from_gapi(@gapi.configuration.load.schema).freeze
|
165
270
|
end
|
166
271
|
|
167
272
|
##
|
168
|
-
# The number of source files.
|
273
|
+
# The number of source data files in the load job.
|
274
|
+
#
|
275
|
+
# @return [Integer] The number of source files.
|
276
|
+
#
|
169
277
|
def input_files
|
170
278
|
Integer @gapi.statistics.load.input_files
|
171
279
|
rescue
|
@@ -173,7 +281,10 @@ module Google
|
|
173
281
|
end
|
174
282
|
|
175
283
|
##
|
176
|
-
# The number of bytes of source data.
|
284
|
+
# The number of bytes of source data in the load job.
|
285
|
+
#
|
286
|
+
# @return [Integer] The number of bytes.
|
287
|
+
#
|
177
288
|
def input_file_bytes
|
178
289
|
Integer @gapi.statistics.load.input_file_bytes
|
179
290
|
rescue
|
@@ -183,6 +294,9 @@ module Google
|
|
183
294
|
##
|
184
295
|
# The number of rows that have been loaded into the table. While an
|
185
296
|
# import job is in the running state, this value may change.
|
297
|
+
#
|
298
|
+
# @return [Integer] The number of rows that have been loaded.
|
299
|
+
#
|
186
300
|
def output_rows
|
187
301
|
Integer @gapi.statistics.load.output_rows
|
188
302
|
rescue
|
@@ -192,6 +306,9 @@ module Google
|
|
192
306
|
##
|
193
307
|
# The number of bytes that have been loaded into the table. While an
|
194
308
|
# import job is in the running state, this value may change.
|
309
|
+
#
|
310
|
+
# @return [Integer] The number of bytes that have been loaded.
|
311
|
+
#
|
195
312
|
def output_bytes
|
196
313
|
Integer @gapi.statistics.load.output_bytes
|
197
314
|
rescue
|
@@ -19,7 +19,7 @@ require "google/cloud/bigquery/service"
|
|
19
19
|
require "google/cloud/bigquery/credentials"
|
20
20
|
require "google/cloud/bigquery/dataset"
|
21
21
|
require "google/cloud/bigquery/job"
|
22
|
-
require "google/cloud/bigquery/
|
22
|
+
require "google/cloud/bigquery/external"
|
23
23
|
require "google/cloud/bigquery/project/list"
|
24
24
|
require "google/cloud/bigquery/time"
|
25
25
|
require "google/cloud/bigquery/schema"
|
@@ -128,6 +128,10 @@ module Google
|
|
128
128
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
129
129
|
# query parameters. When set, `legacy_sql` will automatically be set
|
130
130
|
# to false and `standard_sql` to true.
|
131
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
132
|
+
# that represents the mapping of the external tables to the table
|
133
|
+
# names used in the SQL query. The hash keys are the table names, and
|
134
|
+
# the hash values are the external table objects. See {Project#query}.
|
131
135
|
# @param [String] priority Specifies a priority for the query. Possible
|
132
136
|
# values include `INTERACTIVE` and `BATCH`. The default value is
|
133
137
|
# `INTERACTIVE`.
|
@@ -158,9 +162,9 @@ module Google
|
|
158
162
|
# table exists and contains data.
|
159
163
|
# @param [Dataset, String] dataset The default dataset to use for
|
160
164
|
# unqualified table names in the query. Optional.
|
161
|
-
# @param [
|
162
|
-
#
|
163
|
-
#
|
165
|
+
# @param [String] project Specifies the default projectId to assume for
|
166
|
+
# any unqualified table names in the query. Only used if `dataset`
|
167
|
+
# option is set.
|
164
168
|
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
165
169
|
# [standard
|
166
170
|
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
@@ -192,6 +196,38 @@ module Google
|
|
192
196
|
# job. Queries that will have bytes billed beyond this limit will fail
|
193
197
|
# (without incurring a charge). Optional. If unspecified, this will be
|
194
198
|
# set to your project default.
|
199
|
+
# @param [String] job_id A user-defined ID for the query job. The ID
|
200
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
201
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
202
|
+
# `job_id` is provided, then `prefix` will not be used.
|
203
|
+
#
|
204
|
+
# See [Generating a job
|
205
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
206
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
207
|
+
# prepended to a generated value to produce a unique job ID. For
|
208
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
209
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
210
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
211
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
212
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
213
|
+
# be used.
|
214
|
+
#
|
215
|
+
# See [Generating a job
|
216
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
217
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
218
|
+
# the job. You can use these to organize and group your jobs. Label
|
219
|
+
# keys and values can be no longer than 63 characters, can only
|
220
|
+
# contain lowercase letters, numeric characters, underscores and
|
221
|
+
# dashes. International characters are allowed. Label values are
|
222
|
+
# optional. Label keys must start with a letter and each label in the
|
223
|
+
# list must have a different key.
|
224
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
225
|
+
# used in the query. May be either a code resource to load from a
|
226
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
227
|
+
# that contains code for a user-defined function (UDF). Providing an
|
228
|
+
# inline code resource is equivalent to providing a URI for a file
|
229
|
+
# containing the same code. See [User-Defined
|
230
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
195
231
|
#
|
196
232
|
# @return [Google::Cloud::Bigquery::QueryJob]
|
197
233
|
#
|
@@ -205,7 +241,7 @@ module Google
|
|
205
241
|
#
|
206
242
|
# job.wait_until_done!
|
207
243
|
# if !job.failed?
|
208
|
-
# job.
|
244
|
+
# job.data.each do |row|
|
209
245
|
# puts row[:name]
|
210
246
|
# end
|
211
247
|
# end
|
@@ -221,7 +257,7 @@ module Google
|
|
221
257
|
#
|
222
258
|
# job.wait_until_done!
|
223
259
|
# if !job.failed?
|
224
|
-
# job.
|
260
|
+
# job.data.each do |row|
|
225
261
|
# puts row[:name]
|
226
262
|
# end
|
227
263
|
# end
|
@@ -238,7 +274,7 @@ module Google
|
|
238
274
|
#
|
239
275
|
# job.wait_until_done!
|
240
276
|
# if !job.failed?
|
241
|
-
# job.
|
277
|
+
# job.data.each do |row|
|
242
278
|
# puts row[:name]
|
243
279
|
# end
|
244
280
|
# end
|
@@ -255,32 +291,58 @@ module Google
|
|
255
291
|
#
|
256
292
|
# job.wait_until_done!
|
257
293
|
# if !job.failed?
|
258
|
-
# job.
|
294
|
+
# job.data.each do |row|
|
295
|
+
# puts row[:name]
|
296
|
+
# end
|
297
|
+
# end
|
298
|
+
#
|
299
|
+
# @example Query using external data source:
|
300
|
+
# require "google/cloud/bigquery"
|
301
|
+
#
|
302
|
+
# bigquery = Google::Cloud::Bigquery.new
|
303
|
+
#
|
304
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
305
|
+
# csv_table = bigquery.external csv_url do |csv|
|
306
|
+
# csv.autodetect = true
|
307
|
+
# csv.skip_leading_rows = 1
|
308
|
+
# end
|
309
|
+
#
|
310
|
+
# job = bigquery.query_job "SELECT * FROM my_ext_table",
|
311
|
+
# external: { my_ext_table: csv_table }
|
312
|
+
#
|
313
|
+
# job.wait_until_done!
|
314
|
+
# if !job.failed?
|
315
|
+
# job.data.each do |row|
|
259
316
|
# puts row[:name]
|
260
317
|
# end
|
261
318
|
# end
|
262
319
|
#
|
263
|
-
def query_job query, params: nil,
|
264
|
-
|
320
|
+
def query_job query, params: nil, external: nil,
|
321
|
+
priority: "INTERACTIVE", cache: true, table: nil,
|
322
|
+
create: nil, write: nil, dataset: nil, project: nil,
|
265
323
|
standard_sql: nil, legacy_sql: nil, large_results: nil,
|
266
324
|
flatten: nil, maximum_billing_tier: nil,
|
267
|
-
maximum_bytes_billed: nil
|
325
|
+
maximum_bytes_billed: nil, job_id: nil, prefix: nil,
|
326
|
+
labels: nil, udfs: nil
|
268
327
|
ensure_service!
|
269
328
|
options = { priority: priority, cache: cache, table: table,
|
270
329
|
create: create, write: write,
|
271
330
|
large_results: large_results, flatten: flatten,
|
272
|
-
dataset: dataset,
|
273
|
-
standard_sql: standard_sql,
|
331
|
+
dataset: dataset, project: project,
|
332
|
+
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
274
333
|
maximum_billing_tier: maximum_billing_tier,
|
275
334
|
maximum_bytes_billed: maximum_bytes_billed,
|
276
|
-
params: params
|
335
|
+
params: params, external: external, labels: labels,
|
336
|
+
job_id: job_id, prefix: prefix, udfs: udfs }
|
277
337
|
gapi = service.query_job query, options
|
278
338
|
Job.from_gapi gapi, service
|
279
339
|
end
|
280
340
|
|
281
341
|
##
|
282
|
-
# Queries data using
|
283
|
-
# method
|
342
|
+
# Queries data using a synchronous method that blocks for a response. In
|
343
|
+
# this method, a {QueryJob} is created and its results are saved
|
344
|
+
# to a temporary table, then read from the table. Timeouts and transient
|
345
|
+
# errors are generally handled as needed to complete the query.
|
284
346
|
#
|
285
347
|
# When using standard SQL and passing arguments using `params`, Ruby
|
286
348
|
# types are mapped to BigQuery types as follows:
|
@@ -302,6 +364,8 @@ module Google
|
|
302
364
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
303
365
|
# for an overview of each BigQuery data type, including allowed values.
|
304
366
|
#
|
367
|
+
# @see https://cloud.google.com/bigquery/querying-data Querying Data
|
368
|
+
#
|
305
369
|
# @param [String] query A query string, following the BigQuery [query
|
306
370
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
307
371
|
# query to execute. Example: "SELECT count(f1) FROM
|
@@ -313,22 +377,16 @@ module Google
|
|
313
377
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
314
378
|
# query parameters. When set, `legacy_sql` will automatically be set
|
315
379
|
# to false and `standard_sql` to true.
|
380
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
381
|
+
# that represents the mapping of the external tables to the table
|
382
|
+
# names used in the SQL query. The hash keys are the table names, and
|
383
|
+
# the hash values are the external table objects. See {Project#query}.
|
316
384
|
# @param [Integer] max The maximum number of rows of data to return per
|
317
385
|
# page of results. Setting this flag to a small value such as 1000 and
|
318
386
|
# then paging through results might improve reliability when the query
|
319
387
|
# result set is large. In addition to this limit, responses are also
|
320
388
|
# limited to 10 MB. By default, there is no maximum row count, and
|
321
389
|
# only the byte limit applies.
|
322
|
-
# @param [Integer] timeout How long to wait for the query to complete,
|
323
|
-
# in milliseconds, before the request times out and returns. Note that
|
324
|
-
# this is only a timeout for the request, not the query. If the query
|
325
|
-
# takes longer to run than the timeout value, the call returns without
|
326
|
-
# any results and with QueryData#complete? set to false. The default
|
327
|
-
# value is 10000 milliseconds (10 seconds).
|
328
|
-
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
329
|
-
# job. Instead, if the query is valid, BigQuery returns statistics
|
330
|
-
# about the job such as how many bytes would be processed. If the
|
331
|
-
# query is invalid, an error returns. The default value is `false`.
|
332
390
|
# @param [Boolean] cache Whether to look for the result in the query
|
333
391
|
# cache. The query cache is a best-effort cache that will be flushed
|
334
392
|
# whenever tables in the query are modified. The default value is
|
@@ -361,7 +419,7 @@ module Google
|
|
361
419
|
# ignored; the query will be run as if `large_results` is true and
|
362
420
|
# `flatten` is false. Optional. The default value is false.
|
363
421
|
#
|
364
|
-
# @return [Google::Cloud::Bigquery::
|
422
|
+
# @return [Google::Cloud::Bigquery::Data]
|
365
423
|
#
|
366
424
|
# @example Query using standard SQL:
|
367
425
|
# require "google/cloud/bigquery"
|
@@ -387,7 +445,7 @@ module Google
|
|
387
445
|
# puts row[:name]
|
388
446
|
# end
|
389
447
|
#
|
390
|
-
# @example Retrieve all rows: (See {
|
448
|
+
# @example Retrieve all rows: (See {Data#all})
|
391
449
|
# require "google/cloud/bigquery"
|
392
450
|
#
|
393
451
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -426,16 +484,96 @@ module Google
|
|
426
484
|
# puts row[:name]
|
427
485
|
# end
|
428
486
|
#
|
429
|
-
|
430
|
-
|
431
|
-
|
487
|
+
# @example Query using external data source:
|
488
|
+
# require "google/cloud/bigquery"
|
489
|
+
#
|
490
|
+
# bigquery = Google::Cloud::Bigquery.new
|
491
|
+
#
|
492
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
493
|
+
# csv_table = bigquery.external csv_url do |csv|
|
494
|
+
# csv.autodetect = true
|
495
|
+
# csv.skip_leading_rows = 1
|
496
|
+
# end
|
497
|
+
#
|
498
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
499
|
+
# external: { my_ext_table: csv_table }
|
500
|
+
#
|
501
|
+
# data.each do |row|
|
502
|
+
# puts row[:name]
|
503
|
+
# end
|
504
|
+
#
|
505
|
+
def query query, params: nil, external: nil, max: nil, cache: true,
|
506
|
+
dataset: nil, project: nil, standard_sql: nil, legacy_sql: nil
|
432
507
|
ensure_service!
|
433
|
-
options = {
|
434
|
-
dataset: dataset, project: project,
|
508
|
+
options = { cache: cache, dataset: dataset, project: project,
|
435
509
|
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
436
|
-
params: params }
|
437
|
-
|
438
|
-
|
510
|
+
params: params, external: external }
|
511
|
+
|
512
|
+
job = query_job query, options
|
513
|
+
job.wait_until_done!
|
514
|
+
|
515
|
+
if job.failed?
|
516
|
+
begin
|
517
|
+
# raise to activate ruby exception cause handling
|
518
|
+
fail job.gapi_error
|
519
|
+
rescue => e
|
520
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
521
|
+
raise Google::Cloud::Error.from_error(e)
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
job.data max: max
|
526
|
+
end
|
527
|
+
|
528
|
+
##
|
529
|
+
# Creates a new External::DataSource (or subclass) object that
|
530
|
+
# represents the external data source that can be queried from directly,
|
531
|
+
# even though the data is not stored in BigQuery. Instead of loading or
|
532
|
+
# streaming the data, this object references the external data source.
|
533
|
+
#
|
534
|
+
# @see https://cloud.google.com/bigquery/external-data-sources Querying
|
535
|
+
# External Data Sources
|
536
|
+
#
|
537
|
+
# @param [String, Array<String>] url The fully-qualified URL(s) that
|
538
|
+
# point to your data in Google Cloud. An attempt will be made to
|
539
|
+
# derive the format from the URLs provided.
|
540
|
+
# @param [String|Symbol] format The data format. This value will be used
|
541
|
+
# even if the provided URLs are recognized as a different format.
|
542
|
+
# Optional.
|
543
|
+
#
|
544
|
+
# The following values are supported:
|
545
|
+
#
|
546
|
+
# * `csv` - CSV
|
547
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
548
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
549
|
+
# * `sheets` - Google Sheets
|
550
|
+
# * `datastore_backup` - Cloud Datastore backup
|
551
|
+
# * `bigtable` - Bigtable
|
552
|
+
#
|
553
|
+
# @return [External::DataSource] External data source.
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
#
|
560
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
561
|
+
# csv_table = bigquery.external csv_url do |csv|
|
562
|
+
# csv.autodetect = true
|
563
|
+
# csv.skip_leading_rows = 1
|
564
|
+
# end
|
565
|
+
#
|
566
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
567
|
+
# external: { my_ext_table: csv_table }
|
568
|
+
#
|
569
|
+
# data.each do |row|
|
570
|
+
# puts row[:name]
|
571
|
+
# end
|
572
|
+
#
|
573
|
+
def external url, format: nil
|
574
|
+
ext = External.from_urls url, format
|
575
|
+
yield ext if block_given?
|
576
|
+
ext
|
439
577
|
end
|
440
578
|
|
441
579
|
##
|
@@ -539,6 +677,11 @@ module Google
|
|
539
677
|
#
|
540
678
|
# @param [Boolean] all Whether to list all datasets, including hidden
|
541
679
|
# ones. The default is `false`.
|
680
|
+
# @param [String] filter An expression for filtering the results of the
|
681
|
+
# request by label. The syntax is `labels.<name>[:<value>]`.
|
682
|
+
# Multiple filters can be `AND`ed together by connecting with a space.
|
683
|
+
# Example: `labels.department:receiving labels.active`. See [Filtering
|
684
|
+
# datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
|
542
685
|
# @param [String] token A previously-returned page token representing
|
543
686
|
# part of the larger set of results to view.
|
544
687
|
# @param [Integer] max Maximum number of datasets to return.
|
@@ -573,11 +716,11 @@ module Google
|
|
573
716
|
# puts dataset.name
|
574
717
|
# end
|
575
718
|
#
|
576
|
-
def datasets all: nil, token: nil, max: nil
|
719
|
+
def datasets all: nil, filter: nil, token: nil, max: nil
|
577
720
|
ensure_service!
|
578
|
-
options = { all: all, token: token, max: max }
|
721
|
+
options = { all: all, filter: filter, token: token, max: max }
|
579
722
|
gapi = service.list_datasets options
|
580
|
-
Dataset::List.from_gapi gapi, service, all, max
|
723
|
+
Dataset::List.from_gapi gapi, service, all, filter, max
|
581
724
|
end
|
582
725
|
|
583
726
|
##
|
@@ -757,10 +900,10 @@ module Google
|
|
757
900
|
# configure the schema, otherwise the schema is returned empty and may
|
758
901
|
# be configured directly.
|
759
902
|
#
|
760
|
-
# The returned schema can be passed to {Dataset#load} using the
|
761
|
-
# option. However, for most use cases, the block yielded by
|
762
|
-
# {Dataset#load} is a more convenient way to configure the schema
|
763
|
-
# the destination table.
|
903
|
+
# The returned schema can be passed to {Dataset#load} using the
|
904
|
+
# `schema` option. However, for most use cases, the block yielded by
|
905
|
+
# {Dataset#load} is a more convenient way to configure the schema
|
906
|
+
# for the destination table.
|
764
907
|
#
|
765
908
|
# @yield [schema] a block for setting the schema
|
766
909
|
# @yieldparam [Schema] schema the object accepting the schema
|
@@ -783,7 +926,7 @@ module Google
|
|
783
926
|
# dataset = bigquery.dataset "my_dataset"
|
784
927
|
#
|
785
928
|
# gs_url = "gs://my-bucket/file-name.csv"
|
786
|
-
# load_job = dataset.
|
929
|
+
# load_job = dataset.load_job "my_new_table", gs_url, schema: schema
|
787
930
|
#
|
788
931
|
def schema
|
789
932
|
s = Schema.from_gapi
|