google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -22,13 +22,33 @@ module Google
|
|
22
22
|
# # LoadJob
|
23
23
|
#
|
24
24
|
# A {Job} subclass representing a load operation that may be performed
|
25
|
-
# on a {Table}. A LoadJob instance is created when you call
|
25
|
+
# on a {Table}. A LoadJob instance is created when you call
|
26
|
+
# {Table#load_job}.
|
26
27
|
#
|
27
|
-
# @see https://cloud.google.com/bigquery/loading-data
|
28
|
+
# @see https://cloud.google.com/bigquery/loading-data
|
28
29
|
# Loading Data Into BigQuery
|
29
30
|
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
30
31
|
# reference
|
31
32
|
#
|
33
|
+
# @example
|
34
|
+
# require "google/cloud/bigquery"
|
35
|
+
#
|
36
|
+
# bigquery = Google::Cloud::Bigquery.new
|
37
|
+
# dataset = bigquery.dataset "my_dataset"
|
38
|
+
#
|
39
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
40
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |schema|
|
41
|
+
# schema.string "first_name", mode: :required
|
42
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
43
|
+
# nested_schema.string "place", mode: :required
|
44
|
+
# nested_schema.integer "number_of_years", mode: :required
|
45
|
+
# end
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# load_job.wait_until_done!
|
49
|
+
# load_job.done? #=> true
|
50
|
+
#
|
51
|
+
#
|
32
52
|
class LoadJob < Job
|
33
53
|
##
|
34
54
|
# The URI or URIs representing the Google Cloud Storage files from which
|
@@ -39,7 +59,10 @@ module Google
|
|
39
59
|
|
40
60
|
##
|
41
61
|
# The table into which the operation loads data. This is the table on
|
42
|
-
# which {Table#
|
62
|
+
# which {Table#load_job} was invoked.
|
63
|
+
#
|
64
|
+
# @return [Table] A table instance.
|
65
|
+
#
|
43
66
|
def destination
|
44
67
|
table = @gapi.configuration.load.destination_table
|
45
68
|
return nil unless table
|
@@ -51,13 +74,21 @@ module Google
|
|
51
74
|
##
|
52
75
|
# The delimiter used between fields in the source data. The default is a
|
53
76
|
# comma (,).
|
77
|
+
#
|
78
|
+
# @return [String] A string containing the character, such as `","`.
|
79
|
+
#
|
54
80
|
def delimiter
|
55
81
|
@gapi.configuration.load.field_delimiter || ","
|
56
82
|
end
|
57
83
|
|
58
84
|
##
|
59
|
-
# The number of
|
60
|
-
# default value is
|
85
|
+
# The number of rows at the top of a CSV file that BigQuery will skip
|
86
|
+
# when loading the data. The default value is 0. This property is useful
|
87
|
+
# if you have header rows in the file that should be skipped.
|
88
|
+
#
|
89
|
+
# @return [Integer] The number of header rows at the top of a CSV file
|
90
|
+
# to skip.
|
91
|
+
#
|
61
92
|
def skip_leading_rows
|
62
93
|
@gapi.configuration.load.skip_leading_rows || 0
|
63
94
|
end
|
@@ -65,6 +96,10 @@ module Google
|
|
65
96
|
##
|
66
97
|
# Checks if the character encoding of the data is UTF-8. This is the
|
67
98
|
# default.
|
99
|
+
#
|
100
|
+
# @return [Boolean] `true` when the character encoding is UTF-8,
|
101
|
+
# `false` otherwise.
|
102
|
+
#
|
68
103
|
def utf8?
|
69
104
|
val = @gapi.configuration.load.encoding
|
70
105
|
return true if val.nil?
|
@@ -73,6 +108,10 @@ module Google
|
|
73
108
|
|
74
109
|
##
|
75
110
|
# Checks if the character encoding of the data is ISO-8859-1.
|
111
|
+
#
|
112
|
+
# @return [Boolean] `true` when the character encoding is ISO-8859-1,
|
113
|
+
# `false` otherwise.
|
114
|
+
#
|
76
115
|
def iso8859_1?
|
77
116
|
val = @gapi.configuration.load.encoding
|
78
117
|
val == "ISO-8859-1"
|
@@ -84,6 +123,9 @@ module Google
|
|
84
123
|
# quoted sections, the value should be an empty string. If your data
|
85
124
|
# contains quoted newline characters, {#quoted_newlines?} should return
|
86
125
|
# `true`.
|
126
|
+
#
|
127
|
+
# @return [String] A string containing the character, such as `"\""`.
|
128
|
+
#
|
87
129
|
def quote
|
88
130
|
val = @gapi.configuration.load.quote
|
89
131
|
val = "\"" if val.nil?
|
@@ -94,24 +136,65 @@ module Google
|
|
94
136
|
# The maximum number of bad records that the load operation can ignore.
|
95
137
|
# If the number of bad records exceeds this value, an error is returned.
|
96
138
|
# The default value is `0`, which requires that all records be valid.
|
139
|
+
#
|
140
|
+
# @return [Integer] The maximum number of bad records.
|
141
|
+
#
|
97
142
|
def max_bad_records
|
98
143
|
val = @gapi.configuration.load.max_bad_records
|
99
144
|
val = 0 if val.nil?
|
100
145
|
val
|
101
146
|
end
|
102
147
|
|
148
|
+
##
|
149
|
+
# Specifies a string that represents a null value in a CSV file. For
|
150
|
+
# example, if you specify `\N`, BigQuery interprets `\N` as a null value
|
151
|
+
# when loading a CSV file. The default value is the empty string. If you
|
152
|
+
# set this property to a custom value, BigQuery throws an error if an
|
153
|
+
# empty string is present for all data types except for STRING and BYTE.
|
154
|
+
# For STRING and BYTE columns, BigQuery interprets the empty string as
|
155
|
+
# an empty value.
|
156
|
+
#
|
157
|
+
# @return [String] A string representing null value in a CSV file.
|
158
|
+
#
|
159
|
+
def null_marker
|
160
|
+
val = @gapi.configuration.load.null_marker
|
161
|
+
val = "" if val.nil?
|
162
|
+
val
|
163
|
+
end
|
164
|
+
|
103
165
|
##
|
104
166
|
# Checks if quoted data sections may contain newline characters in a CSV
|
105
167
|
# file. The default is `false`.
|
168
|
+
#
|
169
|
+
# @return [Boolean] `true` when quoted newlines are allowed, `false`
|
170
|
+
# otherwise.
|
171
|
+
#
|
106
172
|
def quoted_newlines?
|
107
173
|
val = @gapi.configuration.load.allow_quoted_newlines
|
108
|
-
val =
|
174
|
+
val = false if val.nil?
|
175
|
+
val
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# Checks if BigQuery should automatically infer the options and schema
|
180
|
+
# for CSV and JSON sources. The default is `false`.
|
181
|
+
#
|
182
|
+
# @return [Boolean] `true` when autodetect is enabled, `false`
|
183
|
+
# otherwise.
|
184
|
+
#
|
185
|
+
def autodetect?
|
186
|
+
val = @gapi.configuration.load.autodetect
|
187
|
+
val = false if val.nil?
|
109
188
|
val
|
110
189
|
end
|
111
190
|
|
112
191
|
##
|
113
192
|
# Checks if the format of the source data is [newline-delimited
|
114
193
|
# JSON](http://jsonlines.org/). The default is `false`.
|
194
|
+
#
|
195
|
+
# @return [Boolean] `true` when the source format is
|
196
|
+
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
197
|
+
#
|
115
198
|
def json?
|
116
199
|
val = @gapi.configuration.load.source_format
|
117
200
|
val == "NEWLINE_DELIMITED_JSON"
|
@@ -119,6 +202,10 @@ module Google
|
|
119
202
|
|
120
203
|
##
|
121
204
|
# Checks if the format of the source data is CSV. The default is `true`.
|
205
|
+
#
|
206
|
+
# @return [Boolean] `true` when the source format is `CSV`, `false`
|
207
|
+
# otherwise.
|
208
|
+
#
|
122
209
|
def csv?
|
123
210
|
val = @gapi.configuration.load.source_format
|
124
211
|
return true if val.nil?
|
@@ -127,6 +214,10 @@ module Google
|
|
127
214
|
|
128
215
|
##
|
129
216
|
# Checks if the source data is a Google Cloud Datastore backup.
|
217
|
+
#
|
218
|
+
# @return [Boolean] `true` when the source format is `DATASTORE_BACKUP`,
|
219
|
+
# `false` otherwise.
|
220
|
+
#
|
130
221
|
def backup?
|
131
222
|
val = @gapi.configuration.load.source_format
|
132
223
|
val == "DATASTORE_BACKUP"
|
@@ -138,6 +229,10 @@ module Google
|
|
138
229
|
# records with missing trailing columns are treated as bad records, and
|
139
230
|
# if there are too many bad records, an error is returned. The default
|
140
231
|
# value is `false`. Only applicable to CSV, ignored for other formats.
|
232
|
+
#
|
233
|
+
# @return [Boolean] `true` when jagged rows are allowed, `false`
|
234
|
+
# otherwise.
|
235
|
+
#
|
141
236
|
def allow_jagged_rows?
|
142
237
|
val = @gapi.configuration.load.allow_jagged_rows
|
143
238
|
val = false if val.nil?
|
@@ -150,6 +245,10 @@ module Google
|
|
150
245
|
# ignored. If `false`, records with extra columns are treated as bad
|
151
246
|
# records, and if there are too many bad records, an invalid error is
|
152
247
|
# returned. The default is `false`.
|
248
|
+
#
|
249
|
+
# @return [Boolean] `true` when unknown values are ignored, `false`
|
250
|
+
# otherwise.
|
251
|
+
#
|
153
252
|
def ignore_unknown_values?
|
154
253
|
val = @gapi.configuration.load.ignore_unknown_values
|
155
254
|
val = false if val.nil?
|
@@ -157,15 +256,24 @@ module Google
|
|
157
256
|
end
|
158
257
|
|
159
258
|
##
|
160
|
-
# The schema for the
|
161
|
-
#
|
162
|
-
#
|
259
|
+
# The schema for the destination table. The schema can be omitted if the
|
260
|
+
# destination table already exists, or if you're loading data from
|
261
|
+
# Google Cloud Datastore.
|
262
|
+
#
|
263
|
+
# The returned object is frozen and changes are not allowed. Use
|
264
|
+
# {Table#schema} to update the schema.
|
265
|
+
#
|
266
|
+
# @return [Schema, nil] A schema object, or `nil`.
|
267
|
+
#
|
163
268
|
def schema
|
164
269
|
Schema.from_gapi(@gapi.configuration.load.schema).freeze
|
165
270
|
end
|
166
271
|
|
167
272
|
##
|
168
|
-
# The number of source files.
|
273
|
+
# The number of source data files in the load job.
|
274
|
+
#
|
275
|
+
# @return [Integer] The number of source files.
|
276
|
+
#
|
169
277
|
def input_files
|
170
278
|
Integer @gapi.statistics.load.input_files
|
171
279
|
rescue
|
@@ -173,7 +281,10 @@ module Google
|
|
173
281
|
end
|
174
282
|
|
175
283
|
##
|
176
|
-
# The number of bytes of source data.
|
284
|
+
# The number of bytes of source data in the load job.
|
285
|
+
#
|
286
|
+
# @return [Integer] The number of bytes.
|
287
|
+
#
|
177
288
|
def input_file_bytes
|
178
289
|
Integer @gapi.statistics.load.input_file_bytes
|
179
290
|
rescue
|
@@ -183,6 +294,9 @@ module Google
|
|
183
294
|
##
|
184
295
|
# The number of rows that have been loaded into the table. While an
|
185
296
|
# import job is in the running state, this value may change.
|
297
|
+
#
|
298
|
+
# @return [Integer] The number of rows that have been loaded.
|
299
|
+
#
|
186
300
|
def output_rows
|
187
301
|
Integer @gapi.statistics.load.output_rows
|
188
302
|
rescue
|
@@ -192,6 +306,9 @@ module Google
|
|
192
306
|
##
|
193
307
|
# The number of bytes that have been loaded into the table. While an
|
194
308
|
# import job is in the running state, this value may change.
|
309
|
+
#
|
310
|
+
# @return [Integer] The number of bytes that have been loaded.
|
311
|
+
#
|
195
312
|
def output_bytes
|
196
313
|
Integer @gapi.statistics.load.output_bytes
|
197
314
|
rescue
|
@@ -19,7 +19,7 @@ require "google/cloud/bigquery/service"
|
|
19
19
|
require "google/cloud/bigquery/credentials"
|
20
20
|
require "google/cloud/bigquery/dataset"
|
21
21
|
require "google/cloud/bigquery/job"
|
22
|
-
require "google/cloud/bigquery/
|
22
|
+
require "google/cloud/bigquery/external"
|
23
23
|
require "google/cloud/bigquery/project/list"
|
24
24
|
require "google/cloud/bigquery/time"
|
25
25
|
require "google/cloud/bigquery/schema"
|
@@ -128,6 +128,10 @@ module Google
|
|
128
128
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
129
129
|
# query parameters. When set, `legacy_sql` will automatically be set
|
130
130
|
# to false and `standard_sql` to true.
|
131
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
132
|
+
# that represents the mapping of the external tables to the table
|
133
|
+
# names used in the SQL query. The hash keys are the table names, and
|
134
|
+
# the hash values are the external table objects. See {Project#query}.
|
131
135
|
# @param [String] priority Specifies a priority for the query. Possible
|
132
136
|
# values include `INTERACTIVE` and `BATCH`. The default value is
|
133
137
|
# `INTERACTIVE`.
|
@@ -158,9 +162,9 @@ module Google
|
|
158
162
|
# table exists and contains data.
|
159
163
|
# @param [Dataset, String] dataset The default dataset to use for
|
160
164
|
# unqualified table names in the query. Optional.
|
161
|
-
# @param [
|
162
|
-
#
|
163
|
-
#
|
165
|
+
# @param [String] project Specifies the default projectId to assume for
|
166
|
+
# any unqualified table names in the query. Only used if `dataset`
|
167
|
+
# option is set.
|
164
168
|
# @param [Boolean] standard_sql Specifies whether to use BigQuery's
|
165
169
|
# [standard
|
166
170
|
# SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
|
@@ -192,6 +196,38 @@ module Google
|
|
192
196
|
# job. Queries that will have bytes billed beyond this limit will fail
|
193
197
|
# (without incurring a charge). Optional. If unspecified, this will be
|
194
198
|
# set to your project default.
|
199
|
+
# @param [String] job_id A user-defined ID for the query job. The ID
|
200
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
201
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
202
|
+
# `job_id` is provided, then `prefix` will not be used.
|
203
|
+
#
|
204
|
+
# See [Generating a job
|
205
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
206
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
207
|
+
# prepended to a generated value to produce a unique job ID. For
|
208
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
209
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
210
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
211
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
212
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
213
|
+
# be used.
|
214
|
+
#
|
215
|
+
# See [Generating a job
|
216
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
217
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
218
|
+
# the job. You can use these to organize and group your jobs. Label
|
219
|
+
# keys and values can be no longer than 63 characters, can only
|
220
|
+
# contain lowercase letters, numeric characters, underscores and
|
221
|
+
# dashes. International characters are allowed. Label values are
|
222
|
+
# optional. Label keys must start with a letter and each label in the
|
223
|
+
# list must have a different key.
|
224
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
225
|
+
# used in the query. May be either a code resource to load from a
|
226
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
227
|
+
# that contains code for a user-defined function (UDF). Providing an
|
228
|
+
# inline code resource is equivalent to providing a URI for a file
|
229
|
+
# containing the same code. See [User-Defined
|
230
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
195
231
|
#
|
196
232
|
# @return [Google::Cloud::Bigquery::QueryJob]
|
197
233
|
#
|
@@ -205,7 +241,7 @@ module Google
|
|
205
241
|
#
|
206
242
|
# job.wait_until_done!
|
207
243
|
# if !job.failed?
|
208
|
-
# job.
|
244
|
+
# job.data.each do |row|
|
209
245
|
# puts row[:name]
|
210
246
|
# end
|
211
247
|
# end
|
@@ -221,7 +257,7 @@ module Google
|
|
221
257
|
#
|
222
258
|
# job.wait_until_done!
|
223
259
|
# if !job.failed?
|
224
|
-
# job.
|
260
|
+
# job.data.each do |row|
|
225
261
|
# puts row[:name]
|
226
262
|
# end
|
227
263
|
# end
|
@@ -238,7 +274,7 @@ module Google
|
|
238
274
|
#
|
239
275
|
# job.wait_until_done!
|
240
276
|
# if !job.failed?
|
241
|
-
# job.
|
277
|
+
# job.data.each do |row|
|
242
278
|
# puts row[:name]
|
243
279
|
# end
|
244
280
|
# end
|
@@ -255,32 +291,58 @@ module Google
|
|
255
291
|
#
|
256
292
|
# job.wait_until_done!
|
257
293
|
# if !job.failed?
|
258
|
-
# job.
|
294
|
+
# job.data.each do |row|
|
295
|
+
# puts row[:name]
|
296
|
+
# end
|
297
|
+
# end
|
298
|
+
#
|
299
|
+
# @example Query using external data source:
|
300
|
+
# require "google/cloud/bigquery"
|
301
|
+
#
|
302
|
+
# bigquery = Google::Cloud::Bigquery.new
|
303
|
+
#
|
304
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
305
|
+
# csv_table = bigquery.external csv_url do |csv|
|
306
|
+
# csv.autodetect = true
|
307
|
+
# csv.skip_leading_rows = 1
|
308
|
+
# end
|
309
|
+
#
|
310
|
+
# job = bigquery.query_job "SELECT * FROM my_ext_table",
|
311
|
+
# external: { my_ext_table: csv_table }
|
312
|
+
#
|
313
|
+
# job.wait_until_done!
|
314
|
+
# if !job.failed?
|
315
|
+
# job.data.each do |row|
|
259
316
|
# puts row[:name]
|
260
317
|
# end
|
261
318
|
# end
|
262
319
|
#
|
263
|
-
def query_job query, params: nil,
|
264
|
-
|
320
|
+
def query_job query, params: nil, external: nil,
|
321
|
+
priority: "INTERACTIVE", cache: true, table: nil,
|
322
|
+
create: nil, write: nil, dataset: nil, project: nil,
|
265
323
|
standard_sql: nil, legacy_sql: nil, large_results: nil,
|
266
324
|
flatten: nil, maximum_billing_tier: nil,
|
267
|
-
maximum_bytes_billed: nil
|
325
|
+
maximum_bytes_billed: nil, job_id: nil, prefix: nil,
|
326
|
+
labels: nil, udfs: nil
|
268
327
|
ensure_service!
|
269
328
|
options = { priority: priority, cache: cache, table: table,
|
270
329
|
create: create, write: write,
|
271
330
|
large_results: large_results, flatten: flatten,
|
272
|
-
dataset: dataset,
|
273
|
-
standard_sql: standard_sql,
|
331
|
+
dataset: dataset, project: project,
|
332
|
+
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
274
333
|
maximum_billing_tier: maximum_billing_tier,
|
275
334
|
maximum_bytes_billed: maximum_bytes_billed,
|
276
|
-
params: params
|
335
|
+
params: params, external: external, labels: labels,
|
336
|
+
job_id: job_id, prefix: prefix, udfs: udfs }
|
277
337
|
gapi = service.query_job query, options
|
278
338
|
Job.from_gapi gapi, service
|
279
339
|
end
|
280
340
|
|
281
341
|
##
|
282
|
-
# Queries data using
|
283
|
-
# method
|
342
|
+
# Queries data using a synchronous method that blocks for a response. In
|
343
|
+
# this method, a {QueryJob} is created and its results are saved
|
344
|
+
# to a temporary table, then read from the table. Timeouts and transient
|
345
|
+
# errors are generally handled as needed to complete the query.
|
284
346
|
#
|
285
347
|
# When using standard SQL and passing arguments using `params`, Ruby
|
286
348
|
# types are mapped to BigQuery types as follows:
|
@@ -302,6 +364,8 @@ module Google
|
|
302
364
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
303
365
|
# for an overview of each BigQuery data type, including allowed values.
|
304
366
|
#
|
367
|
+
# @see https://cloud.google.com/bigquery/querying-data Querying Data
|
368
|
+
#
|
305
369
|
# @param [String] query A query string, following the BigQuery [query
|
306
370
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
307
371
|
# query to execute. Example: "SELECT count(f1) FROM
|
@@ -313,22 +377,16 @@ module Google
|
|
313
377
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
314
378
|
# query parameters. When set, `legacy_sql` will automatically be set
|
315
379
|
# to false and `standard_sql` to true.
|
380
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
381
|
+
# that represents the mapping of the external tables to the table
|
382
|
+
# names used in the SQL query. The hash keys are the table names, and
|
383
|
+
# the hash values are the external table objects. See {Project#query}.
|
316
384
|
# @param [Integer] max The maximum number of rows of data to return per
|
317
385
|
# page of results. Setting this flag to a small value such as 1000 and
|
318
386
|
# then paging through results might improve reliability when the query
|
319
387
|
# result set is large. In addition to this limit, responses are also
|
320
388
|
# limited to 10 MB. By default, there is no maximum row count, and
|
321
389
|
# only the byte limit applies.
|
322
|
-
# @param [Integer] timeout How long to wait for the query to complete,
|
323
|
-
# in milliseconds, before the request times out and returns. Note that
|
324
|
-
# this is only a timeout for the request, not the query. If the query
|
325
|
-
# takes longer to run than the timeout value, the call returns without
|
326
|
-
# any results and with QueryData#complete? set to false. The default
|
327
|
-
# value is 10000 milliseconds (10 seconds).
|
328
|
-
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
329
|
-
# job. Instead, if the query is valid, BigQuery returns statistics
|
330
|
-
# about the job such as how many bytes would be processed. If the
|
331
|
-
# query is invalid, an error returns. The default value is `false`.
|
332
390
|
# @param [Boolean] cache Whether to look for the result in the query
|
333
391
|
# cache. The query cache is a best-effort cache that will be flushed
|
334
392
|
# whenever tables in the query are modified. The default value is
|
@@ -361,7 +419,7 @@ module Google
|
|
361
419
|
# ignored; the query will be run as if `large_results` is true and
|
362
420
|
# `flatten` is false. Optional. The default value is false.
|
363
421
|
#
|
364
|
-
# @return [Google::Cloud::Bigquery::
|
422
|
+
# @return [Google::Cloud::Bigquery::Data]
|
365
423
|
#
|
366
424
|
# @example Query using standard SQL:
|
367
425
|
# require "google/cloud/bigquery"
|
@@ -387,7 +445,7 @@ module Google
|
|
387
445
|
# puts row[:name]
|
388
446
|
# end
|
389
447
|
#
|
390
|
-
# @example Retrieve all rows: (See {
|
448
|
+
# @example Retrieve all rows: (See {Data#all})
|
391
449
|
# require "google/cloud/bigquery"
|
392
450
|
#
|
393
451
|
# bigquery = Google::Cloud::Bigquery.new
|
@@ -426,16 +484,96 @@ module Google
|
|
426
484
|
# puts row[:name]
|
427
485
|
# end
|
428
486
|
#
|
429
|
-
|
430
|
-
|
431
|
-
|
487
|
+
# @example Query using external data source:
|
488
|
+
# require "google/cloud/bigquery"
|
489
|
+
#
|
490
|
+
# bigquery = Google::Cloud::Bigquery.new
|
491
|
+
#
|
492
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
493
|
+
# csv_table = bigquery.external csv_url do |csv|
|
494
|
+
# csv.autodetect = true
|
495
|
+
# csv.skip_leading_rows = 1
|
496
|
+
# end
|
497
|
+
#
|
498
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
499
|
+
# external: { my_ext_table: csv_table }
|
500
|
+
#
|
501
|
+
# data.each do |row|
|
502
|
+
# puts row[:name]
|
503
|
+
# end
|
504
|
+
#
|
505
|
+
def query query, params: nil, external: nil, max: nil, cache: true,
|
506
|
+
dataset: nil, project: nil, standard_sql: nil, legacy_sql: nil
|
432
507
|
ensure_service!
|
433
|
-
options = {
|
434
|
-
dataset: dataset, project: project,
|
508
|
+
options = { cache: cache, dataset: dataset, project: project,
|
435
509
|
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
436
|
-
params: params }
|
437
|
-
|
438
|
-
|
510
|
+
params: params, external: external }
|
511
|
+
|
512
|
+
job = query_job query, options
|
513
|
+
job.wait_until_done!
|
514
|
+
|
515
|
+
if job.failed?
|
516
|
+
begin
|
517
|
+
# raise to activate ruby exception cause handling
|
518
|
+
fail job.gapi_error
|
519
|
+
rescue => e
|
520
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
521
|
+
raise Google::Cloud::Error.from_error(e)
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
job.data max: max
|
526
|
+
end
|
527
|
+
|
528
|
+
##
|
529
|
+
# Creates a new External::DataSource (or subclass) object that
|
530
|
+
# represents the external data source that can be queried from directly,
|
531
|
+
# even though the data is not stored in BigQuery. Instead of loading or
|
532
|
+
# streaming the data, this object references the external data source.
|
533
|
+
#
|
534
|
+
# @see https://cloud.google.com/bigquery/external-data-sources Querying
|
535
|
+
# External Data Sources
|
536
|
+
#
|
537
|
+
# @param [String, Array<String>] url The fully-qualified URL(s) that
|
538
|
+
# point to your data in Google Cloud. An attempt will be made to
|
539
|
+
# derive the format from the URLs provided.
|
540
|
+
# @param [String|Symbol] format The data format. This value will be used
|
541
|
+
# even if the provided URLs are recognized as a different format.
|
542
|
+
# Optional.
|
543
|
+
#
|
544
|
+
# The following values are supported:
|
545
|
+
#
|
546
|
+
# * `csv` - CSV
|
547
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
548
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
549
|
+
# * `sheets` - Google Sheets
|
550
|
+
# * `datastore_backup` - Cloud Datastore backup
|
551
|
+
# * `bigtable` - Bigtable
|
552
|
+
#
|
553
|
+
# @return [External::DataSource] External data source.
|
554
|
+
#
|
555
|
+
# @example
|
556
|
+
# require "google/cloud/bigquery"
|
557
|
+
#
|
558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
559
|
+
#
|
560
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
561
|
+
# csv_table = bigquery.external csv_url do |csv|
|
562
|
+
# csv.autodetect = true
|
563
|
+
# csv.skip_leading_rows = 1
|
564
|
+
# end
|
565
|
+
#
|
566
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
567
|
+
# external: { my_ext_table: csv_table }
|
568
|
+
#
|
569
|
+
# data.each do |row|
|
570
|
+
# puts row[:name]
|
571
|
+
# end
|
572
|
+
#
|
573
|
+
def external url, format: nil
|
574
|
+
ext = External.from_urls url, format
|
575
|
+
yield ext if block_given?
|
576
|
+
ext
|
439
577
|
end
|
440
578
|
|
441
579
|
##
|
@@ -539,6 +677,11 @@ module Google
|
|
539
677
|
#
|
540
678
|
# @param [Boolean] all Whether to list all datasets, including hidden
|
541
679
|
# ones. The default is `false`.
|
680
|
+
# @param [String] filter An expression for filtering the results of the
|
681
|
+
# request by label. The syntax is `labels.<name>[:<value>]`.
|
682
|
+
# Multiple filters can be `AND`ed together by connecting with a space.
|
683
|
+
# Example: `labels.department:receiving labels.active`. See [Filtering
|
684
|
+
# datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
|
542
685
|
# @param [String] token A previously-returned page token representing
|
543
686
|
# part of the larger set of results to view.
|
544
687
|
# @param [Integer] max Maximum number of datasets to return.
|
@@ -573,11 +716,11 @@ module Google
|
|
573
716
|
# puts dataset.name
|
574
717
|
# end
|
575
718
|
#
|
576
|
-
def datasets all: nil, token: nil, max: nil
|
719
|
+
def datasets all: nil, filter: nil, token: nil, max: nil
|
577
720
|
ensure_service!
|
578
|
-
options = { all: all, token: token, max: max }
|
721
|
+
options = { all: all, filter: filter, token: token, max: max }
|
579
722
|
gapi = service.list_datasets options
|
580
|
-
Dataset::List.from_gapi gapi, service, all, max
|
723
|
+
Dataset::List.from_gapi gapi, service, all, filter, max
|
581
724
|
end
|
582
725
|
|
583
726
|
##
|
@@ -757,10 +900,10 @@ module Google
|
|
757
900
|
# configure the schema, otherwise the schema is returned empty and may
|
758
901
|
# be configured directly.
|
759
902
|
#
|
760
|
-
# The returned schema can be passed to {Dataset#load} using the
|
761
|
-
# option. However, for most use cases, the block yielded by
|
762
|
-
# {Dataset#load} is a more convenient way to configure the schema
|
763
|
-
# the destination table.
|
903
|
+
# The returned schema can be passed to {Dataset#load} using the
|
904
|
+
# `schema` option. However, for most use cases, the block yielded by
|
905
|
+
# {Dataset#load} is a more convenient way to configure the schema
|
906
|
+
# for the destination table.
|
764
907
|
#
|
765
908
|
# @yield [schema] a block for setting the schema
|
766
909
|
# @yieldparam [Schema] schema the object accepting the schema
|
@@ -783,7 +926,7 @@ module Google
|
|
783
926
|
# dataset = bigquery.dataset "my_dataset"
|
784
927
|
#
|
785
928
|
# gs_url = "gs://my-bucket/file-name.csv"
|
786
|
-
# load_job = dataset.
|
929
|
+
# load_job = dataset.load_job "my_new_table", gs_url, schema: schema
|
787
930
|
#
|
788
931
|
def schema
|
789
932
|
s = Schema.from_gapi
|