google-cloud-bigquery 0.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/google-cloud-bigquery.rb +122 -0
- data/lib/google/cloud/bigquery.rb +353 -0
- data/lib/google/cloud/bigquery/copy_job.rb +99 -0
- data/lib/google/cloud/bigquery/credentials.rb +31 -0
- data/lib/google/cloud/bigquery/data.rb +244 -0
- data/lib/google/cloud/bigquery/dataset.rb +758 -0
- data/lib/google/cloud/bigquery/dataset/access.rb +509 -0
- data/lib/google/cloud/bigquery/dataset/list.rb +171 -0
- data/lib/google/cloud/bigquery/extract_job.rb +120 -0
- data/lib/google/cloud/bigquery/insert_response.rb +83 -0
- data/lib/google/cloud/bigquery/job.rb +301 -0
- data/lib/google/cloud/bigquery/job/list.rb +174 -0
- data/lib/google/cloud/bigquery/load_job.rb +203 -0
- data/lib/google/cloud/bigquery/project.rb +481 -0
- data/lib/google/cloud/bigquery/query_data.rb +238 -0
- data/lib/google/cloud/bigquery/query_job.rb +139 -0
- data/lib/google/cloud/bigquery/schema.rb +361 -0
- data/lib/google/cloud/bigquery/service.rb +502 -0
- data/lib/google/cloud/bigquery/table.rb +1141 -0
- data/lib/google/cloud/bigquery/table/list.rb +182 -0
- data/lib/google/cloud/bigquery/version.rb +22 -0
- data/lib/google/cloud/bigquery/view.rb +478 -0
- metadata +208 -0
@@ -0,0 +1,174 @@
|
|
1
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "delegate"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
class Job
|
22
|
+
##
|
23
|
+
# Job::List is a special case Array with additional values.
|
24
|
+
class List < DelegateClass(::Array)
|
25
|
+
##
|
26
|
+
# If not empty, indicates that there are more records that match
|
27
|
+
# the request and this value should be passed to continue.
|
28
|
+
attr_accessor :token
|
29
|
+
|
30
|
+
# A hash of this page of results.
|
31
|
+
attr_accessor :etag
|
32
|
+
|
33
|
+
##
|
34
|
+
# @private Create a new Job::List with an array of jobs.
|
35
|
+
def initialize arr = []
|
36
|
+
super arr
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Whether there is a next page of jobs.
|
41
|
+
#
|
42
|
+
# @return [Boolean]
|
43
|
+
#
|
44
|
+
# @example
|
45
|
+
# require "google/cloud"
|
46
|
+
#
|
47
|
+
# gcloud = Google::Cloud.new
|
48
|
+
# bigquery = gcloud.bigquery
|
49
|
+
#
|
50
|
+
# jobs = bigquery.jobs
|
51
|
+
# if jobs.next?
|
52
|
+
# next_jobs = jobs.next
|
53
|
+
# end
|
54
|
+
def next?
|
55
|
+
!token.nil?
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Retrieve the next page of jobs.
|
60
|
+
#
|
61
|
+
# @return [Job::List]
|
62
|
+
#
|
63
|
+
# @example
|
64
|
+
# require "google/cloud"
|
65
|
+
#
|
66
|
+
# gcloud = Google::Cloud.new
|
67
|
+
# bigquery = gcloud.bigquery
|
68
|
+
#
|
69
|
+
# jobs = bigquery.jobs
|
70
|
+
# if jobs.next?
|
71
|
+
# next_jobs = jobs.next
|
72
|
+
# end
|
73
|
+
def next
|
74
|
+
return nil unless next?
|
75
|
+
ensure_service!
|
76
|
+
options = { all: @hidden, token: token, max: @max, filter: @filter }
|
77
|
+
gapi = @service.list_jobs options
|
78
|
+
self.class.from_gapi gapi, @service, @hidden, @max, @filter
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# Retrieves all jobs by repeatedly loading {#next} until {#next?}
|
83
|
+
# returns `false`. Calls the given block once for each job, which is
|
84
|
+
# passed as the parameter.
|
85
|
+
#
|
86
|
+
# An Enumerator is returned if no block is given.
|
87
|
+
#
|
88
|
+
# This method may make several API calls until all jobs are retrieved.
|
89
|
+
# Be sure to use as narrow a search criteria as possible. Please use
|
90
|
+
# with caution.
|
91
|
+
#
|
92
|
+
# @param [Integer] request_limit The upper limit of API requests to
|
93
|
+
# make to load all jobs. Default is no limit.
|
94
|
+
# @yield [job] The block for accessing each job.
|
95
|
+
# @yieldparam [Job] job The job object.
|
96
|
+
#
|
97
|
+
# @return [Enumerator]
|
98
|
+
#
|
99
|
+
# @example Iterating each job by passing a block:
|
100
|
+
# require "google/cloud"
|
101
|
+
#
|
102
|
+
# gcloud = Google::Cloud.new
|
103
|
+
# bigquery = gcloud.bigquery
|
104
|
+
#
|
105
|
+
# bigquery.jobs.all do |job|
|
106
|
+
# puts job.state
|
107
|
+
# end
|
108
|
+
#
|
109
|
+
# @example Using the enumerator by not passing a block:
|
110
|
+
# require "google/cloud"
|
111
|
+
#
|
112
|
+
# gcloud = Google::Cloud.new
|
113
|
+
# bigquery = gcloud.bigquery
|
114
|
+
#
|
115
|
+
# all_states = bigquery.jobs.all.map do |job|
|
116
|
+
# job.state
|
117
|
+
# end
|
118
|
+
#
|
119
|
+
# @example Limit the number of API calls made:
|
120
|
+
# require "google/cloud"
|
121
|
+
#
|
122
|
+
# gcloud = Google::Cloud.new
|
123
|
+
# bigquery = gcloud.bigquery
|
124
|
+
#
|
125
|
+
# bigquery.jobs.all(request_limit: 10) do |job|
|
126
|
+
# puts job.state
|
127
|
+
# end
|
128
|
+
#
|
129
|
+
def all request_limit: nil
|
130
|
+
request_limit = request_limit.to_i if request_limit
|
131
|
+
unless block_given?
|
132
|
+
return enum_for(:all, request_limit: request_limit)
|
133
|
+
end
|
134
|
+
results = self
|
135
|
+
loop do
|
136
|
+
results.each { |r| yield r }
|
137
|
+
if request_limit
|
138
|
+
request_limit -= 1
|
139
|
+
break if request_limit < 0
|
140
|
+
end
|
141
|
+
break unless results.next?
|
142
|
+
results = results.next
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
##
|
147
|
+
# @private New Job::List from a Google API Client
|
148
|
+
# Google::Apis::BigqueryV2::JobList object.
|
149
|
+
def self.from_gapi gapi_list, service, hidden = nil, max = nil,
|
150
|
+
filter = nil
|
151
|
+
jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
|
152
|
+
Job.from_gapi gapi_object, service
|
153
|
+
end)
|
154
|
+
jobs.instance_variable_set :@token, gapi_list.next_page_token
|
155
|
+
jobs.instance_variable_set :@etag, gapi_list.etag
|
156
|
+
jobs.instance_variable_set :@service, service
|
157
|
+
jobs.instance_variable_set :@hidden, hidden
|
158
|
+
jobs.instance_variable_set :@max, max
|
159
|
+
jobs.instance_variable_set :@filter, filter
|
160
|
+
jobs
|
161
|
+
end
|
162
|
+
|
163
|
+
protected
|
164
|
+
|
165
|
+
##
|
166
|
+
# Raise an error unless an active service is available.
|
167
|
+
def ensure_service!
|
168
|
+
fail "Must have active connection" unless @service
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/bigquery/service"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
##
|
22
|
+
# # LoadJob
|
23
|
+
#
|
24
|
+
# A {Job} subclass representing a load operation that may be performed
|
25
|
+
# on a {Table}. A LoadJob instance is created when you call {Table#load}.
|
26
|
+
#
|
27
|
+
# @see https://cloud.google.com/bigquery/loading-data-into-bigquery
|
28
|
+
# Loading Data Into BigQuery
|
29
|
+
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
30
|
+
# reference
|
31
|
+
#
|
32
|
+
class LoadJob < Job
|
33
|
+
##
|
34
|
+
# The URI or URIs representing the Google Cloud Storage files from which
|
35
|
+
# the operation loads data.
|
36
|
+
def sources
|
37
|
+
Array @gapi.configuration.load.source_uris
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# The table into which the operation loads data. This is the table on
|
42
|
+
# which {Table#load} was invoked. Returns a {Table} instance.
|
43
|
+
def destination
|
44
|
+
table = @gapi.configuration.load.destination_table
|
45
|
+
return nil unless table
|
46
|
+
retrieve_table table.project_id,
|
47
|
+
table.dataset_id,
|
48
|
+
table.table_id
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# The delimiter used between fields in the source data. The default is a
|
53
|
+
# comma (,).
|
54
|
+
def delimiter
|
55
|
+
@gapi.configuration.load.field_delimiter || ","
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# The number of header rows at the top of a CSV file to skip. The
|
60
|
+
# default value is `0`.
|
61
|
+
def skip_leading_rows
|
62
|
+
@gapi.configuration.load.skip_leading_rows || 0
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Checks if the character encoding of the data is UTF-8. This is the
|
67
|
+
# default.
|
68
|
+
def utf8?
|
69
|
+
val = @gapi.configuration.load.encoding
|
70
|
+
return true if val.nil?
|
71
|
+
val == "UTF-8"
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Checks if the character encoding of the data is ISO-8859-1.
|
76
|
+
def iso8859_1?
|
77
|
+
val = @gapi.configuration.load.encoding
|
78
|
+
val == "ISO-8859-1"
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# The value that is used to quote data sections in a CSV file. The
|
83
|
+
# default value is a double-quote (`"`). If your data does not contain
|
84
|
+
# quoted sections, the value should be an empty string. If your data
|
85
|
+
# contains quoted newline characters, {#quoted_newlines?} should return
|
86
|
+
# `true`.
|
87
|
+
def quote
|
88
|
+
val = @gapi.configuration.load.quote
|
89
|
+
val = "\"" if val.nil?
|
90
|
+
val
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# The maximum number of bad records that the load operation can ignore.
|
95
|
+
# If the number of bad records exceeds this value, an error is returned.
|
96
|
+
# The default value is `0`, which requires that all records be valid.
|
97
|
+
def max_bad_records
|
98
|
+
val = @gapi.configuration.load.max_bad_records
|
99
|
+
val = 0 if val.nil?
|
100
|
+
val
|
101
|
+
end
|
102
|
+
|
103
|
+
##
|
104
|
+
# Checks if quoted data sections may contain newline characters in a CSV
|
105
|
+
# file. The default is `false`.
|
106
|
+
def quoted_newlines?
|
107
|
+
val = @gapi.configuration.load.allow_quoted_newlines
|
108
|
+
val = true if val.nil?
|
109
|
+
val
|
110
|
+
end
|
111
|
+
|
112
|
+
##
|
113
|
+
# Checks if the format of the source data is [newline-delimited
|
114
|
+
# JSON](http://jsonlines.org/). The default is `false`.
|
115
|
+
def json?
|
116
|
+
val = @gapi.configuration.load.source_format
|
117
|
+
val == "NEWLINE_DELIMITED_JSON"
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Checks if the format of the source data is CSV. The default is `true`.
|
122
|
+
def csv?
|
123
|
+
val = @gapi.configuration.load.source_format
|
124
|
+
return true if val.nil?
|
125
|
+
val == "CSV"
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Checks if the source data is a Google Cloud Datastore backup.
|
130
|
+
def backup?
|
131
|
+
val = @gapi.configuration.load.source_format
|
132
|
+
val == "DATASTORE_BACKUP"
|
133
|
+
end
|
134
|
+
|
135
|
+
##
|
136
|
+
# Checks if the load operation accepts rows that are missing trailing
|
137
|
+
# optional columns. The missing values are treated as nulls. If `false`,
|
138
|
+
# records with missing trailing columns are treated as bad records, and
|
139
|
+
# if there are too many bad records, an error is returned. The default
|
140
|
+
# value is `false`. Only applicable to CSV, ignored for other formats.
|
141
|
+
def allow_jagged_rows?
|
142
|
+
val = @gapi.configuration.load.allow_jagged_rows
|
143
|
+
val = false if val.nil?
|
144
|
+
val
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Checks if the load operation allows extra values that are not
|
149
|
+
# represented in the table schema. If `true`, the extra values are
|
150
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
151
|
+
# records, and if there are too many bad records, an invalid error is
|
152
|
+
# returned. The default is `false`.
|
153
|
+
def ignore_unknown_values?
|
154
|
+
val = @gapi.configuration.load.ignore_unknown_values
|
155
|
+
val = false if val.nil?
|
156
|
+
val
|
157
|
+
end
|
158
|
+
|
159
|
+
##
|
160
|
+
# The schema for the data. Returns a hash. Can be empty if the table has
|
161
|
+
# already has the correct schema (see {Table#schema}), or if the schema
|
162
|
+
# can be inferred from the loaded data.
|
163
|
+
def schema
|
164
|
+
Schema.from_gapi(@gapi.configuration.load.schema).freeze
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# The number of source files.
|
169
|
+
def input_files
|
170
|
+
Integer @gapi.statistics.load.input_files
|
171
|
+
rescue
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
|
175
|
+
##
|
176
|
+
# The number of bytes of source data.
|
177
|
+
def input_file_bytes
|
178
|
+
Integer @gapi.statistics.load.input_file_bytes
|
179
|
+
rescue
|
180
|
+
nil
|
181
|
+
end
|
182
|
+
|
183
|
+
##
|
184
|
+
# The number of rows that have been loaded into the table. While an
|
185
|
+
# import job is in the running state, this value may change.
|
186
|
+
def output_rows
|
187
|
+
Integer @gapi.statistics.load.output_rows
|
188
|
+
rescue
|
189
|
+
nil
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# The number of bytes that have been loaded into the table. While an
|
194
|
+
# import job is in the running state, this value may change.
|
195
|
+
def output_bytes
|
196
|
+
Integer @gapi.statistics.load.output_bytes
|
197
|
+
rescue
|
198
|
+
nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
@@ -0,0 +1,481 @@
|
|
1
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/core/gce"
|
17
|
+
require "google/cloud/errors"
|
18
|
+
require "google/cloud/bigquery/service"
|
19
|
+
require "google/cloud/bigquery/credentials"
|
20
|
+
require "google/cloud/bigquery/dataset"
|
21
|
+
require "google/cloud/bigquery/job"
|
22
|
+
require "google/cloud/bigquery/query_data"
|
23
|
+
|
24
|
+
module Google
|
25
|
+
module Cloud
|
26
|
+
module Bigquery
|
27
|
+
##
|
28
|
+
# # Project
|
29
|
+
#
|
30
|
+
# Projects are top-level containers in Google Cloud Platform. They store
|
31
|
+
# information about billing and authorized users, and they contain
|
32
|
+
# BigQuery data. Each project has a friendly name and a unique ID.
|
33
|
+
#
|
34
|
+
# Google::Cloud::Bigquery::Project is the main object for interacting with
|
35
|
+
# Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
|
36
|
+
# accessed, and deleted by Google::Cloud::Bigquery::Project.
|
37
|
+
#
|
38
|
+
# See {Google::Cloud#bigquery}
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# require "google/cloud"
|
42
|
+
#
|
43
|
+
# gcloud = Google::Cloud.new
|
44
|
+
# bigquery = gcloud.bigquery
|
45
|
+
# dataset = bigquery.dataset "my_dataset"
|
46
|
+
# table = dataset.table "my_table"
|
47
|
+
#
|
48
|
+
class Project
|
49
|
+
##
|
50
|
+
# @private The Service object.
|
51
|
+
attr_accessor :service
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a new Service instance.
|
55
|
+
#
|
56
|
+
# See {Google::Cloud.bigquery}
|
57
|
+
def initialize service
|
58
|
+
@service = service
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# The BigQuery project connected to.
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# require "google/cloud"
|
66
|
+
#
|
67
|
+
# gcloud = Google::Cloud.new "my-todo-project",
|
68
|
+
# "/path/to/keyfile.json"
|
69
|
+
# bigquery = gcloud.bigquery
|
70
|
+
#
|
71
|
+
# bigquery.project #=> "my-todo-project"
|
72
|
+
#
|
73
|
+
def project
|
74
|
+
service.project
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# @private Default project.
|
79
|
+
def self.default_project
|
80
|
+
ENV["BIGQUERY_PROJECT"] ||
|
81
|
+
ENV["GOOGLE_CLOUD_PROJECT"] ||
|
82
|
+
ENV["GCLOUD_PROJECT"] ||
|
83
|
+
Google::Cloud::Core::GCE.project_id
|
84
|
+
end
|
85
|
+
|
86
|
+
##
|
87
|
+
# Queries data using the [asynchronous
|
88
|
+
# method](https://cloud.google.com/bigquery/querying-data).
|
89
|
+
#
|
90
|
+
# @param [String] query A query string, following the BigQuery [query
|
91
|
+
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
92
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
93
|
+
# [myProjectId:myDatasetId.myTableId]".
|
94
|
+
# @param [String] priority Specifies a priority for the query. Possible
|
95
|
+
# values include `INTERACTIVE` and `BATCH`. The default value is
|
96
|
+
# `INTERACTIVE`.
|
97
|
+
# @param [Boolean] cache Whether to look for the result in the query
|
98
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
99
|
+
# whenever tables in the query are modified. The default value is
|
100
|
+
# true. For more information, see [query
|
101
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
102
|
+
# @param [Table] table The destination table where the query results
|
103
|
+
# should be stored. If not present, a new table will be created to
|
104
|
+
# store the results.
|
105
|
+
# @param [String] create Specifies whether the job is allowed to create
|
106
|
+
# new tables.
|
107
|
+
#
|
108
|
+
# The following values are supported:
|
109
|
+
#
|
110
|
+
# * `needed` - Create the table if it does not exist.
|
111
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
112
|
+
# raised if the table does not exist.
|
113
|
+
# @param [String] write Specifies the action that occurs if the
|
114
|
+
# destination table already exists.
|
115
|
+
#
|
116
|
+
# The following values are supported:
|
117
|
+
#
|
118
|
+
# * `truncate` - BigQuery overwrites the table data.
|
119
|
+
# * `append` - BigQuery appends the data to the table.
|
120
|
+
# * `empty` - A 'duplicate' error is returned in the job result if the
|
121
|
+
# table exists and contains data.
|
122
|
+
# @param [Boolean] large_results If `true`, allows the query to produce
|
123
|
+
# arbitrarily large result tables at a slight cost in performance.
|
124
|
+
# Requires `table` parameter to be set.
|
125
|
+
# @param [Boolean] flatten Flattens all nested and repeated fields in
|
126
|
+
# the query results. The default value is `true`. `large_results`
|
127
|
+
# parameter must be `true` if this is set to `false`.
|
128
|
+
# @param [Dataset, String] dataset Specifies the default dataset to use
|
129
|
+
# for unqualified table names in the query.
|
130
|
+
#
|
131
|
+
# @return [Google::Cloud::Bigquery::QueryJob]
|
132
|
+
#
|
133
|
+
# @example
|
134
|
+
# require "google/cloud"
|
135
|
+
#
|
136
|
+
# gcloud = Google::Cloud.new
|
137
|
+
# bigquery = gcloud.bigquery
|
138
|
+
#
|
139
|
+
# job = bigquery.query_job "SELECT name FROM " \
|
140
|
+
# "[my_proj:my_data.my_table]"
|
141
|
+
#
|
142
|
+
# job.wait_until_done!
|
143
|
+
# if !job.failed?
|
144
|
+
# job.query_results.each do |row|
|
145
|
+
# puts row["name"]
|
146
|
+
# end
|
147
|
+
# end
|
148
|
+
#
|
149
|
+
def query_job query, priority: "INTERACTIVE", cache: true, table: nil,
|
150
|
+
create: nil, write: nil, large_results: nil, flatten: nil,
|
151
|
+
dataset: nil
|
152
|
+
ensure_service!
|
153
|
+
options = { priority: priority, cache: cache, table: table,
|
154
|
+
create: create, write: write,
|
155
|
+
large_results: large_results, flatten: flatten,
|
156
|
+
dataset: dataset }
|
157
|
+
gapi = service.query_job query, options
|
158
|
+
Job.from_gapi gapi, service
|
159
|
+
end
|
160
|
+
|
161
|
+
##
|
162
|
+
# Queries data using the [synchronous
|
163
|
+
# method](https://cloud.google.com/bigquery/querying-data).
|
164
|
+
#
|
165
|
+
# @param [String] query A query string, following the BigQuery [query
|
166
|
+
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
167
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
168
|
+
# [myProjectId:myDatasetId.myTableId]".
|
169
|
+
# @param [Integer] max The maximum number of rows of data to return per
|
170
|
+
# page of results. Setting this flag to a small value such as 1000 and
|
171
|
+
# then paging through results might improve reliability when the query
|
172
|
+
# result set is large. In addition to this limit, responses are also
|
173
|
+
# limited to 10 MB. By default, there is no maximum row count, and
|
174
|
+
# only the byte limit applies.
|
175
|
+
# @param [Integer] timeout How long to wait for the query to complete,
|
176
|
+
# in milliseconds, before the request times out and returns. Note that
|
177
|
+
# this is only a timeout for the request, not the query. If the query
|
178
|
+
# takes longer to run than the timeout value, the call returns without
|
179
|
+
# any results and with QueryData#complete? set to false. The default
|
180
|
+
# value is 10000 milliseconds (10 seconds).
|
181
|
+
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
182
|
+
# job. Instead, if the query is valid, BigQuery returns statistics
|
183
|
+
# about the job such as how many bytes would be processed. If the
|
184
|
+
# query is invalid, an error returns. The default value is `false`.
|
185
|
+
# @param [Boolean] cache Whether to look for the result in the query
|
186
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
187
|
+
# whenever tables in the query are modified. The default value is
|
188
|
+
# true. For more information, see [query
|
189
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
190
|
+
# @param [String] dataset Specifies the default datasetId and projectId
|
191
|
+
# to assume for any unqualified table names in the query. If not set,
|
192
|
+
# all table names in the query string must be qualified in the format
|
193
|
+
# 'datasetId.tableId'.
|
194
|
+
# @param [String] project Specifies the default projectId to assume for
|
195
|
+
# any unqualified table names in the query. Only used if `dataset`
|
196
|
+
# option is set.
|
197
|
+
#
|
198
|
+
# @return [Google::Cloud::Bigquery::QueryData]
|
199
|
+
#
|
200
|
+
# @example
|
201
|
+
# require "google/cloud"
|
202
|
+
#
|
203
|
+
# gcloud = Google::Cloud.new
|
204
|
+
# bigquery = gcloud.bigquery
|
205
|
+
#
|
206
|
+
# data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
|
207
|
+
# data.each do |row|
|
208
|
+
# puts row["name"]
|
209
|
+
# end
|
210
|
+
#
|
211
|
+
# @example Retrieve all rows: (See {QueryData#all})
|
212
|
+
# require "google/cloud"
|
213
|
+
#
|
214
|
+
# gcloud = Google::Cloud.new
|
215
|
+
# bigquery = gcloud.bigquery
|
216
|
+
#
|
217
|
+
# data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
|
218
|
+
# data.all do |row|
|
219
|
+
# puts row["name"]
|
220
|
+
# end
|
221
|
+
#
|
222
|
+
def query query, max: nil, timeout: 10000, dryrun: nil, cache: true,
|
223
|
+
dataset: nil, project: nil
|
224
|
+
ensure_service!
|
225
|
+
options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
|
226
|
+
dataset: dataset, project: project }
|
227
|
+
gapi = service.query query, options
|
228
|
+
QueryData.from_gapi gapi, service
|
229
|
+
end
|
230
|
+
|
231
|
+
##
|
232
|
+
# Retrieves an existing dataset by ID.
|
233
|
+
#
|
234
|
+
# @param [String] dataset_id The ID of a dataset.
|
235
|
+
#
|
236
|
+
# @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
|
237
|
+
# dataset does not exist.
|
238
|
+
#
|
239
|
+
# @example
|
240
|
+
# require "google/cloud"
|
241
|
+
#
|
242
|
+
# gcloud = Google::Cloud.new
|
243
|
+
# bigquery = gcloud.bigquery
|
244
|
+
#
|
245
|
+
# dataset = bigquery.dataset "my_dataset"
|
246
|
+
# puts dataset.name
|
247
|
+
#
|
248
|
+
def dataset dataset_id
|
249
|
+
ensure_service!
|
250
|
+
gapi = service.get_dataset dataset_id
|
251
|
+
Dataset.from_gapi gapi, service
|
252
|
+
rescue Google::Cloud::NotFoundError
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
256
|
+
##
|
257
|
+
# Creates a new dataset.
|
258
|
+
#
|
259
|
+
# @param [String] dataset_id A unique ID for this dataset, without the
|
260
|
+
# project name. The ID must contain only letters (a-z, A-Z), numbers
|
261
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
262
|
+
# @param [String] name A descriptive name for the dataset.
|
263
|
+
# @param [String] description A user-friendly description of the
|
264
|
+
# dataset.
|
265
|
+
# @param [Integer] expiration The default lifetime of all tables in the
|
266
|
+
# dataset, in milliseconds. The minimum value is 3600000 milliseconds
|
267
|
+
# (one hour).
|
268
|
+
# @param [String] location The geographic location where the dataset
|
269
|
+
# should reside. Possible values include `EU` and `US`. The default
|
270
|
+
# value is `US`.
|
271
|
+
# @yield [access] a block for setting rules
|
272
|
+
# @yieldparam [Dataset::Access] access the object accepting rules
|
273
|
+
#
|
274
|
+
# @return [Google::Cloud::Bigquery::Dataset]
|
275
|
+
#
|
276
|
+
# @example
|
277
|
+
# require "google/cloud"
|
278
|
+
#
|
279
|
+
# gcloud = Google::Cloud.new
|
280
|
+
# bigquery = gcloud.bigquery
|
281
|
+
#
|
282
|
+
# dataset = bigquery.create_dataset "my_dataset"
|
283
|
+
#
|
284
|
+
# @example A name and description can be provided:
|
285
|
+
# require "google/cloud"
|
286
|
+
#
|
287
|
+
# gcloud = Google::Cloud.new
|
288
|
+
# bigquery = gcloud.bigquery
|
289
|
+
#
|
290
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
291
|
+
# name: "My Dataset",
|
292
|
+
# description: "This is my Dataset"
|
293
|
+
#
|
294
|
+
# @example Access rules can be provided with the `access` option:
|
295
|
+
# require "google/cloud"
|
296
|
+
#
|
297
|
+
# gcloud = Google::Cloud.new
|
298
|
+
# bigquery = gcloud.bigquery
|
299
|
+
#
|
300
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
301
|
+
# access: [{"role"=>"WRITER", "userByEmail"=>"writers@example.com"}]
|
302
|
+
#
|
303
|
+
# @example Or, configure access with a block: (See {Dataset::Access})
|
304
|
+
# require "google/cloud"
|
305
|
+
#
|
306
|
+
# gcloud = Google::Cloud.new
|
307
|
+
# bigquery = gcloud.bigquery
|
308
|
+
#
|
309
|
+
# dataset = bigquery.create_dataset "my_dataset" do |access|
|
310
|
+
# access.add_writer_user "writers@example.com"
|
311
|
+
# end
|
312
|
+
#
|
313
|
+
def create_dataset dataset_id, name: nil, description: nil,
|
314
|
+
expiration: nil, location: nil
|
315
|
+
ensure_service!
|
316
|
+
|
317
|
+
new_ds = Google::Apis::BigqueryV2::Dataset.new(
|
318
|
+
dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
|
319
|
+
project_id: project, dataset_id: dataset_id))
|
320
|
+
|
321
|
+
# Can set location only on creation, no Dataset#location method
|
322
|
+
new_ds.update! location: location unless location.nil?
|
323
|
+
|
324
|
+
updater = Dataset::Updater.new(new_ds).tap do |b|
|
325
|
+
b.name = name unless name.nil?
|
326
|
+
b.description = description unless description.nil?
|
327
|
+
b.default_expiration = expiration unless expiration.nil?
|
328
|
+
end
|
329
|
+
|
330
|
+
if block_given?
|
331
|
+
yield updater
|
332
|
+
updater.check_for_mutated_access!
|
333
|
+
end
|
334
|
+
|
335
|
+
gapi = service.insert_dataset new_ds
|
336
|
+
Dataset.from_gapi gapi, service
|
337
|
+
end
|
338
|
+
|
339
|
+
##
|
340
|
+
# Retrieves the list of datasets belonging to the project.
|
341
|
+
#
|
342
|
+
# @param [Boolean] all Whether to list all datasets, including hidden
|
343
|
+
# ones. The default is `false`.
|
344
|
+
# @param [String] token A previously-returned page token representing
|
345
|
+
# part of the larger set of results to view.
|
346
|
+
# @param [Integer] max Maximum number of datasets to return.
|
347
|
+
#
|
348
|
+
# @return [Array<Google::Cloud::Bigquery::Dataset>] (See
|
349
|
+
# {Google::Cloud::Bigquery::Dataset::List})
|
350
|
+
#
|
351
|
+
# @example
|
352
|
+
# require "google/cloud"
|
353
|
+
#
|
354
|
+
# gcloud = Google::Cloud.new
|
355
|
+
# bigquery = gcloud.bigquery
|
356
|
+
#
|
357
|
+
# datasets = bigquery.datasets
|
358
|
+
# datasets.each do |dataset|
|
359
|
+
# puts dataset.name
|
360
|
+
# end
|
361
|
+
#
|
362
|
+
# @example Retrieve hidden datasets with the `all` optional arg:
|
363
|
+
# require "google/cloud"
|
364
|
+
#
|
365
|
+
# gcloud = Google::Cloud.new
|
366
|
+
# bigquery = gcloud.bigquery
|
367
|
+
#
|
368
|
+
# all_datasets = bigquery.datasets all: true
|
369
|
+
#
|
370
|
+
# @example Retrieve all datasets: (See {Dataset::List#all})
|
371
|
+
# require "google/cloud"
|
372
|
+
#
|
373
|
+
# gcloud = Google::Cloud.new
|
374
|
+
# bigquery = gcloud.bigquery
|
375
|
+
#
|
376
|
+
# datasets = bigquery.datasets
|
377
|
+
# datasets.all do |dataset|
|
378
|
+
# puts dataset.name
|
379
|
+
# end
|
380
|
+
#
|
381
|
+
def datasets all: nil, token: nil, max: nil
|
382
|
+
ensure_service!
|
383
|
+
options = { all: all, token: token, max: max }
|
384
|
+
gapi = service.list_datasets options
|
385
|
+
Dataset::List.from_gapi gapi, service, all, max
|
386
|
+
end
|
387
|
+
|
388
|
+
##
|
389
|
+
# Retrieves an existing job by ID.
|
390
|
+
#
|
391
|
+
# @param [String] job_id The ID of a job.
|
392
|
+
#
|
393
|
+
# @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
|
394
|
+
# does not exist.
|
395
|
+
#
|
396
|
+
# @example
|
397
|
+
# require "google/cloud"
|
398
|
+
#
|
399
|
+
# gcloud = Google::Cloud.new
|
400
|
+
# bigquery = gcloud.bigquery
|
401
|
+
#
|
402
|
+
# job = bigquery.job "my_job"
|
403
|
+
#
|
404
|
+
def job job_id
|
405
|
+
ensure_service!
|
406
|
+
gapi = service.get_job job_id
|
407
|
+
Job.from_gapi gapi, service
|
408
|
+
rescue Google::Cloud::NotFoundError
|
409
|
+
nil
|
410
|
+
end
|
411
|
+
|
412
|
+
##
|
413
|
+
# Retrieves the list of jobs belonging to the project.
|
414
|
+
#
|
415
|
+
# @param [Boolean] all Whether to display jobs owned by all users in the
|
416
|
+
# project. The default is `false`.
|
417
|
+
# @param [String] token A previously-returned page token representing
|
418
|
+
# part of the larger set of results to view.
|
419
|
+
# @param [Integer] max Maximum number of jobs to return.
|
420
|
+
# @param [String] filter A filter for job state.
|
421
|
+
#
|
422
|
+
# Acceptable values are:
|
423
|
+
#
|
424
|
+
# * `done` - Finished jobs
|
425
|
+
# * `pending` - Pending jobs
|
426
|
+
# * `running` - Running jobs
|
427
|
+
#
|
428
|
+
# @return [Array<Google::Cloud::Bigquery::Job>] (See
|
429
|
+
# {Google::Cloud::Bigquery::Job::List})
|
430
|
+
#
|
431
|
+
# @example
|
432
|
+
# require "google/cloud"
|
433
|
+
#
|
434
|
+
# gcloud = Google::Cloud.new
|
435
|
+
# bigquery = gcloud.bigquery
|
436
|
+
#
|
437
|
+
# jobs = bigquery.jobs
|
438
|
+
# jobs.each do |job|
|
439
|
+
# # process job
|
440
|
+
# end
|
441
|
+
#
|
442
|
+
# @example Retrieve only running jobs using the `filter` optional arg:
|
443
|
+
# require "google/cloud"
|
444
|
+
#
|
445
|
+
# gcloud = Google::Cloud.new
|
446
|
+
# bigquery = gcloud.bigquery
|
447
|
+
#
|
448
|
+
# running_jobs = bigquery.jobs filter: "running"
|
449
|
+
# running_jobs.each do |job|
|
450
|
+
# # process job
|
451
|
+
# end
|
452
|
+
#
|
453
|
+
# @example Retrieve all jobs: (See {Job::List#all})
|
454
|
+
# require "google/cloud"
|
455
|
+
#
|
456
|
+
# gcloud = Google::Cloud.new
|
457
|
+
# bigquery = gcloud.bigquery
|
458
|
+
#
|
459
|
+
# jobs = bigquery.jobs
|
460
|
+
# jobs.all do |job|
|
461
|
+
# # process job
|
462
|
+
# end
|
463
|
+
#
|
464
|
+
def jobs all: nil, token: nil, max: nil, filter: nil
|
465
|
+
ensure_service!
|
466
|
+
options = { all: all, token: token, max: max, filter: filter }
|
467
|
+
gapi = service.list_jobs options
|
468
|
+
Job::List.from_gapi gapi, service, all, max, filter
|
469
|
+
end
|
470
|
+
|
471
|
+
protected
|
472
|
+
|
473
|
+
##
|
474
|
+
# Raise an error unless an active service is available.
|
475
|
+
def ensure_service!
|
476
|
+
fail "Must have active connection" unless service
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|