google-cloud-bigquery 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/google-cloud-bigquery.rb +122 -0
- data/lib/google/cloud/bigquery.rb +353 -0
- data/lib/google/cloud/bigquery/copy_job.rb +99 -0
- data/lib/google/cloud/bigquery/credentials.rb +31 -0
- data/lib/google/cloud/bigquery/data.rb +244 -0
- data/lib/google/cloud/bigquery/dataset.rb +758 -0
- data/lib/google/cloud/bigquery/dataset/access.rb +509 -0
- data/lib/google/cloud/bigquery/dataset/list.rb +171 -0
- data/lib/google/cloud/bigquery/extract_job.rb +120 -0
- data/lib/google/cloud/bigquery/insert_response.rb +83 -0
- data/lib/google/cloud/bigquery/job.rb +301 -0
- data/lib/google/cloud/bigquery/job/list.rb +174 -0
- data/lib/google/cloud/bigquery/load_job.rb +203 -0
- data/lib/google/cloud/bigquery/project.rb +481 -0
- data/lib/google/cloud/bigquery/query_data.rb +238 -0
- data/lib/google/cloud/bigquery/query_job.rb +139 -0
- data/lib/google/cloud/bigquery/schema.rb +361 -0
- data/lib/google/cloud/bigquery/service.rb +502 -0
- data/lib/google/cloud/bigquery/table.rb +1141 -0
- data/lib/google/cloud/bigquery/table/list.rb +182 -0
- data/lib/google/cloud/bigquery/version.rb +22 -0
- data/lib/google/cloud/bigquery/view.rb +478 -0
- metadata +208 -0
@@ -0,0 +1,174 @@
|
|
1
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "delegate"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
class Job
|
22
|
+
##
|
23
|
+
# Job::List is a special case Array with additional values.
|
24
|
+
class List < DelegateClass(::Array)
|
25
|
+
##
|
26
|
+
# If not empty, indicates that there are more records that match
|
27
|
+
# the request and this value should be passed to continue.
|
28
|
+
attr_accessor :token
|
29
|
+
|
30
|
+
# A hash of this page of results.
|
31
|
+
attr_accessor :etag
|
32
|
+
|
33
|
+
##
|
34
|
+
# @private Create a new Job::List with an array of jobs.
|
35
|
+
def initialize arr = []
|
36
|
+
super arr
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Whether there is a next page of jobs.
|
41
|
+
#
|
42
|
+
# @return [Boolean]
|
43
|
+
#
|
44
|
+
# @example
|
45
|
+
# require "google/cloud"
|
46
|
+
#
|
47
|
+
# gcloud = Google::Cloud.new
|
48
|
+
# bigquery = gcloud.bigquery
|
49
|
+
#
|
50
|
+
# jobs = bigquery.jobs
|
51
|
+
# if jobs.next?
|
52
|
+
# next_jobs = jobs.next
|
53
|
+
# end
|
54
|
+
def next?
|
55
|
+
!token.nil?
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Retrieve the next page of jobs.
|
60
|
+
#
|
61
|
+
# @return [Job::List]
|
62
|
+
#
|
63
|
+
# @example
|
64
|
+
# require "google/cloud"
|
65
|
+
#
|
66
|
+
# gcloud = Google::Cloud.new
|
67
|
+
# bigquery = gcloud.bigquery
|
68
|
+
#
|
69
|
+
# jobs = bigquery.jobs
|
70
|
+
# if jobs.next?
|
71
|
+
# next_jobs = jobs.next
|
72
|
+
# end
|
73
|
+
def next
|
74
|
+
return nil unless next?
|
75
|
+
ensure_service!
|
76
|
+
options = { all: @hidden, token: token, max: @max, filter: @filter }
|
77
|
+
gapi = @service.list_jobs options
|
78
|
+
self.class.from_gapi gapi, @service, @hidden, @max, @filter
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# Retrieves all jobs by repeatedly loading {#next} until {#next?}
|
83
|
+
# returns `false`. Calls the given block once for each job, which is
|
84
|
+
# passed as the parameter.
|
85
|
+
#
|
86
|
+
# An Enumerator is returned if no block is given.
|
87
|
+
#
|
88
|
+
# This method may make several API calls until all jobs are retrieved.
|
89
|
+
# Be sure to use as narrow a search criteria as possible. Please use
|
90
|
+
# with caution.
|
91
|
+
#
|
92
|
+
# @param [Integer] request_limit The upper limit of API requests to
|
93
|
+
# make to load all jobs. Default is no limit.
|
94
|
+
# @yield [job] The block for accessing each job.
|
95
|
+
# @yieldparam [Job] job The job object.
|
96
|
+
#
|
97
|
+
# @return [Enumerator]
|
98
|
+
#
|
99
|
+
# @example Iterating each job by passing a block:
|
100
|
+
# require "google/cloud"
|
101
|
+
#
|
102
|
+
# gcloud = Google::Cloud.new
|
103
|
+
# bigquery = gcloud.bigquery
|
104
|
+
#
|
105
|
+
# bigquery.jobs.all do |job|
|
106
|
+
# puts job.state
|
107
|
+
# end
|
108
|
+
#
|
109
|
+
# @example Using the enumerator by not passing a block:
|
110
|
+
# require "google/cloud"
|
111
|
+
#
|
112
|
+
# gcloud = Google::Cloud.new
|
113
|
+
# bigquery = gcloud.bigquery
|
114
|
+
#
|
115
|
+
# all_states = bigquery.jobs.all.map do |job|
|
116
|
+
# job.state
|
117
|
+
# end
|
118
|
+
#
|
119
|
+
# @example Limit the number of API calls made:
|
120
|
+
# require "google/cloud"
|
121
|
+
#
|
122
|
+
# gcloud = Google::Cloud.new
|
123
|
+
# bigquery = gcloud.bigquery
|
124
|
+
#
|
125
|
+
# bigquery.jobs.all(request_limit: 10) do |job|
|
126
|
+
# puts job.state
|
127
|
+
# end
|
128
|
+
#
|
129
|
+
def all request_limit: nil
|
130
|
+
request_limit = request_limit.to_i if request_limit
|
131
|
+
unless block_given?
|
132
|
+
return enum_for(:all, request_limit: request_limit)
|
133
|
+
end
|
134
|
+
results = self
|
135
|
+
loop do
|
136
|
+
results.each { |r| yield r }
|
137
|
+
if request_limit
|
138
|
+
request_limit -= 1
|
139
|
+
break if request_limit < 0
|
140
|
+
end
|
141
|
+
break unless results.next?
|
142
|
+
results = results.next
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
##
|
147
|
+
# @private New Job::List from a Google API Client
|
148
|
+
# Google::Apis::BigqueryV2::JobList object.
|
149
|
+
def self.from_gapi gapi_list, service, hidden = nil, max = nil,
|
150
|
+
filter = nil
|
151
|
+
jobs = List.new(Array(gapi_list.jobs).map do |gapi_object|
|
152
|
+
Job.from_gapi gapi_object, service
|
153
|
+
end)
|
154
|
+
jobs.instance_variable_set :@token, gapi_list.next_page_token
|
155
|
+
jobs.instance_variable_set :@etag, gapi_list.etag
|
156
|
+
jobs.instance_variable_set :@service, service
|
157
|
+
jobs.instance_variable_set :@hidden, hidden
|
158
|
+
jobs.instance_variable_set :@max, max
|
159
|
+
jobs.instance_variable_set :@filter, filter
|
160
|
+
jobs
|
161
|
+
end
|
162
|
+
|
163
|
+
protected
|
164
|
+
|
165
|
+
##
|
166
|
+
# Raise an error unless an active service is available.
|
167
|
+
def ensure_service!
|
168
|
+
fail "Must have active connection" unless @service
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/bigquery/service"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
##
|
22
|
+
# # LoadJob
|
23
|
+
#
|
24
|
+
# A {Job} subclass representing a load operation that may be performed
|
25
|
+
# on a {Table}. A LoadJob instance is created when you call {Table#load}.
|
26
|
+
#
|
27
|
+
# @see https://cloud.google.com/bigquery/loading-data-into-bigquery
|
28
|
+
# Loading Data Into BigQuery
|
29
|
+
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
30
|
+
# reference
|
31
|
+
#
|
32
|
+
class LoadJob < Job
|
33
|
+
##
|
34
|
+
# The URI or URIs representing the Google Cloud Storage files from which
|
35
|
+
# the operation loads data.
|
36
|
+
def sources
|
37
|
+
Array @gapi.configuration.load.source_uris
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# The table into which the operation loads data. This is the table on
|
42
|
+
# which {Table#load} was invoked. Returns a {Table} instance.
|
43
|
+
def destination
|
44
|
+
table = @gapi.configuration.load.destination_table
|
45
|
+
return nil unless table
|
46
|
+
retrieve_table table.project_id,
|
47
|
+
table.dataset_id,
|
48
|
+
table.table_id
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# The delimiter used between fields in the source data. The default is a
|
53
|
+
# comma (,).
|
54
|
+
def delimiter
|
55
|
+
@gapi.configuration.load.field_delimiter || ","
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# The number of header rows at the top of a CSV file to skip. The
|
60
|
+
# default value is `0`.
|
61
|
+
def skip_leading_rows
|
62
|
+
@gapi.configuration.load.skip_leading_rows || 0
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Checks if the character encoding of the data is UTF-8. This is the
|
67
|
+
# default.
|
68
|
+
def utf8?
|
69
|
+
val = @gapi.configuration.load.encoding
|
70
|
+
return true if val.nil?
|
71
|
+
val == "UTF-8"
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Checks if the character encoding of the data is ISO-8859-1.
|
76
|
+
def iso8859_1?
|
77
|
+
val = @gapi.configuration.load.encoding
|
78
|
+
val == "ISO-8859-1"
|
79
|
+
end
|
80
|
+
|
81
|
+
##
|
82
|
+
# The value that is used to quote data sections in a CSV file. The
|
83
|
+
# default value is a double-quote (`"`). If your data does not contain
|
84
|
+
# quoted sections, the value should be an empty string. If your data
|
85
|
+
# contains quoted newline characters, {#quoted_newlines?} should return
|
86
|
+
# `true`.
|
87
|
+
def quote
|
88
|
+
val = @gapi.configuration.load.quote
|
89
|
+
val = "\"" if val.nil?
|
90
|
+
val
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# The maximum number of bad records that the load operation can ignore.
|
95
|
+
# If the number of bad records exceeds this value, an error is returned.
|
96
|
+
# The default value is `0`, which requires that all records be valid.
|
97
|
+
def max_bad_records
|
98
|
+
val = @gapi.configuration.load.max_bad_records
|
99
|
+
val = 0 if val.nil?
|
100
|
+
val
|
101
|
+
end
|
102
|
+
|
103
|
+
##
|
104
|
+
# Checks if quoted data sections may contain newline characters in a CSV
|
105
|
+
# file. The default is `false`.
|
106
|
+
def quoted_newlines?
|
107
|
+
val = @gapi.configuration.load.allow_quoted_newlines
|
108
|
+
val = true if val.nil?
|
109
|
+
val
|
110
|
+
end
|
111
|
+
|
112
|
+
##
|
113
|
+
# Checks if the format of the source data is [newline-delimited
|
114
|
+
# JSON](http://jsonlines.org/). The default is `false`.
|
115
|
+
def json?
|
116
|
+
val = @gapi.configuration.load.source_format
|
117
|
+
val == "NEWLINE_DELIMITED_JSON"
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Checks if the format of the source data is CSV. The default is `true`.
|
122
|
+
def csv?
|
123
|
+
val = @gapi.configuration.load.source_format
|
124
|
+
return true if val.nil?
|
125
|
+
val == "CSV"
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Checks if the source data is a Google Cloud Datastore backup.
|
130
|
+
def backup?
|
131
|
+
val = @gapi.configuration.load.source_format
|
132
|
+
val == "DATASTORE_BACKUP"
|
133
|
+
end
|
134
|
+
|
135
|
+
##
|
136
|
+
# Checks if the load operation accepts rows that are missing trailing
|
137
|
+
# optional columns. The missing values are treated as nulls. If `false`,
|
138
|
+
# records with missing trailing columns are treated as bad records, and
|
139
|
+
# if there are too many bad records, an error is returned. The default
|
140
|
+
# value is `false`. Only applicable to CSV, ignored for other formats.
|
141
|
+
def allow_jagged_rows?
|
142
|
+
val = @gapi.configuration.load.allow_jagged_rows
|
143
|
+
val = false if val.nil?
|
144
|
+
val
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Checks if the load operation allows extra values that are not
|
149
|
+
# represented in the table schema. If `true`, the extra values are
|
150
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
151
|
+
# records, and if there are too many bad records, an invalid error is
|
152
|
+
# returned. The default is `false`.
|
153
|
+
def ignore_unknown_values?
|
154
|
+
val = @gapi.configuration.load.ignore_unknown_values
|
155
|
+
val = false if val.nil?
|
156
|
+
val
|
157
|
+
end
|
158
|
+
|
159
|
+
##
|
160
|
+
# The schema for the data. Returns a hash. Can be empty if the table has
|
161
|
+
# already has the correct schema (see {Table#schema}), or if the schema
|
162
|
+
# can be inferred from the loaded data.
|
163
|
+
def schema
|
164
|
+
Schema.from_gapi(@gapi.configuration.load.schema).freeze
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# The number of source files.
|
169
|
+
def input_files
|
170
|
+
Integer @gapi.statistics.load.input_files
|
171
|
+
rescue
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
|
175
|
+
##
|
176
|
+
# The number of bytes of source data.
|
177
|
+
def input_file_bytes
|
178
|
+
Integer @gapi.statistics.load.input_file_bytes
|
179
|
+
rescue
|
180
|
+
nil
|
181
|
+
end
|
182
|
+
|
183
|
+
##
|
184
|
+
# The number of rows that have been loaded into the table. While an
|
185
|
+
# import job is in the running state, this value may change.
|
186
|
+
def output_rows
|
187
|
+
Integer @gapi.statistics.load.output_rows
|
188
|
+
rescue
|
189
|
+
nil
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# The number of bytes that have been loaded into the table. While an
|
194
|
+
# import job is in the running state, this value may change.
|
195
|
+
def output_bytes
|
196
|
+
Integer @gapi.statistics.load.output_bytes
|
197
|
+
rescue
|
198
|
+
nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
@@ -0,0 +1,481 @@
|
|
1
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/core/gce"
|
17
|
+
require "google/cloud/errors"
|
18
|
+
require "google/cloud/bigquery/service"
|
19
|
+
require "google/cloud/bigquery/credentials"
|
20
|
+
require "google/cloud/bigquery/dataset"
|
21
|
+
require "google/cloud/bigquery/job"
|
22
|
+
require "google/cloud/bigquery/query_data"
|
23
|
+
|
24
|
+
module Google
|
25
|
+
module Cloud
|
26
|
+
module Bigquery
|
27
|
+
##
|
28
|
+
# # Project
|
29
|
+
#
|
30
|
+
# Projects are top-level containers in Google Cloud Platform. They store
|
31
|
+
# information about billing and authorized users, and they contain
|
32
|
+
# BigQuery data. Each project has a friendly name and a unique ID.
|
33
|
+
#
|
34
|
+
# Google::Cloud::Bigquery::Project is the main object for interacting with
|
35
|
+
# Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
|
36
|
+
# accessed, and deleted by Google::Cloud::Bigquery::Project.
|
37
|
+
#
|
38
|
+
# See {Google::Cloud#bigquery}
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# require "google/cloud"
|
42
|
+
#
|
43
|
+
# gcloud = Google::Cloud.new
|
44
|
+
# bigquery = gcloud.bigquery
|
45
|
+
# dataset = bigquery.dataset "my_dataset"
|
46
|
+
# table = dataset.table "my_table"
|
47
|
+
#
|
48
|
+
class Project
|
49
|
+
##
|
50
|
+
# @private The Service object.
|
51
|
+
attr_accessor :service
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a new Service instance.
|
55
|
+
#
|
56
|
+
# See {Google::Cloud.bigquery}
|
57
|
+
def initialize service
|
58
|
+
@service = service
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# The BigQuery project connected to.
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# require "google/cloud"
|
66
|
+
#
|
67
|
+
# gcloud = Google::Cloud.new "my-todo-project",
|
68
|
+
# "/path/to/keyfile.json"
|
69
|
+
# bigquery = gcloud.bigquery
|
70
|
+
#
|
71
|
+
# bigquery.project #=> "my-todo-project"
|
72
|
+
#
|
73
|
+
def project
|
74
|
+
service.project
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# @private Default project.
|
79
|
+
def self.default_project
|
80
|
+
ENV["BIGQUERY_PROJECT"] ||
|
81
|
+
ENV["GOOGLE_CLOUD_PROJECT"] ||
|
82
|
+
ENV["GCLOUD_PROJECT"] ||
|
83
|
+
Google::Cloud::Core::GCE.project_id
|
84
|
+
end
|
85
|
+
|
86
|
+
##
|
87
|
+
# Queries data using the [asynchronous
|
88
|
+
# method](https://cloud.google.com/bigquery/querying-data).
|
89
|
+
#
|
90
|
+
# @param [String] query A query string, following the BigQuery [query
|
91
|
+
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
92
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
93
|
+
# [myProjectId:myDatasetId.myTableId]".
|
94
|
+
# @param [String] priority Specifies a priority for the query. Possible
|
95
|
+
# values include `INTERACTIVE` and `BATCH`. The default value is
|
96
|
+
# `INTERACTIVE`.
|
97
|
+
# @param [Boolean] cache Whether to look for the result in the query
|
98
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
99
|
+
# whenever tables in the query are modified. The default value is
|
100
|
+
# true. For more information, see [query
|
101
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
102
|
+
# @param [Table] table The destination table where the query results
|
103
|
+
# should be stored. If not present, a new table will be created to
|
104
|
+
# store the results.
|
105
|
+
# @param [String] create Specifies whether the job is allowed to create
|
106
|
+
# new tables.
|
107
|
+
#
|
108
|
+
# The following values are supported:
|
109
|
+
#
|
110
|
+
# * `needed` - Create the table if it does not exist.
|
111
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
112
|
+
# raised if the table does not exist.
|
113
|
+
# @param [String] write Specifies the action that occurs if the
|
114
|
+
# destination table already exists.
|
115
|
+
#
|
116
|
+
# The following values are supported:
|
117
|
+
#
|
118
|
+
# * `truncate` - BigQuery overwrites the table data.
|
119
|
+
# * `append` - BigQuery appends the data to the table.
|
120
|
+
# * `empty` - A 'duplicate' error is returned in the job result if the
|
121
|
+
# table exists and contains data.
|
122
|
+
# @param [Boolean] large_results If `true`, allows the query to produce
|
123
|
+
# arbitrarily large result tables at a slight cost in performance.
|
124
|
+
# Requires `table` parameter to be set.
|
125
|
+
# @param [Boolean] flatten Flattens all nested and repeated fields in
|
126
|
+
# the query results. The default value is `true`. `large_results`
|
127
|
+
# parameter must be `true` if this is set to `false`.
|
128
|
+
# @param [Dataset, String] dataset Specifies the default dataset to use
|
129
|
+
# for unqualified table names in the query.
|
130
|
+
#
|
131
|
+
# @return [Google::Cloud::Bigquery::QueryJob]
|
132
|
+
#
|
133
|
+
# @example
|
134
|
+
# require "google/cloud"
|
135
|
+
#
|
136
|
+
# gcloud = Google::Cloud.new
|
137
|
+
# bigquery = gcloud.bigquery
|
138
|
+
#
|
139
|
+
# job = bigquery.query_job "SELECT name FROM " \
|
140
|
+
# "[my_proj:my_data.my_table]"
|
141
|
+
#
|
142
|
+
# job.wait_until_done!
|
143
|
+
# if !job.failed?
|
144
|
+
# job.query_results.each do |row|
|
145
|
+
# puts row["name"]
|
146
|
+
# end
|
147
|
+
# end
|
148
|
+
#
|
149
|
+
def query_job query, priority: "INTERACTIVE", cache: true, table: nil,
|
150
|
+
create: nil, write: nil, large_results: nil, flatten: nil,
|
151
|
+
dataset: nil
|
152
|
+
ensure_service!
|
153
|
+
options = { priority: priority, cache: cache, table: table,
|
154
|
+
create: create, write: write,
|
155
|
+
large_results: large_results, flatten: flatten,
|
156
|
+
dataset: dataset }
|
157
|
+
gapi = service.query_job query, options
|
158
|
+
Job.from_gapi gapi, service
|
159
|
+
end
|
160
|
+
|
161
|
+
##
|
162
|
+
# Queries data using the [synchronous
|
163
|
+
# method](https://cloud.google.com/bigquery/querying-data).
|
164
|
+
#
|
165
|
+
# @param [String] query A query string, following the BigQuery [query
|
166
|
+
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
167
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
168
|
+
# [myProjectId:myDatasetId.myTableId]".
|
169
|
+
# @param [Integer] max The maximum number of rows of data to return per
|
170
|
+
# page of results. Setting this flag to a small value such as 1000 and
|
171
|
+
# then paging through results might improve reliability when the query
|
172
|
+
# result set is large. In addition to this limit, responses are also
|
173
|
+
# limited to 10 MB. By default, there is no maximum row count, and
|
174
|
+
# only the byte limit applies.
|
175
|
+
# @param [Integer] timeout How long to wait for the query to complete,
|
176
|
+
# in milliseconds, before the request times out and returns. Note that
|
177
|
+
# this is only a timeout for the request, not the query. If the query
|
178
|
+
# takes longer to run than the timeout value, the call returns without
|
179
|
+
# any results and with QueryData#complete? set to false. The default
|
180
|
+
# value is 10000 milliseconds (10 seconds).
|
181
|
+
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
182
|
+
# job. Instead, if the query is valid, BigQuery returns statistics
|
183
|
+
# about the job such as how many bytes would be processed. If the
|
184
|
+
# query is invalid, an error returns. The default value is `false`.
|
185
|
+
# @param [Boolean] cache Whether to look for the result in the query
|
186
|
+
# cache. The query cache is a best-effort cache that will be flushed
|
187
|
+
# whenever tables in the query are modified. The default value is
|
188
|
+
# true. For more information, see [query
|
189
|
+
# caching](https://developers.google.com/bigquery/querying-data).
|
190
|
+
# @param [String] dataset Specifies the default datasetId and projectId
|
191
|
+
# to assume for any unqualified table names in the query. If not set,
|
192
|
+
# all table names in the query string must be qualified in the format
|
193
|
+
# 'datasetId.tableId'.
|
194
|
+
# @param [String] project Specifies the default projectId to assume for
|
195
|
+
# any unqualified table names in the query. Only used if `dataset`
|
196
|
+
# option is set.
|
197
|
+
#
|
198
|
+
# @return [Google::Cloud::Bigquery::QueryData]
|
199
|
+
#
|
200
|
+
# @example
|
201
|
+
# require "google/cloud"
|
202
|
+
#
|
203
|
+
# gcloud = Google::Cloud.new
|
204
|
+
# bigquery = gcloud.bigquery
|
205
|
+
#
|
206
|
+
# data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
|
207
|
+
# data.each do |row|
|
208
|
+
# puts row["name"]
|
209
|
+
# end
|
210
|
+
#
|
211
|
+
# @example Retrieve all rows: (See {QueryData#all})
|
212
|
+
# require "google/cloud"
|
213
|
+
#
|
214
|
+
# gcloud = Google::Cloud.new
|
215
|
+
# bigquery = gcloud.bigquery
|
216
|
+
#
|
217
|
+
# data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
|
218
|
+
# data.all do |row|
|
219
|
+
# puts row["name"]
|
220
|
+
# end
|
221
|
+
#
|
222
|
+
def query query, max: nil, timeout: 10000, dryrun: nil, cache: true,
|
223
|
+
dataset: nil, project: nil
|
224
|
+
ensure_service!
|
225
|
+
options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
|
226
|
+
dataset: dataset, project: project }
|
227
|
+
gapi = service.query query, options
|
228
|
+
QueryData.from_gapi gapi, service
|
229
|
+
end
|
230
|
+
|
231
|
+
##
|
232
|
+
# Retrieves an existing dataset by ID.
|
233
|
+
#
|
234
|
+
# @param [String] dataset_id The ID of a dataset.
|
235
|
+
#
|
236
|
+
# @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
|
237
|
+
# dataset does not exist.
|
238
|
+
#
|
239
|
+
# @example
|
240
|
+
# require "google/cloud"
|
241
|
+
#
|
242
|
+
# gcloud = Google::Cloud.new
|
243
|
+
# bigquery = gcloud.bigquery
|
244
|
+
#
|
245
|
+
# dataset = bigquery.dataset "my_dataset"
|
246
|
+
# puts dataset.name
|
247
|
+
#
|
248
|
+
def dataset dataset_id
|
249
|
+
ensure_service!
|
250
|
+
gapi = service.get_dataset dataset_id
|
251
|
+
Dataset.from_gapi gapi, service
|
252
|
+
rescue Google::Cloud::NotFoundError
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
256
|
+
##
|
257
|
+
# Creates a new dataset.
|
258
|
+
#
|
259
|
+
# @param [String] dataset_id A unique ID for this dataset, without the
|
260
|
+
# project name. The ID must contain only letters (a-z, A-Z), numbers
|
261
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
262
|
+
# @param [String] name A descriptive name for the dataset.
|
263
|
+
# @param [String] description A user-friendly description of the
|
264
|
+
# dataset.
|
265
|
+
# @param [Integer] expiration The default lifetime of all tables in the
|
266
|
+
# dataset, in milliseconds. The minimum value is 3600000 milliseconds
|
267
|
+
# (one hour).
|
268
|
+
# @param [String] location The geographic location where the dataset
|
269
|
+
# should reside. Possible values include `EU` and `US`. The default
|
270
|
+
# value is `US`.
|
271
|
+
# @yield [access] a block for setting rules
|
272
|
+
# @yieldparam [Dataset::Access] access the object accepting rules
|
273
|
+
#
|
274
|
+
# @return [Google::Cloud::Bigquery::Dataset]
|
275
|
+
#
|
276
|
+
# @example
|
277
|
+
# require "google/cloud"
|
278
|
+
#
|
279
|
+
# gcloud = Google::Cloud.new
|
280
|
+
# bigquery = gcloud.bigquery
|
281
|
+
#
|
282
|
+
# dataset = bigquery.create_dataset "my_dataset"
|
283
|
+
#
|
284
|
+
# @example A name and description can be provided:
|
285
|
+
# require "google/cloud"
|
286
|
+
#
|
287
|
+
# gcloud = Google::Cloud.new
|
288
|
+
# bigquery = gcloud.bigquery
|
289
|
+
#
|
290
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
291
|
+
# name: "My Dataset",
|
292
|
+
# description: "This is my Dataset"
|
293
|
+
#
|
294
|
+
# @example Access rules can be provided with the `access` option:
|
295
|
+
# require "google/cloud"
|
296
|
+
#
|
297
|
+
# gcloud = Google::Cloud.new
|
298
|
+
# bigquery = gcloud.bigquery
|
299
|
+
#
|
300
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
301
|
+
# access: [{"role"=>"WRITER", "userByEmail"=>"writers@example.com"}]
|
302
|
+
#
|
303
|
+
# @example Or, configure access with a block: (See {Dataset::Access})
|
304
|
+
# require "google/cloud"
|
305
|
+
#
|
306
|
+
# gcloud = Google::Cloud.new
|
307
|
+
# bigquery = gcloud.bigquery
|
308
|
+
#
|
309
|
+
# dataset = bigquery.create_dataset "my_dataset" do |access|
|
310
|
+
# access.add_writer_user "writers@example.com"
|
311
|
+
# end
|
312
|
+
#
|
313
|
+
def create_dataset dataset_id, name: nil, description: nil,
|
314
|
+
expiration: nil, location: nil
|
315
|
+
ensure_service!
|
316
|
+
|
317
|
+
new_ds = Google::Apis::BigqueryV2::Dataset.new(
|
318
|
+
dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
|
319
|
+
project_id: project, dataset_id: dataset_id))
|
320
|
+
|
321
|
+
# Can set location only on creation, no Dataset#location method
|
322
|
+
new_ds.update! location: location unless location.nil?
|
323
|
+
|
324
|
+
updater = Dataset::Updater.new(new_ds).tap do |b|
|
325
|
+
b.name = name unless name.nil?
|
326
|
+
b.description = description unless description.nil?
|
327
|
+
b.default_expiration = expiration unless expiration.nil?
|
328
|
+
end
|
329
|
+
|
330
|
+
if block_given?
|
331
|
+
yield updater
|
332
|
+
updater.check_for_mutated_access!
|
333
|
+
end
|
334
|
+
|
335
|
+
gapi = service.insert_dataset new_ds
|
336
|
+
Dataset.from_gapi gapi, service
|
337
|
+
end
|
338
|
+
|
339
|
+
##
|
340
|
+
# Retrieves the list of datasets belonging to the project.
|
341
|
+
#
|
342
|
+
# @param [Boolean] all Whether to list all datasets, including hidden
|
343
|
+
# ones. The default is `false`.
|
344
|
+
# @param [String] token A previously-returned page token representing
|
345
|
+
# part of the larger set of results to view.
|
346
|
+
# @param [Integer] max Maximum number of datasets to return.
|
347
|
+
#
|
348
|
+
# @return [Array<Google::Cloud::Bigquery::Dataset>] (See
|
349
|
+
# {Google::Cloud::Bigquery::Dataset::List})
|
350
|
+
#
|
351
|
+
# @example
|
352
|
+
# require "google/cloud"
|
353
|
+
#
|
354
|
+
# gcloud = Google::Cloud.new
|
355
|
+
# bigquery = gcloud.bigquery
|
356
|
+
#
|
357
|
+
# datasets = bigquery.datasets
|
358
|
+
# datasets.each do |dataset|
|
359
|
+
# puts dataset.name
|
360
|
+
# end
|
361
|
+
#
|
362
|
+
# @example Retrieve hidden datasets with the `all` optional arg:
|
363
|
+
# require "google/cloud"
|
364
|
+
#
|
365
|
+
# gcloud = Google::Cloud.new
|
366
|
+
# bigquery = gcloud.bigquery
|
367
|
+
#
|
368
|
+
# all_datasets = bigquery.datasets all: true
|
369
|
+
#
|
370
|
+
# @example Retrieve all datasets: (See {Dataset::List#all})
|
371
|
+
# require "google/cloud"
|
372
|
+
#
|
373
|
+
# gcloud = Google::Cloud.new
|
374
|
+
# bigquery = gcloud.bigquery
|
375
|
+
#
|
376
|
+
# datasets = bigquery.datasets
|
377
|
+
# datasets.all do |dataset|
|
378
|
+
# puts dataset.name
|
379
|
+
# end
|
380
|
+
#
|
381
|
+
def datasets all: nil, token: nil, max: nil
|
382
|
+
ensure_service!
|
383
|
+
options = { all: all, token: token, max: max }
|
384
|
+
gapi = service.list_datasets options
|
385
|
+
Dataset::List.from_gapi gapi, service, all, max
|
386
|
+
end
|
387
|
+
|
388
|
+
##
|
389
|
+
# Retrieves an existing job by ID.
|
390
|
+
#
|
391
|
+
# @param [String] job_id The ID of a job.
|
392
|
+
#
|
393
|
+
# @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
|
394
|
+
# does not exist.
|
395
|
+
#
|
396
|
+
# @example
|
397
|
+
# require "google/cloud"
|
398
|
+
#
|
399
|
+
# gcloud = Google::Cloud.new
|
400
|
+
# bigquery = gcloud.bigquery
|
401
|
+
#
|
402
|
+
# job = bigquery.job "my_job"
|
403
|
+
#
|
404
|
+
def job job_id
|
405
|
+
ensure_service!
|
406
|
+
gapi = service.get_job job_id
|
407
|
+
Job.from_gapi gapi, service
|
408
|
+
rescue Google::Cloud::NotFoundError
|
409
|
+
nil
|
410
|
+
end
|
411
|
+
|
412
|
+
##
|
413
|
+
# Retrieves the list of jobs belonging to the project.
|
414
|
+
#
|
415
|
+
# @param [Boolean] all Whether to display jobs owned by all users in the
|
416
|
+
# project. The default is `false`.
|
417
|
+
# @param [String] token A previously-returned page token representing
|
418
|
+
# part of the larger set of results to view.
|
419
|
+
# @param [Integer] max Maximum number of jobs to return.
|
420
|
+
# @param [String] filter A filter for job state.
|
421
|
+
#
|
422
|
+
# Acceptable values are:
|
423
|
+
#
|
424
|
+
# * `done` - Finished jobs
|
425
|
+
# * `pending` - Pending jobs
|
426
|
+
# * `running` - Running jobs
|
427
|
+
#
|
428
|
+
# @return [Array<Google::Cloud::Bigquery::Job>] (See
|
429
|
+
# {Google::Cloud::Bigquery::Job::List})
|
430
|
+
#
|
431
|
+
# @example
|
432
|
+
# require "google/cloud"
|
433
|
+
#
|
434
|
+
# gcloud = Google::Cloud.new
|
435
|
+
# bigquery = gcloud.bigquery
|
436
|
+
#
|
437
|
+
# jobs = bigquery.jobs
|
438
|
+
# jobs.each do |job|
|
439
|
+
# # process job
|
440
|
+
# end
|
441
|
+
#
|
442
|
+
# @example Retrieve only running jobs using the `filter` optional arg:
|
443
|
+
# require "google/cloud"
|
444
|
+
#
|
445
|
+
# gcloud = Google::Cloud.new
|
446
|
+
# bigquery = gcloud.bigquery
|
447
|
+
#
|
448
|
+
# running_jobs = bigquery.jobs filter: "running"
|
449
|
+
# running_jobs.each do |job|
|
450
|
+
# # process job
|
451
|
+
# end
|
452
|
+
#
|
453
|
+
# @example Retrieve all jobs: (See {Job::List#all})
|
454
|
+
# require "google/cloud"
|
455
|
+
#
|
456
|
+
# gcloud = Google::Cloud.new
|
457
|
+
# bigquery = gcloud.bigquery
|
458
|
+
#
|
459
|
+
# jobs = bigquery.jobs
|
460
|
+
# jobs.all do |job|
|
461
|
+
# # process job
|
462
|
+
# end
|
463
|
+
#
|
464
|
+
def jobs all: nil, token: nil, max: nil, filter: nil
|
465
|
+
ensure_service!
|
466
|
+
options = { all: all, token: token, max: max, filter: filter }
|
467
|
+
gapi = service.list_jobs options
|
468
|
+
Job::List.from_gapi gapi, service, all, max, filter
|
469
|
+
end
|
470
|
+
|
471
|
+
protected
|
472
|
+
|
473
|
+
##
|
474
|
+
# Raise an error unless an active service is available.
|
475
|
+
def ensure_service!
|
476
|
+
fail "Must have active connection" unless service
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|