gcloud 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/AUTHENTICATION.md +3 -3
- data/CHANGELOG.md +12 -0
- data/OVERVIEW.md +30 -0
- data/lib/gcloud.rb +126 -9
- data/lib/gcloud/bigquery.rb +399 -0
- data/lib/gcloud/bigquery/connection.rb +592 -0
- data/lib/gcloud/bigquery/copy_job.rb +98 -0
- data/lib/gcloud/bigquery/credentials.rb +29 -0
- data/lib/gcloud/bigquery/data.rb +134 -0
- data/lib/gcloud/bigquery/dataset.rb +662 -0
- data/lib/gcloud/bigquery/dataset/list.rb +51 -0
- data/lib/gcloud/bigquery/errors.rb +62 -0
- data/lib/gcloud/bigquery/extract_job.rb +117 -0
- data/lib/gcloud/bigquery/insert_response.rb +80 -0
- data/lib/gcloud/bigquery/job.rb +283 -0
- data/lib/gcloud/bigquery/job/list.rb +55 -0
- data/lib/gcloud/bigquery/load_job.rb +199 -0
- data/lib/gcloud/bigquery/project.rb +512 -0
- data/lib/gcloud/bigquery/query_data.rb +135 -0
- data/lib/gcloud/bigquery/query_job.rb +151 -0
- data/lib/gcloud/bigquery/table.rb +827 -0
- data/lib/gcloud/bigquery/table/list.rb +55 -0
- data/lib/gcloud/bigquery/view.rb +419 -0
- data/lib/gcloud/credentials.rb +3 -3
- data/lib/gcloud/datastore.rb +15 -3
- data/lib/gcloud/datastore/credentials.rb +3 -2
- data/lib/gcloud/datastore/dataset.rb +5 -1
- data/lib/gcloud/datastore/transaction.rb +1 -1
- data/lib/gcloud/pubsub.rb +14 -3
- data/lib/gcloud/pubsub/credentials.rb +4 -4
- data/lib/gcloud/pubsub/project.rb +5 -1
- data/lib/gcloud/pubsub/topic.rb +5 -0
- data/lib/gcloud/storage.rb +14 -24
- data/lib/gcloud/storage/bucket.rb +10 -4
- data/lib/gcloud/storage/credentials.rb +3 -2
- data/lib/gcloud/storage/file.rb +8 -1
- data/lib/gcloud/storage/project.rb +5 -1
- data/lib/gcloud/upload.rb +54 -0
- data/lib/gcloud/version.rb +1 -1
- metadata +78 -2
@@ -0,0 +1,55 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
module Gcloud
|
17
|
+
module Bigquery
|
18
|
+
class Job
|
19
|
+
##
|
20
|
+
# Job::List is a special case Array with additional values.
|
21
|
+
class List < DelegateClass(::Array)
|
22
|
+
##
|
23
|
+
# If not empty, indicates that there are more records that match
|
24
|
+
# the request and this value should be passed to continue.
|
25
|
+
attr_accessor :token
|
26
|
+
|
27
|
+
# A hash of this page of results.
|
28
|
+
attr_accessor :etag
|
29
|
+
|
30
|
+
# Total number of jobs in this collection.
|
31
|
+
attr_accessor :total
|
32
|
+
|
33
|
+
##
|
34
|
+
# Create a new Job::List with an array of jobs.
|
35
|
+
def initialize arr = []
|
36
|
+
super arr
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# New Job::List from a response object.
|
41
|
+
def self.from_resp resp, conn #:nodoc:
|
42
|
+
jobs = List.new(Array(resp.data["jobs"]).map do |gapi_object|
|
43
|
+
Job.from_gapi gapi_object, conn
|
44
|
+
end)
|
45
|
+
jobs.instance_eval do
|
46
|
+
@token = resp.data["nextPageToken"]
|
47
|
+
@etag = resp.data["etag"]
|
48
|
+
@total = resp.data["totalItems"]
|
49
|
+
end
|
50
|
+
jobs
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
module Gcloud
|
17
|
+
module Bigquery
|
18
|
+
##
|
19
|
+
# = LoadJob
|
20
|
+
#
|
21
|
+
# A Job subclass representing a load operation that may be performed
|
22
|
+
# on a Table. A LoadJob instance is created when you call Table#load.
|
23
|
+
#
|
24
|
+
# See {Loading Data Into
|
25
|
+
# BigQuery}[https://cloud.google.com/bigquery/loading-data-into-bigquery]
|
26
|
+
# and the {Jobs API
|
27
|
+
# reference}[https://cloud.google.com/bigquery/docs/reference/v2/jobs]
|
28
|
+
# for details.
|
29
|
+
#
|
30
|
+
class LoadJob < Job
|
31
|
+
##
|
32
|
+
# The URI or URIs representing the Google Cloud Storage files from which
|
33
|
+
# the operation loads data.
|
34
|
+
def sources
|
35
|
+
Array config["load"]["sourceUris"]
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# The table into which the operation loads data. This is the table on
|
40
|
+
# which Table#load was invoked. Returns a Table instance.
|
41
|
+
def destination
|
42
|
+
table = config["load"]["destinationTable"]
|
43
|
+
return nil unless table
|
44
|
+
retrieve_table table["projectId"],
|
45
|
+
table["datasetId"],
|
46
|
+
table["tableId"]
|
47
|
+
end
|
48
|
+
|
49
|
+
##
|
50
|
+
# The delimiter used between fields in the source data. The default is a
|
51
|
+
# comma (,).
|
52
|
+
def delimiter
|
53
|
+
val = config["load"]["fieldDelimiter"]
|
54
|
+
val = "," if val.nil?
|
55
|
+
val
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# The number of header rows at the top of a CSV file to skip. The default
|
60
|
+
# value is +0+.
|
61
|
+
def skip_leading_rows
|
62
|
+
val = config["load"]["skipLeadingRows"]
|
63
|
+
val = 0 if val.nil?
|
64
|
+
val
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Checks if the character encoding of the data is UTF-8. This is the
|
69
|
+
# default.
|
70
|
+
def utf8?
|
71
|
+
val = config["load"]["encoding"]
|
72
|
+
return true if val.nil?
|
73
|
+
val == "UTF-8"
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# Checks if the character encoding of the data is ISO-8859-1.
|
78
|
+
def iso8859_1?
|
79
|
+
val = config["load"]["encoding"]
|
80
|
+
val == "ISO-8859-1"
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# The value that is used to quote data sections in a CSV file.
|
85
|
+
# The default value is a double-quote (+"+). If your data does not contain
|
86
|
+
# quoted sections, the value should be an empty string. If your data
|
87
|
+
# contains quoted newline characters, #quoted_newlines? should return
|
88
|
+
# +true+.
|
89
|
+
def quote
|
90
|
+
val = config["load"]["quote"]
|
91
|
+
val = "\"" if val.nil?
|
92
|
+
val
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# The maximum number of bad records that the load operation can ignore. If
|
97
|
+
# the number of bad records exceeds this value, an error is
|
98
|
+
# returned. The default value is +0+, which requires that all records be
|
99
|
+
# valid.
|
100
|
+
def max_bad_records
|
101
|
+
val = config["load"]["maxBadRecords"]
|
102
|
+
val = 0 if val.nil?
|
103
|
+
val
|
104
|
+
end
|
105
|
+
|
106
|
+
##
|
107
|
+
# Checks if quoted data sections may contain newline characters in a CSV
|
108
|
+
# file. The default is +false+.
|
109
|
+
def quoted_newlines?
|
110
|
+
val = config["load"]["allowQuotedNewlines"]
|
111
|
+
val = true if val.nil?
|
112
|
+
val
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Checks if the format of the source data is
|
117
|
+
# {newline-delimited JSON}[http://jsonlines.org/]. The default is +false+.
|
118
|
+
def json?
|
119
|
+
val = config["load"]["sourceFormat"]
|
120
|
+
val == "NEWLINE_DELIMITED_JSON"
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Checks if the format of the source data is CSV. The default is +true+.
|
125
|
+
def csv?
|
126
|
+
val = config["load"]["sourceFormat"]
|
127
|
+
return true if val.nil?
|
128
|
+
val == "CSV"
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# Checks if the source data is a Google Cloud Datastore backup.
|
133
|
+
def backup?
|
134
|
+
val = config["load"]["sourceFormat"]
|
135
|
+
val == "DATASTORE_BACKUP"
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Checks if the load operation accepts rows that are missing trailing
|
140
|
+
# optional columns. The missing values are treated as nulls. If +false+,
|
141
|
+
# records with missing trailing columns are treated as bad records, and
|
142
|
+
# if there are too many bad records, an error is returned. The default
|
143
|
+
# value is +false+. Only applicable to CSV, ignored for other formats.
|
144
|
+
def allow_jagged_rows?
|
145
|
+
val = config["load"]["allowJaggedRows"]
|
146
|
+
val = false if val.nil?
|
147
|
+
val
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# Checks if the load operation allows extra values that are not
|
152
|
+
# represented in the table schema. If +true+, the extra values are
|
153
|
+
# ignored. If +false+, records with extra columns are treated as bad
|
154
|
+
# records, and if there are too many bad records, an invalid error is
|
155
|
+
# returned. The default is +false+.
|
156
|
+
def ignore_unknown_values?
|
157
|
+
val = config["load"]["ignoreUnknownValues"]
|
158
|
+
val = false if val.nil?
|
159
|
+
val
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# The schema for the data. Returns a hash. Can be empty if the table
|
164
|
+
# has already has the correct schema (see Table#schema= and Table#schema),
|
165
|
+
# or if the schema can be inferred from the loaded data.
|
166
|
+
def schema
|
167
|
+
val = config["load"]["schema"]
|
168
|
+
val = {} if val.nil?
|
169
|
+
val
|
170
|
+
end
|
171
|
+
|
172
|
+
##
|
173
|
+
# The number of source files.
|
174
|
+
def input_files
|
175
|
+
stats["load"]["inputFiles"]
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# The number of bytes of source data.
|
180
|
+
def input_file_bytes
|
181
|
+
stats["load"]["inputFileBytes"]
|
182
|
+
end
|
183
|
+
|
184
|
+
##
|
185
|
+
# The number of rows that have been loaded into the table. While an
|
186
|
+
# import job is in the running state, this value may change.
|
187
|
+
def output_rows
|
188
|
+
stats["load"]["outputRows"]
|
189
|
+
end
|
190
|
+
|
191
|
+
##
|
192
|
+
# The number of bytes that have been loaded into the table. While an
|
193
|
+
# import job is in the running state, this value may change.
|
194
|
+
def output_bytes
|
195
|
+
stats["load"]["outputBytes"]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,512 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
require "gcloud/bigquery/connection"
|
17
|
+
require "gcloud/bigquery/credentials"
|
18
|
+
require "gcloud/bigquery/errors"
|
19
|
+
require "gcloud/bigquery/dataset"
|
20
|
+
require "gcloud/bigquery/job"
|
21
|
+
require "gcloud/bigquery/query_data"
|
22
|
+
|
23
|
+
module Gcloud
|
24
|
+
module Bigquery
|
25
|
+
##
|
26
|
+
# = Project
|
27
|
+
#
|
28
|
+
# Projects are top-level containers in Google Cloud Platform. They store
|
29
|
+
# information about billing and authorized users, and they contain BigQuery
|
30
|
+
# data. Each project has a friendly name and a unique ID.
|
31
|
+
#
|
32
|
+
# Gcloud::Bigquery::Project is the main object for interacting with
|
33
|
+
# Google BigQuery. Gcloud::Bigquery::Dataset objects are created,
|
34
|
+
# accessed, and deleted by Gcloud::Bigquery::Project.
|
35
|
+
#
|
36
|
+
# require "gcloud"
|
37
|
+
#
|
38
|
+
# gcloud = Gcloud.new
|
39
|
+
# bigquery = gcloud.bigquery
|
40
|
+
# dataset = bigquery.dataset "my_dataset"
|
41
|
+
# table = dataset.table "my_table"
|
42
|
+
#
|
43
|
+
# See Gcloud#bigquery
|
44
|
+
class Project
|
45
|
+
##
|
46
|
+
# The Connection object.
|
47
|
+
attr_accessor :connection #:nodoc:
|
48
|
+
|
49
|
+
##
|
50
|
+
# Creates a new Connection instance.
|
51
|
+
#
|
52
|
+
# See Gcloud.bigquery
|
53
|
+
def initialize project, credentials
|
54
|
+
project = project.to_s # Always cast to a string
|
55
|
+
fail ArgumentError, "project is missing" if project.empty?
|
56
|
+
@connection = Connection.new project, credentials
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# The BigQuery project connected to.
|
61
|
+
#
|
62
|
+
# === Example
|
63
|
+
#
|
64
|
+
# require "gcloud"
|
65
|
+
#
|
66
|
+
# gcloud = Gcloud.new "my-todo-project", "/path/to/keyfile.json"
|
67
|
+
# bigquery = gcloud.bigquery
|
68
|
+
#
|
69
|
+
# bigquery.project #=> "my-todo-project"
|
70
|
+
#
|
71
|
+
def project
|
72
|
+
connection.project
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# Default project.
|
77
|
+
def self.default_project #:nodoc:
|
78
|
+
ENV["BIGQUERY_PROJECT"] ||
|
79
|
+
ENV["GCLOUD_PROJECT"] ||
|
80
|
+
ENV["GOOGLE_CLOUD_PROJECT"]
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# Queries data using the {asynchronous
|
85
|
+
# method}[https://cloud.google.com/bigquery/querying-data].
|
86
|
+
#
|
87
|
+
# === Parameters
|
88
|
+
#
|
89
|
+
# +query+::
|
90
|
+
# A query string, following the BigQuery {query
|
91
|
+
# syntax}[https://cloud.google.com/bigquery/query-reference], of the
|
92
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
93
|
+
# [myProjectId:myDatasetId.myTableId]". (+String+)
|
94
|
+
# <code>options[:priority]</code>::
|
95
|
+
# Specifies a priority for the query. Possible values include
|
96
|
+
# +INTERACTIVE+ and +BATCH+. The default value is +INTERACTIVE+.
|
97
|
+
# (+String+)
|
98
|
+
# <code>options[:cache]</code>::
|
99
|
+
# Whether to look for the result in the query cache. The query cache is
|
100
|
+
# a best-effort cache that will be flushed whenever tables in the query
|
101
|
+
# are modified. The default value is +true+. (+Boolean+)
|
102
|
+
# <code>options[:table]</code>::
|
103
|
+
# The destination table where the query results should be stored. If not
|
104
|
+
# present, a new table will be created to store the results. (+Table+)
|
105
|
+
# <code>options[:create]</code>::
|
106
|
+
# Specifies whether the job is allowed to create new tables. (+String+)
|
107
|
+
#
|
108
|
+
# The following values are supported:
|
109
|
+
# * +needed+ - Create the table if it does not exist.
|
110
|
+
# * +never+ - The table must already exist. A 'notFound' error is
|
111
|
+
# raised if the table does not exist.
|
112
|
+
# <code>options[:write]</code>::
|
113
|
+
# Specifies the action that occurs if the destination table already
|
114
|
+
# exists. (+String+)
|
115
|
+
#
|
116
|
+
# The following values are supported:
|
117
|
+
# * +truncate+ - BigQuery overwrites the table data.
|
118
|
+
# * +append+ - BigQuery appends the data to the table.
|
119
|
+
# * +empty+ - A 'duplicate' error is returned in the job result if the
|
120
|
+
# table exists and contains data.
|
121
|
+
# <code>options[:large_results]</code>::
|
122
|
+
# If +true+, allows the query to produce arbitrarily large result tables
|
123
|
+
# at a slight cost in performance. Requires <code>options[:table]</code>
|
124
|
+
# to be set. (+Boolean+)
|
125
|
+
# <code>options[:flatten]</code>::
|
126
|
+
# Flattens all nested and repeated fields in the query results. The
|
127
|
+
# default value is +true+. <code>options[:large_results]</code> must be
|
128
|
+
# +true+ if this is set to +false+. (+Boolean+)
|
129
|
+
# <code>options[:dataset]</code>::
|
130
|
+
# Specifies the default dataset to use for unqualified table names in
|
131
|
+
# the query. (+Dataset+ or +String+)
|
132
|
+
#
|
133
|
+
# === Returns
|
134
|
+
#
|
135
|
+
# Gcloud::Bigquery::QueryJob
|
136
|
+
#
|
137
|
+
# === Example
|
138
|
+
#
|
139
|
+
# require "gcloud"
|
140
|
+
#
|
141
|
+
# gcloud = Gcloud.new
|
142
|
+
# bigquery = gcloud.bigquery
|
143
|
+
#
|
144
|
+
# job = bigquery.query_job "SELECT name FROM [my_proj:my_data.my_table]"
|
145
|
+
#
|
146
|
+
# loop do
|
147
|
+
# break if job.done?
|
148
|
+
# sleep 1
|
149
|
+
# job.refresh!
|
150
|
+
# end
|
151
|
+
# if !job.failed?
|
152
|
+
# job.query_results.each do |row|
|
153
|
+
# puts row["name"]
|
154
|
+
# end
|
155
|
+
# end
|
156
|
+
#
|
157
|
+
def query_job query, options = {}
|
158
|
+
ensure_connection!
|
159
|
+
resp = connection.query_job query, options
|
160
|
+
if resp.success?
|
161
|
+
Job.from_gapi resp.data, connection
|
162
|
+
else
|
163
|
+
fail ApiError.from_response(resp)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# Queries data using the {synchronous
|
169
|
+
# method}[https://cloud.google.com/bigquery/querying-data].
|
170
|
+
#
|
171
|
+
# === Parameters
|
172
|
+
#
|
173
|
+
# +query+::
|
174
|
+
# A query string, following the BigQuery {query
|
175
|
+
# syntax}[https://cloud.google.com/bigquery/query-reference], of the
|
176
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
177
|
+
# [myProjectId:myDatasetId.myTableId]". (+String+)
|
178
|
+
# <code>options[:max]</code>::
|
179
|
+
# The maximum number of rows of data to return per page of results.
|
180
|
+
# Setting this flag to a small value such as 1000 and then paging
|
181
|
+
# through results might improve reliability when the query result set is
|
182
|
+
# large. In addition to this limit, responses are also limited to 10 MB.
|
183
|
+
# By default, there is no maximum row count, and only the byte limit
|
184
|
+
# applies. (+Integer+)
|
185
|
+
# <code>options[:timeout]</code>::
|
186
|
+
# How long to wait for the query to complete, in milliseconds, before
|
187
|
+
# the request times out and returns. Note that this is only a timeout
|
188
|
+
# for the request, not the query. If the query takes longer to run than
|
189
|
+
# the timeout value, the call returns without any results and with
|
190
|
+
# QueryData#complete? set to false. The default value is 10000
|
191
|
+
# milliseconds (10 seconds). (+Integer+)
|
192
|
+
# <code>options[:dryrun]</code>::
|
193
|
+
# If set to +true+, BigQuery doesn't run the job. Instead, if the query
|
194
|
+
# is valid, BigQuery returns statistics about the job such as how many
|
195
|
+
# bytes would be processed. If the query is invalid, an error returns.
|
196
|
+
# The default value is +false+. (+Boolean+)
|
197
|
+
# <code>options[:cache]</code>::
|
198
|
+
# Whether to look for the result in the query cache. The query cache is
|
199
|
+
# a best-effort cache that will be flushed whenever tables in the query
|
200
|
+
# are modified. The default value is true. For more information, see
|
201
|
+
# {query caching}[https://developers.google.com/bigquery/querying-data].
|
202
|
+
# (+Boolean+)
|
203
|
+
# <code>options[:dataset]</code>::
|
204
|
+
# Specifies the default datasetId and projectId to assume for any
|
205
|
+
# unqualified table names in the query. If not set, all table names in
|
206
|
+
# the query string must be qualified in the format 'datasetId.tableId'.
|
207
|
+
# (+String+)
|
208
|
+
# <code>options[:project]</code>::
|
209
|
+
# Specifies the default projectId to assume for any unqualified table
|
210
|
+
# names in the query. Only used if +dataset+ option is set. (+String+)
|
211
|
+
#
|
212
|
+
# === Returns
|
213
|
+
#
|
214
|
+
# Gcloud::Bigquery::QueryData
|
215
|
+
#
|
216
|
+
# === Example
|
217
|
+
#
|
218
|
+
# require "gcloud"
|
219
|
+
#
|
220
|
+
# gcloud = Gcloud.new
|
221
|
+
# bigquery = gcloud.bigquery
|
222
|
+
#
|
223
|
+
# data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
|
224
|
+
# data.each do |row|
|
225
|
+
# puts row["name"]
|
226
|
+
# end
|
227
|
+
#
|
228
|
+
def query query, options = {}
|
229
|
+
ensure_connection!
|
230
|
+
resp = connection.query query, options
|
231
|
+
if resp.success?
|
232
|
+
QueryData.from_gapi resp.data, connection
|
233
|
+
else
|
234
|
+
fail ApiError.from_response(resp)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
##
|
239
|
+
# Retrieves an existing dataset by ID.
|
240
|
+
#
|
241
|
+
# === Parameters
|
242
|
+
#
|
243
|
+
# +dataset_id+::
|
244
|
+
# The ID of a dataset. (+String+)
|
245
|
+
#
|
246
|
+
# === Returns
|
247
|
+
#
|
248
|
+
# Gcloud::Bigquery::Dataset or nil if dataset does not exist
|
249
|
+
#
|
250
|
+
# === Example
|
251
|
+
#
|
252
|
+
# require "gcloud"
|
253
|
+
#
|
254
|
+
# gcloud = Gcloud.new
|
255
|
+
# bigquery = gcloud.bigquery
|
256
|
+
#
|
257
|
+
# dataset = bigquery.dataset "my_dataset"
|
258
|
+
# puts dataset.name
|
259
|
+
#
|
260
|
+
def dataset dataset_id
|
261
|
+
ensure_connection!
|
262
|
+
resp = connection.get_dataset dataset_id
|
263
|
+
if resp.success?
|
264
|
+
Dataset.from_gapi resp.data, connection
|
265
|
+
else
|
266
|
+
return nil if resp.status == 404
|
267
|
+
fail ApiError.from_response(resp)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
##
|
272
|
+
# Creates a new dataset.
|
273
|
+
#
|
274
|
+
# === Parameters
|
275
|
+
#
|
276
|
+
# +dataset_id+::
|
277
|
+
# A unique ID for this dataset, without the project name.
|
278
|
+
# The ID must contain only letters (a-z, A-Z), numbers (0-9), or
|
279
|
+
# underscores (_). The maximum length is 1,024 characters. (+String+)
|
280
|
+
# +options+::
|
281
|
+
# An optional Hash for controlling additional behavior. (+Hash+)
|
282
|
+
# <code>options[:name]</code>::
|
283
|
+
# A descriptive name for the dataset. (+String+)
|
284
|
+
# <code>options[:description]</code>::
|
285
|
+
# A user-friendly description of the dataset. (+String+)
|
286
|
+
# <code>options[:expiration]</code>::
|
287
|
+
# The default lifetime of all tables in the dataset, in milliseconds.
|
288
|
+
# The minimum value is 3600000 milliseconds (one hour). (+Integer+)
|
289
|
+
#
|
290
|
+
# === Returns
|
291
|
+
#
|
292
|
+
# Gcloud::Bigquery::Dataset
|
293
|
+
#
|
294
|
+
# === Examples
|
295
|
+
#
|
296
|
+
# require "gcloud"
|
297
|
+
#
|
298
|
+
# gcloud = Gcloud.new
|
299
|
+
# bigquery = gcloud.bigquery
|
300
|
+
#
|
301
|
+
# dataset = bigquery.create_dataset "my_dataset"
|
302
|
+
#
|
303
|
+
# A name and description can be provided:
|
304
|
+
#
|
305
|
+
# require "gcloud"
|
306
|
+
#
|
307
|
+
# gcloud = Gcloud.new
|
308
|
+
# bigquery = gcloud.bigquery
|
309
|
+
#
|
310
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
311
|
+
# name: "My Dataset",
|
312
|
+
# description: "This is my Dataset"
|
313
|
+
#
|
314
|
+
def create_dataset dataset_id, options = {}
|
315
|
+
ensure_connection!
|
316
|
+
resp = connection.insert_dataset dataset_id, options
|
317
|
+
if resp.success?
|
318
|
+
Dataset.from_gapi resp.data, connection
|
319
|
+
else
|
320
|
+
fail ApiError.from_response(resp)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
##
|
325
|
+
# Retrieves the list of datasets belonging to the project.
|
326
|
+
#
|
327
|
+
# === Parameters
|
328
|
+
#
|
329
|
+
# +options+::
|
330
|
+
# An optional Hash for controlling additional behavior. (+Hash+)
|
331
|
+
# <code>options[:all]</code>::
|
332
|
+
# Whether to list all datasets, including hidden ones. The default is
|
333
|
+
# +false+. (+Boolean+)
|
334
|
+
# <code>options[:token]</code>::
|
335
|
+
# A previously-returned page token representing part of the larger set
|
336
|
+
# of results to view. (+String+)
|
337
|
+
# <code>options[:max]</code>::
|
338
|
+
# Maximum number of datasets to return. (+Integer+)
|
339
|
+
#
|
340
|
+
# === Returns
|
341
|
+
#
|
342
|
+
# Array of Gcloud::Bigquery::Dataset (Gcloud::Bigquery::Dataset::List)
|
343
|
+
#
|
344
|
+
# === Examples
|
345
|
+
#
|
346
|
+
# require "gcloud"
|
347
|
+
#
|
348
|
+
# gcloud = Gcloud.new
|
349
|
+
# bigquery = gcloud.bigquery
|
350
|
+
#
|
351
|
+
# datasets = bigquery.datasets
|
352
|
+
# datasets.each do |dataset|
|
353
|
+
# puts dataset.name
|
354
|
+
# end
|
355
|
+
#
|
356
|
+
# You can also retrieve all datasets, including hidden ones, by providing
|
357
|
+
# the +:all+ option:
|
358
|
+
#
|
359
|
+
# require "gcloud"
|
360
|
+
#
|
361
|
+
# gcloud = Gcloud.new
|
362
|
+
# bigquery = gcloud.bigquery
|
363
|
+
#
|
364
|
+
# all_datasets = bigquery.datasets, all: true
|
365
|
+
#
|
366
|
+
# If you have a significant number of datasets, you may need to paginate
|
367
|
+
# through them: (See Dataset::List#token)
|
368
|
+
#
|
369
|
+
# require "gcloud"
|
370
|
+
#
|
371
|
+
# gcloud = Gcloud.new
|
372
|
+
# bigquery = gcloud.bigquery
|
373
|
+
#
|
374
|
+
# all_datasets = []
|
375
|
+
# tmp_datasets = bigquery.datasets
|
376
|
+
# while tmp_datasets.any? do
|
377
|
+
# tmp_datasets.each do |dataset|
|
378
|
+
# all_datasets << dataset
|
379
|
+
# end
|
380
|
+
# # break loop if no more datasets available
|
381
|
+
# break if tmp_datasets.token.nil?
|
382
|
+
# # get the next group of datasets
|
383
|
+
# tmp_datasets = bigquery.datasets token: tmp_datasets.token
|
384
|
+
# end
|
385
|
+
#
|
386
|
+
def datasets options = {}
|
387
|
+
ensure_connection!
|
388
|
+
resp = connection.list_datasets options
|
389
|
+
if resp.success?
|
390
|
+
Dataset::List.from_resp resp, connection
|
391
|
+
else
|
392
|
+
fail ApiError.from_response(resp)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
##
|
397
|
+
# Retrieves an existing job by ID.
|
398
|
+
#
|
399
|
+
# === Parameters
|
400
|
+
#
|
401
|
+
# +job_id+::
|
402
|
+
# The ID of a job. (+String+)
|
403
|
+
#
|
404
|
+
# === Returns
|
405
|
+
#
|
406
|
+
# Gcloud::Bigquery::Job or nil if job does not exist
|
407
|
+
#
|
408
|
+
# === Example
|
409
|
+
#
|
410
|
+
# require "gcloud"
|
411
|
+
#
|
412
|
+
# gcloud = Gcloud.new
|
413
|
+
# bigquery = gcloud.bigquery
|
414
|
+
#
|
415
|
+
# job = bigquery.job "my_job"
|
416
|
+
#
|
417
|
+
def job job_id
|
418
|
+
ensure_connection!
|
419
|
+
resp = connection.get_job job_id
|
420
|
+
if resp.success?
|
421
|
+
Job.from_gapi resp.data, connection
|
422
|
+
else
|
423
|
+
return nil if resp.status == 404
|
424
|
+
fail ApiError.from_response(resp)
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
##
|
429
|
+
# Retrieves the list of jobs belonging to the project.
|
430
|
+
#
|
431
|
+
# === Parameters
|
432
|
+
#
|
433
|
+
# +options+::
|
434
|
+
# An optional Hash for controlling additional behavior. (+Hash+)
|
435
|
+
# <code>options[:all]</code>::
|
436
|
+
# Whether to display jobs owned by all users in the project.
|
437
|
+
# The default is +false+. (+Boolean+)
|
438
|
+
# <code>options[:token]</code>::
|
439
|
+
# A previously-returned page token representing part of the larger set
|
440
|
+
# of results to view. (+String+)
|
441
|
+
# <code>options[:max]</code>::
|
442
|
+
# Maximum number of jobs to return. (+Integer+)
|
443
|
+
# <code>options[:filter]</code>::
|
444
|
+
# A filter for job state. (+String+)
|
445
|
+
#
|
446
|
+
# Acceptable values are:
|
447
|
+
# * +done+ - Finished jobs
|
448
|
+
# * +pending+ - Pending jobs
|
449
|
+
# * +running+ - Running jobs
|
450
|
+
#
|
451
|
+
# === Returns
|
452
|
+
#
|
453
|
+
# Array of Gcloud::Bigquery::Job (Gcloud::Bigquery::Job::List)
|
454
|
+
#
|
455
|
+
# === Examples
|
456
|
+
#
|
457
|
+
# require "gcloud"
|
458
|
+
#
|
459
|
+
# gcloud = Gcloud.new
|
460
|
+
# bigquery = gcloud.bigquery
|
461
|
+
#
|
462
|
+
# jobs = bigquery.jobs
|
463
|
+
#
|
464
|
+
# You can also retrieve only running jobs using the +:filter+ option:
|
465
|
+
#
|
466
|
+
# require "gcloud"
|
467
|
+
#
|
468
|
+
# gcloud = Gcloud.new
|
469
|
+
# bigquery = gcloud.bigquery
|
470
|
+
#
|
471
|
+
# running_jobs = bigquery.jobs filter: "running"
|
472
|
+
#
|
473
|
+
# If you have a significant number of jobs, you may need to paginate
|
474
|
+
# through them: (See Job::List#token)
|
475
|
+
#
|
476
|
+
# require "gcloud"
|
477
|
+
#
|
478
|
+
# gcloud = Gcloud.new
|
479
|
+
# bigquery = gcloud.bigquery
|
480
|
+
#
|
481
|
+
# all_jobs = []
|
482
|
+
# tmp_jobs = bigquery.jobs
|
483
|
+
# while tmp_jobs.any? do
|
484
|
+
# tmp_jobs.each do |job|
|
485
|
+
# all_jobs << job
|
486
|
+
# end
|
487
|
+
# # break loop if no more jobs available
|
488
|
+
# break if tmp_jobs.token.nil?
|
489
|
+
# # get the next group of jobs
|
490
|
+
# tmp_jobs = bigquery.jobs token: tmp_jobs.token
|
491
|
+
# end
|
492
|
+
#
|
493
|
+
def jobs options = {}
|
494
|
+
ensure_connection!
|
495
|
+
resp = connection.list_jobs options
|
496
|
+
if resp.success?
|
497
|
+
Job::List.from_resp resp, connection
|
498
|
+
else
|
499
|
+
fail ApiError.from_response(resp)
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
protected
|
504
|
+
|
505
|
+
##
|
506
|
+
# Raise an error unless an active connection is available.
|
507
|
+
def ensure_connection!
|
508
|
+
fail "Must have active connection" unless connection
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|