gcloud 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/AUTHENTICATION.md +3 -3
- data/CHANGELOG.md +12 -0
- data/OVERVIEW.md +30 -0
- data/lib/gcloud.rb +126 -9
- data/lib/gcloud/bigquery.rb +399 -0
- data/lib/gcloud/bigquery/connection.rb +592 -0
- data/lib/gcloud/bigquery/copy_job.rb +98 -0
- data/lib/gcloud/bigquery/credentials.rb +29 -0
- data/lib/gcloud/bigquery/data.rb +134 -0
- data/lib/gcloud/bigquery/dataset.rb +662 -0
- data/lib/gcloud/bigquery/dataset/list.rb +51 -0
- data/lib/gcloud/bigquery/errors.rb +62 -0
- data/lib/gcloud/bigquery/extract_job.rb +117 -0
- data/lib/gcloud/bigquery/insert_response.rb +80 -0
- data/lib/gcloud/bigquery/job.rb +283 -0
- data/lib/gcloud/bigquery/job/list.rb +55 -0
- data/lib/gcloud/bigquery/load_job.rb +199 -0
- data/lib/gcloud/bigquery/project.rb +512 -0
- data/lib/gcloud/bigquery/query_data.rb +135 -0
- data/lib/gcloud/bigquery/query_job.rb +151 -0
- data/lib/gcloud/bigquery/table.rb +827 -0
- data/lib/gcloud/bigquery/table/list.rb +55 -0
- data/lib/gcloud/bigquery/view.rb +419 -0
- data/lib/gcloud/credentials.rb +3 -3
- data/lib/gcloud/datastore.rb +15 -3
- data/lib/gcloud/datastore/credentials.rb +3 -2
- data/lib/gcloud/datastore/dataset.rb +5 -1
- data/lib/gcloud/datastore/transaction.rb +1 -1
- data/lib/gcloud/pubsub.rb +14 -3
- data/lib/gcloud/pubsub/credentials.rb +4 -4
- data/lib/gcloud/pubsub/project.rb +5 -1
- data/lib/gcloud/pubsub/topic.rb +5 -0
- data/lib/gcloud/storage.rb +14 -24
- data/lib/gcloud/storage/bucket.rb +10 -4
- data/lib/gcloud/storage/credentials.rb +3 -2
- data/lib/gcloud/storage/file.rb +8 -1
- data/lib/gcloud/storage/project.rb +5 -1
- data/lib/gcloud/upload.rb +54 -0
- data/lib/gcloud/version.rb +1 -1
- metadata +78 -2
@@ -0,0 +1,55 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
module Gcloud
|
17
|
+
module Bigquery
|
18
|
+
class Job
|
19
|
+
##
|
20
|
+
# Job::List is a special case Array with additional values.
|
21
|
+
class List < DelegateClass(::Array)
|
22
|
+
##
|
23
|
+
# If not empty, indicates that there are more records that match
|
24
|
+
# the request and this value should be passed to continue.
|
25
|
+
attr_accessor :token
|
26
|
+
|
27
|
+
# A hash of this page of results.
|
28
|
+
attr_accessor :etag
|
29
|
+
|
30
|
+
# Total number of jobs in this collection.
|
31
|
+
attr_accessor :total
|
32
|
+
|
33
|
+
##
|
34
|
+
# Create a new Job::List with an array of jobs.
|
35
|
+
def initialize arr = []
|
36
|
+
super arr
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# New Job::List from a response object.
|
41
|
+
def self.from_resp resp, conn #:nodoc:
|
42
|
+
jobs = List.new(Array(resp.data["jobs"]).map do |gapi_object|
|
43
|
+
Job.from_gapi gapi_object, conn
|
44
|
+
end)
|
45
|
+
jobs.instance_eval do
|
46
|
+
@token = resp.data["nextPageToken"]
|
47
|
+
@etag = resp.data["etag"]
|
48
|
+
@total = resp.data["totalItems"]
|
49
|
+
end
|
50
|
+
jobs
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
module Gcloud
|
17
|
+
module Bigquery
|
18
|
+
##
|
19
|
+
# = LoadJob
|
20
|
+
#
|
21
|
+
# A Job subclass representing a load operation that may be performed
|
22
|
+
# on a Table. A LoadJob instance is created when you call Table#load.
|
23
|
+
#
|
24
|
+
# See {Loading Data Into
|
25
|
+
# BigQuery}[https://cloud.google.com/bigquery/loading-data-into-bigquery]
|
26
|
+
# and the {Jobs API
|
27
|
+
# reference}[https://cloud.google.com/bigquery/docs/reference/v2/jobs]
|
28
|
+
# for details.
|
29
|
+
#
|
30
|
+
class LoadJob < Job
|
31
|
+
##
|
32
|
+
# The URI or URIs representing the Google Cloud Storage files from which
|
33
|
+
# the operation loads data.
|
34
|
+
def sources
|
35
|
+
Array config["load"]["sourceUris"]
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# The table into which the operation loads data. This is the table on
|
40
|
+
# which Table#load was invoked. Returns a Table instance.
|
41
|
+
def destination
|
42
|
+
table = config["load"]["destinationTable"]
|
43
|
+
return nil unless table
|
44
|
+
retrieve_table table["projectId"],
|
45
|
+
table["datasetId"],
|
46
|
+
table["tableId"]
|
47
|
+
end
|
48
|
+
|
49
|
+
##
|
50
|
+
# The delimiter used between fields in the source data. The default is a
|
51
|
+
# comma (,).
|
52
|
+
def delimiter
|
53
|
+
val = config["load"]["fieldDelimiter"]
|
54
|
+
val = "," if val.nil?
|
55
|
+
val
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# The number of header rows at the top of a CSV file to skip. The default
|
60
|
+
# value is +0+.
|
61
|
+
def skip_leading_rows
|
62
|
+
val = config["load"]["skipLeadingRows"]
|
63
|
+
val = 0 if val.nil?
|
64
|
+
val
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Checks if the character encoding of the data is UTF-8. This is the
|
69
|
+
# default.
|
70
|
+
def utf8?
|
71
|
+
val = config["load"]["encoding"]
|
72
|
+
return true if val.nil?
|
73
|
+
val == "UTF-8"
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# Checks if the character encoding of the data is ISO-8859-1.
|
78
|
+
def iso8859_1?
|
79
|
+
val = config["load"]["encoding"]
|
80
|
+
val == "ISO-8859-1"
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# The value that is used to quote data sections in a CSV file.
|
85
|
+
# The default value is a double-quote (+"+). If your data does not contain
|
86
|
+
# quoted sections, the value should be an empty string. If your data
|
87
|
+
# contains quoted newline characters, #quoted_newlines? should return
|
88
|
+
# +true+.
|
89
|
+
def quote
|
90
|
+
val = config["load"]["quote"]
|
91
|
+
val = "\"" if val.nil?
|
92
|
+
val
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# The maximum number of bad records that the load operation can ignore. If
|
97
|
+
# the number of bad records exceeds this value, an error is
|
98
|
+
# returned. The default value is +0+, which requires that all records be
|
99
|
+
# valid.
|
100
|
+
def max_bad_records
|
101
|
+
val = config["load"]["maxBadRecords"]
|
102
|
+
val = 0 if val.nil?
|
103
|
+
val
|
104
|
+
end
|
105
|
+
|
106
|
+
##
|
107
|
+
# Checks if quoted data sections may contain newline characters in a CSV
|
108
|
+
# file. The default is +false+.
|
109
|
+
def quoted_newlines?
|
110
|
+
val = config["load"]["allowQuotedNewlines"]
|
111
|
+
val = true if val.nil?
|
112
|
+
val
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Checks if the format of the source data is
|
117
|
+
# {newline-delimited JSON}[http://jsonlines.org/]. The default is +false+.
|
118
|
+
def json?
|
119
|
+
val = config["load"]["sourceFormat"]
|
120
|
+
val == "NEWLINE_DELIMITED_JSON"
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Checks if the format of the source data is CSV. The default is +true+.
|
125
|
+
def csv?
|
126
|
+
val = config["load"]["sourceFormat"]
|
127
|
+
return true if val.nil?
|
128
|
+
val == "CSV"
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# Checks if the source data is a Google Cloud Datastore backup.
|
133
|
+
def backup?
|
134
|
+
val = config["load"]["sourceFormat"]
|
135
|
+
val == "DATASTORE_BACKUP"
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Checks if the load operation accepts rows that are missing trailing
|
140
|
+
# optional columns. The missing values are treated as nulls. If +false+,
|
141
|
+
# records with missing trailing columns are treated as bad records, and
|
142
|
+
# if there are too many bad records, an error is returned. The default
|
143
|
+
# value is +false+. Only applicable to CSV, ignored for other formats.
|
144
|
+
def allow_jagged_rows?
|
145
|
+
val = config["load"]["allowJaggedRows"]
|
146
|
+
val = false if val.nil?
|
147
|
+
val
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# Checks if the load operation allows extra values that are not
|
152
|
+
# represented in the table schema. If +true+, the extra values are
|
153
|
+
# ignored. If +false+, records with extra columns are treated as bad
|
154
|
+
# records, and if there are too many bad records, an invalid error is
|
155
|
+
# returned. The default is +false+.
|
156
|
+
def ignore_unknown_values?
|
157
|
+
val = config["load"]["ignoreUnknownValues"]
|
158
|
+
val = false if val.nil?
|
159
|
+
val
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# The schema for the data. Returns a hash. Can be empty if the table
|
164
|
+
# has already has the correct schema (see Table#schema= and Table#schema),
|
165
|
+
# or if the schema can be inferred from the loaded data.
|
166
|
+
def schema
|
167
|
+
val = config["load"]["schema"]
|
168
|
+
val = {} if val.nil?
|
169
|
+
val
|
170
|
+
end
|
171
|
+
|
172
|
+
##
|
173
|
+
# The number of source files.
|
174
|
+
def input_files
|
175
|
+
stats["load"]["inputFiles"]
|
176
|
+
end
|
177
|
+
|
178
|
+
##
|
179
|
+
# The number of bytes of source data.
|
180
|
+
def input_file_bytes
|
181
|
+
stats["load"]["inputFileBytes"]
|
182
|
+
end
|
183
|
+
|
184
|
+
##
|
185
|
+
# The number of rows that have been loaded into the table. While an
|
186
|
+
# import job is in the running state, this value may change.
|
187
|
+
def output_rows
|
188
|
+
stats["load"]["outputRows"]
|
189
|
+
end
|
190
|
+
|
191
|
+
##
|
192
|
+
# The number of bytes that have been loaded into the table. While an
|
193
|
+
# import job is in the running state, this value may change.
|
194
|
+
def output_bytes
|
195
|
+
stats["load"]["outputBytes"]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,512 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright 2015 Google Inc. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
require "gcloud/bigquery/connection"
|
17
|
+
require "gcloud/bigquery/credentials"
|
18
|
+
require "gcloud/bigquery/errors"
|
19
|
+
require "gcloud/bigquery/dataset"
|
20
|
+
require "gcloud/bigquery/job"
|
21
|
+
require "gcloud/bigquery/query_data"
|
22
|
+
|
23
|
+
module Gcloud
|
24
|
+
module Bigquery
|
25
|
+
##
|
26
|
+
# = Project
|
27
|
+
#
|
28
|
+
# Projects are top-level containers in Google Cloud Platform. They store
|
29
|
+
# information about billing and authorized users, and they contain BigQuery
|
30
|
+
# data. Each project has a friendly name and a unique ID.
|
31
|
+
#
|
32
|
+
# Gcloud::Bigquery::Project is the main object for interacting with
|
33
|
+
# Google BigQuery. Gcloud::Bigquery::Dataset objects are created,
|
34
|
+
# accessed, and deleted by Gcloud::Bigquery::Project.
|
35
|
+
#
|
36
|
+
# require "gcloud"
|
37
|
+
#
|
38
|
+
# gcloud = Gcloud.new
|
39
|
+
# bigquery = gcloud.bigquery
|
40
|
+
# dataset = bigquery.dataset "my_dataset"
|
41
|
+
# table = dataset.table "my_table"
|
42
|
+
#
|
43
|
+
# See Gcloud#bigquery
|
44
|
+
class Project
|
45
|
+
##
|
46
|
+
# The Connection object.
|
47
|
+
attr_accessor :connection #:nodoc:
|
48
|
+
|
49
|
+
##
|
50
|
+
# Creates a new Connection instance.
|
51
|
+
#
|
52
|
+
# See Gcloud.bigquery
|
53
|
+
def initialize project, credentials
|
54
|
+
project = project.to_s # Always cast to a string
|
55
|
+
fail ArgumentError, "project is missing" if project.empty?
|
56
|
+
@connection = Connection.new project, credentials
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# The BigQuery project connected to.
|
61
|
+
#
|
62
|
+
# === Example
|
63
|
+
#
|
64
|
+
# require "gcloud"
|
65
|
+
#
|
66
|
+
# gcloud = Gcloud.new "my-todo-project", "/path/to/keyfile.json"
|
67
|
+
# bigquery = gcloud.bigquery
|
68
|
+
#
|
69
|
+
# bigquery.project #=> "my-todo-project"
|
70
|
+
#
|
71
|
+
def project
|
72
|
+
connection.project
|
73
|
+
end
|
74
|
+
|
75
|
+
##
|
76
|
+
# Default project.
|
77
|
+
def self.default_project #:nodoc:
|
78
|
+
ENV["BIGQUERY_PROJECT"] ||
|
79
|
+
ENV["GCLOUD_PROJECT"] ||
|
80
|
+
ENV["GOOGLE_CLOUD_PROJECT"]
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# Queries data using the {asynchronous
|
85
|
+
# method}[https://cloud.google.com/bigquery/querying-data].
|
86
|
+
#
|
87
|
+
# === Parameters
|
88
|
+
#
|
89
|
+
# +query+::
|
90
|
+
# A query string, following the BigQuery {query
|
91
|
+
# syntax}[https://cloud.google.com/bigquery/query-reference], of the
|
92
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
93
|
+
# [myProjectId:myDatasetId.myTableId]". (+String+)
|
94
|
+
# <code>options[:priority]</code>::
|
95
|
+
# Specifies a priority for the query. Possible values include
|
96
|
+
# +INTERACTIVE+ and +BATCH+. The default value is +INTERACTIVE+.
|
97
|
+
# (+String+)
|
98
|
+
# <code>options[:cache]</code>::
|
99
|
+
# Whether to look for the result in the query cache. The query cache is
|
100
|
+
# a best-effort cache that will be flushed whenever tables in the query
|
101
|
+
# are modified. The default value is +true+. (+Boolean+)
|
102
|
+
# <code>options[:table]</code>::
|
103
|
+
# The destination table where the query results should be stored. If not
|
104
|
+
# present, a new table will be created to store the results. (+Table+)
|
105
|
+
# <code>options[:create]</code>::
|
106
|
+
# Specifies whether the job is allowed to create new tables. (+String+)
|
107
|
+
#
|
108
|
+
# The following values are supported:
|
109
|
+
# * +needed+ - Create the table if it does not exist.
|
110
|
+
# * +never+ - The table must already exist. A 'notFound' error is
|
111
|
+
# raised if the table does not exist.
|
112
|
+
# <code>options[:write]</code>::
|
113
|
+
# Specifies the action that occurs if the destination table already
|
114
|
+
# exists. (+String+)
|
115
|
+
#
|
116
|
+
# The following values are supported:
|
117
|
+
# * +truncate+ - BigQuery overwrites the table data.
|
118
|
+
# * +append+ - BigQuery appends the data to the table.
|
119
|
+
# * +empty+ - A 'duplicate' error is returned in the job result if the
|
120
|
+
# table exists and contains data.
|
121
|
+
# <code>options[:large_results]</code>::
|
122
|
+
# If +true+, allows the query to produce arbitrarily large result tables
|
123
|
+
# at a slight cost in performance. Requires <code>options[:table]</code>
|
124
|
+
# to be set. (+Boolean+)
|
125
|
+
# <code>options[:flatten]</code>::
|
126
|
+
# Flattens all nested and repeated fields in the query results. The
|
127
|
+
# default value is +true+. <code>options[:large_results]</code> must be
|
128
|
+
# +true+ if this is set to +false+. (+Boolean+)
|
129
|
+
# <code>options[:dataset]</code>::
|
130
|
+
# Specifies the default dataset to use for unqualified table names in
|
131
|
+
# the query. (+Dataset+ or +String+)
|
132
|
+
#
|
133
|
+
# === Returns
|
134
|
+
#
|
135
|
+
# Gcloud::Bigquery::QueryJob
|
136
|
+
#
|
137
|
+
# === Example
|
138
|
+
#
|
139
|
+
# require "gcloud"
|
140
|
+
#
|
141
|
+
# gcloud = Gcloud.new
|
142
|
+
# bigquery = gcloud.bigquery
|
143
|
+
#
|
144
|
+
# job = bigquery.query_job "SELECT name FROM [my_proj:my_data.my_table]"
|
145
|
+
#
|
146
|
+
# loop do
|
147
|
+
# break if job.done?
|
148
|
+
# sleep 1
|
149
|
+
# job.refresh!
|
150
|
+
# end
|
151
|
+
# if !job.failed?
|
152
|
+
# job.query_results.each do |row|
|
153
|
+
# puts row["name"]
|
154
|
+
# end
|
155
|
+
# end
|
156
|
+
#
|
157
|
+
def query_job query, options = {}
|
158
|
+
ensure_connection!
|
159
|
+
resp = connection.query_job query, options
|
160
|
+
if resp.success?
|
161
|
+
Job.from_gapi resp.data, connection
|
162
|
+
else
|
163
|
+
fail ApiError.from_response(resp)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# Queries data using the {synchronous
|
169
|
+
# method}[https://cloud.google.com/bigquery/querying-data].
|
170
|
+
#
|
171
|
+
# === Parameters
|
172
|
+
#
|
173
|
+
# +query+::
|
174
|
+
# A query string, following the BigQuery {query
|
175
|
+
# syntax}[https://cloud.google.com/bigquery/query-reference], of the
|
176
|
+
# query to execute. Example: "SELECT count(f1) FROM
|
177
|
+
# [myProjectId:myDatasetId.myTableId]". (+String+)
|
178
|
+
# <code>options[:max]</code>::
|
179
|
+
# The maximum number of rows of data to return per page of results.
|
180
|
+
# Setting this flag to a small value such as 1000 and then paging
|
181
|
+
# through results might improve reliability when the query result set is
|
182
|
+
# large. In addition to this limit, responses are also limited to 10 MB.
|
183
|
+
# By default, there is no maximum row count, and only the byte limit
|
184
|
+
# applies. (+Integer+)
|
185
|
+
# <code>options[:timeout]</code>::
|
186
|
+
# How long to wait for the query to complete, in milliseconds, before
|
187
|
+
# the request times out and returns. Note that this is only a timeout
|
188
|
+
# for the request, not the query. If the query takes longer to run than
|
189
|
+
# the timeout value, the call returns without any results and with
|
190
|
+
# QueryData#complete? set to false. The default value is 10000
|
191
|
+
# milliseconds (10 seconds). (+Integer+)
|
192
|
+
# <code>options[:dryrun]</code>::
|
193
|
+
# If set to +true+, BigQuery doesn't run the job. Instead, if the query
|
194
|
+
# is valid, BigQuery returns statistics about the job such as how many
|
195
|
+
# bytes would be processed. If the query is invalid, an error returns.
|
196
|
+
# The default value is +false+. (+Boolean+)
|
197
|
+
# <code>options[:cache]</code>::
|
198
|
+
# Whether to look for the result in the query cache. The query cache is
|
199
|
+
# a best-effort cache that will be flushed whenever tables in the query
|
200
|
+
# are modified. The default value is true. For more information, see
|
201
|
+
# {query caching}[https://developers.google.com/bigquery/querying-data].
|
202
|
+
# (+Boolean+)
|
203
|
+
# <code>options[:dataset]</code>::
|
204
|
+
# Specifies the default datasetId and projectId to assume for any
|
205
|
+
# unqualified table names in the query. If not set, all table names in
|
206
|
+
# the query string must be qualified in the format 'datasetId.tableId'.
|
207
|
+
# (+String+)
|
208
|
+
# <code>options[:project]</code>::
|
209
|
+
# Specifies the default projectId to assume for any unqualified table
|
210
|
+
# names in the query. Only used if +dataset+ option is set. (+String+)
|
211
|
+
#
|
212
|
+
# === Returns
|
213
|
+
#
|
214
|
+
# Gcloud::Bigquery::QueryData
|
215
|
+
#
|
216
|
+
# === Example
|
217
|
+
#
|
218
|
+
# require "gcloud"
|
219
|
+
#
|
220
|
+
# gcloud = Gcloud.new
|
221
|
+
# bigquery = gcloud.bigquery
|
222
|
+
#
|
223
|
+
# data = bigquery.query "SELECT name FROM [my_proj:my_data.my_table]"
|
224
|
+
# data.each do |row|
|
225
|
+
# puts row["name"]
|
226
|
+
# end
|
227
|
+
#
|
228
|
+
def query query, options = {}
|
229
|
+
ensure_connection!
|
230
|
+
resp = connection.query query, options
|
231
|
+
if resp.success?
|
232
|
+
QueryData.from_gapi resp.data, connection
|
233
|
+
else
|
234
|
+
fail ApiError.from_response(resp)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
##
|
239
|
+
# Retrieves an existing dataset by ID.
|
240
|
+
#
|
241
|
+
# === Parameters
|
242
|
+
#
|
243
|
+
# +dataset_id+::
|
244
|
+
# The ID of a dataset. (+String+)
|
245
|
+
#
|
246
|
+
# === Returns
|
247
|
+
#
|
248
|
+
# Gcloud::Bigquery::Dataset or nil if dataset does not exist
|
249
|
+
#
|
250
|
+
# === Example
|
251
|
+
#
|
252
|
+
# require "gcloud"
|
253
|
+
#
|
254
|
+
# gcloud = Gcloud.new
|
255
|
+
# bigquery = gcloud.bigquery
|
256
|
+
#
|
257
|
+
# dataset = bigquery.dataset "my_dataset"
|
258
|
+
# puts dataset.name
|
259
|
+
#
|
260
|
+
def dataset dataset_id
|
261
|
+
ensure_connection!
|
262
|
+
resp = connection.get_dataset dataset_id
|
263
|
+
if resp.success?
|
264
|
+
Dataset.from_gapi resp.data, connection
|
265
|
+
else
|
266
|
+
return nil if resp.status == 404
|
267
|
+
fail ApiError.from_response(resp)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
##
|
272
|
+
# Creates a new dataset.
|
273
|
+
#
|
274
|
+
# === Parameters
|
275
|
+
#
|
276
|
+
# +dataset_id+::
|
277
|
+
# A unique ID for this dataset, without the project name.
|
278
|
+
# The ID must contain only letters (a-z, A-Z), numbers (0-9), or
|
279
|
+
# underscores (_). The maximum length is 1,024 characters. (+String+)
|
280
|
+
# +options+::
|
281
|
+
# An optional Hash for controlling additional behavior. (+Hash+)
|
282
|
+
# <code>options[:name]</code>::
|
283
|
+
# A descriptive name for the dataset. (+String+)
|
284
|
+
# <code>options[:description]</code>::
|
285
|
+
# A user-friendly description of the dataset. (+String+)
|
286
|
+
# <code>options[:expiration]</code>::
|
287
|
+
# The default lifetime of all tables in the dataset, in milliseconds.
|
288
|
+
# The minimum value is 3600000 milliseconds (one hour). (+Integer+)
|
289
|
+
#
|
290
|
+
# === Returns
|
291
|
+
#
|
292
|
+
# Gcloud::Bigquery::Dataset
|
293
|
+
#
|
294
|
+
# === Examples
|
295
|
+
#
|
296
|
+
# require "gcloud"
|
297
|
+
#
|
298
|
+
# gcloud = Gcloud.new
|
299
|
+
# bigquery = gcloud.bigquery
|
300
|
+
#
|
301
|
+
# dataset = bigquery.create_dataset "my_dataset"
|
302
|
+
#
|
303
|
+
# A name and description can be provided:
|
304
|
+
#
|
305
|
+
# require "gcloud"
|
306
|
+
#
|
307
|
+
# gcloud = Gcloud.new
|
308
|
+
# bigquery = gcloud.bigquery
|
309
|
+
#
|
310
|
+
# dataset = bigquery.create_dataset "my_dataset",
|
311
|
+
# name: "My Dataset",
|
312
|
+
# description: "This is my Dataset"
|
313
|
+
#
|
314
|
+
def create_dataset dataset_id, options = {}
|
315
|
+
ensure_connection!
|
316
|
+
resp = connection.insert_dataset dataset_id, options
|
317
|
+
if resp.success?
|
318
|
+
Dataset.from_gapi resp.data, connection
|
319
|
+
else
|
320
|
+
fail ApiError.from_response(resp)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
##
|
325
|
+
# Retrieves the list of datasets belonging to the project.
|
326
|
+
#
|
327
|
+
# === Parameters
|
328
|
+
#
|
329
|
+
# +options+::
|
330
|
+
# An optional Hash for controlling additional behavior. (+Hash+)
|
331
|
+
# <code>options[:all]</code>::
|
332
|
+
# Whether to list all datasets, including hidden ones. The default is
|
333
|
+
# +false+. (+Boolean+)
|
334
|
+
# <code>options[:token]</code>::
|
335
|
+
# A previously-returned page token representing part of the larger set
|
336
|
+
# of results to view. (+String+)
|
337
|
+
# <code>options[:max]</code>::
|
338
|
+
# Maximum number of datasets to return. (+Integer+)
|
339
|
+
#
|
340
|
+
# === Returns
|
341
|
+
#
|
342
|
+
# Array of Gcloud::Bigquery::Dataset (Gcloud::Bigquery::Dataset::List)
|
343
|
+
#
|
344
|
+
# === Examples
|
345
|
+
#
|
346
|
+
# require "gcloud"
|
347
|
+
#
|
348
|
+
# gcloud = Gcloud.new
|
349
|
+
# bigquery = gcloud.bigquery
|
350
|
+
#
|
351
|
+
# datasets = bigquery.datasets
|
352
|
+
# datasets.each do |dataset|
|
353
|
+
# puts dataset.name
|
354
|
+
# end
|
355
|
+
#
|
356
|
+
# You can also retrieve all datasets, including hidden ones, by providing
|
357
|
+
# the +:all+ option:
|
358
|
+
#
|
359
|
+
# require "gcloud"
|
360
|
+
#
|
361
|
+
# gcloud = Gcloud.new
|
362
|
+
# bigquery = gcloud.bigquery
|
363
|
+
#
|
364
|
+
# all_datasets = bigquery.datasets, all: true
|
365
|
+
#
|
366
|
+
# If you have a significant number of datasets, you may need to paginate
|
367
|
+
# through them: (See Dataset::List#token)
|
368
|
+
#
|
369
|
+
# require "gcloud"
|
370
|
+
#
|
371
|
+
# gcloud = Gcloud.new
|
372
|
+
# bigquery = gcloud.bigquery
|
373
|
+
#
|
374
|
+
# all_datasets = []
|
375
|
+
# tmp_datasets = bigquery.datasets
|
376
|
+
# while tmp_datasets.any? do
|
377
|
+
# tmp_datasets.each do |dataset|
|
378
|
+
# all_datasets << dataset
|
379
|
+
# end
|
380
|
+
# # break loop if no more datasets available
|
381
|
+
# break if tmp_datasets.token.nil?
|
382
|
+
# # get the next group of datasets
|
383
|
+
# tmp_datasets = bigquery.datasets token: tmp_datasets.token
|
384
|
+
# end
|
385
|
+
#
|
386
|
+
def datasets options = {}
|
387
|
+
ensure_connection!
|
388
|
+
resp = connection.list_datasets options
|
389
|
+
if resp.success?
|
390
|
+
Dataset::List.from_resp resp, connection
|
391
|
+
else
|
392
|
+
fail ApiError.from_response(resp)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
##
|
397
|
+
# Retrieves an existing job by ID.
|
398
|
+
#
|
399
|
+
# === Parameters
|
400
|
+
#
|
401
|
+
# +job_id+::
|
402
|
+
# The ID of a job. (+String+)
|
403
|
+
#
|
404
|
+
# === Returns
|
405
|
+
#
|
406
|
+
# Gcloud::Bigquery::Job or nil if job does not exist
|
407
|
+
#
|
408
|
+
# === Example
|
409
|
+
#
|
410
|
+
# require "gcloud"
|
411
|
+
#
|
412
|
+
# gcloud = Gcloud.new
|
413
|
+
# bigquery = gcloud.bigquery
|
414
|
+
#
|
415
|
+
# job = bigquery.job "my_job"
|
416
|
+
#
|
417
|
+
def job job_id
|
418
|
+
ensure_connection!
|
419
|
+
resp = connection.get_job job_id
|
420
|
+
if resp.success?
|
421
|
+
Job.from_gapi resp.data, connection
|
422
|
+
else
|
423
|
+
return nil if resp.status == 404
|
424
|
+
fail ApiError.from_response(resp)
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
##
|
429
|
+
# Retrieves the list of jobs belonging to the project.
|
430
|
+
#
|
431
|
+
# === Parameters
|
432
|
+
#
|
433
|
+
# +options+::
|
434
|
+
# An optional Hash for controlling additional behavior. (+Hash+)
|
435
|
+
# <code>options[:all]</code>::
|
436
|
+
# Whether to display jobs owned by all users in the project.
|
437
|
+
# The default is +false+. (+Boolean+)
|
438
|
+
# <code>options[:token]</code>::
|
439
|
+
# A previously-returned page token representing part of the larger set
|
440
|
+
# of results to view. (+String+)
|
441
|
+
# <code>options[:max]</code>::
|
442
|
+
# Maximum number of jobs to return. (+Integer+)
|
443
|
+
# <code>options[:filter]</code>::
|
444
|
+
# A filter for job state. (+String+)
|
445
|
+
#
|
446
|
+
# Acceptable values are:
|
447
|
+
# * +done+ - Finished jobs
|
448
|
+
# * +pending+ - Pending jobs
|
449
|
+
# * +running+ - Running jobs
|
450
|
+
#
|
451
|
+
# === Returns
|
452
|
+
#
|
453
|
+
# Array of Gcloud::Bigquery::Job (Gcloud::Bigquery::Job::List)
|
454
|
+
#
|
455
|
+
# === Examples
|
456
|
+
#
|
457
|
+
# require "gcloud"
|
458
|
+
#
|
459
|
+
# gcloud = Gcloud.new
|
460
|
+
# bigquery = gcloud.bigquery
|
461
|
+
#
|
462
|
+
# jobs = bigquery.jobs
|
463
|
+
#
|
464
|
+
# You can also retrieve only running jobs using the +:filter+ option:
|
465
|
+
#
|
466
|
+
# require "gcloud"
|
467
|
+
#
|
468
|
+
# gcloud = Gcloud.new
|
469
|
+
# bigquery = gcloud.bigquery
|
470
|
+
#
|
471
|
+
# running_jobs = bigquery.jobs filter: "running"
|
472
|
+
#
|
473
|
+
# If you have a significant number of jobs, you may need to paginate
|
474
|
+
# through them: (See Job::List#token)
|
475
|
+
#
|
476
|
+
# require "gcloud"
|
477
|
+
#
|
478
|
+
# gcloud = Gcloud.new
|
479
|
+
# bigquery = gcloud.bigquery
|
480
|
+
#
|
481
|
+
# all_jobs = []
|
482
|
+
# tmp_jobs = bigquery.jobs
|
483
|
+
# while tmp_jobs.any? do
|
484
|
+
# tmp_jobs.each do |job|
|
485
|
+
# all_jobs << job
|
486
|
+
# end
|
487
|
+
# # break loop if no more jobs available
|
488
|
+
# break if tmp_jobs.token.nil?
|
489
|
+
# # get the next group of jobs
|
490
|
+
# tmp_jobs = bigquery.jobs token: tmp_jobs.token
|
491
|
+
# end
|
492
|
+
#
|
493
|
+
def jobs options = {}
|
494
|
+
ensure_connection!
|
495
|
+
resp = connection.list_jobs options
|
496
|
+
if resp.success?
|
497
|
+
Job::List.from_resp resp, connection
|
498
|
+
else
|
499
|
+
fail ApiError.from_response(resp)
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
protected
|
504
|
+
|
505
|
+
##
|
506
|
+
# Raise an error unless an active connection is available.
|
507
|
+
def ensure_connection!
|
508
|
+
fail "Must have active connection" unless connection
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|