google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 59023c44b0659ebbbfe3154585a39e06177f690f
4
+ data.tar.gz: 8577d120217e1134d3e9077f94982edb64882780
5
+ SHA512:
6
+ metadata.gz: c265d96170b9ff9080617bab8c14e2ae0a2a0093a2fe526664545bc8c97d1ac159000d71d3b56616ee125f0f63f9ae24f51828b280c6a2bd06e045d08c6dc5db
7
+ data.tar.gz: bd886ccf5f8a66ce36249e28e81e2a5f24299ba3537331d88852abb95812653e2bff5661b65fe450c9598598425af52b2c6cba063762ce4c741ce6594de1450e
@@ -0,0 +1,122 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ##
16
+ # This file is here to be autorequired by bundler, so that the .bigquery and
17
+ # #bigquery methods can be available, but the library and all dependencies won't
18
+ # be loaded until required and used.
19
+
20
+
21
+ gem "google-cloud-core"
22
+ require "google/cloud"
23
+
24
+ module Google
25
+ module Cloud
26
+ ##
27
+ # Creates a new object for connecting to the BigQuery service.
28
+ # Each call creates a new connection.
29
+ #
30
+ # For more information on connecting to Google Cloud see the [Authentication
31
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
32
+ #
33
+ # @param [String, Array<String>] scope The OAuth 2.0 scopes controlling the
34
+ # set of resources and operations that the connection can access. See
35
+ # [Using OAuth 2.0 to Access Google
36
+ # APIs](https://developers.google.com/identity/protocols/OAuth2).
37
+ #
38
+ # The default scope is:
39
+ #
40
+ # * `https://www.googleapis.com/auth/bigquery`
41
+ # @param [Integer] retries Number of times to retry requests on server
42
+ # error. The default value is `3`. Optional.
43
+ # @param [Integer] timeout Default request timeout in seconds. Optional.
44
+ #
45
+ # @return [Google::Cloud::Bigquery::Project]
46
+ #
47
+ # @example
48
+ # require "google/cloud"
49
+ #
50
+ # gcloud = Google::Cloud.new
51
+ # bigquery = gcloud.bigquery
52
+ # dataset = bigquery.dataset "my-dataset"
53
+ # table = dataset.table "my-table"
54
+ # table.data.each do |row|
55
+ # puts row
56
+ # end
57
+ #
58
+ # @example The default scope can be overridden with the `scope` option:
59
+ # require "google/cloud"
60
+ #
61
+ # gcloud = Google::Cloud.new
62
+ # platform_scope = "https://www.googleapis.com/auth/cloud-platform"
63
+ # bigquery = gcloud.bigquery scope: platform_scope
64
+ #
65
+ def bigquery scope: nil, retries: nil, timeout: nil
66
+ Google::Cloud.bigquery @project, @keyfile, scope: scope,
67
+ retries: (retries || @retries),
68
+ timeout: (timeout || @timeout)
69
+ end
70
+
71
+ ##
72
+ # Creates a new `Project` instance connected to the BigQuery service.
73
+ # Each call creates a new connection.
74
+ #
75
+ # For more information on connecting to Google Cloud see the [Authentication
76
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
77
+ #
78
+ # @param [String] project Identifier for a BigQuery project. If not present,
79
+ # the default project for the credentials is used.
80
+ # @param [String, Hash] keyfile Keyfile downloaded from Google Cloud. If
81
+ # file path the file must be readable.
82
+ # @param [String, Array<String>] scope The OAuth 2.0 scopes controlling the
83
+ # set of resources and operations that the connection can access. See
84
+ # [Using OAuth 2.0 to Access Google
85
+ # APIs](https://developers.google.com/identity/protocols/OAuth2).
86
+ #
87
+ # The default scope is:
88
+ #
89
+ # * `https://www.googleapis.com/auth/bigquery`
90
+ # @param [Integer] retries Number of times to retry requests on server
91
+ # error. The default value is `3`. Optional.
92
+ # @param [Integer] timeout Default timeout to use in requests. Optional.
93
+ #
94
+ # @return [Google::Cloud::Bigquery::Project]
95
+ #
96
+ # @example
97
+ # require "google/cloud/bigquery"
98
+ #
99
+ # bigquery = Google::Cloud.bigquery
100
+ # dataset = bigquery.dataset "my_dataset"
101
+ # table = dataset.table "my_table"
102
+ #
103
+ def self.bigquery project = nil, keyfile = nil, scope: nil, retries: nil,
104
+ timeout: nil
105
+ require "google/cloud/bigquery"
106
+ project ||= Google::Cloud::Bigquery::Project.default_project
107
+ project = project.to_s # Always cast to a string
108
+ fail ArgumentError, "project is missing" if project.empty?
109
+
110
+ if keyfile.nil?
111
+ credentials = Google::Cloud::Bigquery::Credentials.default scope: scope
112
+ else
113
+ credentials = Google::Cloud::Bigquery::Credentials.new(
114
+ keyfile, scope: scope)
115
+ end
116
+
117
+ Google::Cloud::Bigquery::Project.new(
118
+ Google::Cloud::Bigquery::Service.new(
119
+ project, credentials, retries: retries, timeout: timeout))
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,353 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google-cloud-bigquery"
17
+ require "google/cloud/bigquery/project"
18
+
19
+ module Google
20
+ module Cloud
21
+ ##
22
+ # # Google Cloud BigQuery
23
+ #
24
+ # Google Cloud BigQuery enables super-fast, SQL-like queries against massive
25
+ # datasets, using the processing power of Google's infrastructure. To learn
26
+ # more, read [What is
27
+ # BigQuery?](https://cloud.google.com/bigquery/what-is-bigquery).
28
+ #
29
+ # The goal of google-cloud is to provide an API that is comfortable to
30
+ # Rubyists. Authentication is handled by {Google::Cloud#bigquery}. You can
31
+ # provide the project and credential information to connect to the BigQuery
32
+ # service, or if you are running on Google Compute Engine this configuration
33
+ # is taken care of for you. You can read more about the options for
34
+ # connecting in the [Authentication
35
+ # Guide](https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/guides/authentication).
36
+ #
37
+ # To help you get started quickly, the first few examples below use a public
38
+ # dataset provided by Google. As soon as you have [signed
39
+ # up](https://cloud.google.com/bigquery/sign-up) to use BigQuery, and
40
+ # provided that you stay in the free tier for queries, you should be able to
41
+ # run these first examples without the need to set up billing or to load
42
+ # data (although we'll show you how to do that too.)
43
+ #
44
+ # ## Listing Datasets and Tables
45
+ #
46
+ # A BigQuery project holds datasets, which in turn hold tables. Assuming
47
+ # that you have not yet created datasets or tables in your own project,
48
+ # let's connect to Google's `publicdata` project, and see what you find.
49
+ #
50
+ # ```ruby
51
+ # require "google/cloud"
52
+ #
53
+ # gcloud = Google::Cloud.new "publicdata"
54
+ # bigquery = gcloud.bigquery
55
+ #
56
+ # bigquery.datasets.count #=> 1
57
+ # bigquery.datasets.first.dataset_id #=> "samples"
58
+ #
59
+ # dataset = bigquery.datasets.first
60
+ # tables = dataset.tables
61
+ #
62
+ # tables.count #=> 7
63
+ # tables.map &:table_id #=> [..., "shakespeare", "trigrams", "wikipedia"]
64
+ # ```
65
+ #
66
+ # In addition listing all datasets and tables in the project, you can also
67
+ # retrieve individual datasets and tables by ID. Let's look at the structure
68
+ # of the `shakespeare` table, which contains an entry for every word in
69
+ # every play written by Shakespeare.
70
+ #
71
+ # ```ruby
72
+ # require "google/cloud"
73
+ #
74
+ # gcloud = Google::Cloud.new "publicdata"
75
+ # bigquery = gcloud.bigquery
76
+ #
77
+ # dataset = bigquery.dataset "samples"
78
+ # table = dataset.table "shakespeare"
79
+ #
80
+ # table.headers #=> ["word", "word_count", "corpus", "corpus_date"]
81
+ # table.rows_count #=> 164656
82
+ # ```
83
+ #
84
+ # Now that you know the column names for the Shakespeare table, you can
85
+ # write and run a query.
86
+ #
87
+ # ## Running queries
88
+ #
89
+ # BigQuery offers both synchronous and asynchronous methods, as explained in
90
+ # [Querying Data](https://cloud.google.com/bigquery/querying-data).
91
+ #
92
+ # ### Synchronous queries
93
+ #
94
+ # Let's start with the simpler synchronous approach. Notice that this time
95
+ # you are connecting using your own default project. This is necessary for
96
+ # running a query, since queries need to be able to create tables to hold
97
+ # results.
98
+ #
99
+ # ```ruby
100
+ # require "google/cloud"
101
+ #
102
+ # gcloud = Google::Cloud.new
103
+ # bigquery = gcloud.bigquery
104
+ #
105
+ # sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " +
106
+ # "FROM publicdata:samples.shakespeare"
107
+ # data = bigquery.query sql
108
+ #
109
+ # data.count #=> 50
110
+ # data.next? #=> false
111
+ # data.first #=> {"word"=>"you", "count"=>42}
112
+ # ```
113
+ #
114
+ # The `TOP` function shown above is just one of a variety of functions
115
+ # offered by BigQuery. See the [Query
116
+ # Reference](https://cloud.google.com/bigquery/query-reference) for a full
117
+ # listing.
118
+ #
119
+ # ### Asynchronous queries
120
+ #
121
+ # Because you probably should not block for most BigQuery operations,
122
+ # including querying as well as importing, exporting, and copying data, the
123
+ # BigQuery API enables you to manage longer-running jobs. In the
124
+ # asynchronous approach to running a query, an instance of
125
+ # {Google::Cloud::Bigquery::QueryJob} is returned, rather than an instance
126
+ # of {Google::Cloud::Bigquery::QueryData}.
127
+ #
128
+ # ```ruby
129
+ # require "google/cloud"
130
+ #
131
+ # gcloud = Google::Cloud.new
132
+ # bigquery = gcloud.bigquery
133
+ #
134
+ # sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " +
135
+ # "FROM publicdata:samples.shakespeare"
136
+ # job = bigquery.query_job sql
137
+ #
138
+ # job.wait_until_done!
139
+ # if !job.failed?
140
+ # job.query_results.each do |row|
141
+ # puts row["word"]
142
+ # end
143
+ # end
144
+ # ```
145
+ #
146
+ # Once you have determined that the job is done and has not failed, you can
147
+ # obtain an instance of {Google::Cloud::Bigquery::QueryData} by calling
148
+ # {Google::Cloud::Bigquery::QueryJob#query_results}. The query results for
149
+ # both of the above examples are stored in temporary tables with a lifetime
150
+ # of about 24 hours. See the final example below for a demonstration of how
151
+ # to store query results in a permanent table.
152
+ #
153
+ # ## Creating Datasets and Tables
154
+ #
155
+ # The first thing you need to do in a new BigQuery project is to create a
156
+ # {Google::Cloud::Bigquery::Dataset}. Datasets hold tables and control
157
+ # access to them.
158
+ #
159
+ # ```ruby
160
+ # require "google/cloud/bigquery"
161
+ #
162
+ # gcloud = Google::Cloud.new
163
+ # bigquery = gcloud.bigquery
164
+ # dataset = bigquery.create_dataset "my_dataset"
165
+ # ```
166
+ #
167
+ # Now that you have a dataset, you can use it to create a table. Every table
168
+ # is defined by a schema that may contain nested and repeated fields. The
169
+ # example below shows a schema with a repeated record field named
170
+ # `cities_lived`. (For more information about nested and repeated fields,
171
+ # see [Preparing Data for
172
+ # BigQuery](https://cloud.google.com/bigquery/preparing-data-for-bigquery).)
173
+ #
174
+ # ```ruby
175
+ # require "google/cloud"
176
+ #
177
+ # gcloud = Google::Cloud.new
178
+ # bigquery = gcloud.bigquery
179
+ # dataset = bigquery.dataset "my_dataset"
180
+ #
181
+ # table = dataset.create_table "people" do |schema|
182
+ # schema.string "first_name", mode: :required
183
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
184
+ # nested_schema.string "place", mode: :required
185
+ # nested_schema.integer "number_of_years", mode: :required
186
+ # end
187
+ # end
188
+ # ```
189
+ #
190
+ # Because of the repeated field in this schema, we cannot use the CSV format
191
+ # to load data into the table.
192
+ #
193
+ # ## Loading records
194
+ #
195
+ # In addition to CSV, data can be imported from files that are formatted as
196
+ # [Newline-delimited JSON](http://jsonlines.org/) or
197
+ # [Avro](http://avro.apache.org/), or from a Google Cloud Datastore backup.
198
+ # It can also be "streamed" into BigQuery.
199
+ #
200
+ # To follow along with these examples, you will need to set up billing on
201
+ # the [Google Developers Console](https://console.developers.google.com).
202
+ #
203
+ # ### Streaming records
204
+ #
205
+ # For situations in which you want new data to be available for querying as
206
+ # soon as possible, inserting individual records directly from your Ruby
207
+ # application is a great approach.
208
+ #
209
+ # ```ruby
210
+ # require "google/cloud"
211
+ #
212
+ # gcloud = Google::Cloud.new
213
+ # bigquery = gcloud.bigquery
214
+ # dataset = bigquery.dataset "my_dataset"
215
+ # table = dataset.table "people"
216
+ #
217
+ # rows = [
218
+ # {
219
+ # "first_name" => "Anna",
220
+ # "cities_lived" => [
221
+ # {
222
+ # "place" => "Stockholm",
223
+ # "number_of_years" => 2
224
+ # }
225
+ # ]
226
+ # },
227
+ # {
228
+ # "first_name" => "Bob",
229
+ # "cities_lived" => [
230
+ # {
231
+ # "place" => "Seattle",
232
+ # "number_of_years" => 5
233
+ # },
234
+ # {
235
+ # "place" => "Austin",
236
+ # "number_of_years" => 6
237
+ # }
238
+ # ]
239
+ # }
240
+ # ]
241
+ # table.insert rows
242
+ # ```
243
+ #
244
+ # There are some trade-offs involved with streaming, so be sure to read the
245
+ # discussion of data consistency in [Streaming Data Into
246
+ # BigQuery](https://cloud.google.com/bigquery/streaming-data-into-bigquery).
247
+ #
248
+ # ### Uploading a file
249
+ #
250
+ # To follow along with this example, please download the
251
+ # [names.zip](http://www.ssa.gov/OACT/babynames/names.zip) archive from the
252
+ # U.S. Social Security Administration. Inside the archive you will find over
253
+ # 100 files containing baby name records since the year 1880. A PDF file
254
+ # also contained in the archive specifies the schema used below.
255
+ #
256
+ # ```ruby
257
+ # require "google/cloud"
258
+ #
259
+ # gcloud = Google::Cloud.new
260
+ # bigquery = gcloud.bigquery
261
+ # dataset = bigquery.dataset "my_dataset"
262
+ # table = dataset.create_table "baby_names" do |schema|
263
+ # schema.string "name", mode: :required
264
+ # schema.string "sex", mode: :required
265
+ # schema.integer "number", mode: :required
266
+ # end
267
+ #
268
+ # file = File.open "names/yob2014.txt"
269
+ # load_job = table.load file, format: "csv"
270
+ # ```
271
+ #
272
+ # Because the names data, although formatted as CSV, is distributed in files
273
+ # with a `.txt` extension, this example explicitly passes the `format`
274
+ # option in order to demonstrate how to handle such situations. Because CSV
275
+ # is the default format for load operations, the option is not actually
276
+ # necessary. For JSON saved with a `.txt` extension, however, it would be.
277
+ #
278
+ # ## Exporting query results to Google Cloud Storage
279
+ #
280
+ # The example below shows how to pass the `table` option with a query in
281
+ # order to store results in a permanent table. It also shows how to export
282
+ # the result data to a Google Cloud Storage file. In order to follow along,
283
+ # you will need to enable the Google Cloud Storage API in addition to
284
+ # setting up billing.
285
+ #
286
+ # ```ruby
287
+ # require "google/cloud"
288
+ #
289
+ # gcloud = Google::Cloud.new
290
+ # bigquery = gcloud.bigquery
291
+ # dataset = bigquery.dataset "my_dataset"
292
+ # source_table = dataset.table "baby_names"
293
+ # result_table = dataset.create_table "baby_names_results"
294
+ #
295
+ # sql = "SELECT name, number as count " +
296
+ # "FROM baby_names " +
297
+ # "WHERE name CONTAINS 'Sam' " +
298
+ # "ORDER BY count DESC"
299
+ # query_job = dataset.query_job sql, table: result_table
300
+ #
301
+ # query_job.wait_until_done!
302
+ #
303
+ # if !query_job.failed?
304
+ #
305
+ # storage = gcloud.storage
306
+ # bucket_id = "bigquery-exports-#{SecureRandom.uuid}"
307
+ # bucket = storage.create_bucket bucket_id
308
+ # extract_url = "gs://#{bucket.id}/baby-names-sam.csv"
309
+ #
310
+ # extract_job = result_table.extract extract_url
311
+ #
312
+ # extract_job.wait_until_done!
313
+ #
314
+ # # Download to local filesystem
315
+ # bucket.files.first.download "baby-names-sam.csv"
316
+ #
317
+ # end
318
+ # ```
319
+ #
320
+ # If a table you wish to export contains a large amount of data, you can
321
+ # pass a wildcard URI to export to multiple files (for sharding), or an
322
+ # array of URIs (for partitioning), or both. See [Exporting Data From
323
+ # BigQuery](https://cloud.google.com/bigquery/exporting-data-from-bigquery)
324
+ # for details.
325
+ #
326
+ # ## Configuring retries and timeout
327
+ #
328
+ # You can configure how many times API requests may be automatically
329
+ # retried. When an API request fails, the response will be inspected to see
330
+ # if the request meets criteria indicating that it may succeed on retry,
331
+ # such as `500` and `503` status codes or a specific internal error code
332
+ # such as `rateLimitExceeded`. If it meets the criteria, the request will be
333
+ # retried after a delay. If another error occurs, the delay will be
334
+ # increased before a subsequent attempt, until the `retries` limit is
335
+ # reached.
336
+ #
337
+ # You can also set the request `timeout` value in seconds.
338
+ #
339
+ # ```ruby
340
+ # require "google/cloud"
341
+ #
342
+ # gcloud = Google::Cloud.new
343
+ # bigquery = gcloud.bigquery retries: 10, timeout: 120
344
+ # ```
345
+ #
346
+ # See the [BigQuery error
347
+ # table](https://cloud.google.com/bigquery/troubleshooting-errors#errortable)
348
+ # for a list of error conditions.
349
+ #
350
+ module Bigquery
351
+ end
352
+ end
353
+ end