bigquery 0.2.9 → 0.2.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +67 -0
- data/README.md +151 -0
- data/Rakefile +9 -0
- data/bigquery.gemspec +24 -0
- data/lib/big_query.rb +9 -0
- data/lib/big_query/client.rb +73 -0
- data/lib/big_query/client/errors.rb +27 -0
- data/lib/big_query/client/jobs.rb +38 -0
- data/lib/big_query/client/load.rb +21 -0
- data/lib/big_query/client/query.rb +49 -0
- data/lib/big_query/client/tables.rb +124 -0
- data/lib/big_query/errors.rb +8 -0
- data/lib/big_query/version.rb +3 -0
- data/test/bigquery.rb +91 -0
- metadata +102 -91
- data/lib/bigquery.rb +0 -130
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2a6b70cb84d6ff943f8b0df09c7d78aafdddb60f
|
4
|
+
data.tar.gz: 31bf68de60cb4f6e10773605f007df26a35d7ae3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 16f52a1058e60c9cf7a9dc89da0c1fd7a0b4ddc5685912cedefef4d68432658eb0aaf9001611e07c97eef8757d529e53ccfbd1e1ce5762488fdec5f1f7e79a26
|
7
|
+
data.tar.gz: fce727fc5ebfcbe1ef232fc8c32f79a0e7df0351fcb70f131b2c3755e4cc2d8f52347db40d9011d7b29dde8521c39a338c35977edf84640ba8a089218ba747e3
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
bigquery (0.2.9)
|
5
|
+
google-api-client (>= 0.4.6)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
addressable (2.3.6)
|
11
|
+
autoparse (0.3.3)
|
12
|
+
addressable (>= 2.3.1)
|
13
|
+
extlib (>= 0.9.15)
|
14
|
+
multi_json (>= 1.0.0)
|
15
|
+
byebug (2.7.0)
|
16
|
+
columnize (~> 0.3)
|
17
|
+
debugger-linecache (~> 1.2)
|
18
|
+
coderay (1.1.0)
|
19
|
+
columnize (0.8.9)
|
20
|
+
debugger-linecache (1.2.0)
|
21
|
+
extlib (0.9.16)
|
22
|
+
faraday (0.9.0)
|
23
|
+
multipart-post (>= 1.2, < 3)
|
24
|
+
google-api-client (0.7.1)
|
25
|
+
addressable (>= 2.3.2)
|
26
|
+
autoparse (>= 0.3.3)
|
27
|
+
extlib (>= 0.9.15)
|
28
|
+
faraday (>= 0.9.0)
|
29
|
+
jwt (>= 0.1.5)
|
30
|
+
launchy (>= 2.1.1)
|
31
|
+
multi_json (>= 1.0.0)
|
32
|
+
retriable (>= 1.4)
|
33
|
+
signet (>= 0.5.0)
|
34
|
+
uuidtools (>= 2.1.0)
|
35
|
+
jwt (1.0.0)
|
36
|
+
launchy (2.4.2)
|
37
|
+
addressable (~> 2.3)
|
38
|
+
method_source (0.8.2)
|
39
|
+
minitest (5.3.4)
|
40
|
+
multi_json (1.10.1)
|
41
|
+
multipart-post (2.0.0)
|
42
|
+
pry (0.9.12.6)
|
43
|
+
coderay (~> 1.0)
|
44
|
+
method_source (~> 0.8)
|
45
|
+
slop (~> 3.4)
|
46
|
+
pry-byebug (1.3.2)
|
47
|
+
byebug (~> 2.7)
|
48
|
+
pry (~> 0.9.12)
|
49
|
+
rake (10.3.2)
|
50
|
+
retriable (1.4.1)
|
51
|
+
signet (0.5.1)
|
52
|
+
addressable (>= 2.2.3)
|
53
|
+
faraday (>= 0.9.0.rc5)
|
54
|
+
jwt (>= 0.1.5)
|
55
|
+
multi_json (>= 1.0.0)
|
56
|
+
slop (3.5.0)
|
57
|
+
uuidtools (2.1.4)
|
58
|
+
|
59
|
+
PLATFORMS
|
60
|
+
ruby
|
61
|
+
|
62
|
+
DEPENDENCIES
|
63
|
+
bigquery!
|
64
|
+
bundler
|
65
|
+
minitest
|
66
|
+
pry-byebug
|
67
|
+
rake
|
data/README.md
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
# BigQuery
|
2
|
+
|
3
|
+
BigQuery is a wrapper around the Google api ruby gem designed to make interacting with BigQuery easier.
|
4
|
+
|
5
|
+
## Install
|
6
|
+
|
7
|
+
gem install bigquery
|
8
|
+
|
9
|
+
## Authorization
|
10
|
+
|
11
|
+
Only service accounts are supported right now. https://developers.google.com/accounts/docs/OAuth2#serviceaccount
|
12
|
+
|
13
|
+
## Available methods
|
14
|
+
|
15
|
+
* query
|
16
|
+
* tables
|
17
|
+
* load
|
18
|
+
* tables_formatted
|
19
|
+
* job
|
20
|
+
* jobs
|
21
|
+
* refresh_auth
|
22
|
+
|
23
|
+
## Example
|
24
|
+
|
25
|
+
require 'big_query'
|
26
|
+
|
27
|
+
opts = {}
|
28
|
+
opts['client_id'] = '1234.apps.googleusercontent.com'
|
29
|
+
opts['service_email'] = '1234@developer.gserviceaccount.com'
|
30
|
+
opts['key'] = '/path/to/somekeyfile-privatekey.p12'
|
31
|
+
opts['project_id'] = '54321'
|
32
|
+
opts['dataset'] = 'yourdataset'
|
33
|
+
|
34
|
+
bq = BigQuery::Client.new(opts)
|
35
|
+
|
36
|
+
puts bq.tables
|
37
|
+
|
38
|
+
## Tables
|
39
|
+
|
40
|
+
List tables in dataset
|
41
|
+
|
42
|
+
bq.tables
|
43
|
+
|
44
|
+
List table names
|
45
|
+
|
46
|
+
bq.tables_formatted
|
47
|
+
|
48
|
+
Fetch table data
|
49
|
+
|
50
|
+
bq.table_data('table_name')
|
51
|
+
|
52
|
+
Delete exiting table
|
53
|
+
|
54
|
+
bq.delete_table('test123')
|
55
|
+
|
56
|
+
Create table. First param is the table name second one is the table schema defined with the following format
|
57
|
+
|
58
|
+
{
|
59
|
+
field_name: {
|
60
|
+
type: 'TYPE_VALUE BETWEEN (STRING, INTEGER, FLOAT, BOOLEAN, RECORD)',
|
61
|
+
mode: 'MODE_VALUE BETWEEN (NULLABLE, REQUIRED, REPEATED)'
|
62
|
+
},
|
63
|
+
other_field_name: { ... }
|
64
|
+
}
|
65
|
+
|
66
|
+
|
67
|
+
As this example defines
|
68
|
+
|
69
|
+
table_name = 'test123'
|
70
|
+
table_schema = { id: { type: 'INTEGER' },
|
71
|
+
name: { type: 'STRING' } }
|
72
|
+
bq.create_table(table_name, table_schema)
|
73
|
+
|
74
|
+
Describe table schema
|
75
|
+
|
76
|
+
bq.describe_table('table_name')
|
77
|
+
|
78
|
+
## Query
|
79
|
+
|
80
|
+
You can either select
|
81
|
+
|
82
|
+
bq.query("SELECT * FROM [#{config['dataset']}.table_name] LIMIT 1")
|
83
|
+
|
84
|
+
Or insert
|
85
|
+
|
86
|
+
bq.insert('table_name', 'id' => 123, 'type' => 'Task')
|
87
|
+
|
88
|
+
## Keys
|
89
|
+
|
90
|
+
To get the keys you need to have a:
|
91
|
+
|
92
|
+
* google API project (link)[https://console.developers.google.com/project]
|
93
|
+
* bigquery activated (link)[https://bigquery.cloud.google.com]
|
94
|
+
* create a bigquery dataset in the project (link)[https://bigquery.cloud.google.com]
|
95
|
+
|
96
|
+
1- Goto your project google api access
|
97
|
+
|
98
|
+
https://code.google.com/apis/console/b/0/?noredirect&pli=1#project:YOUR_PROJECT_ID:access
|
99
|
+
|
100
|
+
2- Create a new client-ID for service_account
|
101
|
+
3- Download de key file
|
102
|
+
|
103
|
+
Now you have everything:
|
104
|
+
|
105
|
+
* client_id: API access client-ID
|
106
|
+
* service_email: API access Email address
|
107
|
+
* key: API access key file path
|
108
|
+
* project_id: your google API project id
|
109
|
+
* dataset: your big query dataset name
|
110
|
+
|
111
|
+
## Troubleshooting
|
112
|
+
|
113
|
+
If you're getting an "invalid_grant" error it usually means your system clock is off.
|
114
|
+
|
115
|
+
If you're getting unauthorized requested but you've been able to successfully connect before, you need to refresh your auth by running the "refresh_auth" method.
|
116
|
+
|
117
|
+
## How to run test
|
118
|
+
|
119
|
+
Before run test, you must create file named `.bigquery_settings.yml` on root of this repository. `.bigquery_settings.yml` must include following infomation.
|
120
|
+
|
121
|
+
```yaml
|
122
|
+
client_id: '1234.apps.googleusercontent.com'
|
123
|
+
service_email: '1234@developer.gserviceaccount.com'
|
124
|
+
key: '/path/to/somekeyfile-privatekey.p12'
|
125
|
+
project_id: '54321'
|
126
|
+
dataset: 'yourdataset'
|
127
|
+
```
|
128
|
+
|
129
|
+
Then run tests via rake.
|
130
|
+
|
131
|
+
```
|
132
|
+
$ bundle install && bundle exec rake test
|
133
|
+
```
|
134
|
+
|
135
|
+
## Contributing
|
136
|
+
|
137
|
+
Fork and submit a pull request and make sure you add a test for any feature you add.
|
138
|
+
|
139
|
+
## License
|
140
|
+
|
141
|
+
LICENSE:
|
142
|
+
|
143
|
+
(The MIT License)
|
144
|
+
|
145
|
+
Copyright © 2012 Adam Bronte
|
146
|
+
|
147
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ‘Software’), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
148
|
+
|
149
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
150
|
+
|
151
|
+
THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/bigquery.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "big_query/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "bigquery"
|
7
|
+
s.version = BigQuery::VERSION
|
8
|
+
s.authors = ["Adam Bronte", "Andres Bravo"]
|
9
|
+
s.email = ["adam@brontesaurus.com", "andresbravog@gmail.com"]
|
10
|
+
s.description = "This library is a wrapper around the google-api-client ruby gem.
|
11
|
+
It's meant to make calls to BigQuery easier and streamlined."
|
12
|
+
s.require_paths = ["lib"]
|
13
|
+
s.summary = "A nice wrapper for Google Big Query"
|
14
|
+
s.homepage = "https://github.com/abronte/BigQuery"
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
|
18
|
+
s.add_dependency "google-api-client", ">= 0.4.6"
|
19
|
+
|
20
|
+
s.add_development_dependency "bundler"
|
21
|
+
s.add_development_dependency "rake"
|
22
|
+
s.add_development_dependency "minitest"
|
23
|
+
s.add_development_dependency "pry-byebug"
|
24
|
+
end
|
data/lib/big_query.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'big_query/client/errors'
|
2
|
+
require 'big_query/client/query'
|
3
|
+
require 'big_query/client/jobs'
|
4
|
+
require 'big_query/client/tables'
|
5
|
+
|
6
|
+
module BigQuery
|
7
|
+
class Client
|
8
|
+
include BigQuery::Client::Errors
|
9
|
+
include BigQuery::Client::Query
|
10
|
+
include BigQuery::Client::Jobs
|
11
|
+
include BigQuery::Client::Tables
|
12
|
+
|
13
|
+
attr_accessor :dataset, :project_id
|
14
|
+
|
15
|
+
def initialize(opts = {})
|
16
|
+
@client = Google::APIClient.new(
|
17
|
+
application_name: 'BigQuery ruby app',
|
18
|
+
application_version: BigQuery::VERSION
|
19
|
+
)
|
20
|
+
|
21
|
+
key = Google::APIClient::PKCS12.load_key(File.open(
|
22
|
+
opts['key'], mode: 'rb'),
|
23
|
+
"notasecret"
|
24
|
+
)
|
25
|
+
|
26
|
+
@asserter = Google::APIClient::JWTAsserter.new(
|
27
|
+
opts['service_email'],
|
28
|
+
"https://www.googleapis.com/auth/bigquery",
|
29
|
+
key
|
30
|
+
)
|
31
|
+
|
32
|
+
refresh_auth
|
33
|
+
|
34
|
+
@bq = @client.discovered_api("bigquery", "v2")
|
35
|
+
|
36
|
+
@project_id = opts['project_id']
|
37
|
+
@dataset = opts['dataset']
|
38
|
+
end
|
39
|
+
|
40
|
+
def refresh_auth
|
41
|
+
@client.authorization = @asserter.authorize
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
# Performs the api calls with the given params adding the defined project and
|
47
|
+
# dataset params if not defined
|
48
|
+
#
|
49
|
+
# @param opts [Hash] options for the api call
|
50
|
+
# @return [Hash] json response
|
51
|
+
def api(opts)
|
52
|
+
if opts[:parameters]
|
53
|
+
opts[:parameters] = opts[:parameters].merge({"projectId" => @project_id})
|
54
|
+
else
|
55
|
+
opts[:parameters] = {"projectId" => @project_id}
|
56
|
+
end
|
57
|
+
|
58
|
+
resp = @client.execute(opts)
|
59
|
+
data = parse_body(resp)
|
60
|
+
handle_error(data) if data && is_error?(data)
|
61
|
+
data
|
62
|
+
end
|
63
|
+
|
64
|
+
# Parses json body if present and is a json formatted
|
65
|
+
#
|
66
|
+
# @param resp [Faraday::Response] response object
|
67
|
+
# @return [Hash]
|
68
|
+
def parse_body(resp)
|
69
|
+
return nil unless resp.body && !resp.body.empty?
|
70
|
+
JSON.parse(resp.body)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module BigQuery
|
2
|
+
class Client
|
3
|
+
module Errors
|
4
|
+
# Defines whenever the response is an error or not
|
5
|
+
#
|
6
|
+
# @param response [Hash] parsed json response
|
7
|
+
# @return [Boolean]
|
8
|
+
def is_error?(response)
|
9
|
+
!response["error"].nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
# handles the error and raises an understandable error
|
13
|
+
#
|
14
|
+
# @param response [Hash] parsed json response
|
15
|
+
# @raise [BigQueryError]
|
16
|
+
def handle_error(response)
|
17
|
+
error = response['error']
|
18
|
+
case error['code']
|
19
|
+
when 404
|
20
|
+
fail BigQuery::Errors::NotFound, error['message']
|
21
|
+
else
|
22
|
+
fail BigQuery::Errors::BigQueryError, error['message']
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module BigQuery
|
2
|
+
class Client
|
3
|
+
module Jobs
|
4
|
+
# Fetches a bigquery job by id
|
5
|
+
#
|
6
|
+
# @param id [Integer] job id to fetch
|
7
|
+
# @param options [Hash] bigquery opts accepted
|
8
|
+
# @return [Hash] json api response
|
9
|
+
def job(id, opts = {})
|
10
|
+
opts['jobId'] ||= id
|
11
|
+
|
12
|
+
api(api_method: @bq.jobs.get,
|
13
|
+
parameters: opts)
|
14
|
+
end
|
15
|
+
|
16
|
+
# lists all the jobs
|
17
|
+
#
|
18
|
+
# @param options [Hash] bigquery opts accepted
|
19
|
+
# @return [Hash] json api response
|
20
|
+
def jobs(opts = {})
|
21
|
+
api(api_method: @bq.jobs.list,
|
22
|
+
parameters: opts)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Gets the results of a given job
|
26
|
+
#
|
27
|
+
# @param id [Integer] job id to fetch
|
28
|
+
# @param options [Hash] bigquery opts accepted
|
29
|
+
# @return [Hash] json api response
|
30
|
+
def get_query_results(id, opts = {})
|
31
|
+
opts['jobId'] ||= id
|
32
|
+
|
33
|
+
api(api_method: @bq.jobs.get_query_results,
|
34
|
+
parameters: opts)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module BigQuery
|
2
|
+
class Client
|
3
|
+
module Insert
|
4
|
+
# Loads file content into a table
|
5
|
+
#
|
6
|
+
# see https://developers.google.com/bigquery/loading-data-into-bigquery for possible opts
|
7
|
+
# @param opts [Hash] field value hash to be inserted
|
8
|
+
# @return [Hash]
|
9
|
+
def load(opts)
|
10
|
+
api(
|
11
|
+
api_method: @bq.jobs.insert,
|
12
|
+
body_object: {
|
13
|
+
'configuration' => {
|
14
|
+
'load' => opts
|
15
|
+
}
|
16
|
+
}
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module BigQuery
|
2
|
+
class Client
|
3
|
+
module Query
|
4
|
+
# Performs the given query in the bigquery api
|
5
|
+
#
|
6
|
+
# @param given_query [String] query to perform
|
7
|
+
# @param options [Hash] query options
|
8
|
+
# @option options [Integer] timeout (90 * 1000) timeout in miliseconds
|
9
|
+
# @return [Hash] json api response
|
10
|
+
def query(given_query, options={})
|
11
|
+
timeout = options.fetch(:timeout, 90 * 1000)
|
12
|
+
response = api(
|
13
|
+
api_method: @bq.jobs.query,
|
14
|
+
body_object: { 'query' => given_query,
|
15
|
+
'timeoutMs' => timeout}
|
16
|
+
)
|
17
|
+
|
18
|
+
response
|
19
|
+
end
|
20
|
+
|
21
|
+
# perform a query synchronously
|
22
|
+
# fetch all result rows, even when that takes >1 query
|
23
|
+
# invoke /block/ once for each row, passing the row
|
24
|
+
#
|
25
|
+
# @param q [String] query to be executed
|
26
|
+
# @param options [Hash] query options
|
27
|
+
# @option options [Integer] timeout (90 * 1000) timeout in miliseconds
|
28
|
+
def each_row(q, options = {}, &block)
|
29
|
+
current_row = 0
|
30
|
+
# repeatedly fetch results, starting from current_row
|
31
|
+
# invoke the block on each one, then grab next page if there is one
|
32
|
+
# it'll terminate when res has no 'rows' key or when we've done enough rows
|
33
|
+
# perform query...
|
34
|
+
res = query(q, options)
|
35
|
+
job_id = res['jobReference']['jobId']
|
36
|
+
# call the block on the first page of results
|
37
|
+
if( res && res['rows'] )
|
38
|
+
res['rows'].each(&block)
|
39
|
+
current_row += res['rows'].size
|
40
|
+
end
|
41
|
+
# keep grabbing pages from the API and calling the block on each row
|
42
|
+
while(( res = get_query_results(job_id, :startIndex => current_row) ) && res['rows'] && current_row < res['totalRows'].to_i ) do
|
43
|
+
res['rows'].each(&block)
|
44
|
+
current_row += res['rows'].size
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
|
2
|
+
# Module to handle table actions
|
3
|
+
# https://developers.google.com/bigquery/docs/tables
|
4
|
+
module BigQuery
|
5
|
+
class Client
|
6
|
+
module Tables
|
7
|
+
ALLOWED_FIELD_TYPES = ['STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', 'RECORD']
|
8
|
+
ALLOWED_FIELD_MODES = ['NULLABLE', 'REQUIRED', 'REPEATED']
|
9
|
+
|
10
|
+
# Lists the tables
|
11
|
+
#
|
12
|
+
# @param dataset [String] dataset to look for
|
13
|
+
# @return [Hash] json api response
|
14
|
+
def tables(dataset = @dataset)
|
15
|
+
response = api({
|
16
|
+
:api_method => @bq.tables.list,
|
17
|
+
:parameters => {"datasetId" => dataset}
|
18
|
+
})
|
19
|
+
|
20
|
+
response['tables'] || []
|
21
|
+
end
|
22
|
+
|
23
|
+
# Lists the tables returnning only the tableId
|
24
|
+
#
|
25
|
+
# @param dataset [String] dataset to look for
|
26
|
+
# @return [Hash] json api response
|
27
|
+
def tables_formatted(dataset = @dataset)
|
28
|
+
tables(dataset).map { |t| t['tableReference']['tableId'] }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns all rows of table data
|
32
|
+
#
|
33
|
+
# @param tableId [String] id of the table to look for
|
34
|
+
# @param dataset [String] dataset to look for
|
35
|
+
# @return [Hash] json api response
|
36
|
+
def table_data(tableId, dataset = @dataset)
|
37
|
+
response = api(api_method: @bq.tabledata.list,
|
38
|
+
parameters: { 'datasetId' => dataset,
|
39
|
+
'tableId' => tableId })
|
40
|
+
response['rows'] || []
|
41
|
+
end
|
42
|
+
|
43
|
+
# insert row into table
|
44
|
+
#
|
45
|
+
# @param tableId [String] table id to insert into
|
46
|
+
# @param opts [Hash] field value hash to be inserted
|
47
|
+
# @return [Hash]
|
48
|
+
def insert(tableId, opts)
|
49
|
+
api(
|
50
|
+
api_method: @bq.tabledata.insert_all,
|
51
|
+
parameters: { 'tableId' => tableId,
|
52
|
+
'datasetId' => @dataset },
|
53
|
+
body_object: { 'rows' => [{ 'json' => opts }] }
|
54
|
+
)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Creating a new table
|
58
|
+
#
|
59
|
+
# @param tableId [String] table id to insert into
|
60
|
+
# @param schema [Hash] name => opts hash for the schema
|
61
|
+
#
|
62
|
+
# examples:
|
63
|
+
#
|
64
|
+
# @bq.create_table('new_table', id: { type: 'INTEGER', mode: 'required' })
|
65
|
+
# @bq.create_table('new_table', price: { type: 'FLOAT' })
|
66
|
+
def create_table(tableId, schema={})
|
67
|
+
api(
|
68
|
+
api_method: @bq.tables.insert,
|
69
|
+
parameters: { "datasetId" => @dataset },
|
70
|
+
body_object: { "tableReference" => {
|
71
|
+
"tableId" => tableId,
|
72
|
+
"projectId" => @project_id,
|
73
|
+
"datasetId" => @dataset
|
74
|
+
},
|
75
|
+
"schema" => {
|
76
|
+
"fields" => validate_schema(schema)
|
77
|
+
}
|
78
|
+
}
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Deletes the given tableId
|
83
|
+
#
|
84
|
+
# @param tableId [String] table id to insert into
|
85
|
+
def delete_table(tableId)
|
86
|
+
api(api_method: @bq.tables.delete,
|
87
|
+
parameters: { 'tableId' => tableId,
|
88
|
+
'datasetId' => @dataset }
|
89
|
+
)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Describe the schema of the given tableId
|
93
|
+
#
|
94
|
+
# @param tableId [String] table id to describe
|
95
|
+
# @param dataset [String] dataset to look for
|
96
|
+
# @return [Hash] json api response
|
97
|
+
def describe_table(tableId, dataset = @dataset)
|
98
|
+
api(
|
99
|
+
api_method: @bq.tables.get,
|
100
|
+
parameters: { 'tableId' => tableId,
|
101
|
+
'datasetId' => @dataset }
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
protected
|
106
|
+
|
107
|
+
# Translate given schema to a one understandable by bigquery
|
108
|
+
#
|
109
|
+
# @param [Hash] schema like { field_nane => { type: 'TYPE', mode: 'MODE' }, ... }
|
110
|
+
# @return [Array<Hash>]
|
111
|
+
def validate_schema(schema)
|
112
|
+
fields = []
|
113
|
+
schema.map do |name, options|
|
114
|
+
type = (ALLOWED_FIELD_TYPES & [options[:type].to_s]).first
|
115
|
+
mode = (ALLOWED_FIELD_MODES & [options[:mode].to_s]).first
|
116
|
+
field = { "name" => name.to_s, "type" => type }
|
117
|
+
field["mode"] = mode if mode
|
118
|
+
fields << field
|
119
|
+
end
|
120
|
+
fields
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
data/test/bigquery.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'yaml'
|
4
|
+
require 'big_query'
|
5
|
+
require 'pry-byebug'
|
6
|
+
|
7
|
+
class BigQueryTest < MiniTest::Unit::TestCase
|
8
|
+
def setup
|
9
|
+
@bq = BigQuery::Client.new(config)
|
10
|
+
if @bq.tables_formatted.include? 'test'
|
11
|
+
@bq.delete_table('test')
|
12
|
+
end
|
13
|
+
result = @bq.create_table('test', id: { type: 'INTEGER', mode: 'REQUIRED' }, type: { type: 'STRING', mode: 'NULLABLE' })
|
14
|
+
end
|
15
|
+
|
16
|
+
def config
|
17
|
+
return @config if @config
|
18
|
+
config_data ||= File.expand_path(File.dirname(__FILE__) + "/../.bigquery_settings.yml")
|
19
|
+
@config = YAML.load_file(config_data)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_for_tables
|
23
|
+
tables = @bq.tables
|
24
|
+
|
25
|
+
assert_equal tables[0]['kind'], "bigquery#table"
|
26
|
+
assert_equal tables[0]['id'], "#{config['project_id']}:#{config['dataset']}.test"
|
27
|
+
assert_equal tables[0]['tableReference']['tableId'], 'test'
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_for_tables_formatted
|
31
|
+
result = @bq.tables_formatted
|
32
|
+
|
33
|
+
assert_includes result, 'test'
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_for_table_data
|
37
|
+
result = @bq.table_data('test')
|
38
|
+
|
39
|
+
assert_kind_of Array, result
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_for_create_table
|
43
|
+
if @bq.tables_formatted.include? 'test123'
|
44
|
+
@bq.delete_table('test123')
|
45
|
+
end
|
46
|
+
result = @bq.create_table('test123', id: { type: 'INTEGER' })
|
47
|
+
|
48
|
+
assert_equal result['kind'], "bigquery#table"
|
49
|
+
assert_equal result['tableReference']['tableId'], "test123"
|
50
|
+
assert_equal result['schema']['fields'], [{"name"=>"id", "type"=>"INTEGER"}]
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_for_delete_table
|
54
|
+
if !@bq.tables_formatted.include? 'test123'
|
55
|
+
@bq.create_table('test123', id: { type: 'INTEGER' })
|
56
|
+
end
|
57
|
+
result = @bq.delete_table('test123')
|
58
|
+
|
59
|
+
tables = @bq.tables_formatted
|
60
|
+
|
61
|
+
refute_includes tables, 'test123'
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_for_describe_table
|
65
|
+
result = @bq.describe_table('test')
|
66
|
+
|
67
|
+
assert_equal result['kind'], "bigquery#table"
|
68
|
+
assert_equal result['type'], "TABLE"
|
69
|
+
assert_equal result['id'], "#{config['project_id']}:#{config['dataset']}.test"
|
70
|
+
assert_equal result['tableReference']['tableId'], 'test'
|
71
|
+
assert_equal result['schema']['fields'][0]['name'], 'id'
|
72
|
+
assert_equal result['schema']['fields'][0]['type'], 'INTEGER'
|
73
|
+
assert_equal result['schema']['fields'][0]['mode'], 'REQUIRED'
|
74
|
+
assert_equal result['schema']['fields'][1]['name'], 'type'
|
75
|
+
assert_equal result['schema']['fields'][1]['type'], 'STRING'
|
76
|
+
assert_equal result['schema']['fields'][1]['mode'], 'NULLABLE'
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_for_query
|
80
|
+
result = @bq.query("SELECT * FROM [#{config['dataset']}.test] LIMIT 1")
|
81
|
+
|
82
|
+
assert_equal result['kind'], "bigquery#queryResponse"
|
83
|
+
assert_equal result['jobComplete'], true
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_for_insert
|
87
|
+
result = @bq.insert('test' ,"id" => 123, "type" => "Task")
|
88
|
+
|
89
|
+
assert_equal result['kind'], "bigquery#tableDataInsertAllResponse"
|
90
|
+
end
|
91
|
+
end
|
metadata
CHANGED
@@ -1,124 +1,135 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bigquery
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 9
|
10
|
-
version: 0.2.9
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.10
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Adam Bronte
|
8
|
+
- Andres Bravo
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
requirements:
|
12
|
+
date: 2014-08-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: google-api-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
24
18
|
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
hash: 3
|
27
|
-
segments:
|
28
|
-
- 0
|
29
|
-
- 4
|
30
|
-
- 6
|
19
|
+
- !ruby/object:Gem::Version
|
31
20
|
version: 0.4.6
|
32
|
-
prerelease: false
|
33
21
|
type: :runtime
|
34
|
-
name: google-api-client
|
35
|
-
requirement: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
version_requirements: &id002 !ruby/object:Gem::Requirement
|
38
|
-
none: false
|
39
|
-
requirements:
|
40
|
-
- - ">="
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
hash: 3
|
43
|
-
segments:
|
44
|
-
- 0
|
45
|
-
version: "0"
|
46
22
|
prerelease: false
|
47
|
-
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 0.4.6
|
28
|
+
- !ruby/object:Gem::Dependency
|
48
29
|
name: bundler
|
49
|
-
requirement:
|
50
|
-
|
51
|
-
version_requirements: &id003 !ruby/object:Gem::Requirement
|
52
|
-
none: false
|
53
|
-
requirements:
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
54
32
|
- - ">="
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
segments:
|
58
|
-
- 0
|
59
|
-
version: "0"
|
60
|
-
prerelease: false
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
61
35
|
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
62
43
|
name: rake
|
63
|
-
requirement:
|
64
|
-
|
65
|
-
version_requirements: &id004 !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
|
-
requirements:
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
68
46
|
- - ">="
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
|
71
|
-
segments:
|
72
|
-
- 0
|
73
|
-
version: "0"
|
74
|
-
prerelease: false
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
75
49
|
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
76
57
|
name: minitest
|
77
|
-
requirement:
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: pry-byebug
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
78
84
|
description: |-
|
79
85
|
This library is a wrapper around the google-api-client ruby gem.
|
80
|
-
|
81
|
-
email:
|
86
|
+
It's meant to make calls to BigQuery easier and streamlined.
|
87
|
+
email:
|
88
|
+
- adam@brontesaurus.com
|
89
|
+
- andresbravog@gmail.com
|
82
90
|
executables: []
|
83
|
-
|
84
91
|
extensions: []
|
85
|
-
|
86
92
|
extra_rdoc_files: []
|
87
|
-
|
88
|
-
|
89
|
-
-
|
93
|
+
files:
|
94
|
+
- ".gitignore"
|
95
|
+
- Gemfile
|
96
|
+
- Gemfile.lock
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bigquery.gemspec
|
100
|
+
- lib/big_query.rb
|
101
|
+
- lib/big_query/client.rb
|
102
|
+
- lib/big_query/client/errors.rb
|
103
|
+
- lib/big_query/client/jobs.rb
|
104
|
+
- lib/big_query/client/load.rb
|
105
|
+
- lib/big_query/client/query.rb
|
106
|
+
- lib/big_query/client/tables.rb
|
107
|
+
- lib/big_query/errors.rb
|
108
|
+
- lib/big_query/version.rb
|
109
|
+
- test/bigquery.rb
|
90
110
|
homepage: https://github.com/abronte/BigQuery
|
91
111
|
licenses: []
|
92
|
-
|
112
|
+
metadata: {}
|
93
113
|
post_install_message:
|
94
114
|
rdoc_options: []
|
95
|
-
|
96
|
-
require_paths:
|
115
|
+
require_paths:
|
97
116
|
- lib
|
98
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
-
|
100
|
-
requirements:
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
101
119
|
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
version: "0"
|
107
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
-
none: false
|
109
|
-
requirements:
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
110
124
|
- - ">="
|
111
|
-
- !ruby/object:Gem::Version
|
112
|
-
|
113
|
-
segments:
|
114
|
-
- 0
|
115
|
-
version: "0"
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
116
127
|
requirements: []
|
117
|
-
|
118
128
|
rubyforge_project:
|
119
|
-
rubygems_version:
|
129
|
+
rubygems_version: 2.2.2
|
120
130
|
signing_key:
|
121
|
-
specification_version:
|
131
|
+
specification_version: 4
|
122
132
|
summary: A nice wrapper for Google Big Query
|
123
|
-
test_files:
|
124
|
-
|
133
|
+
test_files:
|
134
|
+
- test/bigquery.rb
|
135
|
+
has_rdoc:
|
data/lib/bigquery.rb
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
require 'google/api_client'
|
2
|
-
|
3
|
-
class BigQuery
|
4
|
-
|
5
|
-
attr_accessor :dataset, :project_id
|
6
|
-
|
7
|
-
def initialize(opts = {})
|
8
|
-
@client = Google::APIClient.new
|
9
|
-
|
10
|
-
key = Google::APIClient::PKCS12.load_key(
|
11
|
-
opts['key'],
|
12
|
-
"notasecret"
|
13
|
-
)
|
14
|
-
|
15
|
-
@asserter = Google::APIClient::JWTAsserter.new(
|
16
|
-
opts['service_email'],
|
17
|
-
"https://www.googleapis.com/auth/bigquery",
|
18
|
-
key
|
19
|
-
)
|
20
|
-
|
21
|
-
refresh_auth
|
22
|
-
|
23
|
-
@bq = @client.discovered_api("bigquery", "v2")
|
24
|
-
|
25
|
-
@project_id = opts['project_id']
|
26
|
-
@dataset = opts['dataset']
|
27
|
-
end
|
28
|
-
|
29
|
-
def query(q)
|
30
|
-
res = api({
|
31
|
-
:api_method => @bq.jobs.query,
|
32
|
-
:body_object => { "query" => q, 'timeoutMs' => 90 * 1000}
|
33
|
-
})
|
34
|
-
|
35
|
-
if res.has_key? "errors"
|
36
|
-
raise BigQueryError, "BigQuery has returned an error :: #{res['errors'].inspect}"
|
37
|
-
else
|
38
|
-
res
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def load(opts)
|
43
|
-
api({
|
44
|
-
:api_method => @bq.jobs.insert,
|
45
|
-
:body_object => {
|
46
|
-
"configuration" => {
|
47
|
-
"load" => opts
|
48
|
-
}
|
49
|
-
}
|
50
|
-
})
|
51
|
-
end
|
52
|
-
|
53
|
-
def job(id, opts = {})
|
54
|
-
opts['jobId'] = id
|
55
|
-
|
56
|
-
api({
|
57
|
-
:api_method => @bq.jobs.get,
|
58
|
-
:parameters => opts
|
59
|
-
})
|
60
|
-
end
|
61
|
-
|
62
|
-
def jobs(opts = {})
|
63
|
-
api({
|
64
|
-
:api_method => @bq.jobs.list,
|
65
|
-
:parameters => opts
|
66
|
-
})
|
67
|
-
end
|
68
|
-
|
69
|
-
def get_query_results(jobId, opts = {})
|
70
|
-
opts['jobId'] = jobId
|
71
|
-
api({
|
72
|
-
:api_method => @bq.jobs.get_query_results,
|
73
|
-
:parameters => opts
|
74
|
-
})
|
75
|
-
end
|
76
|
-
|
77
|
-
# perform a query synchronously
|
78
|
-
# fetch all result rows, even when that takes >1 query
|
79
|
-
# invoke /block/ once for each row, passing the row
|
80
|
-
def each_row(q, &block)
|
81
|
-
current_row = 0
|
82
|
-
# repeatedly fetch results, starting from current_row
|
83
|
-
# invoke the block on each one, then grab next page if there is one
|
84
|
-
# it'll terminate when res has no 'rows' key or when we've done enough rows
|
85
|
-
# perform query...
|
86
|
-
res = query(q)
|
87
|
-
job_id = res['jobReference']['jobId']
|
88
|
-
# call the block on the first page of results
|
89
|
-
if( res && res['rows'] )
|
90
|
-
res['rows'].each(&block)
|
91
|
-
current_row += res['rows'].size
|
92
|
-
end
|
93
|
-
# keep grabbing pages from the API and calling the block on each row
|
94
|
-
while(( res = get_query_results(job_id, :startIndex => current_row) ) && res['rows'] && current_row < res['totalRows'].to_i ) do
|
95
|
-
res['rows'].each(&block)
|
96
|
-
current_row += res['rows'].size
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def tables(dataset = @dataset)
|
101
|
-
api({
|
102
|
-
:api_method => @bq.tables.list,
|
103
|
-
:parameters => {"datasetId" => dataset}
|
104
|
-
})['tables']
|
105
|
-
end
|
106
|
-
|
107
|
-
def tables_formatted(dataset = @dataset)
|
108
|
-
tables(dataset).map {|t| "[#{dataset}.#{t['tableReference']['tableId']}]"}
|
109
|
-
end
|
110
|
-
|
111
|
-
def refresh_auth
|
112
|
-
@client.authorization = @asserter.authorize
|
113
|
-
end
|
114
|
-
|
115
|
-
private
|
116
|
-
|
117
|
-
def api(opts)
|
118
|
-
if opts[:parameters]
|
119
|
-
opts[:parameters] = opts[:parameters].merge({"projectId" => @project_id})
|
120
|
-
else
|
121
|
-
opts[:parameters] = {"projectId" => @project_id}
|
122
|
-
end
|
123
|
-
|
124
|
-
resp = @client.execute(opts)
|
125
|
-
JSON.parse(resp.body)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
class BigQueryError < StandardError
|
130
|
-
end
|