google-cloud-bigquery 0.23.0 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/google/cloud/bigquery.rb +33 -32
- data/lib/google/cloud/bigquery/convert.rb +258 -0
- data/lib/google/cloud/bigquery/data.rb +20 -44
- data/lib/google/cloud/bigquery/dataset.rb +49 -65
- data/lib/google/cloud/bigquery/project.rb +42 -44
- data/lib/google/cloud/bigquery/query_data.rb +9 -8
- data/lib/google/cloud/bigquery/schema.rb +151 -179
- data/lib/google/cloud/bigquery/schema/field.rb +498 -0
- data/lib/google/cloud/bigquery/service.rb +11 -105
- data/lib/google/cloud/bigquery/table.rb +130 -6
- data/lib/google/cloud/bigquery/time.rb +2 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +36 -7
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8cba429d574299afb4e2a230f071e13417c954c
|
4
|
+
data.tar.gz: ca363404d3ec1d319ca4aa3738170a924c3f20fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4aa08940ba60adcc8058e4c1787227d0b40314db0c92e1c40a01dada7a5a4d973311430591d3e786dced51ce7ae65bb554220041ef6c36a5cfc27e930b866b4
|
7
|
+
data.tar.gz: 8ce5bc5f122cb1b90f617ed6c334df9ad7eade436e7727771ff4bad4687e4d712a97c0d4f64db4cafcb55a93e0ee72bc869f58387ac846816968e06f37a09b4b
|
@@ -75,7 +75,7 @@ module Google
|
|
75
75
|
# dataset = bigquery.dataset "samples"
|
76
76
|
# table = dataset.table "shakespeare"
|
77
77
|
#
|
78
|
-
# table.headers #=> [
|
78
|
+
# table.headers #=> [:word, :word_count, :corpus, :corpus_date]
|
79
79
|
# table.rows_count #=> 164656
|
80
80
|
# ```
|
81
81
|
#
|
@@ -95,33 +95,12 @@ module Google
|
|
95
95
|
# explained in [Querying
|
96
96
|
# Data](https://cloud.google.com/bigquery/querying-data).
|
97
97
|
#
|
98
|
-
# ### Legacy SQL (formerly BigQuery SQL)
|
99
|
-
#
|
100
|
-
# Before version 2.0, BigQuery executed queries using a non-standard SQL
|
101
|
-
# dialect known as BigQuery SQL. This variant is still the default, and will
|
102
|
-
# be used unless you pass the flag `standard_sql: true` with your query.
|
103
|
-
# (If you get an SQL syntax error with a query that may be written in
|
104
|
-
# standard SQL, be sure that you are passing this option.)
|
105
|
-
#
|
106
|
-
# ```ruby
|
107
|
-
# require "google/cloud/bigquery"
|
108
|
-
#
|
109
|
-
# bigquery = Google::Cloud::Bigquery.new
|
110
|
-
#
|
111
|
-
# sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " \
|
112
|
-
# "FROM [publicdata:samples.shakespeare]"
|
113
|
-
# data = bigquery.query sql
|
114
|
-
# ```
|
115
|
-
#
|
116
|
-
# Notice that in legacy SQL, a fully-qualified table name uses the following
|
117
|
-
# format: `[my-dashed-project:dataset1.tableName]`.
|
118
|
-
#
|
119
98
|
# ### Standard SQL
|
120
99
|
#
|
121
100
|
# Standard SQL is the preferred SQL dialect for querying data stored in
|
122
101
|
# BigQuery. It is compliant with the SQL 2011 standard, and has extensions
|
123
|
-
# that support querying nested and repeated data.
|
124
|
-
# over
|
102
|
+
# that support querying nested and repeated data. This is the default
|
103
|
+
# syntax. It has several advantages over Legacy SQL, including:
|
125
104
|
#
|
126
105
|
# * Composability using `WITH` clauses and SQL functions
|
127
106
|
# * Subqueries in the `SELECT` list and `WHERE` clause
|
@@ -136,8 +115,7 @@ module Google
|
|
136
115
|
# For examples that demonstrate some of these features, see [Standard SQL
|
137
116
|
# highlights](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#standard_sql_highlights).
|
138
117
|
#
|
139
|
-
#
|
140
|
-
# option `standard_sql: true` with your query.
|
118
|
+
# Standard SQL is the default.
|
141
119
|
#
|
142
120
|
# ```ruby
|
143
121
|
# require "google/cloud/bigquery"
|
@@ -147,12 +125,35 @@ module Google
|
|
147
125
|
# sql = "SELECT word, SUM(word_count) AS word_count " \
|
148
126
|
# "FROM `bigquery-public-data.samples.shakespeare`" \
|
149
127
|
# "WHERE word IN ('me', 'I', 'you') GROUP BY word"
|
150
|
-
# data = bigquery.query sql
|
128
|
+
# data = bigquery.query sql
|
129
|
+
# ```
|
130
|
+
#
|
131
|
+
# Notice that in standard SQL, a fully-qualified table name uses the
|
132
|
+
# following format: <code>`my-dashed-project.dataset1.tableName`</code>.
|
133
|
+
#
|
134
|
+
# ### Legacy SQL (formerly BigQuery SQL)
|
135
|
+
#
|
136
|
+
# Before version 2.0, BigQuery executed queries using a non-standard SQL
|
137
|
+
# dialect known as BigQuery SQL. This variant is optional, and can be
|
138
|
+
# enabled by passing the flag `legacy_sql: true` with your query. (If you
|
139
|
+
# get an SQL syntax error with a query that may be written in standard SQL,
|
140
|
+
# be sure that you are passing this option.)
|
141
|
+
#
|
142
|
+
# To use legacy SQL, pass the option `legacy_sql: true` with your query.
|
143
|
+
#
|
144
|
+
# ```ruby
|
145
|
+
# require "google/cloud/bigquery"
|
146
|
+
#
|
147
|
+
# bigquery = Google::Cloud::Bigquery.new
|
148
|
+
#
|
149
|
+
# sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " \
|
150
|
+
# "FROM [publicdata:samples.shakespeare]"
|
151
|
+
# data = bigquery.query sql, legacy_sql: true
|
151
152
|
# ```
|
152
153
|
#
|
153
|
-
# Notice that in
|
154
|
-
#
|
155
|
-
#
|
154
|
+
# Notice that in legacy SQL, a fully-qualified table name uses brackets
|
155
|
+
# instead of back-ticks, and a semi-colon instead of a dot to separate the
|
156
|
+
# project and the dataset: `[my-dashed-project:dataset1.tableName]`.
|
156
157
|
#
|
157
158
|
# #### Query parameters
|
158
159
|
#
|
@@ -166,7 +167,7 @@ module Google
|
|
166
167
|
#
|
167
168
|
# sql = "SELECT word, SUM(word_count) AS word_count " \
|
168
169
|
# "FROM `bigquery-public-data.samples.shakespeare`" \
|
169
|
-
# "WHERE word IN (@words) GROUP BY word"
|
170
|
+
# "WHERE word IN UNNEST(@words) GROUP BY word"
|
170
171
|
# data = bigquery.query sql, params: { words: ['me', 'I', 'you'] }
|
171
172
|
# ```
|
172
173
|
#
|
@@ -244,7 +245,7 @@ module Google
|
|
244
245
|
# job.wait_until_done!
|
245
246
|
# if !job.failed?
|
246
247
|
# job.query_results.each do |row|
|
247
|
-
# puts row[
|
248
|
+
# puts row[:word]
|
248
249
|
# end
|
249
250
|
# end
|
250
251
|
# ```
|
@@ -0,0 +1,258 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/apis/bigquery_v2"
|
17
|
+
require "stringio"
|
18
|
+
require "base64"
|
19
|
+
require "time"
|
20
|
+
require "date"
|
21
|
+
|
22
|
+
module Google
|
23
|
+
module Cloud
|
24
|
+
module Bigquery
|
25
|
+
# rubocop:disable all
|
26
|
+
|
27
|
+
##
|
28
|
+
# @private
|
29
|
+
#
|
30
|
+
# Internal conversion of raw data values to/from Bigquery values
|
31
|
+
#
|
32
|
+
# | BigQuery | Ruby | Notes |
|
33
|
+
# |-------------|----------------|---|
|
34
|
+
# | `BOOL` | `true`/`false` | |
|
35
|
+
# | `INT64` | `Integer` | |
|
36
|
+
# | `FLOAT64` | `Float` | |
|
37
|
+
# | `STRING` | `STRING` | |
|
38
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
39
|
+
# | `DATE` | `Date` | |
|
40
|
+
# | `TIMESTAMP` | `Time` | |
|
41
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
42
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
43
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
44
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
45
|
+
|
46
|
+
module Convert
|
47
|
+
##
|
48
|
+
# @private
|
49
|
+
def self.format_rows rows, fields
|
50
|
+
Array(rows).map do |row|
|
51
|
+
# convert TableRow to hash to handle nested TableCell values
|
52
|
+
format_row row.to_h, fields
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# @private
|
58
|
+
def self.format_row row, fields
|
59
|
+
row_pairs = fields.zip(row[:f]).map do |f, v|
|
60
|
+
[f.name.to_sym, format_value(v, f)]
|
61
|
+
end
|
62
|
+
Hash[row_pairs]
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.format_value value, field
|
66
|
+
if value.nil?
|
67
|
+
nil
|
68
|
+
elsif value.empty?
|
69
|
+
nil
|
70
|
+
elsif value[:v].nil?
|
71
|
+
nil
|
72
|
+
elsif Array === value[:v]
|
73
|
+
value[:v].map { |v| format_value v, field }
|
74
|
+
elsif Hash === value[:v]
|
75
|
+
if value[:v].empty?
|
76
|
+
nil
|
77
|
+
else
|
78
|
+
format_row value[:v], field.fields
|
79
|
+
end
|
80
|
+
elsif field.type == "STRING"
|
81
|
+
String value[:v]
|
82
|
+
elsif field.type == "INTEGER"
|
83
|
+
Integer value[:v]
|
84
|
+
elsif field.type == "FLOAT"
|
85
|
+
Float value[:v]
|
86
|
+
elsif field.type == "BOOLEAN"
|
87
|
+
(value[:v] == "true" ? true : (value[:v] == "false" ? false : nil))
|
88
|
+
elsif field.type == "BYTES"
|
89
|
+
StringIO.new Base64.decode64 value[:v]
|
90
|
+
elsif field.type == "TIMESTAMP"
|
91
|
+
::Time.at Float(value[:v])
|
92
|
+
elsif field.type == "TIME"
|
93
|
+
Bigquery::Time.new value[:v]
|
94
|
+
elsif field.type == "DATETIME"
|
95
|
+
::Time.parse("#{value[:v]} UTC").to_datetime
|
96
|
+
elsif field.type == "DATE"
|
97
|
+
Date.parse value[:v]
|
98
|
+
else
|
99
|
+
value[:v]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
##
|
104
|
+
# @private
|
105
|
+
def self.to_query_param value
|
106
|
+
if TrueClass === value
|
107
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
108
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
109
|
+
type: "BOOL"),
|
110
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
111
|
+
value: true)
|
112
|
+
)
|
113
|
+
elsif FalseClass === value
|
114
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
115
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
116
|
+
type: "BOOL"),
|
117
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
118
|
+
value: false)
|
119
|
+
)
|
120
|
+
elsif Integer === value
|
121
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
122
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
123
|
+
type: "INT64"),
|
124
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
125
|
+
value: value)
|
126
|
+
)
|
127
|
+
elsif Float === value
|
128
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
129
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
130
|
+
type: "FLOAT64"),
|
131
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
132
|
+
value: value)
|
133
|
+
)
|
134
|
+
elsif String === value
|
135
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
136
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
137
|
+
type: "STRING"),
|
138
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
139
|
+
value: value)
|
140
|
+
)
|
141
|
+
elsif DateTime === value
|
142
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
143
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
144
|
+
type: "DATETIME"),
|
145
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
146
|
+
value: value.strftime("%Y-%m-%d %H:%M:%S.%6N"))
|
147
|
+
)
|
148
|
+
elsif Date === value
|
149
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
150
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
151
|
+
type: "DATE"),
|
152
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
153
|
+
value: value.to_s)
|
154
|
+
)
|
155
|
+
elsif ::Time === value
|
156
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
157
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
158
|
+
type: "TIMESTAMP"),
|
159
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
160
|
+
value: value.strftime("%Y-%m-%d %H:%M:%S.%6N%:z"))
|
161
|
+
)
|
162
|
+
elsif Bigquery::Time === value
|
163
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
164
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
165
|
+
type: "TIME"),
|
166
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
167
|
+
value: value.value)
|
168
|
+
)
|
169
|
+
elsif value.respond_to?(:read) && value.respond_to?(:rewind)
|
170
|
+
value.rewind
|
171
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
172
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
173
|
+
type: "BYTES"),
|
174
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
175
|
+
value: Base64.strict_encode64(
|
176
|
+
value.read.force_encoding("ASCII-8BIT")))
|
177
|
+
)
|
178
|
+
elsif Array === value
|
179
|
+
array_params = value.map { |param| Convert.to_query_param param }
|
180
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
181
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
182
|
+
type: "ARRAY",
|
183
|
+
array_type: array_params.first.parameter_type
|
184
|
+
),
|
185
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
186
|
+
array_values: array_params.map(&:parameter_value)
|
187
|
+
)
|
188
|
+
)
|
189
|
+
elsif Hash === value
|
190
|
+
struct_pairs = value.map do |name, param|
|
191
|
+
struct_param = Convert.to_query_param param
|
192
|
+
[Google::Apis::BigqueryV2::QueryParameterType::StructType.new(
|
193
|
+
name: String(name),
|
194
|
+
type: struct_param.parameter_type
|
195
|
+
), struct_param.parameter_value]
|
196
|
+
end
|
197
|
+
struct_values = Hash[struct_pairs.map do |type, value|
|
198
|
+
[type.name.to_sym, value]
|
199
|
+
end]
|
200
|
+
|
201
|
+
return Google::Apis::BigqueryV2::QueryParameter.new(
|
202
|
+
parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
|
203
|
+
type: "STRUCT",
|
204
|
+
struct_types: struct_pairs.map(&:first)
|
205
|
+
),
|
206
|
+
parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
|
207
|
+
struct_values: struct_values
|
208
|
+
)
|
209
|
+
)
|
210
|
+
else
|
211
|
+
fail "A query parameter of type #{value.class} is not supported."
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
##
|
216
|
+
# @private
|
217
|
+
def self.to_json_rows rows
|
218
|
+
rows.map { |row| to_json_row row }
|
219
|
+
end
|
220
|
+
##
|
221
|
+
# @private
|
222
|
+
def self.to_json_row row
|
223
|
+
Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
|
224
|
+
end
|
225
|
+
##
|
226
|
+
# @private
|
227
|
+
def self.to_json_value value
|
228
|
+
if DateTime === value
|
229
|
+
value.strftime "%Y-%m-%d %H:%M:%S.%6N"
|
230
|
+
elsif Date === value
|
231
|
+
value.to_s
|
232
|
+
elsif ::Time === value
|
233
|
+
value.strftime "%Y-%m-%d %H:%M:%S.%6N%:z"
|
234
|
+
elsif Bigquery::Time === value
|
235
|
+
value.value
|
236
|
+
elsif value.respond_to?(:read) && value.respond_to?(:rewind)
|
237
|
+
value.rewind
|
238
|
+
Base64.strict_encode64(value.read.force_encoding("ASCII-8BIT"))
|
239
|
+
elsif Array === value
|
240
|
+
value.map { |v| to_json_value v }
|
241
|
+
elsif Hash === value
|
242
|
+
Hash[value.map { |k, v| [k.to_s, to_json_value(v)] }]
|
243
|
+
else
|
244
|
+
value
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def self.resolve_legacy_sql standard_sql, legacy_sql
|
249
|
+
return !standard_sql unless standard_sql.nil?
|
250
|
+
return legacy_sql unless legacy_sql.nil?
|
251
|
+
false
|
252
|
+
end
|
253
|
+
|
254
|
+
# rubocop:enable all
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
@@ -65,6 +65,24 @@ module Google
|
|
65
65
|
nil
|
66
66
|
end
|
67
67
|
|
68
|
+
##
|
69
|
+
# The schema of the data.
|
70
|
+
def schema
|
71
|
+
table.schema
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# The fields of the data.
|
76
|
+
def fields
|
77
|
+
schema.fields
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# The name of the columns in the data.
|
82
|
+
def headers
|
83
|
+
schema.headers
|
84
|
+
end
|
85
|
+
|
68
86
|
##
|
69
87
|
# Whether there is a next page of data.
|
70
88
|
#
|
@@ -177,18 +195,11 @@ module Google
|
|
177
195
|
end
|
178
196
|
end
|
179
197
|
|
180
|
-
##
|
181
|
-
# Represents Table Data as a list of positional values (array of
|
182
|
-
# arrays). No type conversion is made, e.g. numbers are formatted as
|
183
|
-
# strings.
|
184
|
-
def raw
|
185
|
-
Array(gapi.rows).map { |row| row.f.map(&:v) }
|
186
|
-
end
|
187
|
-
|
188
198
|
##
|
189
199
|
# @private New Data from a response object.
|
190
200
|
def self.from_gapi gapi, table
|
191
|
-
formatted_rows = format_rows
|
201
|
+
formatted_rows = Convert.format_rows(gapi.rows,
|
202
|
+
table.gapi.schema.fields)
|
192
203
|
|
193
204
|
data = new formatted_rows
|
194
205
|
data.table = table
|
@@ -196,41 +207,6 @@ module Google
|
|
196
207
|
data
|
197
208
|
end
|
198
209
|
|
199
|
-
# rubocop:disable all
|
200
|
-
# Disabled rubocop because this implementation will not last.
|
201
|
-
|
202
|
-
def self.format_rows rows, fields
|
203
|
-
headers = Array(fields).map { |f| f.name }
|
204
|
-
field_types = Array(fields).map { |f| f.type }
|
205
|
-
|
206
|
-
Array(rows).map do |row|
|
207
|
-
values = row.f.map { |f| f.v }
|
208
|
-
formatted_values = format_values field_types, values
|
209
|
-
Hash[headers.zip formatted_values]
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
def self.format_values field_types, values
|
214
|
-
field_types.zip(values).map do |type, value|
|
215
|
-
begin
|
216
|
-
if value.nil?
|
217
|
-
nil
|
218
|
-
elsif type == "INTEGER"
|
219
|
-
Integer value
|
220
|
-
elsif type == "FLOAT"
|
221
|
-
Float value
|
222
|
-
elsif type == "BOOLEAN"
|
223
|
-
(value == "true" ? true : (value == "false" ? false : nil))
|
224
|
-
else
|
225
|
-
value
|
226
|
-
end
|
227
|
-
rescue
|
228
|
-
value
|
229
|
-
end
|
230
|
-
end
|
231
|
-
end
|
232
|
-
# rubocop:enable all
|
233
|
-
|
234
210
|
protected
|
235
211
|
|
236
212
|
##
|