google-cloud-bigquery 0.23.0 → 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8c37453c0300ca5c4bddd798fb4e34dbb0afeb7
4
- data.tar.gz: f9a9842d943e303c92e8f90a95403d05d8f5ba73
3
+ metadata.gz: e8cba429d574299afb4e2a230f071e13417c954c
4
+ data.tar.gz: ca363404d3ec1d319ca4aa3738170a924c3f20fb
5
5
  SHA512:
6
- metadata.gz: e0046330e19fede666ad09614981e0010787c52b59a1cf5675bf994e04b08f64033ac565789d180e34c26f59d5cd72e8d35e4fe722a42407ad92625fa2887280
7
- data.tar.gz: 2989d1c9d3cc203561c26181b8549eb25ffadbff7a0aa941a4ece3a2b628778949b29225999193b46efcf3b92c9f6afb7b2a7322d1af1a6ce3d742dd23e3d6ab
6
+ metadata.gz: c4aa08940ba60adcc8058e4c1787227d0b40314db0c92e1c40a01dada7a5a4d973311430591d3e786dced51ce7ae65bb554220041ef6c36a5cfc27e930b866b4
7
+ data.tar.gz: 8ce5bc5f122cb1b90f617ed6c334df9ad7eade436e7727771ff4bad4687e4d712a97c0d4f64db4cafcb55a93e0ee72bc869f58387ac846816968e06f37a09b4b
@@ -75,7 +75,7 @@ module Google
75
75
  # dataset = bigquery.dataset "samples"
76
76
  # table = dataset.table "shakespeare"
77
77
  #
78
- # table.headers #=> ["word", "word_count", "corpus", "corpus_date"]
78
+ # table.headers #=> [:word, :word_count, :corpus, :corpus_date]
79
79
  # table.rows_count #=> 164656
80
80
  # ```
81
81
  #
@@ -95,33 +95,12 @@ module Google
95
95
  # explained in [Querying
96
96
  # Data](https://cloud.google.com/bigquery/querying-data).
97
97
  #
98
- # ### Legacy SQL (formerly BigQuery SQL)
99
- #
100
- # Before version 2.0, BigQuery executed queries using a non-standard SQL
101
- # dialect known as BigQuery SQL. This variant is still the default, and will
102
- # be used unless you pass the flag `standard_sql: true` with your query.
103
- # (If you get an SQL syntax error with a query that may be written in
104
- # standard SQL, be sure that you are passing this option.)
105
- #
106
- # ```ruby
107
- # require "google/cloud/bigquery"
108
- #
109
- # bigquery = Google::Cloud::Bigquery.new
110
- #
111
- # sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " \
112
- # "FROM [publicdata:samples.shakespeare]"
113
- # data = bigquery.query sql
114
- # ```
115
- #
116
- # Notice that in legacy SQL, a fully-qualified table name uses the following
117
- # format: `[my-dashed-project:dataset1.tableName]`.
118
- #
119
98
  # ### Standard SQL
120
99
  #
121
100
  # Standard SQL is the preferred SQL dialect for querying data stored in
122
101
  # BigQuery. It is compliant with the SQL 2011 standard, and has extensions
123
- # that support querying nested and repeated data. It has several advantages
124
- # over legacy SQL, including:
102
+ # that support querying nested and repeated data. This is the default
103
+ # syntax. It has several advantages over Legacy SQL, including:
125
104
  #
126
105
  # * Composability using `WITH` clauses and SQL functions
127
106
  # * Subqueries in the `SELECT` list and `WHERE` clause
@@ -136,8 +115,7 @@ module Google
136
115
  # For examples that demonstrate some of these features, see [Standard SQL
137
116
  # highlights](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#standard_sql_highlights).
138
117
  #
139
- # Legacy SQL is still the default. To use standard SQL instead, pass the
140
- # option `standard_sql: true` with your query.
118
+ # Standard SQL is the default.
141
119
  #
142
120
  # ```ruby
143
121
  # require "google/cloud/bigquery"
@@ -147,12 +125,35 @@ module Google
147
125
  # sql = "SELECT word, SUM(word_count) AS word_count " \
148
126
  # "FROM `bigquery-public-data.samples.shakespeare`" \
149
127
  # "WHERE word IN ('me', 'I', 'you') GROUP BY word"
150
- # data = bigquery.query sql, standard_sql: true
128
+ # data = bigquery.query sql
129
+ # ```
130
+ #
131
+ # Notice that in standard SQL, a fully-qualified table name uses the
132
+ # following format: <code>`my-dashed-project.dataset1.tableName`</code>.
133
+ #
134
+ # ### Legacy SQL (formerly BigQuery SQL)
135
+ #
136
+ # Before version 2.0, BigQuery executed queries using a non-standard SQL
137
+ # dialect known as BigQuery SQL. This variant is optional, and can be
138
+ # enabled by passing the flag `legacy_sql: true` with your query. (If you
139
+ # get an SQL syntax error with a query that may be written in standard SQL,
140
+ # be sure that you are passing this option.)
141
+ #
142
+ # To use legacy SQL, pass the option `legacy_sql: true` with your query.
143
+ #
144
+ # ```ruby
145
+ # require "google/cloud/bigquery"
146
+ #
147
+ # bigquery = Google::Cloud::Bigquery.new
148
+ #
149
+ # sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " \
150
+ # "FROM [publicdata:samples.shakespeare]"
151
+ # data = bigquery.query sql, legacy_sql: true
151
152
  # ```
152
153
  #
153
- # Notice that in standard SQL, the format for a fully-qualified table name
154
- # uses back-ticks instead of brackets, and a dot instead of a semi-colon:
155
- # <code>`my-dashed-project.dataset1.tableName`</code>.
154
+ # Notice that in legacy SQL, a fully-qualified table name uses brackets
155
+ # instead of back-ticks, and a semi-colon instead of a dot to separate the
156
+ # project and the dataset: `[my-dashed-project:dataset1.tableName]`.
156
157
  #
157
158
  # #### Query parameters
158
159
  #
@@ -166,7 +167,7 @@ module Google
166
167
  #
167
168
  # sql = "SELECT word, SUM(word_count) AS word_count " \
168
169
  # "FROM `bigquery-public-data.samples.shakespeare`" \
169
- # "WHERE word IN (@words) GROUP BY word"
170
+ # "WHERE word IN UNNEST(@words) GROUP BY word"
170
171
  # data = bigquery.query sql, params: { words: ['me', 'I', 'you'] }
171
172
  # ```
172
173
  #
@@ -244,7 +245,7 @@ module Google
244
245
  # job.wait_until_done!
245
246
  # if !job.failed?
246
247
  # job.query_results.each do |row|
247
- # puts row["word"]
248
+ # puts row[:word]
248
249
  # end
249
250
  # end
250
251
  # ```
@@ -0,0 +1,258 @@
1
+ # Copyright 2017 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "stringio"
18
+ require "base64"
19
+ require "time"
20
+ require "date"
21
+
22
+ module Google
23
+ module Cloud
24
+ module Bigquery
25
+ # rubocop:disable all
26
+
27
+ ##
28
+ # @private
29
+ #
30
+ # Internal conversion of raw data values to/from Bigquery values
31
+ #
32
+ # | BigQuery | Ruby | Notes |
33
+ # |-------------|----------------|---|
34
+ # | `BOOL` | `true`/`false` | |
35
+ # | `INT64` | `Integer` | |
36
+ # | `FLOAT64` | `Float` | |
37
+ # | `STRING` | `STRING` | |
38
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
39
+ # | `DATE` | `Date` | |
40
+ # | `TIMESTAMP` | `Time` | |
41
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
42
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
43
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
44
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
45
+
46
+ module Convert
47
+ ##
48
+ # @private
49
+ def self.format_rows rows, fields
50
+ Array(rows).map do |row|
51
+ # convert TableRow to hash to handle nested TableCell values
52
+ format_row row.to_h, fields
53
+ end
54
+ end
55
+
56
+ ##
57
+ # @private
58
+ def self.format_row row, fields
59
+ row_pairs = fields.zip(row[:f]).map do |f, v|
60
+ [f.name.to_sym, format_value(v, f)]
61
+ end
62
+ Hash[row_pairs]
63
+ end
64
+
65
+ def self.format_value value, field
66
+ if value.nil?
67
+ nil
68
+ elsif value.empty?
69
+ nil
70
+ elsif value[:v].nil?
71
+ nil
72
+ elsif Array === value[:v]
73
+ value[:v].map { |v| format_value v, field }
74
+ elsif Hash === value[:v]
75
+ if value[:v].empty?
76
+ nil
77
+ else
78
+ format_row value[:v], field.fields
79
+ end
80
+ elsif field.type == "STRING"
81
+ String value[:v]
82
+ elsif field.type == "INTEGER"
83
+ Integer value[:v]
84
+ elsif field.type == "FLOAT"
85
+ Float value[:v]
86
+ elsif field.type == "BOOLEAN"
87
+ (value[:v] == "true" ? true : (value[:v] == "false" ? false : nil))
88
+ elsif field.type == "BYTES"
89
+ StringIO.new Base64.decode64 value[:v]
90
+ elsif field.type == "TIMESTAMP"
91
+ ::Time.at Float(value[:v])
92
+ elsif field.type == "TIME"
93
+ Bigquery::Time.new value[:v]
94
+ elsif field.type == "DATETIME"
95
+ ::Time.parse("#{value[:v]} UTC").to_datetime
96
+ elsif field.type == "DATE"
97
+ Date.parse value[:v]
98
+ else
99
+ value[:v]
100
+ end
101
+ end
102
+
103
+ ##
104
+ # @private
105
+ def self.to_query_param value
106
+ if TrueClass === value
107
+ return Google::Apis::BigqueryV2::QueryParameter.new(
108
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
109
+ type: "BOOL"),
110
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
111
+ value: true)
112
+ )
113
+ elsif FalseClass === value
114
+ return Google::Apis::BigqueryV2::QueryParameter.new(
115
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
116
+ type: "BOOL"),
117
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
118
+ value: false)
119
+ )
120
+ elsif Integer === value
121
+ return Google::Apis::BigqueryV2::QueryParameter.new(
122
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
123
+ type: "INT64"),
124
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
125
+ value: value)
126
+ )
127
+ elsif Float === value
128
+ return Google::Apis::BigqueryV2::QueryParameter.new(
129
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
130
+ type: "FLOAT64"),
131
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
132
+ value: value)
133
+ )
134
+ elsif String === value
135
+ return Google::Apis::BigqueryV2::QueryParameter.new(
136
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
137
+ type: "STRING"),
138
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
139
+ value: value)
140
+ )
141
+ elsif DateTime === value
142
+ return Google::Apis::BigqueryV2::QueryParameter.new(
143
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
144
+ type: "DATETIME"),
145
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
146
+ value: value.strftime("%Y-%m-%d %H:%M:%S.%6N"))
147
+ )
148
+ elsif Date === value
149
+ return Google::Apis::BigqueryV2::QueryParameter.new(
150
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
151
+ type: "DATE"),
152
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
153
+ value: value.to_s)
154
+ )
155
+ elsif ::Time === value
156
+ return Google::Apis::BigqueryV2::QueryParameter.new(
157
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
158
+ type: "TIMESTAMP"),
159
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
160
+ value: value.strftime("%Y-%m-%d %H:%M:%S.%6N%:z"))
161
+ )
162
+ elsif Bigquery::Time === value
163
+ return Google::Apis::BigqueryV2::QueryParameter.new(
164
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
165
+ type: "TIME"),
166
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
167
+ value: value.value)
168
+ )
169
+ elsif value.respond_to?(:read) && value.respond_to?(:rewind)
170
+ value.rewind
171
+ return Google::Apis::BigqueryV2::QueryParameter.new(
172
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
173
+ type: "BYTES"),
174
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
175
+ value: Base64.strict_encode64(
176
+ value.read.force_encoding("ASCII-8BIT")))
177
+ )
178
+ elsif Array === value
179
+ array_params = value.map { |param| Convert.to_query_param param }
180
+ return Google::Apis::BigqueryV2::QueryParameter.new(
181
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
182
+ type: "ARRAY",
183
+ array_type: array_params.first.parameter_type
184
+ ),
185
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
186
+ array_values: array_params.map(&:parameter_value)
187
+ )
188
+ )
189
+ elsif Hash === value
190
+ struct_pairs = value.map do |name, param|
191
+ struct_param = Convert.to_query_param param
192
+ [Google::Apis::BigqueryV2::QueryParameterType::StructType.new(
193
+ name: String(name),
194
+ type: struct_param.parameter_type
195
+ ), struct_param.parameter_value]
196
+ end
197
+ struct_values = Hash[struct_pairs.map do |type, value|
198
+ [type.name.to_sym, value]
199
+ end]
200
+
201
+ return Google::Apis::BigqueryV2::QueryParameter.new(
202
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
203
+ type: "STRUCT",
204
+ struct_types: struct_pairs.map(&:first)
205
+ ),
206
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
207
+ struct_values: struct_values
208
+ )
209
+ )
210
+ else
211
+ fail "A query parameter of type #{value.class} is not supported."
212
+ end
213
+ end
214
+
215
+ ##
216
+ # @private
217
+ def self.to_json_rows rows
218
+ rows.map { |row| to_json_row row }
219
+ end
220
+ ##
221
+ # @private
222
+ def self.to_json_row row
223
+ Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
224
+ end
225
+ ##
226
+ # @private
227
+ def self.to_json_value value
228
+ if DateTime === value
229
+ value.strftime "%Y-%m-%d %H:%M:%S.%6N"
230
+ elsif Date === value
231
+ value.to_s
232
+ elsif ::Time === value
233
+ value.strftime "%Y-%m-%d %H:%M:%S.%6N%:z"
234
+ elsif Bigquery::Time === value
235
+ value.value
236
+ elsif value.respond_to?(:read) && value.respond_to?(:rewind)
237
+ value.rewind
238
+ Base64.strict_encode64(value.read.force_encoding("ASCII-8BIT"))
239
+ elsif Array === value
240
+ value.map { |v| to_json_value v }
241
+ elsif Hash === value
242
+ Hash[value.map { |k, v| [k.to_s, to_json_value(v)] }]
243
+ else
244
+ value
245
+ end
246
+ end
247
+
248
+ def self.resolve_legacy_sql standard_sql, legacy_sql
249
+ return !standard_sql unless standard_sql.nil?
250
+ return legacy_sql unless legacy_sql.nil?
251
+ false
252
+ end
253
+
254
+ # rubocop:enable all
255
+ end
256
+ end
257
+ end
258
+ end
@@ -65,6 +65,24 @@ module Google
65
65
  nil
66
66
  end
67
67
 
68
+ ##
69
+ # The schema of the data.
70
+ def schema
71
+ table.schema
72
+ end
73
+
74
+ ##
75
+ # The fields of the data.
76
+ def fields
77
+ schema.fields
78
+ end
79
+
80
+ ##
81
+ # The name of the columns in the data.
82
+ def headers
83
+ schema.headers
84
+ end
85
+
68
86
  ##
69
87
  # Whether there is a next page of data.
70
88
  #
@@ -177,18 +195,11 @@ module Google
177
195
  end
178
196
  end
179
197
 
180
- ##
181
- # Represents Table Data as a list of positional values (array of
182
- # arrays). No type conversion is made, e.g. numbers are formatted as
183
- # strings.
184
- def raw
185
- Array(gapi.rows).map { |row| row.f.map(&:v) }
186
- end
187
-
188
198
  ##
189
199
  # @private New Data from a response object.
190
200
  def self.from_gapi gapi, table
191
- formatted_rows = format_rows gapi.rows, table.fields
201
+ formatted_rows = Convert.format_rows(gapi.rows,
202
+ table.gapi.schema.fields)
192
203
 
193
204
  data = new formatted_rows
194
205
  data.table = table
@@ -196,41 +207,6 @@ module Google
196
207
  data
197
208
  end
198
209
 
199
- # rubocop:disable all
200
- # Disabled rubocop because this implementation will not last.
201
-
202
- def self.format_rows rows, fields
203
- headers = Array(fields).map { |f| f.name }
204
- field_types = Array(fields).map { |f| f.type }
205
-
206
- Array(rows).map do |row|
207
- values = row.f.map { |f| f.v }
208
- formatted_values = format_values field_types, values
209
- Hash[headers.zip formatted_values]
210
- end
211
- end
212
-
213
- def self.format_values field_types, values
214
- field_types.zip(values).map do |type, value|
215
- begin
216
- if value.nil?
217
- nil
218
- elsif type == "INTEGER"
219
- Integer value
220
- elsif type == "FLOAT"
221
- Float value
222
- elsif type == "BOOLEAN"
223
- (value == "true" ? true : (value == "false" ? false : nil))
224
- else
225
- value
226
- end
227
- rescue
228
- value
229
- end
230
- end
231
- end
232
- # rubocop:enable all
233
-
234
210
  protected
235
211
 
236
212
  ##