google-cloud-bigquery 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8c37453c0300ca5c4bddd798fb4e34dbb0afeb7
4
- data.tar.gz: f9a9842d943e303c92e8f90a95403d05d8f5ba73
3
+ metadata.gz: e8cba429d574299afb4e2a230f071e13417c954c
4
+ data.tar.gz: ca363404d3ec1d319ca4aa3738170a924c3f20fb
5
5
  SHA512:
6
- metadata.gz: e0046330e19fede666ad09614981e0010787c52b59a1cf5675bf994e04b08f64033ac565789d180e34c26f59d5cd72e8d35e4fe722a42407ad92625fa2887280
7
- data.tar.gz: 2989d1c9d3cc203561c26181b8549eb25ffadbff7a0aa941a4ece3a2b628778949b29225999193b46efcf3b92c9f6afb7b2a7322d1af1a6ce3d742dd23e3d6ab
6
+ metadata.gz: c4aa08940ba60adcc8058e4c1787227d0b40314db0c92e1c40a01dada7a5a4d973311430591d3e786dced51ce7ae65bb554220041ef6c36a5cfc27e930b866b4
7
+ data.tar.gz: 8ce5bc5f122cb1b90f617ed6c334df9ad7eade436e7727771ff4bad4687e4d712a97c0d4f64db4cafcb55a93e0ee72bc869f58387ac846816968e06f37a09b4b
@@ -75,7 +75,7 @@ module Google
75
75
  # dataset = bigquery.dataset "samples"
76
76
  # table = dataset.table "shakespeare"
77
77
  #
78
- # table.headers #=> ["word", "word_count", "corpus", "corpus_date"]
78
+ # table.headers #=> [:word, :word_count, :corpus, :corpus_date]
79
79
  # table.rows_count #=> 164656
80
80
  # ```
81
81
  #
@@ -95,33 +95,12 @@ module Google
95
95
  # explained in [Querying
96
96
  # Data](https://cloud.google.com/bigquery/querying-data).
97
97
  #
98
- # ### Legacy SQL (formerly BigQuery SQL)
99
- #
100
- # Before version 2.0, BigQuery executed queries using a non-standard SQL
101
- # dialect known as BigQuery SQL. This variant is still the default, and will
102
- # be used unless you pass the flag `standard_sql: true` with your query.
103
- # (If you get an SQL syntax error with a query that may be written in
104
- # standard SQL, be sure that you are passing this option.)
105
- #
106
- # ```ruby
107
- # require "google/cloud/bigquery"
108
- #
109
- # bigquery = Google::Cloud::Bigquery.new
110
- #
111
- # sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " \
112
- # "FROM [publicdata:samples.shakespeare]"
113
- # data = bigquery.query sql
114
- # ```
115
- #
116
- # Notice that in legacy SQL, a fully-qualified table name uses the following
117
- # format: `[my-dashed-project:dataset1.tableName]`.
118
- #
119
98
  # ### Standard SQL
120
99
  #
121
100
  # Standard SQL is the preferred SQL dialect for querying data stored in
122
101
  # BigQuery. It is compliant with the SQL 2011 standard, and has extensions
123
- # that support querying nested and repeated data. It has several advantages
124
- # over legacy SQL, including:
102
+ # that support querying nested and repeated data. This is the default
103
+ # syntax. It has several advantages over Legacy SQL, including:
125
104
  #
126
105
  # * Composability using `WITH` clauses and SQL functions
127
106
  # * Subqueries in the `SELECT` list and `WHERE` clause
@@ -136,8 +115,7 @@ module Google
136
115
  # For examples that demonstrate some of these features, see [Standard SQL
137
116
  # highlights](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#standard_sql_highlights).
138
117
  #
139
- # Legacy SQL is still the default. To use standard SQL instead, pass the
140
- # option `standard_sql: true` with your query.
118
+ # Standard SQL is the default.
141
119
  #
142
120
  # ```ruby
143
121
  # require "google/cloud/bigquery"
@@ -147,12 +125,35 @@ module Google
147
125
  # sql = "SELECT word, SUM(word_count) AS word_count " \
148
126
  # "FROM `bigquery-public-data.samples.shakespeare`" \
149
127
  # "WHERE word IN ('me', 'I', 'you') GROUP BY word"
150
- # data = bigquery.query sql, standard_sql: true
128
+ # data = bigquery.query sql
129
+ # ```
130
+ #
131
+ # Notice that in standard SQL, a fully-qualified table name uses the
132
+ # following format: <code>`my-dashed-project.dataset1.tableName`</code>.
133
+ #
134
+ # ### Legacy SQL (formerly BigQuery SQL)
135
+ #
136
+ # Before version 2.0, BigQuery executed queries using a non-standard SQL
137
+ # dialect known as BigQuery SQL. This variant is optional, and can be
138
+ # enabled by passing the flag `legacy_sql: true` with your query. (If you
139
+ # get an SQL syntax error with a query that may be written in standard SQL,
140
+ # be sure that you are passing this option.)
141
+ #
142
+ # To use legacy SQL, pass the option `legacy_sql: true` with your query.
143
+ #
144
+ # ```ruby
145
+ # require "google/cloud/bigquery"
146
+ #
147
+ # bigquery = Google::Cloud::Bigquery.new
148
+ #
149
+ # sql = "SELECT TOP(word, 50) as word, COUNT(*) as count " \
150
+ # "FROM [publicdata:samples.shakespeare]"
151
+ # data = bigquery.query sql, legacy_sql: true
151
152
  # ```
152
153
  #
153
- # Notice that in standard SQL, the format for a fully-qualified table name
154
- # uses back-ticks instead of brackets, and a dot instead of a semi-colon:
155
- # <code>`my-dashed-project.dataset1.tableName`</code>.
154
+ # Notice that in legacy SQL, a fully-qualified table name uses brackets
155
+ # instead of back-ticks, and a semi-colon instead of a dot to separate the
156
+ # project and the dataset: `[my-dashed-project:dataset1.tableName]`.
156
157
  #
157
158
  # #### Query parameters
158
159
  #
@@ -166,7 +167,7 @@ module Google
166
167
  #
167
168
  # sql = "SELECT word, SUM(word_count) AS word_count " \
168
169
  # "FROM `bigquery-public-data.samples.shakespeare`" \
169
- # "WHERE word IN (@words) GROUP BY word"
170
+ # "WHERE word IN UNNEST(@words) GROUP BY word"
170
171
  # data = bigquery.query sql, params: { words: ['me', 'I', 'you'] }
171
172
  # ```
172
173
  #
@@ -244,7 +245,7 @@ module Google
244
245
  # job.wait_until_done!
245
246
  # if !job.failed?
246
247
  # job.query_results.each do |row|
247
- # puts row["word"]
248
+ # puts row[:word]
248
249
  # end
249
250
  # end
250
251
  # ```
@@ -0,0 +1,258 @@
1
+ # Copyright 2017 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "stringio"
18
+ require "base64"
19
+ require "time"
20
+ require "date"
21
+
22
+ module Google
23
+ module Cloud
24
+ module Bigquery
25
+ # rubocop:disable all
26
+
27
+ ##
28
+ # @private
29
+ #
30
+ # Internal conversion of raw data values to/from Bigquery values
31
+ #
32
+ # | BigQuery | Ruby | Notes |
33
+ # |-------------|----------------|---|
34
+ # | `BOOL` | `true`/`false` | |
35
+ # | `INT64` | `Integer` | |
36
+ # | `FLOAT64` | `Float` | |
37
+ # | `STRING` | `STRING` | |
38
+ # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
39
+ # | `DATE` | `Date` | |
40
+ # | `TIMESTAMP` | `Time` | |
41
+ # | `TIME` | `Google::Cloud::BigQuery::Time` | |
42
+ # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
43
+ # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
44
+ # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
45
+
46
+ module Convert
47
+ ##
48
+ # @private
49
+ def self.format_rows rows, fields
50
+ Array(rows).map do |row|
51
+ # convert TableRow to hash to handle nested TableCell values
52
+ format_row row.to_h, fields
53
+ end
54
+ end
55
+
56
+ ##
57
+ # @private
58
+ def self.format_row row, fields
59
+ row_pairs = fields.zip(row[:f]).map do |f, v|
60
+ [f.name.to_sym, format_value(v, f)]
61
+ end
62
+ Hash[row_pairs]
63
+ end
64
+
65
+ def self.format_value value, field
66
+ if value.nil?
67
+ nil
68
+ elsif value.empty?
69
+ nil
70
+ elsif value[:v].nil?
71
+ nil
72
+ elsif Array === value[:v]
73
+ value[:v].map { |v| format_value v, field }
74
+ elsif Hash === value[:v]
75
+ if value[:v].empty?
76
+ nil
77
+ else
78
+ format_row value[:v], field.fields
79
+ end
80
+ elsif field.type == "STRING"
81
+ String value[:v]
82
+ elsif field.type == "INTEGER"
83
+ Integer value[:v]
84
+ elsif field.type == "FLOAT"
85
+ Float value[:v]
86
+ elsif field.type == "BOOLEAN"
87
+ (value[:v] == "true" ? true : (value[:v] == "false" ? false : nil))
88
+ elsif field.type == "BYTES"
89
+ StringIO.new Base64.decode64 value[:v]
90
+ elsif field.type == "TIMESTAMP"
91
+ ::Time.at Float(value[:v])
92
+ elsif field.type == "TIME"
93
+ Bigquery::Time.new value[:v]
94
+ elsif field.type == "DATETIME"
95
+ ::Time.parse("#{value[:v]} UTC").to_datetime
96
+ elsif field.type == "DATE"
97
+ Date.parse value[:v]
98
+ else
99
+ value[:v]
100
+ end
101
+ end
102
+
103
+ ##
104
+ # @private
105
+ def self.to_query_param value
106
+ if TrueClass === value
107
+ return Google::Apis::BigqueryV2::QueryParameter.new(
108
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
109
+ type: "BOOL"),
110
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
111
+ value: true)
112
+ )
113
+ elsif FalseClass === value
114
+ return Google::Apis::BigqueryV2::QueryParameter.new(
115
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
116
+ type: "BOOL"),
117
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
118
+ value: false)
119
+ )
120
+ elsif Integer === value
121
+ return Google::Apis::BigqueryV2::QueryParameter.new(
122
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
123
+ type: "INT64"),
124
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
125
+ value: value)
126
+ )
127
+ elsif Float === value
128
+ return Google::Apis::BigqueryV2::QueryParameter.new(
129
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
130
+ type: "FLOAT64"),
131
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
132
+ value: value)
133
+ )
134
+ elsif String === value
135
+ return Google::Apis::BigqueryV2::QueryParameter.new(
136
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
137
+ type: "STRING"),
138
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
139
+ value: value)
140
+ )
141
+ elsif DateTime === value
142
+ return Google::Apis::BigqueryV2::QueryParameter.new(
143
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
144
+ type: "DATETIME"),
145
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
146
+ value: value.strftime("%Y-%m-%d %H:%M:%S.%6N"))
147
+ )
148
+ elsif Date === value
149
+ return Google::Apis::BigqueryV2::QueryParameter.new(
150
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
151
+ type: "DATE"),
152
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
153
+ value: value.to_s)
154
+ )
155
+ elsif ::Time === value
156
+ return Google::Apis::BigqueryV2::QueryParameter.new(
157
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
158
+ type: "TIMESTAMP"),
159
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
160
+ value: value.strftime("%Y-%m-%d %H:%M:%S.%6N%:z"))
161
+ )
162
+ elsif Bigquery::Time === value
163
+ return Google::Apis::BigqueryV2::QueryParameter.new(
164
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
165
+ type: "TIME"),
166
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
167
+ value: value.value)
168
+ )
169
+ elsif value.respond_to?(:read) && value.respond_to?(:rewind)
170
+ value.rewind
171
+ return Google::Apis::BigqueryV2::QueryParameter.new(
172
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
173
+ type: "BYTES"),
174
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
175
+ value: Base64.strict_encode64(
176
+ value.read.force_encoding("ASCII-8BIT")))
177
+ )
178
+ elsif Array === value
179
+ array_params = value.map { |param| Convert.to_query_param param }
180
+ return Google::Apis::BigqueryV2::QueryParameter.new(
181
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
182
+ type: "ARRAY",
183
+ array_type: array_params.first.parameter_type
184
+ ),
185
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
186
+ array_values: array_params.map(&:parameter_value)
187
+ )
188
+ )
189
+ elsif Hash === value
190
+ struct_pairs = value.map do |name, param|
191
+ struct_param = Convert.to_query_param param
192
+ [Google::Apis::BigqueryV2::QueryParameterType::StructType.new(
193
+ name: String(name),
194
+ type: struct_param.parameter_type
195
+ ), struct_param.parameter_value]
196
+ end
197
+ struct_values = Hash[struct_pairs.map do |type, value|
198
+ [type.name.to_sym, value]
199
+ end]
200
+
201
+ return Google::Apis::BigqueryV2::QueryParameter.new(
202
+ parameter_type: Google::Apis::BigqueryV2::QueryParameterType.new(
203
+ type: "STRUCT",
204
+ struct_types: struct_pairs.map(&:first)
205
+ ),
206
+ parameter_value: Google::Apis::BigqueryV2::QueryParameterValue.new(
207
+ struct_values: struct_values
208
+ )
209
+ )
210
+ else
211
+ fail "A query parameter of type #{value.class} is not supported."
212
+ end
213
+ end
214
+
215
+ ##
216
+ # @private
217
+ def self.to_json_rows rows
218
+ rows.map { |row| to_json_row row }
219
+ end
220
+ ##
221
+ # @private
222
+ def self.to_json_row row
223
+ Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
224
+ end
225
+ ##
226
+ # @private
227
+ def self.to_json_value value
228
+ if DateTime === value
229
+ value.strftime "%Y-%m-%d %H:%M:%S.%6N"
230
+ elsif Date === value
231
+ value.to_s
232
+ elsif ::Time === value
233
+ value.strftime "%Y-%m-%d %H:%M:%S.%6N%:z"
234
+ elsif Bigquery::Time === value
235
+ value.value
236
+ elsif value.respond_to?(:read) && value.respond_to?(:rewind)
237
+ value.rewind
238
+ Base64.strict_encode64(value.read.force_encoding("ASCII-8BIT"))
239
+ elsif Array === value
240
+ value.map { |v| to_json_value v }
241
+ elsif Hash === value
242
+ Hash[value.map { |k, v| [k.to_s, to_json_value(v)] }]
243
+ else
244
+ value
245
+ end
246
+ end
247
+
248
+ def self.resolve_legacy_sql standard_sql, legacy_sql
249
+ return !standard_sql unless standard_sql.nil?
250
+ return legacy_sql unless legacy_sql.nil?
251
+ false
252
+ end
253
+
254
+ # rubocop:enable all
255
+ end
256
+ end
257
+ end
258
+ end
@@ -65,6 +65,24 @@ module Google
65
65
  nil
66
66
  end
67
67
 
68
+ ##
69
+ # The schema of the data.
70
+ def schema
71
+ table.schema
72
+ end
73
+
74
+ ##
75
+ # The fields of the data.
76
+ def fields
77
+ schema.fields
78
+ end
79
+
80
+ ##
81
+ # The name of the columns in the data.
82
+ def headers
83
+ schema.headers
84
+ end
85
+
68
86
  ##
69
87
  # Whether there is a next page of data.
70
88
  #
@@ -177,18 +195,11 @@ module Google
177
195
  end
178
196
  end
179
197
 
180
- ##
181
- # Represents Table Data as a list of positional values (array of
182
- # arrays). No type conversion is made, e.g. numbers are formatted as
183
- # strings.
184
- def raw
185
- Array(gapi.rows).map { |row| row.f.map(&:v) }
186
- end
187
-
188
198
  ##
189
199
  # @private New Data from a response object.
190
200
  def self.from_gapi gapi, table
191
- formatted_rows = format_rows gapi.rows, table.fields
201
+ formatted_rows = Convert.format_rows(gapi.rows,
202
+ table.gapi.schema.fields)
192
203
 
193
204
  data = new formatted_rows
194
205
  data.table = table
@@ -196,41 +207,6 @@ module Google
196
207
  data
197
208
  end
198
209
 
199
- # rubocop:disable all
200
- # Disabled rubocop because this implementation will not last.
201
-
202
- def self.format_rows rows, fields
203
- headers = Array(fields).map { |f| f.name }
204
- field_types = Array(fields).map { |f| f.type }
205
-
206
- Array(rows).map do |row|
207
- values = row.f.map { |f| f.v }
208
- formatted_values = format_values field_types, values
209
- Hash[headers.zip formatted_values]
210
- end
211
- end
212
-
213
- def self.format_values field_types, values
214
- field_types.zip(values).map do |type, value|
215
- begin
216
- if value.nil?
217
- nil
218
- elsif type == "INTEGER"
219
- Integer value
220
- elsif type == "FLOAT"
221
- Float value
222
- elsif type == "BOOLEAN"
223
- (value == "true" ? true : (value == "false" ? false : nil))
224
- else
225
- value
226
- end
227
- rescue
228
- value
229
- end
230
- end
231
- end
232
- # rubocop:enable all
233
-
234
210
  protected
235
211
 
236
212
  ##