google-cloud-bigquery 1.31.0 → 1.32.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "google/cloud/bigquery/external/bigtable_source/column_family"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ module External
23
+ ##
24
+ # # BigtableSource
25
+ #
26
+ # {External::BigtableSource} is a subclass of {External::DataSource} and
27
+ # represents a Bigtable external data source that can be queried from
28
+ # directly, even though the data is not stored in BigQuery. Instead of
29
+ # loading or streaming the data, this object references the external
30
+ # data source.
31
+ #
32
+ # @example
33
+ # require "google/cloud/bigquery"
34
+ #
35
+ # bigquery = Google::Cloud::Bigquery.new
36
+ #
37
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
38
+ # bigtable_table = bigquery.external bigtable_url do |bt|
39
+ # bt.rowkey_as_string = true
40
+ # bt.add_family "user" do |u|
41
+ # u.add_string "name"
42
+ # u.add_string "email"
43
+ # u.add_integer "age"
44
+ # u.add_boolean "active"
45
+ # end
46
+ # end
47
+ #
48
+ # data = bigquery.query "SELECT * FROM my_ext_table",
49
+ # external: { my_ext_table: bigtable_table }
50
+ #
51
+ # # Iterate over the first page of results
52
+ # data.each do |row|
53
+ # puts row[:name]
54
+ # end
55
+ # # Retrieve the next page of results
56
+ # data = data.next if data.next?
57
+ #
58
+ class BigtableSource < External::DataSource
59
+ ##
60
+ # @private Create an empty BigtableSource object.
61
+ def initialize
62
+ super
63
+ @gapi.bigtable_options = Google::Apis::BigqueryV2::BigtableOptions.new
64
+ @families = []
65
+ end
66
+
67
+ ##
68
+ # List of column families to expose in the table schema along with
69
+ # their types. This list restricts the column families that can be
70
+ # referenced in queries and specifies their value types. You can use
71
+ # this list to do type conversions - see
72
+ # {BigtableSource::ColumnFamily#type} for more details. If you leave
73
+ # this list empty, all column families are present in the table schema
74
+ # and their values are read as `BYTES`. During a query only the column
75
+ # families referenced in that query are read from Bigtable.
76
+ #
77
+ # @return [Array<BigtableSource::ColumnFamily>]
78
+ #
79
+ # @example
80
+ # require "google/cloud/bigquery"
81
+ #
82
+ # bigquery = Google::Cloud::Bigquery.new
83
+ #
84
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
85
+ # bigtable_table = bigquery.external bigtable_url do |bt|
86
+ # bt.rowkey_as_string = true
87
+ # bt.add_family "user" do |u|
88
+ # u.add_string "name"
89
+ # u.add_string "email"
90
+ # u.add_integer "age"
91
+ # u.add_boolean "active"
92
+ # end
93
+ # end
94
+ #
95
+ # bigtable_table.families.count #=> 1
96
+ #
97
+ def families
98
+ @families
99
+ end
100
+
101
+ ##
102
+ # Add a column family to expose in the table schema along with its
103
+ # types. Columns belonging to the column family may also be exposed.
104
+ #
105
+ # @param [String] family_id Identifier of the column family. See
106
+ # {BigtableSource::ColumnFamily#family_id}.
107
+ # @param [String] encoding The encoding of the values when the type is
108
+ # not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
109
+ # @param [Boolean] latest Whether only the latest version of value are
110
+ # exposed for all columns in this column family. See
111
+ # {BigtableSource::ColumnFamily#latest}.
112
+ # @param [String] type The type to convert the value in cells of this
113
+ # column. See {BigtableSource::ColumnFamily#type}.
114
+ #
115
+ # @yield [family] a block for setting the family
116
+ # @yieldparam [BigtableSource::ColumnFamily] family the family object
117
+ #
118
+ # @return [BigtableSource::ColumnFamily]
119
+ #
120
+ # @example
121
+ # require "google/cloud/bigquery"
122
+ #
123
+ # bigquery = Google::Cloud::Bigquery.new
124
+ #
125
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
126
+ # bigtable_table = bigquery.external bigtable_url do |bt|
127
+ # bt.rowkey_as_string = true
128
+ # bt.add_family "user" do |u|
129
+ # u.add_string "name"
130
+ # u.add_string "email"
131
+ # u.add_integer "age"
132
+ # u.add_boolean "active"
133
+ # end
134
+ # end
135
+ #
136
+ def add_family family_id, encoding: nil, latest: nil, type: nil
137
+ frozen_check!
138
+ fam = BigtableSource::ColumnFamily.new
139
+ fam.family_id = family_id
140
+ fam.encoding = encoding if encoding
141
+ fam.latest = latest if latest
142
+ fam.type = type if type
143
+ yield fam if block_given?
144
+ @families << fam
145
+ fam
146
+ end
147
+
148
+ ##
149
+ # Whether the rowkey column families will be read and converted to
150
+ # string. Otherwise they are read with `BYTES` type values and users
151
+ # need to manually cast them with `CAST` if necessary. The default
152
+ # value is `false`.
153
+ #
154
+ # @return [Boolean]
155
+ #
156
+ # @example
157
+ # require "google/cloud/bigquery"
158
+ #
159
+ # bigquery = Google::Cloud::Bigquery.new
160
+ #
161
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
162
+ # bigtable_table = bigquery.external bigtable_url do |bt|
163
+ # bt.rowkey_as_string = true
164
+ # end
165
+ #
166
+ # bigtable_table.rowkey_as_string #=> true
167
+ #
168
+ def rowkey_as_string
169
+ @gapi.bigtable_options.read_rowkey_as_string
170
+ end
171
+
172
+ ##
173
+ # Set the number of rows at the top of a sheet that BigQuery will skip
174
+ # when reading the data.
175
+ #
176
+ # @param [Boolean] row_rowkey New rowkey_as_string value
177
+ #
178
+ # @example
179
+ # require "google/cloud/bigquery"
180
+ #
181
+ # bigquery = Google::Cloud::Bigquery.new
182
+ #
183
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
184
+ # bigtable_table = bigquery.external bigtable_url do |bt|
185
+ # bt.rowkey_as_string = true
186
+ # end
187
+ #
188
+ # bigtable_table.rowkey_as_string #=> true
189
+ #
190
+ def rowkey_as_string= row_rowkey
191
+ frozen_check!
192
+ @gapi.bigtable_options.read_rowkey_as_string = row_rowkey
193
+ end
194
+
195
+ ##
196
+ # @private Google API Client object.
197
+ def to_gapi
198
+ @gapi.bigtable_options.column_families = @families.map(&:to_gapi)
199
+ @gapi
200
+ end
201
+
202
+ ##
203
+ # @private Google API Client object.
204
+ def self.from_gapi gapi
205
+ new_table = super
206
+ families = Array gapi.bigtable_options.column_families
207
+ families = families.map { |fam_gapi| BigtableSource::ColumnFamily.from_gapi fam_gapi }
208
+ new_table.instance_variable_set :@families, families
209
+ new_table
210
+ end
211
+
212
+ ##
213
+ # @private
214
+ def freeze
215
+ @families.map(&:freeze!)
216
+ @families.freeze!
217
+ super
218
+ end
219
+
220
+ protected
221
+
222
+ def frozen_check!
223
+ return unless frozen?
224
+ raise ArgumentError, "Cannot modify external data source when frozen"
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,404 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "base64"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ module External
23
+ class BigtableSource < External::DataSource
24
+ ##
25
+ # # BigtableSource::Column
26
+ #
27
+ # A Bigtable column to expose in the table schema along with its
28
+ # types.
29
+ #
30
+ # @example
31
+ # require "google/cloud/bigquery"
32
+ #
33
+ # bigquery = Google::Cloud::Bigquery.new
34
+ #
35
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
36
+ # bigtable_table = bigquery.external bigtable_url do |bt|
37
+ # bt.rowkey_as_string = true
38
+ # bt.add_family "user" do |u|
39
+ # u.add_string "name"
40
+ # u.add_string "email"
41
+ # u.add_integer "age"
42
+ # u.add_boolean "active"
43
+ # end
44
+ # end
45
+ #
46
+ # data = bigquery.query "SELECT * FROM my_ext_table",
47
+ # external: { my_ext_table: bigtable_table }
48
+ #
49
+ # # Iterate over the first page of results
50
+ # data.each do |row|
51
+ # puts row[:name]
52
+ # end
53
+ # # Retrieve the next page of results
54
+ # data = data.next if data.next?
55
+ #
56
+ class Column
57
+ ##
58
+ # @private Create an empty BigtableSource::Column object.
59
+ def initialize
60
+ @gapi = Google::Apis::BigqueryV2::BigtableColumn.new
61
+ end
62
+
63
+ ##
64
+ # Qualifier of the column. Columns in the parent column family that
65
+ # has this exact qualifier are exposed as `.` field. If the
66
+ # qualifier is valid UTF-8 string, it will be represented as a UTF-8
67
+ # string. Otherwise, it will represented as a ASCII-8BIT string.
68
+ #
69
+ # If the qualifier is not a valid BigQuery field identifier (does
70
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
71
+ # provided as `field_name`.
72
+ #
73
+ # @return [String]
74
+ #
75
+ # @example
76
+ # require "google/cloud/bigquery"
77
+ #
78
+ # bigquery = Google::Cloud::Bigquery.new
79
+ #
80
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
81
+ # bigtable_table = bigquery.external bigtable_url do |bt|
82
+ # bt.add_family "user" do |u|
83
+ # u.add_string "name" do |col|
84
+ # col.qualifier # "user"
85
+ # col.qualifier = "User"
86
+ # col.qualifier # "User"
87
+ # end
88
+ # end
89
+ # end
90
+ #
91
+ def qualifier
92
+ @gapi.qualifier_string || Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
93
+ end
94
+
95
+ ##
96
+ # Set the qualifier of the column. Columns in the parent column
97
+ # family that has this exact qualifier are exposed as `.` field.
98
+ # Values that are valid UTF-8 strings will be treated as such. All
99
+ # other values will be treated as `BINARY`.
100
+ #
101
+ # @param [String] new_qualifier New qualifier value
102
+ #
103
+ # @example
104
+ # require "google/cloud/bigquery"
105
+ #
106
+ # bigquery = Google::Cloud::Bigquery.new
107
+ #
108
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
109
+ # bigtable_table = bigquery.external bigtable_url do |bt|
110
+ # bt.add_family "user" do |u|
111
+ # u.add_string "name" do |col|
112
+ # col.qualifier # "user"
113
+ # col.qualifier = "User"
114
+ # col.qualifier # "User"
115
+ # end
116
+ # end
117
+ # end
118
+ #
119
+ def qualifier= new_qualifier
120
+ frozen_check!
121
+ raise ArgumentError if new_qualifier.nil?
122
+
123
+ utf8_qualifier = new_qualifier.encode Encoding::UTF_8
124
+ if utf8_qualifier.valid_encoding?
125
+ @gapi.qualifier_string = utf8_qualifier
126
+ if @gapi.instance_variables.include? :@qualifier_encoded
127
+ @gapi.remove_instance_variable :@qualifier_encoded
128
+ end
129
+ else
130
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
131
+ if @gapi.instance_variables.include? :@qualifier_string
132
+ @gapi.remove_instance_variable :@qualifier_string
133
+ end
134
+ end
135
+ rescue EncodingError
136
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
137
+ @gapi.remove_instance_variable :@qualifier_string if @gapi.instance_variables.include? :@qualifier_string
138
+ end
139
+
140
+ ##
141
+ # The encoding of the values when the type is not `STRING`.
142
+ #
143
+ # @return [String]
144
+ #
145
+ # @example
146
+ # require "google/cloud/bigquery"
147
+ #
148
+ # bigquery = Google::Cloud::Bigquery.new
149
+ #
150
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
151
+ # bigtable_table = bigquery.external bigtable_url do |bt|
152
+ # bt.add_family "user" do |u|
153
+ # u.add_bytes "name" do |col|
154
+ # col.encoding = "TEXT"
155
+ # col.encoding # "TEXT"
156
+ # end
157
+ # end
158
+ # end
159
+ #
160
+ def encoding
161
+ @gapi.encoding
162
+ end
163
+
164
+ ##
165
+ # Set the encoding of the values when the type is not `STRING`.
166
+ # Acceptable encoding values are:
167
+ #
168
+ # * `TEXT` - indicates values are alphanumeric text strings.
169
+ # * `BINARY` - indicates values are encoded using HBase
170
+ # `Bytes.toBytes` family of functions. This can be overridden on a
171
+ # column.
172
+ #
173
+ # @param [String] new_encoding New encoding value
174
+ #
175
+ # @example
176
+ # require "google/cloud/bigquery"
177
+ #
178
+ # bigquery = Google::Cloud::Bigquery.new
179
+ #
180
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
181
+ # bigtable_table = bigquery.external bigtable_url do |bt|
182
+ # bt.add_family "user" do |u|
183
+ # u.add_bytes "name" do |col|
184
+ # col.encoding = "TEXT"
185
+ # col.encoding # "TEXT"
186
+ # end
187
+ # end
188
+ # end
189
+ #
190
+ def encoding= new_encoding
191
+ frozen_check!
192
+ @gapi.encoding = new_encoding
193
+ end
194
+
195
+ ##
196
+ # If the qualifier is not a valid BigQuery field identifier (does
197
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
198
+ # provided as the column field name and is used as field name in
199
+ # queries.
200
+ #
201
+ # @return [String]
202
+ #
203
+ # @example
204
+ # require "google/cloud/bigquery"
205
+ #
206
+ # bigquery = Google::Cloud::Bigquery.new
207
+ #
208
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
209
+ # bigtable_table = bigquery.external bigtable_url do |bt|
210
+ # bt.add_family "user" do |u|
211
+ # u.add_string "001_name", as: "user" do |col|
212
+ # col.field_name # "user"
213
+ # col.field_name = "User"
214
+ # col.field_name # "User"
215
+ # end
216
+ # end
217
+ # end
218
+ #
219
+ def field_name
220
+ @gapi.field_name
221
+ end
222
+
223
+ ##
224
+ # Sets the identifier to be used as the column field name in queries
225
+ # when the qualifier is not a valid BigQuery field identifier (does
226
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`).
227
+ #
228
+ # @param [String] new_field_name New field_name value
229
+ #
230
+ # @example
231
+ # require "google/cloud/bigquery"
232
+ #
233
+ # bigquery = Google::Cloud::Bigquery.new
234
+ #
235
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
236
+ # bigtable_table = bigquery.external bigtable_url do |bt|
237
+ # bt.add_family "user" do |u|
238
+ # u.add_string "001_name", as: "user" do |col|
239
+ # col.field_name # "user"
240
+ # col.field_name = "User"
241
+ # col.field_name # "User"
242
+ # end
243
+ # end
244
+ # end
245
+ #
246
+ def field_name= new_field_name
247
+ frozen_check!
248
+ @gapi.field_name = new_field_name
249
+ end
250
+
251
+ ##
252
+ # Whether only the latest version of value in this column are
253
+ # exposed. Can also be set at the column family level. However, this
254
+ # value takes precedence when set at both levels.
255
+ #
256
+ # @return [Boolean]
257
+ #
258
+ # @example
259
+ # require "google/cloud/bigquery"
260
+ #
261
+ # bigquery = Google::Cloud::Bigquery.new
262
+ #
263
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
264
+ # bigtable_table = bigquery.external bigtable_url do |bt|
265
+ # bt.add_family "user" do |u|
266
+ # u.add_string "name" do |col|
267
+ # col.latest = true
268
+ # col.latest # true
269
+ # end
270
+ # end
271
+ # end
272
+ #
273
+ def latest
274
+ @gapi.only_read_latest
275
+ end
276
+
277
+ ##
278
+ # Set whether only the latest version of value in this column are
279
+ # exposed. Can also be set at the column family level. However, this
280
+ # value takes precedence when set at both levels.
281
+ #
282
+ # @param [Boolean] new_latest New latest value
283
+ #
284
+ # @example
285
+ # require "google/cloud/bigquery"
286
+ #
287
+ # bigquery = Google::Cloud::Bigquery.new
288
+ #
289
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
290
+ # bigtable_table = bigquery.external bigtable_url do |bt|
291
+ # bt.add_family "user" do |u|
292
+ # u.add_string "name" do |col|
293
+ # col.latest = true
294
+ # col.latest # true
295
+ # end
296
+ # end
297
+ # end
298
+ #
299
+ def latest= new_latest
300
+ frozen_check!
301
+ @gapi.only_read_latest = new_latest
302
+ end
303
+
304
+ ##
305
+ # The type to convert the value in cells of this column. The values
306
+ # are expected to be encoded using HBase `Bytes.toBytes` function
307
+ # when using the `BINARY` encoding value. The following BigQuery
308
+ # types are allowed:
309
+ #
310
+ # * `BYTES`
311
+ # * `STRING`
312
+ # * `INTEGER`
313
+ # * `FLOAT`
314
+ # * `BOOLEAN`
315
+ #
316
+ # Default type is `BYTES`. Can also be set at the column family
317
+ # level. However, this value takes precedence when set at both
318
+ # levels.
319
+ #
320
+ # @return [String]
321
+ #
322
+ # @example
323
+ # require "google/cloud/bigquery"
324
+ #
325
+ # bigquery = Google::Cloud::Bigquery.new
326
+ #
327
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
328
+ # bigtable_table = bigquery.external bigtable_url do |bt|
329
+ # bt.add_family "user" do |u|
330
+ # u.add_string "name" do |col|
331
+ # col.type # "STRING"
332
+ # end
333
+ # end
334
+ # end
335
+ #
336
+ def type
337
+ @gapi.type
338
+ end
339
+
340
+ ##
341
+ # Set the type to convert the value in cells of this column. The
342
+ # values are expected to be encoded using HBase `Bytes.toBytes`
343
+ # function when using the `BINARY` encoding value. The following
344
+ # BigQuery types are allowed:
345
+ #
346
+ # * `BYTES`
347
+ # * `STRING`
348
+ # * `INTEGER`
349
+ # * `FLOAT`
350
+ # * `BOOLEAN`
351
+ #
352
+ # Default type is `BYTES`. Can also be set at the column family
353
+ # level. However, this value takes precedence when set at both
354
+ # levels.
355
+ #
356
+ # @param [String] new_type New type value
357
+ #
358
+ # @example
359
+ # require "google/cloud/bigquery"
360
+ #
361
+ # bigquery = Google::Cloud::Bigquery.new
362
+ #
363
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
364
+ # bigtable_table = bigquery.external bigtable_url do |bt|
365
+ # bt.add_family "user" do |u|
366
+ # u.add_string "name" do |col|
367
+ # col.type # "STRING"
368
+ # col.type = "BYTES"
369
+ # col.type # "BYTES"
370
+ # end
371
+ # end
372
+ # end
373
+ #
374
+ def type= new_type
375
+ frozen_check!
376
+ @gapi.type = new_type
377
+ end
378
+
379
+ ##
380
+ # @private Google API Client object.
381
+ def to_gapi
382
+ @gapi
383
+ end
384
+
385
+ ##
386
+ # @private Google API Client object.
387
+ def self.from_gapi gapi
388
+ new_col = new
389
+ new_col.instance_variable_set :@gapi, gapi
390
+ new_col
391
+ end
392
+
393
+ protected
394
+
395
+ def frozen_check!
396
+ return unless frozen?
397
+ raise ArgumentError, "Cannot modify external data source when frozen"
398
+ end
399
+ end
400
+ end
401
+ end
402
+ end
403
+ end
404
+ end