google-cloud-bigquery 1.31.0 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "google/cloud/bigquery/external/bigtable_source/column_family"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ module External
23
+ ##
24
+ # # BigtableSource
25
+ #
26
+ # {External::BigtableSource} is a subclass of {External::DataSource} and
27
+ # represents a Bigtable external data source that can be queried from
28
+ # directly, even though the data is not stored in BigQuery. Instead of
29
+ # loading or streaming the data, this object references the external
30
+ # data source.
31
+ #
32
+ # @example
33
+ # require "google/cloud/bigquery"
34
+ #
35
+ # bigquery = Google::Cloud::Bigquery.new
36
+ #
37
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
38
+ # bigtable_table = bigquery.external bigtable_url do |bt|
39
+ # bt.rowkey_as_string = true
40
+ # bt.add_family "user" do |u|
41
+ # u.add_string "name"
42
+ # u.add_string "email"
43
+ # u.add_integer "age"
44
+ # u.add_boolean "active"
45
+ # end
46
+ # end
47
+ #
48
+ # data = bigquery.query "SELECT * FROM my_ext_table",
49
+ # external: { my_ext_table: bigtable_table }
50
+ #
51
+ # # Iterate over the first page of results
52
+ # data.each do |row|
53
+ # puts row[:name]
54
+ # end
55
+ # # Retrieve the next page of results
56
+ # data = data.next if data.next?
57
+ #
58
+ class BigtableSource < External::DataSource
59
+ ##
60
+ # @private Create an empty BigtableSource object.
61
+ def initialize
62
+ super
63
+ @gapi.bigtable_options = Google::Apis::BigqueryV2::BigtableOptions.new
64
+ @families = []
65
+ end
66
+
67
+ ##
68
+ # List of column families to expose in the table schema along with
69
+ # their types. This list restricts the column families that can be
70
+ # referenced in queries and specifies their value types. You can use
71
+ # this list to do type conversions - see
72
+ # {BigtableSource::ColumnFamily#type} for more details. If you leave
73
+ # this list empty, all column families are present in the table schema
74
+ # and their values are read as `BYTES`. During a query only the column
75
+ # families referenced in that query are read from Bigtable.
76
+ #
77
+ # @return [Array<BigtableSource::ColumnFamily>]
78
+ #
79
+ # @example
80
+ # require "google/cloud/bigquery"
81
+ #
82
+ # bigquery = Google::Cloud::Bigquery.new
83
+ #
84
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
85
+ # bigtable_table = bigquery.external bigtable_url do |bt|
86
+ # bt.rowkey_as_string = true
87
+ # bt.add_family "user" do |u|
88
+ # u.add_string "name"
89
+ # u.add_string "email"
90
+ # u.add_integer "age"
91
+ # u.add_boolean "active"
92
+ # end
93
+ # end
94
+ #
95
+ # bigtable_table.families.count #=> 1
96
+ #
97
+ def families
98
+ @families
99
+ end
100
+
101
+ ##
102
+ # Add a column family to expose in the table schema along with its
103
+ # types. Columns belonging to the column family may also be exposed.
104
+ #
105
+ # @param [String] family_id Identifier of the column family. See
106
+ # {BigtableSource::ColumnFamily#family_id}.
107
+ # @param [String] encoding The encoding of the values when the type is
108
+ # not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
109
+ # @param [Boolean] latest Whether only the latest version of value are
110
+ # exposed for all columns in this column family. See
111
+ # {BigtableSource::ColumnFamily#latest}.
112
+ # @param [String] type The type to convert the value in cells of this
113
+ # column. See {BigtableSource::ColumnFamily#type}.
114
+ #
115
+ # @yield [family] a block for setting the family
116
+ # @yieldparam [BigtableSource::ColumnFamily] family the family object
117
+ #
118
+ # @return [BigtableSource::ColumnFamily]
119
+ #
120
+ # @example
121
+ # require "google/cloud/bigquery"
122
+ #
123
+ # bigquery = Google::Cloud::Bigquery.new
124
+ #
125
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
126
+ # bigtable_table = bigquery.external bigtable_url do |bt|
127
+ # bt.rowkey_as_string = true
128
+ # bt.add_family "user" do |u|
129
+ # u.add_string "name"
130
+ # u.add_string "email"
131
+ # u.add_integer "age"
132
+ # u.add_boolean "active"
133
+ # end
134
+ # end
135
+ #
136
+ def add_family family_id, encoding: nil, latest: nil, type: nil
137
+ frozen_check!
138
+ fam = BigtableSource::ColumnFamily.new
139
+ fam.family_id = family_id
140
+ fam.encoding = encoding if encoding
141
+ fam.latest = latest if latest
142
+ fam.type = type if type
143
+ yield fam if block_given?
144
+ @families << fam
145
+ fam
146
+ end
147
+
148
+ ##
149
+ # Whether the rowkey column families will be read and converted to
150
+ # string. Otherwise they are read with `BYTES` type values and users
151
+ # need to manually cast them with `CAST` if necessary. The default
152
+ # value is `false`.
153
+ #
154
+ # @return [Boolean]
155
+ #
156
+ # @example
157
+ # require "google/cloud/bigquery"
158
+ #
159
+ # bigquery = Google::Cloud::Bigquery.new
160
+ #
161
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
162
+ # bigtable_table = bigquery.external bigtable_url do |bt|
163
+ # bt.rowkey_as_string = true
164
+ # end
165
+ #
166
+ # bigtable_table.rowkey_as_string #=> true
167
+ #
168
+ def rowkey_as_string
169
+ @gapi.bigtable_options.read_rowkey_as_string
170
+ end
171
+
172
+ ##
173
+ # Set the number of rows at the top of a sheet that BigQuery will skip
174
+ # when reading the data.
175
+ #
176
+ # @param [Boolean] row_rowkey New rowkey_as_string value
177
+ #
178
+ # @example
179
+ # require "google/cloud/bigquery"
180
+ #
181
+ # bigquery = Google::Cloud::Bigquery.new
182
+ #
183
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
184
+ # bigtable_table = bigquery.external bigtable_url do |bt|
185
+ # bt.rowkey_as_string = true
186
+ # end
187
+ #
188
+ # bigtable_table.rowkey_as_string #=> true
189
+ #
190
+ def rowkey_as_string= row_rowkey
191
+ frozen_check!
192
+ @gapi.bigtable_options.read_rowkey_as_string = row_rowkey
193
+ end
194
+
195
+ ##
196
+ # @private Google API Client object.
197
+ def to_gapi
198
+ @gapi.bigtable_options.column_families = @families.map(&:to_gapi)
199
+ @gapi
200
+ end
201
+
202
+ ##
203
+ # @private Google API Client object.
204
+ def self.from_gapi gapi
205
+ new_table = super
206
+ families = Array gapi.bigtable_options.column_families
207
+ families = families.map { |fam_gapi| BigtableSource::ColumnFamily.from_gapi fam_gapi }
208
+ new_table.instance_variable_set :@families, families
209
+ new_table
210
+ end
211
+
212
+ ##
213
+ # @private
214
+ def freeze
215
+ @families.map(&:freeze!)
216
+ @families.freeze!
217
+ super
218
+ end
219
+
220
+ protected
221
+
222
+ def frozen_check!
223
+ return unless frozen?
224
+ raise ArgumentError, "Cannot modify external data source when frozen"
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,404 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+ require "base64"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ module External
23
+ class BigtableSource < External::DataSource
24
+ ##
25
+ # # BigtableSource::Column
26
+ #
27
+ # A Bigtable column to expose in the table schema along with its
28
+ # types.
29
+ #
30
+ # @example
31
+ # require "google/cloud/bigquery"
32
+ #
33
+ # bigquery = Google::Cloud::Bigquery.new
34
+ #
35
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
36
+ # bigtable_table = bigquery.external bigtable_url do |bt|
37
+ # bt.rowkey_as_string = true
38
+ # bt.add_family "user" do |u|
39
+ # u.add_string "name"
40
+ # u.add_string "email"
41
+ # u.add_integer "age"
42
+ # u.add_boolean "active"
43
+ # end
44
+ # end
45
+ #
46
+ # data = bigquery.query "SELECT * FROM my_ext_table",
47
+ # external: { my_ext_table: bigtable_table }
48
+ #
49
+ # # Iterate over the first page of results
50
+ # data.each do |row|
51
+ # puts row[:name]
52
+ # end
53
+ # # Retrieve the next page of results
54
+ # data = data.next if data.next?
55
+ #
56
+ class Column
57
+ ##
58
+ # @private Create an empty BigtableSource::Column object.
59
+ def initialize
60
+ @gapi = Google::Apis::BigqueryV2::BigtableColumn.new
61
+ end
62
+
63
+ ##
64
+ # Qualifier of the column. Columns in the parent column family that
65
+ # has this exact qualifier are exposed as `.` field. If the
66
+ # qualifier is valid UTF-8 string, it will be represented as a UTF-8
67
+ # string. Otherwise, it will represented as a ASCII-8BIT string.
68
+ #
69
+ # If the qualifier is not a valid BigQuery field identifier (does
70
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
71
+ # provided as `field_name`.
72
+ #
73
+ # @return [String]
74
+ #
75
+ # @example
76
+ # require "google/cloud/bigquery"
77
+ #
78
+ # bigquery = Google::Cloud::Bigquery.new
79
+ #
80
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
81
+ # bigtable_table = bigquery.external bigtable_url do |bt|
82
+ # bt.add_family "user" do |u|
83
+ # u.add_string "name" do |col|
84
+ # col.qualifier # "user"
85
+ # col.qualifier = "User"
86
+ # col.qualifier # "User"
87
+ # end
88
+ # end
89
+ # end
90
+ #
91
+ def qualifier
92
+ @gapi.qualifier_string || Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
93
+ end
94
+
95
+ ##
96
+ # Set the qualifier of the column. Columns in the parent column
97
+ # family that has this exact qualifier are exposed as `.` field.
98
+ # Values that are valid UTF-8 strings will be treated as such. All
99
+ # other values will be treated as `BINARY`.
100
+ #
101
+ # @param [String] new_qualifier New qualifier value
102
+ #
103
+ # @example
104
+ # require "google/cloud/bigquery"
105
+ #
106
+ # bigquery = Google::Cloud::Bigquery.new
107
+ #
108
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
109
+ # bigtable_table = bigquery.external bigtable_url do |bt|
110
+ # bt.add_family "user" do |u|
111
+ # u.add_string "name" do |col|
112
+ # col.qualifier # "user"
113
+ # col.qualifier = "User"
114
+ # col.qualifier # "User"
115
+ # end
116
+ # end
117
+ # end
118
+ #
119
+ def qualifier= new_qualifier
120
+ frozen_check!
121
+ raise ArgumentError if new_qualifier.nil?
122
+
123
+ utf8_qualifier = new_qualifier.encode Encoding::UTF_8
124
+ if utf8_qualifier.valid_encoding?
125
+ @gapi.qualifier_string = utf8_qualifier
126
+ if @gapi.instance_variables.include? :@qualifier_encoded
127
+ @gapi.remove_instance_variable :@qualifier_encoded
128
+ end
129
+ else
130
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
131
+ if @gapi.instance_variables.include? :@qualifier_string
132
+ @gapi.remove_instance_variable :@qualifier_string
133
+ end
134
+ end
135
+ rescue EncodingError
136
+ @gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
137
+ @gapi.remove_instance_variable :@qualifier_string if @gapi.instance_variables.include? :@qualifier_string
138
+ end
139
+
140
+ ##
141
+ # The encoding of the values when the type is not `STRING`.
142
+ #
143
+ # @return [String]
144
+ #
145
+ # @example
146
+ # require "google/cloud/bigquery"
147
+ #
148
+ # bigquery = Google::Cloud::Bigquery.new
149
+ #
150
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
151
+ # bigtable_table = bigquery.external bigtable_url do |bt|
152
+ # bt.add_family "user" do |u|
153
+ # u.add_bytes "name" do |col|
154
+ # col.encoding = "TEXT"
155
+ # col.encoding # "TEXT"
156
+ # end
157
+ # end
158
+ # end
159
+ #
160
+ def encoding
161
+ @gapi.encoding
162
+ end
163
+
164
+ ##
165
+ # Set the encoding of the values when the type is not `STRING`.
166
+ # Acceptable encoding values are:
167
+ #
168
+ # * `TEXT` - indicates values are alphanumeric text strings.
169
+ # * `BINARY` - indicates values are encoded using HBase
170
+ # `Bytes.toBytes` family of functions. This can be overridden on a
171
+ # column.
172
+ #
173
+ # @param [String] new_encoding New encoding value
174
+ #
175
+ # @example
176
+ # require "google/cloud/bigquery"
177
+ #
178
+ # bigquery = Google::Cloud::Bigquery.new
179
+ #
180
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
181
+ # bigtable_table = bigquery.external bigtable_url do |bt|
182
+ # bt.add_family "user" do |u|
183
+ # u.add_bytes "name" do |col|
184
+ # col.encoding = "TEXT"
185
+ # col.encoding # "TEXT"
186
+ # end
187
+ # end
188
+ # end
189
+ #
190
+ def encoding= new_encoding
191
+ frozen_check!
192
+ @gapi.encoding = new_encoding
193
+ end
194
+
195
+ ##
196
+ # If the qualifier is not a valid BigQuery field identifier (does
197
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
198
+ # provided as the column field name and is used as field name in
199
+ # queries.
200
+ #
201
+ # @return [String]
202
+ #
203
+ # @example
204
+ # require "google/cloud/bigquery"
205
+ #
206
+ # bigquery = Google::Cloud::Bigquery.new
207
+ #
208
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
209
+ # bigtable_table = bigquery.external bigtable_url do |bt|
210
+ # bt.add_family "user" do |u|
211
+ # u.add_string "001_name", as: "user" do |col|
212
+ # col.field_name # "user"
213
+ # col.field_name = "User"
214
+ # col.field_name # "User"
215
+ # end
216
+ # end
217
+ # end
218
+ #
219
+ def field_name
220
+ @gapi.field_name
221
+ end
222
+
223
+ ##
224
+ # Sets the identifier to be used as the column field name in queries
225
+ # when the qualifier is not a valid BigQuery field identifier (does
226
+ # not match `[a-zA-Z][a-zA-Z0-9_]*`).
227
+ #
228
+ # @param [String] new_field_name New field_name value
229
+ #
230
+ # @example
231
+ # require "google/cloud/bigquery"
232
+ #
233
+ # bigquery = Google::Cloud::Bigquery.new
234
+ #
235
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
236
+ # bigtable_table = bigquery.external bigtable_url do |bt|
237
+ # bt.add_family "user" do |u|
238
+ # u.add_string "001_name", as: "user" do |col|
239
+ # col.field_name # "user"
240
+ # col.field_name = "User"
241
+ # col.field_name # "User"
242
+ # end
243
+ # end
244
+ # end
245
+ #
246
+ def field_name= new_field_name
247
+ frozen_check!
248
+ @gapi.field_name = new_field_name
249
+ end
250
+
251
+ ##
252
+ # Whether only the latest version of value in this column are
253
+ # exposed. Can also be set at the column family level. However, this
254
+ # value takes precedence when set at both levels.
255
+ #
256
+ # @return [Boolean]
257
+ #
258
+ # @example
259
+ # require "google/cloud/bigquery"
260
+ #
261
+ # bigquery = Google::Cloud::Bigquery.new
262
+ #
263
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
264
+ # bigtable_table = bigquery.external bigtable_url do |bt|
265
+ # bt.add_family "user" do |u|
266
+ # u.add_string "name" do |col|
267
+ # col.latest = true
268
+ # col.latest # true
269
+ # end
270
+ # end
271
+ # end
272
+ #
273
+ def latest
274
+ @gapi.only_read_latest
275
+ end
276
+
277
+ ##
278
+ # Set whether only the latest version of value in this column are
279
+ # exposed. Can also be set at the column family level. However, this
280
+ # value takes precedence when set at both levels.
281
+ #
282
+ # @param [Boolean] new_latest New latest value
283
+ #
284
+ # @example
285
+ # require "google/cloud/bigquery"
286
+ #
287
+ # bigquery = Google::Cloud::Bigquery.new
288
+ #
289
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
290
+ # bigtable_table = bigquery.external bigtable_url do |bt|
291
+ # bt.add_family "user" do |u|
292
+ # u.add_string "name" do |col|
293
+ # col.latest = true
294
+ # col.latest # true
295
+ # end
296
+ # end
297
+ # end
298
+ #
299
+ def latest= new_latest
300
+ frozen_check!
301
+ @gapi.only_read_latest = new_latest
302
+ end
303
+
304
+ ##
305
+ # The type to convert the value in cells of this column. The values
306
+ # are expected to be encoded using HBase `Bytes.toBytes` function
307
+ # when using the `BINARY` encoding value. The following BigQuery
308
+ # types are allowed:
309
+ #
310
+ # * `BYTES`
311
+ # * `STRING`
312
+ # * `INTEGER`
313
+ # * `FLOAT`
314
+ # * `BOOLEAN`
315
+ #
316
+ # Default type is `BYTES`. Can also be set at the column family
317
+ # level. However, this value takes precedence when set at both
318
+ # levels.
319
+ #
320
+ # @return [String]
321
+ #
322
+ # @example
323
+ # require "google/cloud/bigquery"
324
+ #
325
+ # bigquery = Google::Cloud::Bigquery.new
326
+ #
327
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
328
+ # bigtable_table = bigquery.external bigtable_url do |bt|
329
+ # bt.add_family "user" do |u|
330
+ # u.add_string "name" do |col|
331
+ # col.type # "STRING"
332
+ # end
333
+ # end
334
+ # end
335
+ #
336
+ def type
337
+ @gapi.type
338
+ end
339
+
340
+ ##
341
+ # Set the type to convert the value in cells of this column. The
342
+ # values are expected to be encoded using HBase `Bytes.toBytes`
343
+ # function when using the `BINARY` encoding value. The following
344
+ # BigQuery types are allowed:
345
+ #
346
+ # * `BYTES`
347
+ # * `STRING`
348
+ # * `INTEGER`
349
+ # * `FLOAT`
350
+ # * `BOOLEAN`
351
+ #
352
+ # Default type is `BYTES`. Can also be set at the column family
353
+ # level. However, this value takes precedence when set at both
354
+ # levels.
355
+ #
356
+ # @param [String] new_type New type value
357
+ #
358
+ # @example
359
+ # require "google/cloud/bigquery"
360
+ #
361
+ # bigquery = Google::Cloud::Bigquery.new
362
+ #
363
+ # bigtable_url = "https://googleapis.com/bigtable/projects/..."
364
+ # bigtable_table = bigquery.external bigtable_url do |bt|
365
+ # bt.add_family "user" do |u|
366
+ # u.add_string "name" do |col|
367
+ # col.type # "STRING"
368
+ # col.type = "BYTES"
369
+ # col.type # "BYTES"
370
+ # end
371
+ # end
372
+ # end
373
+ #
374
+ def type= new_type
375
+ frozen_check!
376
+ @gapi.type = new_type
377
+ end
378
+
379
+ ##
380
+ # @private Google API Client object.
381
+ def to_gapi
382
+ @gapi
383
+ end
384
+
385
+ ##
386
+ # @private Google API Client object.
387
+ def self.from_gapi gapi
388
+ new_col = new
389
+ new_col.instance_variable_set :@gapi, gapi
390
+ new_col
391
+ end
392
+
393
+ protected
394
+
395
+ def frozen_check!
396
+ return unless frozen?
397
+ raise ArgumentError, "Cannot modify external data source when frozen"
398
+ end
399
+ end
400
+ end
401
+ end
402
+ end
403
+ end
404
+ end