google-cloud-bigquery 1.31.0 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # CsvSource
24
+ #
25
+ # {External::CsvSource} is a subclass of {External::DataSource} and
26
+ # represents a CSV external data source that can be queried from
27
+ # directly, such as Google Cloud Storage or Google Drive, even though
28
+ # the data is not stored in BigQuery. Instead of loading or streaming
29
+ # the data, this object references the external data source.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # csv_url = "gs://bucket/path/to/data.csv"
37
+ # csv_table = bigquery.external csv_url do |csv|
38
+ # csv.autodetect = true
39
+ # csv.skip_leading_rows = 1
40
+ # end
41
+ #
42
+ # data = bigquery.query "SELECT * FROM my_ext_table",
43
+ # external: { my_ext_table: csv_table }
44
+ #
45
+ # # Iterate over the first page of results
46
+ # data.each do |row|
47
+ # puts row[:name]
48
+ # end
49
+ # # Retrieve the next page of results
50
+ # data = data.next if data.next?
51
+ #
52
+ class CsvSource < External::DataSource
53
+ ##
54
+ # @private Create an empty CsvSource object.
55
+ def initialize
56
+ super
57
+ @gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
58
+ end
59
+
60
+ ##
61
+ # Indicates if BigQuery should accept rows that are missing trailing
62
+ # optional columns.
63
+ #
64
+ # @return [Boolean]
65
+ #
66
+ # @example
67
+ # require "google/cloud/bigquery"
68
+ #
69
+ # bigquery = Google::Cloud::Bigquery.new
70
+ #
71
+ # csv_url = "gs://bucket/path/to/data.csv"
72
+ # csv_table = bigquery.external csv_url do |csv|
73
+ # csv.jagged_rows = true
74
+ # end
75
+ #
76
+ # csv_table.jagged_rows #=> true
77
+ #
78
+ def jagged_rows
79
+ @gapi.csv_options.allow_jagged_rows
80
+ end
81
+
82
+ ##
83
+ # Set whether BigQuery should accept rows that are missing trailing
84
+ # optional columns.
85
+ #
86
+ # @param [Boolean] new_jagged_rows New jagged_rows value
87
+ #
88
+ # @example
89
+ # require "google/cloud/bigquery"
90
+ #
91
+ # bigquery = Google::Cloud::Bigquery.new
92
+ #
93
+ # csv_url = "gs://bucket/path/to/data.csv"
94
+ # csv_table = bigquery.external csv_url do |csv|
95
+ # csv.jagged_rows = true
96
+ # end
97
+ #
98
+ # csv_table.jagged_rows #=> true
99
+ #
100
+ def jagged_rows= new_jagged_rows
101
+ frozen_check!
102
+ @gapi.csv_options.allow_jagged_rows = new_jagged_rows
103
+ end
104
+
105
+ ##
106
+ # Indicates if BigQuery should allow quoted data sections that contain
107
+ # newline characters in a CSV file.
108
+ #
109
+ # @return [Boolean]
110
+ #
111
+ # @example
112
+ # require "google/cloud/bigquery"
113
+ #
114
+ # bigquery = Google::Cloud::Bigquery.new
115
+ #
116
+ # csv_url = "gs://bucket/path/to/data.csv"
117
+ # csv_table = bigquery.external csv_url do |csv|
118
+ # csv.quoted_newlines = true
119
+ # end
120
+ #
121
+ # csv_table.quoted_newlines #=> true
122
+ #
123
+ def quoted_newlines
124
+ @gapi.csv_options.allow_quoted_newlines
125
+ end
126
+
127
+ ##
128
+ # Set whether BigQuery should allow quoted data sections that contain
129
+ # newline characters in a CSV file.
130
+ #
131
+ # @param [Boolean] new_quoted_newlines New quoted_newlines value
132
+ #
133
+ # @example
134
+ # require "google/cloud/bigquery"
135
+ #
136
+ # bigquery = Google::Cloud::Bigquery.new
137
+ #
138
+ # csv_url = "gs://bucket/path/to/data.csv"
139
+ # csv_table = bigquery.external csv_url do |csv|
140
+ # csv.quoted_newlines = true
141
+ # end
142
+ #
143
+ # csv_table.quoted_newlines #=> true
144
+ #
145
+ def quoted_newlines= new_quoted_newlines
146
+ frozen_check!
147
+ @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
148
+ end
149
+
150
+ ##
151
+ # The character encoding of the data.
152
+ #
153
+ # @return [String]
154
+ #
155
+ # @example
156
+ # require "google/cloud/bigquery"
157
+ #
158
+ # bigquery = Google::Cloud::Bigquery.new
159
+ #
160
+ # csv_url = "gs://bucket/path/to/data.csv"
161
+ # csv_table = bigquery.external csv_url do |csv|
162
+ # csv.encoding = "UTF-8"
163
+ # end
164
+ #
165
+ # csv_table.encoding #=> "UTF-8"
166
+ #
167
+ def encoding
168
+ @gapi.csv_options.encoding
169
+ end
170
+
171
+ ##
172
+ # Set the character encoding of the data.
173
+ #
174
+ # @param [String] new_encoding New encoding value
175
+ #
176
+ # @example
177
+ # require "google/cloud/bigquery"
178
+ #
179
+ # bigquery = Google::Cloud::Bigquery.new
180
+ #
181
+ # csv_url = "gs://bucket/path/to/data.csv"
182
+ # csv_table = bigquery.external csv_url do |csv|
183
+ # csv.encoding = "UTF-8"
184
+ # end
185
+ #
186
+ # csv_table.encoding #=> "UTF-8"
187
+ #
188
+ def encoding= new_encoding
189
+ frozen_check!
190
+ @gapi.csv_options.encoding = new_encoding
191
+ end
192
+
193
+ ##
194
+ # Checks if the character encoding of the data is "UTF-8". This is the
195
+ # default.
196
+ #
197
+ # @return [Boolean]
198
+ #
199
+ # @example
200
+ # require "google/cloud/bigquery"
201
+ #
202
+ # bigquery = Google::Cloud::Bigquery.new
203
+ #
204
+ # csv_url = "gs://bucket/path/to/data.csv"
205
+ # csv_table = bigquery.external csv_url do |csv|
206
+ # csv.encoding = "UTF-8"
207
+ # end
208
+ #
209
+ # csv_table.encoding #=> "UTF-8"
210
+ # csv_table.utf8? #=> true
211
+ #
212
+ def utf8?
213
+ return true if encoding.nil?
214
+ encoding == "UTF-8"
215
+ end
216
+
217
+ ##
218
+ # Checks if the character encoding of the data is "ISO-8859-1".
219
+ #
220
+ # @return [Boolean]
221
+ #
222
+ # @example
223
+ # require "google/cloud/bigquery"
224
+ #
225
+ # bigquery = Google::Cloud::Bigquery.new
226
+ #
227
+ # csv_url = "gs://bucket/path/to/data.csv"
228
+ # csv_table = bigquery.external csv_url do |csv|
229
+ # csv.encoding = "ISO-8859-1"
230
+ # end
231
+ #
232
+ # csv_table.encoding #=> "ISO-8859-1"
233
+ # csv_table.iso8859_1? #=> true
234
+ #
235
+ def iso8859_1?
236
+ encoding == "ISO-8859-1"
237
+ end
238
+
239
+ ##
240
+ # The separator for fields in a CSV file.
241
+ #
242
+ # @return [String]
243
+ #
244
+ # @example
245
+ # require "google/cloud/bigquery"
246
+ #
247
+ # bigquery = Google::Cloud::Bigquery.new
248
+ #
249
+ # csv_url = "gs://bucket/path/to/data.csv"
250
+ # csv_table = bigquery.external csv_url do |csv|
251
+ # csv.delimiter = "|"
252
+ # end
253
+ #
254
+ # csv_table.delimiter #=> "|"
255
+ #
256
+ def delimiter
257
+ @gapi.csv_options.field_delimiter
258
+ end
259
+
260
+ ##
261
+ # Set the separator for fields in a CSV file.
262
+ #
263
+ # @param [String] new_delimiter New delimiter value
264
+ #
265
+ # @example
266
+ # require "google/cloud/bigquery"
267
+ #
268
+ # bigquery = Google::Cloud::Bigquery.new
269
+ #
270
+ # csv_url = "gs://bucket/path/to/data.csv"
271
+ # csv_table = bigquery.external csv_url do |csv|
272
+ # csv.delimiter = "|"
273
+ # end
274
+ #
275
+ # csv_table.delimiter #=> "|"
276
+ #
277
+ def delimiter= new_delimiter
278
+ frozen_check!
279
+ @gapi.csv_options.field_delimiter = new_delimiter
280
+ end
281
+
282
+ ##
283
+ # The value that is used to quote data sections in a CSV file.
284
+ #
285
+ # @return [String]
286
+ #
287
+ # @example
288
+ # require "google/cloud/bigquery"
289
+ #
290
+ # bigquery = Google::Cloud::Bigquery.new
291
+ #
292
+ # csv_url = "gs://bucket/path/to/data.csv"
293
+ # csv_table = bigquery.external csv_url do |csv|
294
+ # csv.quote = "'"
295
+ # end
296
+ #
297
+ # csv_table.quote #=> "'"
298
+ #
299
+ def quote
300
+ @gapi.csv_options.quote
301
+ end
302
+
303
+ ##
304
+ # Set the value that is used to quote data sections in a CSV file.
305
+ #
306
+ # @param [String] new_quote New quote value
307
+ #
308
+ # @example
309
+ # require "google/cloud/bigquery"
310
+ #
311
+ # bigquery = Google::Cloud::Bigquery.new
312
+ #
313
+ # csv_url = "gs://bucket/path/to/data.csv"
314
+ # csv_table = bigquery.external csv_url do |csv|
315
+ # csv.quote = "'"
316
+ # end
317
+ #
318
+ # csv_table.quote #=> "'"
319
+ #
320
+ def quote= new_quote
321
+ frozen_check!
322
+ @gapi.csv_options.quote = new_quote
323
+ end
324
+
325
+ ##
326
+ # The number of rows at the top of a CSV file that BigQuery will skip
327
+ # when reading the data.
328
+ #
329
+ # @return [Integer]
330
+ #
331
+ # @example
332
+ # require "google/cloud/bigquery"
333
+ #
334
+ # bigquery = Google::Cloud::Bigquery.new
335
+ #
336
+ # csv_url = "gs://bucket/path/to/data.csv"
337
+ # csv_table = bigquery.external csv_url do |csv|
338
+ # csv.skip_leading_rows = 1
339
+ # end
340
+ #
341
+ # csv_table.skip_leading_rows #=> 1
342
+ #
343
+ def skip_leading_rows
344
+ @gapi.csv_options.skip_leading_rows
345
+ end
346
+
347
+ ##
348
+ # Set the number of rows at the top of a CSV file that BigQuery will
349
+ # skip when reading the data.
350
+ #
351
+ # @param [Integer] row_count New skip_leading_rows value
352
+ #
353
+ # @example
354
+ # require "google/cloud/bigquery"
355
+ #
356
+ # bigquery = Google::Cloud::Bigquery.new
357
+ #
358
+ # csv_url = "gs://bucket/path/to/data.csv"
359
+ # csv_table = bigquery.external csv_url do |csv|
360
+ # csv.skip_leading_rows = 1
361
+ # end
362
+ #
363
+ # csv_table.skip_leading_rows #=> 1
364
+ #
365
+ def skip_leading_rows= row_count
366
+ frozen_check!
367
+ @gapi.csv_options.skip_leading_rows = row_count
368
+ end
369
+
370
+ ##
371
+ # The schema for the data.
372
+ #
373
+ # @param [Boolean] replace Whether to replace the existing schema with
374
+ # the new schema. If `true`, the fields will replace the existing
375
+ # schema. If `false`, the fields will be added to the existing
376
+ # schema. The default value is `false`.
377
+ # @yield [schema] a block for setting the schema
378
+ # @yieldparam [Schema] schema the object accepting the schema
379
+ #
380
+ # @return [Google::Cloud::Bigquery::Schema]
381
+ #
382
+ # @example
383
+ # require "google/cloud/bigquery"
384
+ #
385
+ # bigquery = Google::Cloud::Bigquery.new
386
+ #
387
+ # csv_url = "gs://bucket/path/to/data.csv"
388
+ # csv_table = bigquery.external csv_url do |csv|
389
+ # csv.schema do |schema|
390
+ # schema.string "name", mode: :required
391
+ # schema.string "email", mode: :required
392
+ # schema.integer "age", mode: :required
393
+ # schema.boolean "active", mode: :required
394
+ # end
395
+ # end
396
+ #
397
+ def schema replace: false
398
+ @schema ||= Schema.from_gapi @gapi.schema
399
+ if replace
400
+ frozen_check!
401
+ @schema = Schema.from_gapi
402
+ end
403
+ @schema.freeze if frozen?
404
+ yield @schema if block_given?
405
+ @schema
406
+ end
407
+
408
+ ##
409
+ # Set the schema for the data.
410
+ #
411
+ # @param [Schema] new_schema The schema object.
412
+ #
413
+ # @example
414
+ # require "google/cloud/bigquery"
415
+ #
416
+ # bigquery = Google::Cloud::Bigquery.new
417
+ #
418
+ # csv_shema = bigquery.schema do |schema|
419
+ # schema.string "name", mode: :required
420
+ # schema.string "email", mode: :required
421
+ # schema.integer "age", mode: :required
422
+ # schema.boolean "active", mode: :required
423
+ # end
424
+ #
425
+ # csv_url = "gs://bucket/path/to/data.csv"
426
+ # csv_table = bigquery.external csv_url
427
+ # csv_table.schema = csv_shema
428
+ #
429
+ def schema= new_schema
430
+ frozen_check!
431
+ @schema = new_schema
432
+ end
433
+
434
+ ##
435
+ # The fields of the schema.
436
+ #
437
+ # @return [Array<Schema::Field>] An array of field objects.
438
+ #
439
+ def fields
440
+ schema.fields
441
+ end
442
+
443
+ ##
444
+ # The names of the columns in the schema.
445
+ #
446
+ # @return [Array<Symbol>] An array of column names.
447
+ #
448
+ def headers
449
+ schema.headers
450
+ end
451
+
452
+ ##
453
+ # The types of the fields in the data in the schema, using the same
454
+ # format as the optional query parameter types.
455
+ #
456
+ # @return [Hash] A hash with field names as keys, and types as values.
457
+ #
458
+ def param_types
459
+ schema.param_types
460
+ end
461
+
462
+ ##
463
+ # @private Google API Client object.
464
+ def to_gapi
465
+ @gapi.schema = @schema.to_gapi if @schema
466
+ @gapi
467
+ end
468
+
469
+ ##
470
+ # @private Google API Client object.
471
+ def self.from_gapi gapi
472
+ new_table = super
473
+ schema = Schema.from_gapi gapi.schema
474
+ new_table.instance_variable_set :@schema, schema
475
+ new_table
476
+ end
477
+ end
478
+ end
479
+ end
480
+ end
481
+ end