google-cloud-bigquery 1.31.0 → 1.34.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,481 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # CsvSource
24
+ #
25
+ # {External::CsvSource} is a subclass of {External::DataSource} and
26
+ # represents a CSV external data source that can be queried from
27
+ # directly, such as Google Cloud Storage or Google Drive, even though
28
+ # the data is not stored in BigQuery. Instead of loading or streaming
29
+ # the data, this object references the external data source.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # csv_url = "gs://bucket/path/to/data.csv"
37
+ # csv_table = bigquery.external csv_url do |csv|
38
+ # csv.autodetect = true
39
+ # csv.skip_leading_rows = 1
40
+ # end
41
+ #
42
+ # data = bigquery.query "SELECT * FROM my_ext_table",
43
+ # external: { my_ext_table: csv_table }
44
+ #
45
+ # # Iterate over the first page of results
46
+ # data.each do |row|
47
+ # puts row[:name]
48
+ # end
49
+ # # Retrieve the next page of results
50
+ # data = data.next if data.next?
51
+ #
52
+ class CsvSource < External::DataSource
53
+ ##
54
+ # @private Create an empty CsvSource object.
55
+ def initialize
56
+ super
57
+ @gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
58
+ end
59
+
60
+ ##
61
+ # Indicates if BigQuery should accept rows that are missing trailing
62
+ # optional columns.
63
+ #
64
+ # @return [Boolean]
65
+ #
66
+ # @example
67
+ # require "google/cloud/bigquery"
68
+ #
69
+ # bigquery = Google::Cloud::Bigquery.new
70
+ #
71
+ # csv_url = "gs://bucket/path/to/data.csv"
72
+ # csv_table = bigquery.external csv_url do |csv|
73
+ # csv.jagged_rows = true
74
+ # end
75
+ #
76
+ # csv_table.jagged_rows #=> true
77
+ #
78
+ def jagged_rows
79
+ @gapi.csv_options.allow_jagged_rows
80
+ end
81
+
82
+ ##
83
+ # Set whether BigQuery should accept rows that are missing trailing
84
+ # optional columns.
85
+ #
86
+ # @param [Boolean] new_jagged_rows New jagged_rows value
87
+ #
88
+ # @example
89
+ # require "google/cloud/bigquery"
90
+ #
91
+ # bigquery = Google::Cloud::Bigquery.new
92
+ #
93
+ # csv_url = "gs://bucket/path/to/data.csv"
94
+ # csv_table = bigquery.external csv_url do |csv|
95
+ # csv.jagged_rows = true
96
+ # end
97
+ #
98
+ # csv_table.jagged_rows #=> true
99
+ #
100
+ def jagged_rows= new_jagged_rows
101
+ frozen_check!
102
+ @gapi.csv_options.allow_jagged_rows = new_jagged_rows
103
+ end
104
+
105
+ ##
106
+ # Indicates if BigQuery should allow quoted data sections that contain
107
+ # newline characters in a CSV file.
108
+ #
109
+ # @return [Boolean]
110
+ #
111
+ # @example
112
+ # require "google/cloud/bigquery"
113
+ #
114
+ # bigquery = Google::Cloud::Bigquery.new
115
+ #
116
+ # csv_url = "gs://bucket/path/to/data.csv"
117
+ # csv_table = bigquery.external csv_url do |csv|
118
+ # csv.quoted_newlines = true
119
+ # end
120
+ #
121
+ # csv_table.quoted_newlines #=> true
122
+ #
123
+ def quoted_newlines
124
+ @gapi.csv_options.allow_quoted_newlines
125
+ end
126
+
127
+ ##
128
+ # Set whether BigQuery should allow quoted data sections that contain
129
+ # newline characters in a CSV file.
130
+ #
131
+ # @param [Boolean] new_quoted_newlines New quoted_newlines value
132
+ #
133
+ # @example
134
+ # require "google/cloud/bigquery"
135
+ #
136
+ # bigquery = Google::Cloud::Bigquery.new
137
+ #
138
+ # csv_url = "gs://bucket/path/to/data.csv"
139
+ # csv_table = bigquery.external csv_url do |csv|
140
+ # csv.quoted_newlines = true
141
+ # end
142
+ #
143
+ # csv_table.quoted_newlines #=> true
144
+ #
145
+ def quoted_newlines= new_quoted_newlines
146
+ frozen_check!
147
+ @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
148
+ end
149
+
150
+ ##
151
+ # The character encoding of the data.
152
+ #
153
+ # @return [String]
154
+ #
155
+ # @example
156
+ # require "google/cloud/bigquery"
157
+ #
158
+ # bigquery = Google::Cloud::Bigquery.new
159
+ #
160
+ # csv_url = "gs://bucket/path/to/data.csv"
161
+ # csv_table = bigquery.external csv_url do |csv|
162
+ # csv.encoding = "UTF-8"
163
+ # end
164
+ #
165
+ # csv_table.encoding #=> "UTF-8"
166
+ #
167
+ def encoding
168
+ @gapi.csv_options.encoding
169
+ end
170
+
171
+ ##
172
+ # Set the character encoding of the data.
173
+ #
174
+ # @param [String] new_encoding New encoding value
175
+ #
176
+ # @example
177
+ # require "google/cloud/bigquery"
178
+ #
179
+ # bigquery = Google::Cloud::Bigquery.new
180
+ #
181
+ # csv_url = "gs://bucket/path/to/data.csv"
182
+ # csv_table = bigquery.external csv_url do |csv|
183
+ # csv.encoding = "UTF-8"
184
+ # end
185
+ #
186
+ # csv_table.encoding #=> "UTF-8"
187
+ #
188
+ def encoding= new_encoding
189
+ frozen_check!
190
+ @gapi.csv_options.encoding = new_encoding
191
+ end
192
+
193
+ ##
194
+ # Checks if the character encoding of the data is "UTF-8". This is the
195
+ # default.
196
+ #
197
+ # @return [Boolean]
198
+ #
199
+ # @example
200
+ # require "google/cloud/bigquery"
201
+ #
202
+ # bigquery = Google::Cloud::Bigquery.new
203
+ #
204
+ # csv_url = "gs://bucket/path/to/data.csv"
205
+ # csv_table = bigquery.external csv_url do |csv|
206
+ # csv.encoding = "UTF-8"
207
+ # end
208
+ #
209
+ # csv_table.encoding #=> "UTF-8"
210
+ # csv_table.utf8? #=> true
211
+ #
212
+ def utf8?
213
+ return true if encoding.nil?
214
+ encoding == "UTF-8"
215
+ end
216
+
217
+ ##
218
+ # Checks if the character encoding of the data is "ISO-8859-1".
219
+ #
220
+ # @return [Boolean]
221
+ #
222
+ # @example
223
+ # require "google/cloud/bigquery"
224
+ #
225
+ # bigquery = Google::Cloud::Bigquery.new
226
+ #
227
+ # csv_url = "gs://bucket/path/to/data.csv"
228
+ # csv_table = bigquery.external csv_url do |csv|
229
+ # csv.encoding = "ISO-8859-1"
230
+ # end
231
+ #
232
+ # csv_table.encoding #=> "ISO-8859-1"
233
+ # csv_table.iso8859_1? #=> true
234
+ #
235
+ def iso8859_1?
236
+ encoding == "ISO-8859-1"
237
+ end
238
+
239
+ ##
240
+ # The separator for fields in a CSV file.
241
+ #
242
+ # @return [String]
243
+ #
244
+ # @example
245
+ # require "google/cloud/bigquery"
246
+ #
247
+ # bigquery = Google::Cloud::Bigquery.new
248
+ #
249
+ # csv_url = "gs://bucket/path/to/data.csv"
250
+ # csv_table = bigquery.external csv_url do |csv|
251
+ # csv.delimiter = "|"
252
+ # end
253
+ #
254
+ # csv_table.delimiter #=> "|"
255
+ #
256
+ def delimiter
257
+ @gapi.csv_options.field_delimiter
258
+ end
259
+
260
+ ##
261
+ # Set the separator for fields in a CSV file.
262
+ #
263
+ # @param [String] new_delimiter New delimiter value
264
+ #
265
+ # @example
266
+ # require "google/cloud/bigquery"
267
+ #
268
+ # bigquery = Google::Cloud::Bigquery.new
269
+ #
270
+ # csv_url = "gs://bucket/path/to/data.csv"
271
+ # csv_table = bigquery.external csv_url do |csv|
272
+ # csv.delimiter = "|"
273
+ # end
274
+ #
275
+ # csv_table.delimiter #=> "|"
276
+ #
277
+ def delimiter= new_delimiter
278
+ frozen_check!
279
+ @gapi.csv_options.field_delimiter = new_delimiter
280
+ end
281
+
282
+ ##
283
+ # The value that is used to quote data sections in a CSV file.
284
+ #
285
+ # @return [String]
286
+ #
287
+ # @example
288
+ # require "google/cloud/bigquery"
289
+ #
290
+ # bigquery = Google::Cloud::Bigquery.new
291
+ #
292
+ # csv_url = "gs://bucket/path/to/data.csv"
293
+ # csv_table = bigquery.external csv_url do |csv|
294
+ # csv.quote = "'"
295
+ # end
296
+ #
297
+ # csv_table.quote #=> "'"
298
+ #
299
+ def quote
300
+ @gapi.csv_options.quote
301
+ end
302
+
303
+ ##
304
+ # Set the value that is used to quote data sections in a CSV file.
305
+ #
306
+ # @param [String] new_quote New quote value
307
+ #
308
+ # @example
309
+ # require "google/cloud/bigquery"
310
+ #
311
+ # bigquery = Google::Cloud::Bigquery.new
312
+ #
313
+ # csv_url = "gs://bucket/path/to/data.csv"
314
+ # csv_table = bigquery.external csv_url do |csv|
315
+ # csv.quote = "'"
316
+ # end
317
+ #
318
+ # csv_table.quote #=> "'"
319
+ #
320
+ def quote= new_quote
321
+ frozen_check!
322
+ @gapi.csv_options.quote = new_quote
323
+ end
324
+
325
+ ##
326
+ # The number of rows at the top of a CSV file that BigQuery will skip
327
+ # when reading the data.
328
+ #
329
+ # @return [Integer]
330
+ #
331
+ # @example
332
+ # require "google/cloud/bigquery"
333
+ #
334
+ # bigquery = Google::Cloud::Bigquery.new
335
+ #
336
+ # csv_url = "gs://bucket/path/to/data.csv"
337
+ # csv_table = bigquery.external csv_url do |csv|
338
+ # csv.skip_leading_rows = 1
339
+ # end
340
+ #
341
+ # csv_table.skip_leading_rows #=> 1
342
+ #
343
+ def skip_leading_rows
344
+ @gapi.csv_options.skip_leading_rows
345
+ end
346
+
347
+ ##
348
+ # Set the number of rows at the top of a CSV file that BigQuery will
349
+ # skip when reading the data.
350
+ #
351
+ # @param [Integer] row_count New skip_leading_rows value
352
+ #
353
+ # @example
354
+ # require "google/cloud/bigquery"
355
+ #
356
+ # bigquery = Google::Cloud::Bigquery.new
357
+ #
358
+ # csv_url = "gs://bucket/path/to/data.csv"
359
+ # csv_table = bigquery.external csv_url do |csv|
360
+ # csv.skip_leading_rows = 1
361
+ # end
362
+ #
363
+ # csv_table.skip_leading_rows #=> 1
364
+ #
365
+ def skip_leading_rows= row_count
366
+ frozen_check!
367
+ @gapi.csv_options.skip_leading_rows = row_count
368
+ end
369
+
370
+ ##
371
+ # The schema for the data.
372
+ #
373
+ # @param [Boolean] replace Whether to replace the existing schema with
374
+ # the new schema. If `true`, the fields will replace the existing
375
+ # schema. If `false`, the fields will be added to the existing
376
+ # schema. The default value is `false`.
377
+ # @yield [schema] a block for setting the schema
378
+ # @yieldparam [Schema] schema the object accepting the schema
379
+ #
380
+ # @return [Google::Cloud::Bigquery::Schema]
381
+ #
382
+ # @example
383
+ # require "google/cloud/bigquery"
384
+ #
385
+ # bigquery = Google::Cloud::Bigquery.new
386
+ #
387
+ # csv_url = "gs://bucket/path/to/data.csv"
388
+ # csv_table = bigquery.external csv_url do |csv|
389
+ # csv.schema do |schema|
390
+ # schema.string "name", mode: :required
391
+ # schema.string "email", mode: :required
392
+ # schema.integer "age", mode: :required
393
+ # schema.boolean "active", mode: :required
394
+ # end
395
+ # end
396
+ #
397
+ def schema replace: false
398
+ @schema ||= Schema.from_gapi @gapi.schema
399
+ if replace
400
+ frozen_check!
401
+ @schema = Schema.from_gapi
402
+ end
403
+ @schema.freeze if frozen?
404
+ yield @schema if block_given?
405
+ @schema
406
+ end
407
+
408
+ ##
409
+ # Set the schema for the data.
410
+ #
411
+ # @param [Schema] new_schema The schema object.
412
+ #
413
+ # @example
414
+ # require "google/cloud/bigquery"
415
+ #
416
+ # bigquery = Google::Cloud::Bigquery.new
417
+ #
418
+ # csv_shema = bigquery.schema do |schema|
419
+ # schema.string "name", mode: :required
420
+ # schema.string "email", mode: :required
421
+ # schema.integer "age", mode: :required
422
+ # schema.boolean "active", mode: :required
423
+ # end
424
+ #
425
+ # csv_url = "gs://bucket/path/to/data.csv"
426
+ # csv_table = bigquery.external csv_url
427
+ # csv_table.schema = csv_shema
428
+ #
429
+ def schema= new_schema
430
+ frozen_check!
431
+ @schema = new_schema
432
+ end
433
+
434
+ ##
435
+ # The fields of the schema.
436
+ #
437
+ # @return [Array<Schema::Field>] An array of field objects.
438
+ #
439
+ def fields
440
+ schema.fields
441
+ end
442
+
443
+ ##
444
+ # The names of the columns in the schema.
445
+ #
446
+ # @return [Array<Symbol>] An array of column names.
447
+ #
448
+ def headers
449
+ schema.headers
450
+ end
451
+
452
+ ##
453
+ # The types of the fields in the data in the schema, using the same
454
+ # format as the optional query parameter types.
455
+ #
456
+ # @return [Hash] A hash with field names as keys, and types as values.
457
+ #
458
+ def param_types
459
+ schema.param_types
460
+ end
461
+
462
+ ##
463
+ # @private Google API Client object.
464
+ def to_gapi
465
+ @gapi.schema = @schema.to_gapi if @schema
466
+ @gapi
467
+ end
468
+
469
+ ##
470
+ # @private Google API Client object.
471
+ def self.from_gapi gapi
472
+ new_table = super
473
+ schema = Schema.from_gapi gapi.schema
474
+ new_table.instance_variable_set :@schema, schema
475
+ new_table
476
+ end
477
+ end
478
+ end
479
+ end
480
+ end
481
+ end