google-cloud-bigquery 1.21.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +16 -0
  3. data/AUTHENTICATION.md +158 -0
  4. data/CHANGELOG.md +397 -0
  5. data/CODE_OF_CONDUCT.md +40 -0
  6. data/CONTRIBUTING.md +188 -0
  7. data/LICENSE +201 -0
  8. data/LOGGING.md +27 -0
  9. data/OVERVIEW.md +463 -0
  10. data/TROUBLESHOOTING.md +31 -0
  11. data/lib/google-cloud-bigquery.rb +139 -0
  12. data/lib/google/cloud/bigquery.rb +145 -0
  13. data/lib/google/cloud/bigquery/argument.rb +197 -0
  14. data/lib/google/cloud/bigquery/convert.rb +383 -0
  15. data/lib/google/cloud/bigquery/copy_job.rb +316 -0
  16. data/lib/google/cloud/bigquery/credentials.rb +50 -0
  17. data/lib/google/cloud/bigquery/data.rb +526 -0
  18. data/lib/google/cloud/bigquery/dataset.rb +2845 -0
  19. data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
  20. data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
  21. data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
  22. data/lib/google/cloud/bigquery/external.rb +2432 -0
  23. data/lib/google/cloud/bigquery/extract_job.rb +368 -0
  24. data/lib/google/cloud/bigquery/insert_response.rb +180 -0
  25. data/lib/google/cloud/bigquery/job.rb +657 -0
  26. data/lib/google/cloud/bigquery/job/list.rb +162 -0
  27. data/lib/google/cloud/bigquery/load_job.rb +1704 -0
  28. data/lib/google/cloud/bigquery/model.rb +740 -0
  29. data/lib/google/cloud/bigquery/model/list.rb +164 -0
  30. data/lib/google/cloud/bigquery/project.rb +1655 -0
  31. data/lib/google/cloud/bigquery/project/list.rb +161 -0
  32. data/lib/google/cloud/bigquery/query_job.rb +1695 -0
  33. data/lib/google/cloud/bigquery/routine.rb +1108 -0
  34. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  35. data/lib/google/cloud/bigquery/schema.rb +564 -0
  36. data/lib/google/cloud/bigquery/schema/field.rb +668 -0
  37. data/lib/google/cloud/bigquery/service.rb +589 -0
  38. data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
  39. data/lib/google/cloud/bigquery/table.rb +3340 -0
  40. data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
  41. data/lib/google/cloud/bigquery/table/list.rb +172 -0
  42. data/lib/google/cloud/bigquery/time.rb +65 -0
  43. data/lib/google/cloud/bigquery/version.rb +22 -0
  44. metadata +297 -0
@@ -0,0 +1,50 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "googleauth"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # # Credentials
23
+ #
24
+ # Represents the authentication and authorization used to connect to the
25
+ # BigQuery API.
26
+ #
27
+ # @example
28
+ # require "google/cloud/bigquery"
29
+ #
30
+ # keyfile = "/path/to/keyfile.json"
31
+ # creds = Google::Cloud::Bigquery::Credentials.new keyfile
32
+ #
33
+ # bigquery = Google::Cloud::Bigquery.new(
34
+ # project_id: "my-project",
35
+ # credentials: creds
36
+ # )
37
+ #
38
+ # bigquery.project_id #=> "my-project"
39
+ #
40
+ class Credentials < Google::Auth::Credentials
41
+ SCOPE = ["https://www.googleapis.com/auth/bigquery"].freeze
42
+ PATH_ENV_VARS = ["BIGQUERY_CREDENTIALS", "GOOGLE_CLOUD_CREDENTIALS", "BIGQUERY_KEYFILE", "GOOGLE_CLOUD_KEYFILE",
43
+ "GCLOUD_KEYFILE"].freeze
44
+ JSON_ENV_VARS = ["BIGQUERY_CREDENTIALS_JSON", "GOOGLE_CLOUD_CREDENTIALS_JSON", "BIGQUERY_KEYFILE_JSON",
45
+ "GOOGLE_CLOUD_KEYFILE_JSON", "GCLOUD_KEYFILE_JSON"].freeze
46
+ DEFAULT_PATHS = ["~/.config/gcloud/application_default_credentials.json"].freeze
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,526 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+ require "google/cloud/bigquery/service"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ ##
23
+ # # Data
24
+ #
25
+ # Represents a page of results (rows) as an array of hashes. Because Data
26
+ # delegates to Array, methods such as `Array#count` represent the number
27
+ # of rows in the page. In addition, methods of this class include result
28
+ # set metadata such as `total` and provide access to the schema of the
29
+ # query or table. See {Project#query}, {Dataset#query} and {Table#data}.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
37
+ # job = bigquery.query_job sql
38
+ #
39
+ # job.wait_until_done!
40
+ # data = job.data
41
+ #
42
+ # data.count # 100000
43
+ # data.total # 164656
44
+ #
45
+ # # Iterate over the first page of results
46
+ # data.each do |row|
47
+ # puts row[:word]
48
+ # end
49
+ # # Retrieve the next page of results
50
+ # data = data.next if data.next?
51
+ #
52
+ class Data < DelegateClass(::Array)
53
+ ##
54
+ # @private The Service object.
55
+ attr_accessor :service
56
+
57
+ ##
58
+ # @private The {Table} object the data belongs to.
59
+ attr_accessor :table_gapi
60
+
61
+ ##
62
+ # @private The Google API Client object in JSON Hash.
63
+ attr_accessor :gapi_json
64
+
65
+ ##
66
+ # @private The query Job gapi object, or nil if from Table#data.
67
+ attr_accessor :job_gapi
68
+
69
+ # @private
70
+ def initialize arr = []
71
+ @service = nil
72
+ @table_gapi = nil
73
+ @gapi_json = nil
74
+ super arr
75
+ end
76
+
77
+ ##
78
+ # The resource type of the API response.
79
+ #
80
+ # @return [String] The resource type.
81
+ #
82
+ def kind
83
+ @gapi_json[:kind]
84
+ end
85
+
86
+ ##
87
+ # An ETag hash for the page of results represented by the data instance.
88
+ #
89
+ # @return [String] The ETag hash.
90
+ #
91
+ def etag
92
+ @gapi_json[:etag]
93
+ end
94
+
95
+ ##
96
+ # A token used for paging results. Used by the data instance to retrieve
97
+ # subsequent pages. See {#next}.
98
+ #
99
+ # @return [String] The pagination token.
100
+ #
101
+ def token
102
+ @gapi_json[:pageToken]
103
+ end
104
+
105
+ ##
106
+ # The total number of rows in the complete table.
107
+ #
108
+ # @return [Integer] The number of rows.
109
+ #
110
+ # @example
111
+ # require "google/cloud/bigquery"
112
+ #
113
+ # bigquery = Google::Cloud::Bigquery.new
114
+ #
115
+ # sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
116
+ # job = bigquery.query_job sql
117
+ #
118
+ # job.wait_until_done!
119
+ # data = job.data
120
+ #
121
+ # data.count # 100000
122
+ # data.total # 164656
123
+ #
124
+ # # Iterate over the first page of results
125
+ # data.each do |row|
126
+ # puts row[:word]
127
+ # end
128
+ # # Retrieve the next page of results
129
+ # data = data.next if data.next?
130
+ #
131
+ def total
132
+ Integer @gapi_json[:totalRows]
133
+ rescue StandardError
134
+ nil
135
+ end
136
+
137
+ ##
138
+ # The schema of the table from which the data was read.
139
+ #
140
+ # The returned object is frozen and changes are not allowed. Use
141
+ # {Table#schema} to update the schema.
142
+ #
143
+ # @return [Schema] A schema object.
144
+ #
145
+ # @example
146
+ # require "google/cloud/bigquery"
147
+ #
148
+ # bigquery = Google::Cloud::Bigquery.new
149
+ # dataset = bigquery.dataset "my_dataset"
150
+ # table = dataset.table "my_table"
151
+ #
152
+ # data = table.data
153
+ #
154
+ # schema = data.schema
155
+ # field = schema.field "name"
156
+ # field.required? #=> true
157
+ #
158
+ def schema
159
+ return nil unless @table_gapi
160
+ Schema.from_gapi(@table_gapi.schema).freeze
161
+ end
162
+
163
+ ##
164
+ # The fields of the data, obtained from the schema of the table from
165
+ # which the data was read.
166
+ #
167
+ # @return [Array<Schema::Field>] An array of field objects.
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ # dataset = bigquery.dataset "my_dataset"
174
+ # table = dataset.table "my_table"
175
+ #
176
+ # data = table.data
177
+ #
178
+ # data.fields.each do |field|
179
+ # puts field.name
180
+ # end
181
+ #
182
+ def fields
183
+ schema.fields
184
+ end
185
+
186
+ ##
187
+ # The names of the columns in the data, obtained from the schema of the
188
+ # table from which the data was read.
189
+ #
190
+ # @return [Array<Symbol>] An array of column names.
191
+ #
192
+ # @example
193
+ # require "google/cloud/bigquery"
194
+ #
195
+ # bigquery = Google::Cloud::Bigquery.new
196
+ # dataset = bigquery.dataset "my_dataset"
197
+ # table = dataset.table "my_table"
198
+ #
199
+ # data = table.data
200
+ #
201
+ # data.headers.each do |header|
202
+ # puts header
203
+ # end
204
+ #
205
+ def headers
206
+ schema.headers
207
+ end
208
+
209
+ ##
210
+ # The types of the fields in the data, obtained from the schema of the
211
+ # table from which the data was read. Types use the same format as the
212
+ # optional query parameter types.
213
+ #
214
+ # @return [Hash] A hash with field names as keys, and types as values.
215
+ #
216
+ # @example
217
+ # require "google/cloud/bigquery"
218
+ #
219
+ # bigquery = Google::Cloud::Bigquery.new
220
+ # dataset = bigquery.dataset "my_dataset"
221
+ # table = dataset.table "my_table"
222
+ #
223
+ # data = table.data
224
+ #
225
+ # data.param_types
226
+ #
227
+ def param_types
228
+ schema.param_types
229
+ end
230
+
231
+ ##
232
+ # The type of query statement, if valid. Possible values (new values
233
+ # might be added in the future):
234
+ #
235
+ # * "CREATE_MODEL": DDL statement, see [Using Data Definition Language
236
+ # Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
237
+ # * "CREATE_TABLE": DDL statement, see [Using Data Definition Language
238
+ # Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
239
+ # * "CREATE_TABLE_AS_SELECT": DDL statement, see [Using Data Definition
240
+ # Language Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
241
+ # * "CREATE_VIEW": DDL statement, see [Using Data Definition Language
242
+ # Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
243
+ # * "DELETE": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
244
+ # * "DROP_MODEL": DDL statement, see [Using Data Definition Language
245
+ # Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
246
+ # * "DROP_TABLE": DDL statement, see [Using Data Definition Language
247
+ # Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
248
+ # * "DROP_VIEW": DDL statement, see [Using Data Definition Language
249
+ # Statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language)
250
+ # * "INSERT": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
251
+ # * "MERGE": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
252
+ # * "SELECT": SQL query, see [Standard SQL Query Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax)
253
+ # * "UPDATE": DML statement, see [Data Manipulation Language Syntax](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
254
+ #
255
+ # @return [String, nil] The type of query statement.
256
+ #
257
+ def statement_type
258
+ job_gapi&.statistics&.query&.statement_type
259
+ end
260
+
261
+ ##
262
+ # Whether the query that created this data was a DDL statement.
263
+ #
264
+ # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language
265
+ # Using Data Definition Language Statements
266
+ #
267
+ # @return [Boolean]
268
+ #
269
+ # @example
270
+ # require "google/cloud/bigquery"
271
+ #
272
+ # bigquery = Google::Cloud::Bigquery.new
273
+ # data = bigquery.query "CREATE TABLE my_table (x INT64)"
274
+ #
275
+ # data.statement_type #=> "CREATE_TABLE"
276
+ # data.ddl? #=> true
277
+ #
278
+ def ddl?
279
+ ["CREATE_MODEL", "CREATE_TABLE", "CREATE_TABLE_AS_SELECT", "CREATE_VIEW", "\n", "DROP_MODEL", "DROP_TABLE",
280
+ "DROP_VIEW"].include? statement_type
281
+ end
282
+
283
+ ##
284
+ # Whether the query that created this data was a DML statement.
285
+ #
286
+ # @see https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax
287
+ # Data Manipulation Language Syntax
288
+ #
289
+ # @return [Boolean]
290
+ #
291
+ # @example
292
+ # require "google/cloud/bigquery"
293
+ #
294
+ # bigquery = Google::Cloud::Bigquery.new
295
+ # data = bigquery.query "UPDATE my_table " \
296
+ # "SET x = x + 1 " \
297
+ # "WHERE x IS NOT NULL"
298
+ #
299
+ # data.statement_type #=> "UPDATE"
300
+ # data.dml? #=> true
301
+ #
302
+ def dml?
303
+ ["INSERT", "UPDATE", "MERGE", "DELETE"].include? statement_type
304
+ end
305
+
306
+ ##
307
+ # The DDL operation performed, possibly dependent on the pre-existence
308
+ # of the DDL target. (See {#ddl_target_table}.) Possible values (new
309
+ # values might be added in the future):
310
+ #
311
+ # * "CREATE": The query created the DDL target.
312
+ # * "SKIP": No-op. Example cases: the query is
313
+ # `CREATE TABLE IF NOT EXISTS` while the table already exists, or the
314
+ # query is `DROP TABLE IF EXISTS` while the table does not exist.
315
+ # * "REPLACE": The query replaced the DDL target. Example case: the
316
+ # query is `CREATE OR REPLACE TABLE`, and the table already exists.
317
+ # * "DROP": The query deleted the DDL target.
318
+ #
319
+ # @return [String, nil] The DDL operation performed.
320
+ #
321
+ def ddl_operation_performed
322
+ job_gapi&.statistics&.query&.ddl_operation_performed
323
+ end
324
+
325
+ ##
326
+ # The DDL target routine, in reference state. (See {Routine#reference?}.)
327
+ # Present only for `CREATE/DROP FUNCTION/PROCEDURE` queries. (See
328
+ # {#statement_type}.)
329
+ #
330
+ # @return [Google::Cloud::Bigquery::Routine, nil] The DDL target routine, in
331
+ # reference state.
332
+ #
333
+ def ddl_target_routine
334
+ ensure_service!
335
+ routine = job_gapi&.statistics&.query&.ddl_target_routine
336
+ return nil if routine.nil?
337
+ Google::Cloud::Bigquery::Routine.new_reference_from_gapi routine, service
338
+ end
339
+
340
+ ##
341
+ # The DDL target table, in reference state. (See {Table#reference?}.)
342
+ # Present only for `CREATE/DROP TABLE/VIEW` queries. (See
343
+ # {#statement_type}.)
344
+ #
345
+ # @return [Google::Cloud::Bigquery::Table, nil] The DDL target table, in
346
+ # reference state.
347
+ #
348
+ def ddl_target_table
349
+ ensure_service!
350
+ table = job_gapi&.statistics&.query&.ddl_target_table
351
+ return nil if table.nil?
352
+ Google::Cloud::Bigquery::Table.new_reference_from_gapi table, service
353
+ end
354
+
355
+ ##
356
+ # The number of rows affected by a DML statement. Present only for DML
357
+ # statements `INSERT`, `UPDATE` or `DELETE`. (See {#statement_type}.)
358
+ #
359
+ # @return [Integer, nil] The number of rows affected by a DML statement,
360
+ # or `nil` if the query is not a DML statement.
361
+ #
362
+ def num_dml_affected_rows
363
+ job_gapi&.statistics&.query&.num_dml_affected_rows
364
+ end
365
+
366
+ ##
367
+ # Whether there is a next page of data.
368
+ #
369
+ # @return [Boolean] `true` when there is a next page, `false` otherwise.
370
+ #
371
+ # @example
372
+ # require "google/cloud/bigquery"
373
+ #
374
+ # bigquery = Google::Cloud::Bigquery.new
375
+ #
376
+ # sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
377
+ # job = bigquery.query_job sql
378
+ #
379
+ # job.wait_until_done!
380
+ # data = job.data
381
+ #
382
+ # data.count # 100000
383
+ # data.total # 164656
384
+ #
385
+ # # Iterate over the first page of results
386
+ # data.each do |row|
387
+ # puts row[:word]
388
+ # end
389
+ # # Retrieve the next page of results
390
+ # data = data.next if data.next?
391
+ #
392
+ def next?
393
+ !token.nil?
394
+ end
395
+
396
+ ##
397
+ # Retrieves the next page of data.
398
+ #
399
+ # @return [Data] A new instance providing the next page of data.
400
+ #
401
+ # @example
402
+ # require "google/cloud/bigquery"
403
+ #
404
+ # bigquery = Google::Cloud::Bigquery.new
405
+ #
406
+ # sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
407
+ # job = bigquery.query_job sql
408
+ #
409
+ # job.wait_until_done!
410
+ # data = job.data
411
+ #
412
+ # data.count # 100000
413
+ # data.total # 164656
414
+ #
415
+ # # Iterate over the first page of results
416
+ # data.each do |row|
417
+ # puts row[:word]
418
+ # end
419
+ # # Retrieve the next page of results
420
+ # data = data.next if data.next?
421
+ #
422
+ def next
423
+ return nil unless next?
424
+ ensure_service!
425
+ data_json = service.list_tabledata \
426
+ @table_gapi.table_reference.dataset_id,
427
+ @table_gapi.table_reference.table_id,
428
+ token: token
429
+ self.class.from_gapi_json data_json, @table_gapi, job_gapi, @service
430
+ end
431
+
432
+ ##
433
+ # Retrieves all rows by repeatedly loading {#next} until {#next?}
434
+ # returns `false`. Calls the given block once for each row, which is
435
+ # passed as the parameter.
436
+ #
437
+ # An enumerator is returned if no block is given.
438
+ #
439
+ # This method may make several API calls until all rows are retrieved.
440
+ # Be sure to use as narrow a search criteria as possible. Please use
441
+ # with caution.
442
+ #
443
+ # @param [Integer] request_limit The upper limit of API requests to make
444
+ # to load all data. Default is no limit.
445
+ # @yield [row] The block for accessing each row of data.
446
+ # @yieldparam [Hash] row The row object.
447
+ #
448
+ # @return [Enumerator] An enumerator providing access to all of the
449
+ # data.
450
+ #
451
+ # @example Iterating each rows by passing a block:
452
+ # require "google/cloud/bigquery"
453
+ #
454
+ # bigquery = Google::Cloud::Bigquery.new
455
+ # dataset = bigquery.dataset "my_dataset"
456
+ # table = dataset.table "my_table"
457
+ #
458
+ # table.data.all do |row|
459
+ # puts row[:word]
460
+ # end
461
+ #
462
+ # @example Using the enumerator by not passing a block:
463
+ # require "google/cloud/bigquery"
464
+ #
465
+ # bigquery = Google::Cloud::Bigquery.new
466
+ # dataset = bigquery.dataset "my_dataset"
467
+ # table = dataset.table "my_table"
468
+ #
469
+ # words = table.data.all.map do |row|
470
+ # row[:word]
471
+ # end
472
+ #
473
+ #
474
+ # @example Limit the number of API calls made:
475
+ # require "google/cloud/bigquery"
476
+ #
477
+ # bigquery = Google::Cloud::Bigquery.new
478
+ # dataset = bigquery.dataset "my_dataset"
479
+ # table = dataset.table "my_table"
480
+ #
481
+ # table.data.all(request_limit: 10) do |row|
482
+ # puts row[:word]
483
+ # end
484
+ #
485
+ def all request_limit: nil
486
+ request_limit = request_limit.to_i if request_limit
487
+
488
+ return enum_for :all, request_limit: request_limit unless block_given?
489
+
490
+ results = self
491
+ loop do
492
+ results.each { |r| yield r }
493
+ if request_limit
494
+ request_limit -= 1
495
+ break if request_limit.negative?
496
+ end
497
+ break unless results.next?
498
+ results = results.next
499
+ end
500
+ end
501
+
502
+ ##
503
+ # @private New Data from a response object.
504
+ def self.from_gapi_json gapi_json, table_gapi, job_gapi, service
505
+ rows = gapi_json[:rows] || []
506
+ rows = Convert.format_rows rows, table_gapi.schema.fields unless rows.empty?
507
+
508
+ data = new rows
509
+ data.table_gapi = table_gapi
510
+ data.gapi_json = gapi_json
511
+ data.job_gapi = job_gapi
512
+ data.service = service
513
+ data
514
+ end
515
+
516
+ protected
517
+
518
+ ##
519
+ # Raise an error unless an active service is available.
520
+ def ensure_service!
521
+ raise "Must have active connection" unless service
522
+ end
523
+ end
524
+ end
525
+ end
526
+ end