google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,502 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/bigquery/version"
17
+ require "google/cloud/errors"
18
+ require "google/apis/bigquery_v2"
19
+ require "pathname"
20
+ require "digest/md5"
21
+ require "mime/types"
22
+
23
+ module Google
24
+ module Cloud
25
+ module Bigquery
26
+ ##
27
+ # @private Represents the Bigquery service and API calls.
28
+ class Service
29
+ ##
30
+ # Alias to the Google Client API module
31
+ API = Google::Apis::BigqueryV2
32
+
33
+ # @private
34
+ attr_accessor :project
35
+
36
+ # @private
37
+ attr_accessor :credentials
38
+
39
+ ##
40
+ # Creates a new Service instance.
41
+ def initialize project, credentials, retries: nil, timeout: nil
42
+ @project = project
43
+ @credentials = credentials
44
+ @credentials = credentials
45
+ @service = API::BigqueryService.new
46
+ @service.client_options.application_name = "google-cloud-bigquery"
47
+ @service.client_options.application_version = \
48
+ Google::Cloud::Bigquery::VERSION
49
+ @service.request_options.retries = retries || 3
50
+ @service.request_options.timeout_sec = timeout if timeout
51
+ @service.authorization = @credentials.client
52
+ end
53
+
54
+ def service
55
+ return mocked_service if mocked_service
56
+ @service
57
+ end
58
+ attr_accessor :mocked_service
59
+
60
+ ##
61
+ # Lists all datasets in the specified project to which you have
62
+ # been granted the READER dataset role.
63
+ def list_datasets options = {}
64
+ execute do
65
+ service.list_datasets \
66
+ @project, all: options[:all], max_results: options[:max],
67
+ page_token: options[:token]
68
+ end
69
+ end
70
+
71
+ ##
72
+ # Returns the dataset specified by datasetID.
73
+ def get_dataset dataset_id
74
+ execute { service.get_dataset @project, dataset_id }
75
+ end
76
+
77
+ ##
78
+ # Creates a new empty dataset.
79
+ def insert_dataset new_dataset_gapi
80
+ execute { service.insert_dataset @project, new_dataset_gapi }
81
+ end
82
+
83
+ ##
84
+ # Updates information in an existing dataset, only replacing
85
+ # fields that are provided in the submitted dataset resource.
86
+ def patch_dataset dataset_id, patched_dataset_gapi
87
+ execute do
88
+ service.patch_dataset @project, dataset_id, patched_dataset_gapi
89
+ end
90
+ end
91
+
92
+ ##
93
+ # Deletes the dataset specified by the datasetId value.
94
+ # Before you can delete a dataset, you must delete all its tables,
95
+ # either manually or by specifying force: true in options.
96
+ # Immediately after deletion, you can create another dataset with
97
+ # the same name.
98
+ def delete_dataset dataset_id, force = nil
99
+ execute do
100
+ service.delete_dataset @project, dataset_id, delete_contents: force
101
+ end
102
+ end
103
+
104
+ ##
105
+ # Lists all tables in the specified dataset.
106
+ # Requires the READER dataset role.
107
+ def list_tables dataset_id, options = {}
108
+ execute do
109
+ service.list_tables @project, dataset_id,
110
+ max_results: options[:max],
111
+ page_token: options[:token]
112
+ end
113
+ end
114
+
115
+ def get_project_table project_id, dataset_id, table_id
116
+ execute { service.get_table project_id, dataset_id, table_id }
117
+ end
118
+
119
+ ##
120
+ # Gets the specified table resource by table ID.
121
+ # This method does not return the data in the table,
122
+ # it only returns the table resource,
123
+ # which describes the structure of this table.
124
+ def get_table dataset_id, table_id
125
+ execute { get_project_table @project, dataset_id, table_id }
126
+ end
127
+
128
+ ##
129
+ # Creates a new, empty table in the dataset.
130
+ def insert_table dataset_id, new_table_gapi
131
+ execute { service.insert_table @project, dataset_id, new_table_gapi }
132
+ end
133
+
134
+ ##
135
+ # Updates information in an existing table, replacing fields that
136
+ # are provided in the submitted table resource.
137
+ def patch_table dataset_id, table_id, patched_table_gapi
138
+ execute do
139
+ service.patch_table @project, dataset_id, table_id,
140
+ patched_table_gapi
141
+ end
142
+ end
143
+
144
+ ##
145
+ # Deletes the table specified by tableId from the dataset.
146
+ # If the table contains data, all the data will be deleted.
147
+ def delete_table dataset_id, table_id
148
+ execute { service.delete_table @project, dataset_id, table_id }
149
+ end
150
+
151
+ ##
152
+ # Retrieves data from the table.
153
+ def list_tabledata dataset_id, table_id, options = {}
154
+ execute do
155
+ service.list_table_data @project, dataset_id, table_id,
156
+ max_results: options.delete(:max),
157
+ page_token: options.delete(:token),
158
+ start_index: options.delete(:start)
159
+ end
160
+ end
161
+
162
+ def insert_tabledata dataset_id, table_id, rows, options = {}
163
+ insert_rows = Array(rows).map do |row|
164
+ Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(
165
+ insert_id: Digest::MD5.base64digest(row.inspect),
166
+ # Hash[row.map{|(k,v)| [k.to_s,v]}] for Hash<String,Object>
167
+ json: row
168
+ )
169
+ end
170
+ insert_req = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(
171
+ rows: insert_rows,
172
+ ignore_unknown_values: options[:ignore_unknown],
173
+ skip_invalid_rows: options[:skip_invalid]
174
+ )
175
+
176
+ execute do
177
+ service.insert_all_table_data(
178
+ @project, dataset_id, table_id, insert_req)
179
+ end
180
+ end
181
+
182
+ ##
183
+ # Lists all jobs in the specified project to which you have
184
+ # been granted the READER job role.
185
+ def list_jobs options = {}
186
+ execute do
187
+ service.list_jobs \
188
+ @project, all_users: options[:all], max_results: options[:max],
189
+ page_token: options[:token], projection: "full",
190
+ state_filter: options[:filter]
191
+ end
192
+ end
193
+
194
+ ##
195
+ # Returns the job specified by jobID.
196
+ def get_job job_id
197
+ execute { service.get_job @project, job_id }
198
+ end
199
+
200
+ def insert_job config
201
+ job_object = API::Job.new(
202
+ configuration: config
203
+ )
204
+ execute { service.insert_job @project, job_object }
205
+ end
206
+
207
+ def query_job query, options = {}
208
+ config = query_table_config(query, options)
209
+ execute { service.insert_job @project, config }
210
+ end
211
+
212
+ def query query, options = {}
213
+ execute { service.query_job @project, query_config(query, options) }
214
+ end
215
+
216
+ ##
217
+ # Returns the query data for the job
218
+ def job_query_results job_id, options = {}
219
+ execute do
220
+ service.get_job_query_results @project,
221
+ job_id,
222
+ max_results: options.delete(:max),
223
+ page_token: options.delete(:token),
224
+ start_index: options.delete(:start),
225
+ timeout_ms: options.delete(:timeout)
226
+ end
227
+ end
228
+
229
+ def copy_table source, target, options = {}
230
+ execute do
231
+ service.insert_job @project, copy_table_config(
232
+ source, target, options)
233
+ end
234
+ end
235
+
236
+ def extract_table table, storage_files, options = {}
237
+ execute do
238
+ service.insert_job \
239
+ @project, extract_table_config(table, storage_files, options)
240
+ end
241
+ end
242
+
243
+ def load_table_gs_url dataset_id, table_id, url, options = {}
244
+ execute do
245
+ service.insert_job \
246
+ @project, load_table_url_config(dataset_id, table_id,
247
+ url, options)
248
+ end
249
+ end
250
+
251
+ def load_table_file dataset_id, table_id, file, options = {}
252
+ execute do
253
+ service.insert_job \
254
+ @project, load_table_file_config(
255
+ dataset_id, table_id, file, options),
256
+ upload_source: file, content_type: mime_type_for(file)
257
+ end
258
+ end
259
+
260
+ ##
261
+ # Extracts at least `tbl` group, and possibly `dts` and `prj` groups,
262
+ # from strings in the formats: "my_table", "my_dataset.my_table", or
263
+ # "my-project:my_dataset.my_table". Then merges project_id and
264
+ # dataset_id from the default table if they are missing.
265
+ def self.table_ref_from_s str, default_table_ref
266
+ str = str.to_s
267
+ m = /\A(((?<prj>\S*):)?(?<dts>\S*)\.)?(?<tbl>\S*)\z/.match str
268
+ unless m
269
+ fail ArgumentError, "unable to identify table from #{str.inspect}"
270
+ end
271
+ str_table_ref_hash = {
272
+ project_id: m["prj"],
273
+ dataset_id: m["dts"],
274
+ table_id: m["tbl"]
275
+ }.delete_if { |_, v| v.nil? }
276
+ new_table_ref_hash = default_table_ref.to_h.merge str_table_ref_hash
277
+ Google::Apis::BigqueryV2::TableReference.new new_table_ref_hash
278
+ end
279
+
280
+ def inspect
281
+ "#{self.class}(#{@project})"
282
+ end
283
+
284
+ protected
285
+
286
+ def table_ref_from tbl
287
+ return nil if tbl.nil?
288
+ API::TableReference.new(
289
+ project_id: tbl.project_id,
290
+ dataset_id: tbl.dataset_id,
291
+ table_id: tbl.table_id
292
+ )
293
+ end
294
+
295
+ def dataset_ref_from dts, pjt = nil
296
+ return nil if dts.nil?
297
+ if dts.respond_to? :dataset_id
298
+ API::DatasetReference.new(
299
+ project_id: (pjt || dts.project_id || @project),
300
+ dataset_id: dts.dataset_id
301
+ )
302
+ else
303
+ API::DatasetReference.new(
304
+ project_id: (pjt || @project),
305
+ dataset_id: dts
306
+ )
307
+ end
308
+ end
309
+
310
+ def load_table_file_opts dataset_id, table_id, file, options = {}
311
+ path = Pathname(file).to_path
312
+ {
313
+ destination_table: Google::Apis::BigqueryV2::TableReference.new(
314
+ project_id: @project, dataset_id: dataset_id, table_id: table_id),
315
+ create_disposition: create_disposition(options[:create]),
316
+ write_disposition: write_disposition(options[:write]),
317
+ source_format: source_format(path, options[:format]),
318
+ projection_fields: projection_fields(options[:projection_fields]),
319
+ allow_jagged_rows: options[:jagged_rows],
320
+ allow_quoted_newlines: options[:quoted_newlines],
321
+ encoding: options[:encoding], field_delimiter: options[:delimiter],
322
+ ignore_unknown_values: options[:ignore_unknown],
323
+ max_bad_records: options[:max_bad_records], quote: options[:quote],
324
+ schema: options[:schema], skip_leading_rows: options[:skip_leading]
325
+ }.delete_if { |_, v| v.nil? }
326
+ end
327
+
328
+ def load_table_file_config dataset_id, table_id, file, options = {}
329
+ load_opts = load_table_file_opts dataset_id, table_id, file, options
330
+ API::Job.new(
331
+ configuration: API::JobConfiguration.new(
332
+ load: API::JobConfigurationLoad.new(load_opts),
333
+ dry_run: options[:dryrun]
334
+ )
335
+ )
336
+ end
337
+
338
+ def load_table_url_opts dataset_id, table_id, url, options = {}
339
+ {
340
+ destination_table: Google::Apis::BigqueryV2::TableReference.new(
341
+ project_id: @project, dataset_id: dataset_id, table_id: table_id),
342
+ source_uris: Array(url),
343
+ create_disposition: create_disposition(options[:create]),
344
+ write_disposition: write_disposition(options[:write]),
345
+ source_format: source_format(url, options[:format]),
346
+ projection_fields: projection_fields(options[:projection_fields]),
347
+ allow_jagged_rows: options[:jagged_rows],
348
+ allow_quoted_newlines: options[:quoted_newlines],
349
+ encoding: options[:encoding], field_delimiter: options[:delimiter],
350
+ ignore_unknown_values: options[:ignore_unknown],
351
+ max_bad_records: options[:max_bad_records], quote: options[:quote],
352
+ schema: options[:schema], skip_leading_rows: options[:skip_leading]
353
+ }.delete_if { |_, v| v.nil? }
354
+ end
355
+
356
+ def load_table_url_config dataset_id, table_id, url, options = {}
357
+ load_opts = load_table_url_opts dataset_id, table_id, url, options
358
+ API::Job.new(
359
+ configuration: API::JobConfiguration.new(
360
+ load: API::JobConfigurationLoad.new(load_opts),
361
+ dry_run: options[:dryrun]
362
+ )
363
+ )
364
+ end
365
+
366
+ ##
367
+ # Job description for query job
368
+ def query_table_config query, options
369
+ dest_table = table_ref_from options[:table]
370
+ default_dataset = dataset_ref_from options[:dataset]
371
+ API::Job.new(
372
+ configuration: API::JobConfiguration.new(
373
+ query: API::JobConfigurationQuery.new(
374
+ query: query,
375
+ # tableDefinitions: { ... },
376
+ priority: priority_value(options[:priority]),
377
+ use_query_cache: options[:cache],
378
+ destination_table: dest_table,
379
+ create_disposition: create_disposition(options[:create]),
380
+ write_disposition: write_disposition(options[:write]),
381
+ allow_large_results: options[:large_results],
382
+ flatten_results: options[:flatten],
383
+ default_dataset: default_dataset
384
+ )
385
+ )
386
+ )
387
+ end
388
+
389
+ def query_config query, options = {}
390
+ dataset_config = dataset_ref_from options[:dataset], options[:project]
391
+
392
+ API::QueryRequest.new(
393
+ query: query,
394
+ max_results: options[:max],
395
+ default_dataset: dataset_config,
396
+ timeout_ms: options[:timeout],
397
+ dry_run: options[:dryrun],
398
+ use_query_cache: options[:cache]
399
+ )
400
+ end
401
+
402
+ ##
403
+ # Job description for copy job
404
+ def copy_table_config source, target, options = {}
405
+ API::Job.new(
406
+ configuration: API::JobConfiguration.new(
407
+ copy: API::JobConfigurationTableCopy.new(
408
+ source_table: source,
409
+ destination_table: target,
410
+ create_disposition: create_disposition(options[:create]),
411
+ write_disposition: write_disposition(options[:write])
412
+ ),
413
+ dry_run: options[:dryrun]
414
+ )
415
+ )
416
+ end
417
+
418
+ def extract_table_config table, storage_files, options = {}
419
+ storage_urls = Array(storage_files).map do |url|
420
+ url.respond_to?(:to_gs_url) ? url.to_gs_url : url
421
+ end
422
+ dest_format = source_format storage_urls.first, options[:format]
423
+ API::Job.new(
424
+ configuration: API::JobConfiguration.new(
425
+ extract: API::JobConfigurationExtract.new(
426
+ destination_uris: Array(storage_urls),
427
+ source_table: table,
428
+ destination_format: dest_format,
429
+ compression: options[:compression],
430
+ field_delimiter: options[:delimiter],
431
+ print_header: options[:header]
432
+ ),
433
+ dry_run: options[:dryrun]
434
+ )
435
+ )
436
+ end
437
+
438
+ def create_disposition str
439
+ { "create_if_needed" => "CREATE_IF_NEEDED",
440
+ "createifneeded" => "CREATE_IF_NEEDED",
441
+ "if_needed" => "CREATE_IF_NEEDED",
442
+ "needed" => "CREATE_IF_NEEDED",
443
+ "create_never" => "CREATE_NEVER",
444
+ "createnever" => "CREATE_NEVER",
445
+ "never" => "CREATE_NEVER" }[str.to_s.downcase]
446
+ end
447
+
448
+ def write_disposition str
449
+ { "write_truncate" => "WRITE_TRUNCATE",
450
+ "writetruncate" => "WRITE_TRUNCATE",
451
+ "truncate" => "WRITE_TRUNCATE",
452
+ "write_append" => "WRITE_APPEND",
453
+ "writeappend" => "WRITE_APPEND",
454
+ "append" => "WRITE_APPEND",
455
+ "write_empty" => "WRITE_EMPTY",
456
+ "writeempty" => "WRITE_EMPTY",
457
+ "empty" => "WRITE_EMPTY" }[str.to_s.downcase]
458
+ end
459
+
460
+ def priority_value str
461
+ { "batch" => "BATCH",
462
+ "interactive" => "INTERACTIVE" }[str.to_s.downcase]
463
+ end
464
+
465
+ def source_format path, format
466
+ val = { "csv" => "CSV",
467
+ "json" => "NEWLINE_DELIMITED_JSON",
468
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
469
+ "avro" => "AVRO",
470
+ "datastore" => "DATASTORE_BACKUP",
471
+ "datastore_backup" => "DATASTORE_BACKUP"
472
+ }[format.to_s.downcase]
473
+ return val unless val.nil?
474
+ return nil if path.nil?
475
+ return "CSV" if path.end_with? ".csv"
476
+ return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
477
+ return "AVRO" if path.end_with? ".avro"
478
+ return "DATASTORE_BACKUP" if path.end_with? ".backup_info"
479
+ nil
480
+ end
481
+
482
+ def projection_fields array_or_str
483
+ Array(array_or_str) unless array_or_str.nil?
484
+ end
485
+
486
+ def mime_type_for file
487
+ mime_type = MIME::Types.of(Pathname(file).to_path).first.to_s
488
+ return nil if mime_type.empty?
489
+ mime_type
490
+ rescue
491
+ nil
492
+ end
493
+
494
+ def execute
495
+ yield
496
+ rescue Google::Apis::Error => e
497
+ raise Google::Cloud::Error.from_error(e)
498
+ end
499
+ end
500
+ end
501
+ end
502
+ end