gcloud 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +8 -8
  2. data/AUTHENTICATION.md +3 -3
  3. data/CHANGELOG.md +12 -0
  4. data/OVERVIEW.md +30 -0
  5. data/lib/gcloud.rb +126 -9
  6. data/lib/gcloud/bigquery.rb +399 -0
  7. data/lib/gcloud/bigquery/connection.rb +592 -0
  8. data/lib/gcloud/bigquery/copy_job.rb +98 -0
  9. data/lib/gcloud/bigquery/credentials.rb +29 -0
  10. data/lib/gcloud/bigquery/data.rb +134 -0
  11. data/lib/gcloud/bigquery/dataset.rb +662 -0
  12. data/lib/gcloud/bigquery/dataset/list.rb +51 -0
  13. data/lib/gcloud/bigquery/errors.rb +62 -0
  14. data/lib/gcloud/bigquery/extract_job.rb +117 -0
  15. data/lib/gcloud/bigquery/insert_response.rb +80 -0
  16. data/lib/gcloud/bigquery/job.rb +283 -0
  17. data/lib/gcloud/bigquery/job/list.rb +55 -0
  18. data/lib/gcloud/bigquery/load_job.rb +199 -0
  19. data/lib/gcloud/bigquery/project.rb +512 -0
  20. data/lib/gcloud/bigquery/query_data.rb +135 -0
  21. data/lib/gcloud/bigquery/query_job.rb +151 -0
  22. data/lib/gcloud/bigquery/table.rb +827 -0
  23. data/lib/gcloud/bigquery/table/list.rb +55 -0
  24. data/lib/gcloud/bigquery/view.rb +419 -0
  25. data/lib/gcloud/credentials.rb +3 -3
  26. data/lib/gcloud/datastore.rb +15 -3
  27. data/lib/gcloud/datastore/credentials.rb +3 -2
  28. data/lib/gcloud/datastore/dataset.rb +5 -1
  29. data/lib/gcloud/datastore/transaction.rb +1 -1
  30. data/lib/gcloud/pubsub.rb +14 -3
  31. data/lib/gcloud/pubsub/credentials.rb +4 -4
  32. data/lib/gcloud/pubsub/project.rb +5 -1
  33. data/lib/gcloud/pubsub/topic.rb +5 -0
  34. data/lib/gcloud/storage.rb +14 -24
  35. data/lib/gcloud/storage/bucket.rb +10 -4
  36. data/lib/gcloud/storage/credentials.rb +3 -2
  37. data/lib/gcloud/storage/file.rb +8 -1
  38. data/lib/gcloud/storage/project.rb +5 -1
  39. data/lib/gcloud/upload.rb +54 -0
  40. data/lib/gcloud/version.rb +1 -1
  41. metadata +78 -2
@@ -0,0 +1,592 @@
1
+ #--
2
+ # Copyright 2015 Google Inc. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require "gcloud/version"
17
+ require "google/api_client"
18
+ require "digest/md5"
19
+
20
+ module Gcloud
21
+ module Bigquery
22
+ ##
23
+ # Represents the connection to Bigquery,
24
+ # as well as expose the API calls.
25
+ class Connection #:nodoc:
26
+ API_VERSION = "v2"
27
+
28
+ attr_accessor :project
29
+ attr_accessor :credentials #:nodoc:
30
+
31
+ ##
32
+ # Creates a new Connection instance.
33
+ def initialize project, credentials #:nodoc:
34
+ @project = project
35
+ @credentials = credentials
36
+ @client = Google::APIClient.new application_name: "gcloud-ruby",
37
+ application_version: Gcloud::VERSION
38
+ @client.authorization = @credentials.client
39
+ @bigquery = @client.discovered_api "bigquery", API_VERSION
40
+ end
41
+
42
+ ##
43
+ # Lists all datasets in the specified project to which you have
44
+ # been granted the READER dataset role.
45
+ def list_datasets options = {}
46
+ params = { projectId: @project,
47
+ all: options.delete(:all),
48
+ pageToken: options.delete(:token),
49
+ maxResults: options.delete(:max)
50
+ }.delete_if { |_, v| v.nil? }
51
+
52
+ @client.execute(
53
+ api_method: @bigquery.datasets.list,
54
+ parameters: params
55
+ )
56
+ end
57
+
58
+ ##
59
+ # Returns the dataset specified by datasetID.
60
+ def get_dataset dataset_id
61
+ @client.execute(
62
+ api_method: @bigquery.datasets.get,
63
+ parameters: { projectId: @project, datasetId: dataset_id }
64
+ )
65
+ end
66
+
67
+ ##
68
+ # Creates a new empty dataset.
69
+ def insert_dataset dataset_id, options = {}
70
+ @client.execute(
71
+ api_method: @bigquery.datasets.insert,
72
+ parameters: { projectId: @project },
73
+ body_object: insert_dataset_request(dataset_id, options)
74
+ )
75
+ end
76
+
77
+ ##
78
+ # Updates information in an existing dataset, only replacing
79
+ # fields that are provided in the submitted dataset resource.
80
+ def patch_dataset dataset_id, options = {}
81
+ project_id = options[:project_id] || @project
82
+ body = { friendlyName: options[:name],
83
+ description: options[:description],
84
+ defaultTableExpirationMs: options[:default_expiration]
85
+ }.delete_if { |_, v| v.nil? }
86
+
87
+ @client.execute(
88
+ api_method: @bigquery.datasets.patch,
89
+ parameters: { projectId: project_id, datasetId: dataset_id },
90
+ body_object: body
91
+ )
92
+ end
93
+
94
+ ##
95
+ # Deletes the dataset specified by the datasetId value.
96
+ # Before you can delete a dataset, you must delete all its tables,
97
+ # either manually or by specifying force: true in options.
98
+ # Immediately after deletion, you can create another dataset with
99
+ # the same name.
100
+ def delete_dataset dataset_id, options = {}
101
+ @client.execute(
102
+ api_method: @bigquery.datasets.delete,
103
+ parameters: { projectId: @project, datasetId: dataset_id,
104
+ deleteContents: options[:force]
105
+ }.delete_if { |_, v| v.nil? }
106
+ )
107
+ end
108
+
109
+ ##
110
+ # Lists all tables in the specified dataset.
111
+ # Requires the READER dataset role.
112
+ def list_tables dataset_id, options = {}
113
+ params = { projectId: @project,
114
+ datasetId: dataset_id,
115
+ pageToken: options.delete(:token),
116
+ maxResults: options.delete(:max)
117
+ }.delete_if { |_, v| v.nil? }
118
+
119
+ @client.execute(
120
+ api_method: @bigquery.tables.list,
121
+ parameters: params
122
+ )
123
+ end
124
+
125
+ def get_project_table project_id, dataset_id, table_id
126
+ @client.execute(
127
+ api_method: @bigquery.tables.get,
128
+ parameters: { projectId: project_id, datasetId: dataset_id,
129
+ tableId: table_id }
130
+ )
131
+ end
132
+
133
+ ##
134
+ # Gets the specified table resource by table ID.
135
+ # This method does not return the data in the table,
136
+ # it only returns the table resource,
137
+ # which describes the structure of this table.
138
+ def get_table dataset_id, table_id
139
+ get_project_table @project, dataset_id, table_id
140
+ end
141
+
142
+ ##
143
+ # Creates a new, empty table in the dataset.
144
+ def insert_table dataset_id, table_id, options = {}
145
+ @client.execute(
146
+ api_method: @bigquery.tables.insert,
147
+ parameters: { projectId: @project, datasetId: dataset_id },
148
+ body_object: insert_table_request(dataset_id, table_id, options)
149
+ )
150
+ end
151
+
152
+ ##
153
+ # Updates information in an existing table, replacing fields that
154
+ # are provided in the submitted table resource.
155
+ def patch_table dataset_id, table_id, options = {}
156
+ @client.execute(
157
+ api_method: @bigquery.tables.patch,
158
+ parameters: { projectId: @project, datasetId: dataset_id,
159
+ tableId: table_id },
160
+ body_object: patch_table_request(options)
161
+ )
162
+ end
163
+
164
+ ##
165
+ # Deletes the table specified by tableId from the dataset.
166
+ # If the table contains data, all the data will be deleted.
167
+ def delete_table dataset_id, table_id
168
+ @client.execute(
169
+ api_method: @bigquery.tables.delete,
170
+ parameters: { projectId: @project, datasetId: dataset_id,
171
+ tableId: table_id }
172
+ )
173
+ end
174
+
175
+ ##
176
+ # Retrieves data from the table.
177
+ def list_tabledata dataset_id, table_id, options = {}
178
+ params = { projectId: @project,
179
+ datasetId: dataset_id, tableId: table_id,
180
+ pageToken: options.delete(:token),
181
+ maxResults: options.delete(:max),
182
+ startIndex: options.delete(:start)
183
+ }.delete_if { |_, v| v.nil? }
184
+
185
+ @client.execute(
186
+ api_method: @bigquery.tabledata.list,
187
+ parameters: params
188
+ )
189
+ end
190
+
191
+ def insert_tabledata dataset_id, table_id, rows, options = {}
192
+ @client.execute(
193
+ api_method: @bigquery.tabledata.insert_all,
194
+ parameters: { projectId: @project,
195
+ datasetId: dataset_id,
196
+ tableId: table_id },
197
+ body_object: insert_tabledata_rows(rows, options)
198
+ )
199
+ end
200
+
201
+ ##
202
+ # Lists all jobs in the specified project to which you have
203
+ # been granted the READER job role.
204
+ def list_jobs options = {}
205
+ @client.execute(
206
+ api_method: @bigquery.jobs.list,
207
+ parameters: list_jobs_params(options)
208
+ )
209
+ end
210
+
211
+ ##
212
+ # Returns the job specified by jobID.
213
+ def get_job job_id
214
+ @client.execute(
215
+ api_method: @bigquery.jobs.get,
216
+ parameters: { projectId: @project, jobId: job_id }
217
+ )
218
+ end
219
+
220
+ def insert_job config
221
+ @client.execute(
222
+ api_method: @bigquery.jobs.insert,
223
+ parameters: { projectId: @project },
224
+ body_object: { "configuration" => config }
225
+ )
226
+ end
227
+
228
+ def query_job query, options = {}
229
+ @client.execute(
230
+ api_method: @bigquery.jobs.insert,
231
+ parameters: { projectId: @project },
232
+ body_object: query_table_config(query, options)
233
+ )
234
+ end
235
+
236
+ def query query, options = {}
237
+ @client.execute(
238
+ api_method: @bigquery.jobs.query,
239
+ parameters: { projectId: @project },
240
+ body_object: query_config(query, options)
241
+ )
242
+ end
243
+
244
+ ##
245
+ # Returns the query data for the job
246
+ def job_query_results job_id, options = {}
247
+ params = { projectId: @project, jobId: job_id,
248
+ pageToken: options.delete(:token),
249
+ maxResults: options.delete(:max),
250
+ startIndex: options.delete(:start),
251
+ timeoutMs: options.delete(:timeout)
252
+ }.delete_if { |_, v| v.nil? }
253
+
254
+ @client.execute(
255
+ api_method: @bigquery.jobs.get_query_results,
256
+ parameters: params
257
+ )
258
+ end
259
+
260
+ def copy_table source, target, options = {}
261
+ @client.execute(
262
+ api_method: @bigquery.jobs.insert,
263
+ parameters: { projectId: @project },
264
+ body_object: copy_table_config(source, target, options)
265
+ )
266
+ end
267
+
268
+ def link_table table, urls, options = {}
269
+ @client.execute(
270
+ api_method: @bigquery.jobs.insert,
271
+ parameters: { projectId: @project },
272
+ body_object: link_table_config(table, urls, options)
273
+ )
274
+ end
275
+
276
+ def extract_table table, storage_files, options = {}
277
+ @client.execute(
278
+ api_method: @bigquery.jobs.insert,
279
+ parameters: { projectId: @project },
280
+ body_object: extract_table_config(table, storage_files, options)
281
+ )
282
+ end
283
+
284
+ def load_table table, storage_url, options = {}
285
+ @client.execute(
286
+ api_method: @bigquery.jobs.insert,
287
+ parameters: { projectId: @project },
288
+ body_object: load_table_config(table, storage_url,
289
+ Array(storage_url).first, options)
290
+ )
291
+ end
292
+
293
+ def load_multipart table, file, options = {}
294
+ media = load_media file
295
+
296
+ @client.execute(
297
+ api_method: @bigquery.jobs.insert,
298
+ media: media,
299
+ parameters: { projectId: @project, uploadType: "multipart" },
300
+ body_object: load_table_config(table, nil, file, options)
301
+ )
302
+ end
303
+
304
+ def load_resumable table, file, chunk_size = nil, options = {}
305
+ media = load_media file, chunk_size
306
+
307
+ result = @client.execute(
308
+ api_method: @bigquery.jobs.insert,
309
+ media: media,
310
+ parameters: { projectId: @project, uploadType: "resumable" },
311
+ body_object: load_table_config(table, nil, file, options)
312
+ )
313
+ upload = result.resumable_upload
314
+ result = @client.execute upload while upload.resumable?
315
+ result
316
+ end
317
+
318
+ protected
319
+
320
+ ##
321
+ # Make sure the object is converted to a hash
322
+ # Ruby 1.9.3 doesn't support to_h, so here we are.
323
+ def hashify hash
324
+ if hash.respond_to? :to_h
325
+ hash.to_h
326
+ else
327
+ Hash.try_convert(hash) || {}
328
+ end
329
+ end
330
+
331
+ ##
332
+ # Create the HTTP body for insert dataset
333
+ def insert_dataset_request dataset_id, options = {}
334
+ {
335
+ "kind" => "bigquery#dataset",
336
+ "datasetReference" => {
337
+ "projectId" => @project,
338
+ "datasetId" => dataset_id
339
+ },
340
+ "friendlyName" => options[:name],
341
+ "description" => options[:description],
342
+ "defaultTableExpirationMs" => options[:expiration]
343
+ }.delete_if { |_, v| v.nil? }
344
+ end
345
+
346
+ ##
347
+ # The parameters for the list_jobs call.
348
+ def list_jobs_params options = {}
349
+ params = { projectId: @project,
350
+ allUsers: options.delete(:all),
351
+ pageToken: options.delete(:token),
352
+ maxResults: options.delete(:max),
353
+ stateFilter: options.delete(:filter),
354
+ projection: "full"
355
+ }.delete_if { |_, v| v.nil? }
356
+ params
357
+ end
358
+
359
+ ##
360
+ # Create the HTTP body for insert table
361
+ def insert_table_request dataset_id, table_id, options = {}
362
+ hash = {
363
+ tableReference: {
364
+ projectId: @project, datasetId: dataset_id, tableId: table_id
365
+ },
366
+ friendlyName: options[:name],
367
+ description: options[:description],
368
+ schema: options[:schema]
369
+ }.delete_if { |_, v| v.nil? }
370
+ hash["view"] = { "query" => options[:query] } if options[:query]
371
+ hash
372
+ end
373
+
374
+ def patch_table_request options = {}
375
+ body = { friendlyName: options[:name],
376
+ description: options[:description],
377
+ schema: options[:schema]
378
+ }.delete_if { |_, v| v.nil? }
379
+ body["view"] = { "query" => options[:query] } if options[:query]
380
+ body
381
+ end
382
+
383
+ def insert_tabledata_rows rows, options = {}
384
+ {
385
+ "kind" => "bigquery#tableDataInsertAllRequest",
386
+ "skipInvalidRows" => options[:skip_invalid],
387
+ "ignoreUnknownValues" => options[:ignore_unknown],
388
+ "rows" => rows.map do |row|
389
+ { "insertId" => Digest::MD5.base64digest(row.inspect),
390
+ "json" => row }
391
+ end
392
+ }.delete_if { |_, v| v.nil? }
393
+ end
394
+
395
+ # rubocop:disable all
396
+ # Disabled rubocop because the API is verbose and so these methods
397
+ # are going to be verbose.
398
+
399
+ ##
400
+ # Job description for query job
401
+ def query_table_config query, options
402
+ dest_table = nil
403
+ if options[:table]
404
+ dest_table = { "projectId" => options[:table].project_id,
405
+ "datasetId" => options[:table].dataset_id,
406
+ "tableId" => options[:table].table_id }
407
+ end
408
+ default_dataset = nil
409
+ if dataset = options[:dataset]
410
+ if dataset.respond_to? :dataset_id
411
+ default_dataset = { "projectId" => dataset.project_id,
412
+ "datasetId" => dataset.dataset_id }
413
+ else
414
+ default_dataset = { "datasetId" => dataset }
415
+ end
416
+ end
417
+ {
418
+ "configuration" => {
419
+ "query" => {
420
+ "query" => query,
421
+ # "tableDefinitions" => { ... },
422
+ "priority" => priority_value(options[:priority]),
423
+ "useQueryCache" => options[:cache],
424
+ "destinationTable" => dest_table,
425
+ "createDisposition" => create_disposition(options[:create]),
426
+ "writeDisposition" => write_disposition(options[:write]),
427
+ "allowLargeResults" => options[:large_results],
428
+ "flattenResults" => options[:flatten],
429
+ "defaultDataset" => default_dataset
430
+ }.delete_if { |_, v| v.nil? }
431
+ }.delete_if { |_, v| v.nil? }
432
+ }
433
+ end
434
+
435
+ def query_config query, options = {}
436
+ dataset_config = nil
437
+ dataset_config = {
438
+ "datasetId" => options[:dataset],
439
+ "projectId" => options[:project] || @project
440
+ } if options[:dataset]
441
+
442
+ {
443
+ "kind" => "bigquery#queryRequest",
444
+ "query" => query,
445
+ "maxResults" => options[:max],
446
+ "defaultDataset" => dataset_config,
447
+ "timeoutMs" => options[:timeout],
448
+ "dryRun" => options[:dryrun],
449
+ "preserveNulls" => options[:preserve_nulls],
450
+ "useQueryCache" => options[:cache]
451
+ }.delete_if { |_, v| v.nil? }
452
+ end
453
+
454
+ ##
455
+ # Job description for copy job
456
+ def copy_table_config source, target, options = {}
457
+ {
458
+ "configuration" => {
459
+ "copy" => {
460
+ "sourceTable" => {
461
+ "projectId" => source["tableReference"]["projectId"],
462
+ "datasetId" => source["tableReference"]["datasetId"],
463
+ "tableId" => source["tableReference"]["tableId"]
464
+ }.delete_if { |_, v| v.nil? },
465
+ "destinationTable" => {
466
+ "projectId" => target["tableReference"]["projectId"],
467
+ "datasetId" => target["tableReference"]["datasetId"],
468
+ "tableId" => target["tableReference"]["tableId"]
469
+ }.delete_if { |_, v| v.nil? },
470
+ "createDisposition" => create_disposition(options[:create]),
471
+ "writeDisposition" => write_disposition(options[:write])
472
+ }.delete_if { |_, v| v.nil? },
473
+ "dryRun" => options[:dryrun]
474
+ }.delete_if { |_, v| v.nil? }
475
+ }
476
+ end
477
+
478
+ def link_table_config table, urls, options = {}
479
+ path = Array(urls).first
480
+ {
481
+ "configuration" => {
482
+ "link" => {
483
+ "sourceUri" => Array(urls),
484
+ "destinationTable" => {
485
+ "projectId" => table["tableReference"]["projectId"],
486
+ "datasetId" => table["tableReference"]["datasetId"],
487
+ "tableId" => table["tableReference"]["tableId"]
488
+ }.delete_if { |_, v| v.nil? },
489
+ "createDisposition" => create_disposition(options[:create]),
490
+ "writeDisposition" => write_disposition(options[:write]),
491
+ "sourceFormat" => source_format(path, options[:format])
492
+ }.delete_if { |_, v| v.nil? },
493
+ "dryRun" => options[:dryrun]
494
+ }.delete_if { |_, v| v.nil? }
495
+ }
496
+ end
497
+
498
+ def extract_table_config table, storage_files, options = {}
499
+ storage_urls = Array(storage_files).map do |url|
500
+ url.respond_to?(:to_gs_url) ? url.to_gs_url : url
501
+ end
502
+ dest_format = source_format storage_urls.first, options[:format]
503
+ {
504
+ "configuration" => {
505
+ "extract" => {
506
+ "destinationUris" => Array(storage_urls),
507
+ "sourceTable" => {
508
+ "projectId" => table["tableReference"]["projectId"],
509
+ "datasetId" => table["tableReference"]["datasetId"],
510
+ "tableId" => table["tableReference"]["tableId"]
511
+ }.delete_if { |_, v| v.nil? },
512
+ "destinationFormat" => dest_format
513
+ }.delete_if { |_, v| v.nil? },
514
+ "dryRun" => options[:dryrun]
515
+ }.delete_if { |_, v| v.nil? }
516
+ }
517
+ end
518
+
519
+ def load_table_config table, urls, file, options = {}
520
+ path = Array(urls).first
521
+ path = Pathname(file).to_path unless file.nil?
522
+ {
523
+ "configuration" => {
524
+ "load" => {
525
+ "sourceUris" => Array(urls),
526
+ "destinationTable" => {
527
+ "projectId" => table["tableReference"]["projectId"],
528
+ "datasetId" => table["tableReference"]["datasetId"],
529
+ "tableId" => table["tableReference"]["tableId"]
530
+ }.delete_if { |_, v| v.nil? },
531
+ "createDisposition" => create_disposition(options[:create]),
532
+ "writeDisposition" => write_disposition(options[:write]),
533
+ "sourceFormat" => source_format(path, options[:format])
534
+ }.delete_if { |_, v| v.nil? },
535
+ "dryRun" => options[:dryrun]
536
+ }.delete_if { |_, v| v.nil? }
537
+ }
538
+ end
539
+
540
+ def create_disposition str #:nodoc:
541
+ { "create_if_needed" => "CREATE_IF_NEEDED",
542
+ "createifneeded" => "CREATE_IF_NEEDED",
543
+ "if_needed" => "CREATE_IF_NEEDED",
544
+ "needed" => "CREATE_IF_NEEDED",
545
+ "create_never" => "CREATE_NEVER",
546
+ "createnever" => "CREATE_NEVER",
547
+ "never" => "CREATE_NEVER" }[str.to_s.downcase]
548
+ end
549
+
550
+ def write_disposition str #:nodoc:
551
+ { "write_truncate" => "WRITE_TRUNCATE",
552
+ "writetruncate" => "WRITE_TRUNCATE",
553
+ "truncate" => "WRITE_TRUNCATE",
554
+ "write_append" => "WRITE_APPEND",
555
+ "writeappend" => "WRITE_APPEND",
556
+ "append" => "WRITE_APPEND",
557
+ "write_empty" => "WRITE_EMPTY",
558
+ "writeempty" => "WRITE_EMPTY",
559
+ "empty" => "WRITE_EMPTY" }[str.to_s.downcase]
560
+ end
561
+
562
+ def priority_value str
563
+ { "batch" => "BATCH",
564
+ "interactive" => "INTERACTIVE" }[str.to_s.downcase]
565
+ end
566
+
567
+ def source_format path, format
568
+ val = { "csv" => "CSV",
569
+ "json" => "NEWLINE_DELIMITED_JSON",
570
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
571
+ "avro" => "AVRO" }[format.to_s.downcase]
572
+ return val unless val.nil?
573
+ return nil if path.nil?
574
+ return "CSV" if path.end_with? ".csv"
575
+ return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
576
+ return "AVRO" if path.end_with? ".avro"
577
+ nil
578
+ end
579
+
580
+ # rubocop:enable all
581
+
582
+ def load_media file, chunk_size = nil
583
+ local_path = Pathname(file).to_path
584
+ mime_type = "application/octet-stream"
585
+
586
+ media = Google::APIClient::UploadIO.new local_path, mime_type
587
+ media.chunk_size = chunk_size unless chunk_size.nil?
588
+ media
589
+ end
590
+ end
591
+ end
592
+ end