gcloud 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +8 -8
  2. data/AUTHENTICATION.md +3 -3
  3. data/CHANGELOG.md +12 -0
  4. data/OVERVIEW.md +30 -0
  5. data/lib/gcloud.rb +126 -9
  6. data/lib/gcloud/bigquery.rb +399 -0
  7. data/lib/gcloud/bigquery/connection.rb +592 -0
  8. data/lib/gcloud/bigquery/copy_job.rb +98 -0
  9. data/lib/gcloud/bigquery/credentials.rb +29 -0
  10. data/lib/gcloud/bigquery/data.rb +134 -0
  11. data/lib/gcloud/bigquery/dataset.rb +662 -0
  12. data/lib/gcloud/bigquery/dataset/list.rb +51 -0
  13. data/lib/gcloud/bigquery/errors.rb +62 -0
  14. data/lib/gcloud/bigquery/extract_job.rb +117 -0
  15. data/lib/gcloud/bigquery/insert_response.rb +80 -0
  16. data/lib/gcloud/bigquery/job.rb +283 -0
  17. data/lib/gcloud/bigquery/job/list.rb +55 -0
  18. data/lib/gcloud/bigquery/load_job.rb +199 -0
  19. data/lib/gcloud/bigquery/project.rb +512 -0
  20. data/lib/gcloud/bigquery/query_data.rb +135 -0
  21. data/lib/gcloud/bigquery/query_job.rb +151 -0
  22. data/lib/gcloud/bigquery/table.rb +827 -0
  23. data/lib/gcloud/bigquery/table/list.rb +55 -0
  24. data/lib/gcloud/bigquery/view.rb +419 -0
  25. data/lib/gcloud/credentials.rb +3 -3
  26. data/lib/gcloud/datastore.rb +15 -3
  27. data/lib/gcloud/datastore/credentials.rb +3 -2
  28. data/lib/gcloud/datastore/dataset.rb +5 -1
  29. data/lib/gcloud/datastore/transaction.rb +1 -1
  30. data/lib/gcloud/pubsub.rb +14 -3
  31. data/lib/gcloud/pubsub/credentials.rb +4 -4
  32. data/lib/gcloud/pubsub/project.rb +5 -1
  33. data/lib/gcloud/pubsub/topic.rb +5 -0
  34. data/lib/gcloud/storage.rb +14 -24
  35. data/lib/gcloud/storage/bucket.rb +10 -4
  36. data/lib/gcloud/storage/credentials.rb +3 -2
  37. data/lib/gcloud/storage/file.rb +8 -1
  38. data/lib/gcloud/storage/project.rb +5 -1
  39. data/lib/gcloud/upload.rb +54 -0
  40. data/lib/gcloud/version.rb +1 -1
  41. metadata +78 -2
@@ -0,0 +1,592 @@
1
+ #--
2
+ # Copyright 2015 Google Inc. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require "gcloud/version"
17
+ require "google/api_client"
18
+ require "digest/md5"
19
+
20
+ module Gcloud
21
+ module Bigquery
22
+ ##
23
+ # Represents the connection to Bigquery,
24
+ # as well as expose the API calls.
25
+ class Connection #:nodoc:
26
+ API_VERSION = "v2"
27
+
28
+ attr_accessor :project
29
+ attr_accessor :credentials #:nodoc:
30
+
31
+ ##
32
+ # Creates a new Connection instance.
33
+ def initialize project, credentials #:nodoc:
34
+ @project = project
35
+ @credentials = credentials
36
+ @client = Google::APIClient.new application_name: "gcloud-ruby",
37
+ application_version: Gcloud::VERSION
38
+ @client.authorization = @credentials.client
39
+ @bigquery = @client.discovered_api "bigquery", API_VERSION
40
+ end
41
+
42
+ ##
43
+ # Lists all datasets in the specified project to which you have
44
+ # been granted the READER dataset role.
45
+ def list_datasets options = {}
46
+ params = { projectId: @project,
47
+ all: options.delete(:all),
48
+ pageToken: options.delete(:token),
49
+ maxResults: options.delete(:max)
50
+ }.delete_if { |_, v| v.nil? }
51
+
52
+ @client.execute(
53
+ api_method: @bigquery.datasets.list,
54
+ parameters: params
55
+ )
56
+ end
57
+
58
+ ##
59
+ # Returns the dataset specified by datasetID.
60
+ def get_dataset dataset_id
61
+ @client.execute(
62
+ api_method: @bigquery.datasets.get,
63
+ parameters: { projectId: @project, datasetId: dataset_id }
64
+ )
65
+ end
66
+
67
+ ##
68
+ # Creates a new empty dataset.
69
+ def insert_dataset dataset_id, options = {}
70
+ @client.execute(
71
+ api_method: @bigquery.datasets.insert,
72
+ parameters: { projectId: @project },
73
+ body_object: insert_dataset_request(dataset_id, options)
74
+ )
75
+ end
76
+
77
+ ##
78
+ # Updates information in an existing dataset, only replacing
79
+ # fields that are provided in the submitted dataset resource.
80
+ def patch_dataset dataset_id, options = {}
81
+ project_id = options[:project_id] || @project
82
+ body = { friendlyName: options[:name],
83
+ description: options[:description],
84
+ defaultTableExpirationMs: options[:default_expiration]
85
+ }.delete_if { |_, v| v.nil? }
86
+
87
+ @client.execute(
88
+ api_method: @bigquery.datasets.patch,
89
+ parameters: { projectId: project_id, datasetId: dataset_id },
90
+ body_object: body
91
+ )
92
+ end
93
+
94
+ ##
95
+ # Deletes the dataset specified by the datasetId value.
96
+ # Before you can delete a dataset, you must delete all its tables,
97
+ # either manually or by specifying force: true in options.
98
+ # Immediately after deletion, you can create another dataset with
99
+ # the same name.
100
+ def delete_dataset dataset_id, options = {}
101
+ @client.execute(
102
+ api_method: @bigquery.datasets.delete,
103
+ parameters: { projectId: @project, datasetId: dataset_id,
104
+ deleteContents: options[:force]
105
+ }.delete_if { |_, v| v.nil? }
106
+ )
107
+ end
108
+
109
+ ##
110
+ # Lists all tables in the specified dataset.
111
+ # Requires the READER dataset role.
112
+ def list_tables dataset_id, options = {}
113
+ params = { projectId: @project,
114
+ datasetId: dataset_id,
115
+ pageToken: options.delete(:token),
116
+ maxResults: options.delete(:max)
117
+ }.delete_if { |_, v| v.nil? }
118
+
119
+ @client.execute(
120
+ api_method: @bigquery.tables.list,
121
+ parameters: params
122
+ )
123
+ end
124
+
125
+ def get_project_table project_id, dataset_id, table_id
126
+ @client.execute(
127
+ api_method: @bigquery.tables.get,
128
+ parameters: { projectId: project_id, datasetId: dataset_id,
129
+ tableId: table_id }
130
+ )
131
+ end
132
+
133
+ ##
134
+ # Gets the specified table resource by table ID.
135
+ # This method does not return the data in the table,
136
+ # it only returns the table resource,
137
+ # which describes the structure of this table.
138
+ def get_table dataset_id, table_id
139
+ get_project_table @project, dataset_id, table_id
140
+ end
141
+
142
+ ##
143
+ # Creates a new, empty table in the dataset.
144
+ def insert_table dataset_id, table_id, options = {}
145
+ @client.execute(
146
+ api_method: @bigquery.tables.insert,
147
+ parameters: { projectId: @project, datasetId: dataset_id },
148
+ body_object: insert_table_request(dataset_id, table_id, options)
149
+ )
150
+ end
151
+
152
+ ##
153
+ # Updates information in an existing table, replacing fields that
154
+ # are provided in the submitted table resource.
155
+ def patch_table dataset_id, table_id, options = {}
156
+ @client.execute(
157
+ api_method: @bigquery.tables.patch,
158
+ parameters: { projectId: @project, datasetId: dataset_id,
159
+ tableId: table_id },
160
+ body_object: patch_table_request(options)
161
+ )
162
+ end
163
+
164
+ ##
165
+ # Deletes the table specified by tableId from the dataset.
166
+ # If the table contains data, all the data will be deleted.
167
+ def delete_table dataset_id, table_id
168
+ @client.execute(
169
+ api_method: @bigquery.tables.delete,
170
+ parameters: { projectId: @project, datasetId: dataset_id,
171
+ tableId: table_id }
172
+ )
173
+ end
174
+
175
+ ##
176
+ # Retrieves data from the table.
177
+ def list_tabledata dataset_id, table_id, options = {}
178
+ params = { projectId: @project,
179
+ datasetId: dataset_id, tableId: table_id,
180
+ pageToken: options.delete(:token),
181
+ maxResults: options.delete(:max),
182
+ startIndex: options.delete(:start)
183
+ }.delete_if { |_, v| v.nil? }
184
+
185
+ @client.execute(
186
+ api_method: @bigquery.tabledata.list,
187
+ parameters: params
188
+ )
189
+ end
190
+
191
+ def insert_tabledata dataset_id, table_id, rows, options = {}
192
+ @client.execute(
193
+ api_method: @bigquery.tabledata.insert_all,
194
+ parameters: { projectId: @project,
195
+ datasetId: dataset_id,
196
+ tableId: table_id },
197
+ body_object: insert_tabledata_rows(rows, options)
198
+ )
199
+ end
200
+
201
+ ##
202
+ # Lists all jobs in the specified project to which you have
203
+ # been granted the READER job role.
204
+ def list_jobs options = {}
205
+ @client.execute(
206
+ api_method: @bigquery.jobs.list,
207
+ parameters: list_jobs_params(options)
208
+ )
209
+ end
210
+
211
+ ##
212
+ # Returns the job specified by jobID.
213
+ def get_job job_id
214
+ @client.execute(
215
+ api_method: @bigquery.jobs.get,
216
+ parameters: { projectId: @project, jobId: job_id }
217
+ )
218
+ end
219
+
220
+ def insert_job config
221
+ @client.execute(
222
+ api_method: @bigquery.jobs.insert,
223
+ parameters: { projectId: @project },
224
+ body_object: { "configuration" => config }
225
+ )
226
+ end
227
+
228
+ def query_job query, options = {}
229
+ @client.execute(
230
+ api_method: @bigquery.jobs.insert,
231
+ parameters: { projectId: @project },
232
+ body_object: query_table_config(query, options)
233
+ )
234
+ end
235
+
236
+ def query query, options = {}
237
+ @client.execute(
238
+ api_method: @bigquery.jobs.query,
239
+ parameters: { projectId: @project },
240
+ body_object: query_config(query, options)
241
+ )
242
+ end
243
+
244
+ ##
245
+ # Returns the query data for the job
246
+ def job_query_results job_id, options = {}
247
+ params = { projectId: @project, jobId: job_id,
248
+ pageToken: options.delete(:token),
249
+ maxResults: options.delete(:max),
250
+ startIndex: options.delete(:start),
251
+ timeoutMs: options.delete(:timeout)
252
+ }.delete_if { |_, v| v.nil? }
253
+
254
+ @client.execute(
255
+ api_method: @bigquery.jobs.get_query_results,
256
+ parameters: params
257
+ )
258
+ end
259
+
260
+ def copy_table source, target, options = {}
261
+ @client.execute(
262
+ api_method: @bigquery.jobs.insert,
263
+ parameters: { projectId: @project },
264
+ body_object: copy_table_config(source, target, options)
265
+ )
266
+ end
267
+
268
+ def link_table table, urls, options = {}
269
+ @client.execute(
270
+ api_method: @bigquery.jobs.insert,
271
+ parameters: { projectId: @project },
272
+ body_object: link_table_config(table, urls, options)
273
+ )
274
+ end
275
+
276
+ def extract_table table, storage_files, options = {}
277
+ @client.execute(
278
+ api_method: @bigquery.jobs.insert,
279
+ parameters: { projectId: @project },
280
+ body_object: extract_table_config(table, storage_files, options)
281
+ )
282
+ end
283
+
284
+ def load_table table, storage_url, options = {}
285
+ @client.execute(
286
+ api_method: @bigquery.jobs.insert,
287
+ parameters: { projectId: @project },
288
+ body_object: load_table_config(table, storage_url,
289
+ Array(storage_url).first, options)
290
+ )
291
+ end
292
+
293
+ def load_multipart table, file, options = {}
294
+ media = load_media file
295
+
296
+ @client.execute(
297
+ api_method: @bigquery.jobs.insert,
298
+ media: media,
299
+ parameters: { projectId: @project, uploadType: "multipart" },
300
+ body_object: load_table_config(table, nil, file, options)
301
+ )
302
+ end
303
+
304
+ def load_resumable table, file, chunk_size = nil, options = {}
305
+ media = load_media file, chunk_size
306
+
307
+ result = @client.execute(
308
+ api_method: @bigquery.jobs.insert,
309
+ media: media,
310
+ parameters: { projectId: @project, uploadType: "resumable" },
311
+ body_object: load_table_config(table, nil, file, options)
312
+ )
313
+ upload = result.resumable_upload
314
+ result = @client.execute upload while upload.resumable?
315
+ result
316
+ end
317
+
318
+ protected
319
+
320
+ ##
321
+ # Make sure the object is converted to a hash
322
+ # Ruby 1.9.3 doesn't support to_h, so here we are.
323
+ def hashify hash
324
+ if hash.respond_to? :to_h
325
+ hash.to_h
326
+ else
327
+ Hash.try_convert(hash) || {}
328
+ end
329
+ end
330
+
331
+ ##
332
+ # Create the HTTP body for insert dataset
333
+ def insert_dataset_request dataset_id, options = {}
334
+ {
335
+ "kind" => "bigquery#dataset",
336
+ "datasetReference" => {
337
+ "projectId" => @project,
338
+ "datasetId" => dataset_id
339
+ },
340
+ "friendlyName" => options[:name],
341
+ "description" => options[:description],
342
+ "defaultTableExpirationMs" => options[:expiration]
343
+ }.delete_if { |_, v| v.nil? }
344
+ end
345
+
346
+ ##
347
+ # The parameters for the list_jobs call.
348
+ def list_jobs_params options = {}
349
+ params = { projectId: @project,
350
+ allUsers: options.delete(:all),
351
+ pageToken: options.delete(:token),
352
+ maxResults: options.delete(:max),
353
+ stateFilter: options.delete(:filter),
354
+ projection: "full"
355
+ }.delete_if { |_, v| v.nil? }
356
+ params
357
+ end
358
+
359
+ ##
360
+ # Create the HTTP body for insert table
361
+ def insert_table_request dataset_id, table_id, options = {}
362
+ hash = {
363
+ tableReference: {
364
+ projectId: @project, datasetId: dataset_id, tableId: table_id
365
+ },
366
+ friendlyName: options[:name],
367
+ description: options[:description],
368
+ schema: options[:schema]
369
+ }.delete_if { |_, v| v.nil? }
370
+ hash["view"] = { "query" => options[:query] } if options[:query]
371
+ hash
372
+ end
373
+
374
+ def patch_table_request options = {}
375
+ body = { friendlyName: options[:name],
376
+ description: options[:description],
377
+ schema: options[:schema]
378
+ }.delete_if { |_, v| v.nil? }
379
+ body["view"] = { "query" => options[:query] } if options[:query]
380
+ body
381
+ end
382
+
383
+ def insert_tabledata_rows rows, options = {}
384
+ {
385
+ "kind" => "bigquery#tableDataInsertAllRequest",
386
+ "skipInvalidRows" => options[:skip_invalid],
387
+ "ignoreUnknownValues" => options[:ignore_unknown],
388
+ "rows" => rows.map do |row|
389
+ { "insertId" => Digest::MD5.base64digest(row.inspect),
390
+ "json" => row }
391
+ end
392
+ }.delete_if { |_, v| v.nil? }
393
+ end
394
+
395
+ # rubocop:disable all
396
+ # Disabled rubocop because the API is verbose and so these methods
397
+ # are going to be verbose.
398
+
399
+ ##
400
+ # Job description for query job
401
+ def query_table_config query, options
402
+ dest_table = nil
403
+ if options[:table]
404
+ dest_table = { "projectId" => options[:table].project_id,
405
+ "datasetId" => options[:table].dataset_id,
406
+ "tableId" => options[:table].table_id }
407
+ end
408
+ default_dataset = nil
409
+ if dataset = options[:dataset]
410
+ if dataset.respond_to? :dataset_id
411
+ default_dataset = { "projectId" => dataset.project_id,
412
+ "datasetId" => dataset.dataset_id }
413
+ else
414
+ default_dataset = { "datasetId" => dataset }
415
+ end
416
+ end
417
+ {
418
+ "configuration" => {
419
+ "query" => {
420
+ "query" => query,
421
+ # "tableDefinitions" => { ... },
422
+ "priority" => priority_value(options[:priority]),
423
+ "useQueryCache" => options[:cache],
424
+ "destinationTable" => dest_table,
425
+ "createDisposition" => create_disposition(options[:create]),
426
+ "writeDisposition" => write_disposition(options[:write]),
427
+ "allowLargeResults" => options[:large_results],
428
+ "flattenResults" => options[:flatten],
429
+ "defaultDataset" => default_dataset
430
+ }.delete_if { |_, v| v.nil? }
431
+ }.delete_if { |_, v| v.nil? }
432
+ }
433
+ end
434
+
435
+ def query_config query, options = {}
436
+ dataset_config = nil
437
+ dataset_config = {
438
+ "datasetId" => options[:dataset],
439
+ "projectId" => options[:project] || @project
440
+ } if options[:dataset]
441
+
442
+ {
443
+ "kind" => "bigquery#queryRequest",
444
+ "query" => query,
445
+ "maxResults" => options[:max],
446
+ "defaultDataset" => dataset_config,
447
+ "timeoutMs" => options[:timeout],
448
+ "dryRun" => options[:dryrun],
449
+ "preserveNulls" => options[:preserve_nulls],
450
+ "useQueryCache" => options[:cache]
451
+ }.delete_if { |_, v| v.nil? }
452
+ end
453
+
454
+ ##
455
+ # Job description for copy job
456
+ def copy_table_config source, target, options = {}
457
+ {
458
+ "configuration" => {
459
+ "copy" => {
460
+ "sourceTable" => {
461
+ "projectId" => source["tableReference"]["projectId"],
462
+ "datasetId" => source["tableReference"]["datasetId"],
463
+ "tableId" => source["tableReference"]["tableId"]
464
+ }.delete_if { |_, v| v.nil? },
465
+ "destinationTable" => {
466
+ "projectId" => target["tableReference"]["projectId"],
467
+ "datasetId" => target["tableReference"]["datasetId"],
468
+ "tableId" => target["tableReference"]["tableId"]
469
+ }.delete_if { |_, v| v.nil? },
470
+ "createDisposition" => create_disposition(options[:create]),
471
+ "writeDisposition" => write_disposition(options[:write])
472
+ }.delete_if { |_, v| v.nil? },
473
+ "dryRun" => options[:dryrun]
474
+ }.delete_if { |_, v| v.nil? }
475
+ }
476
+ end
477
+
478
+ def link_table_config table, urls, options = {}
479
+ path = Array(urls).first
480
+ {
481
+ "configuration" => {
482
+ "link" => {
483
+ "sourceUri" => Array(urls),
484
+ "destinationTable" => {
485
+ "projectId" => table["tableReference"]["projectId"],
486
+ "datasetId" => table["tableReference"]["datasetId"],
487
+ "tableId" => table["tableReference"]["tableId"]
488
+ }.delete_if { |_, v| v.nil? },
489
+ "createDisposition" => create_disposition(options[:create]),
490
+ "writeDisposition" => write_disposition(options[:write]),
491
+ "sourceFormat" => source_format(path, options[:format])
492
+ }.delete_if { |_, v| v.nil? },
493
+ "dryRun" => options[:dryrun]
494
+ }.delete_if { |_, v| v.nil? }
495
+ }
496
+ end
497
+
498
+ def extract_table_config table, storage_files, options = {}
499
+ storage_urls = Array(storage_files).map do |url|
500
+ url.respond_to?(:to_gs_url) ? url.to_gs_url : url
501
+ end
502
+ dest_format = source_format storage_urls.first, options[:format]
503
+ {
504
+ "configuration" => {
505
+ "extract" => {
506
+ "destinationUris" => Array(storage_urls),
507
+ "sourceTable" => {
508
+ "projectId" => table["tableReference"]["projectId"],
509
+ "datasetId" => table["tableReference"]["datasetId"],
510
+ "tableId" => table["tableReference"]["tableId"]
511
+ }.delete_if { |_, v| v.nil? },
512
+ "destinationFormat" => dest_format
513
+ }.delete_if { |_, v| v.nil? },
514
+ "dryRun" => options[:dryrun]
515
+ }.delete_if { |_, v| v.nil? }
516
+ }
517
+ end
518
+
519
+ def load_table_config table, urls, file, options = {}
520
+ path = Array(urls).first
521
+ path = Pathname(file).to_path unless file.nil?
522
+ {
523
+ "configuration" => {
524
+ "load" => {
525
+ "sourceUris" => Array(urls),
526
+ "destinationTable" => {
527
+ "projectId" => table["tableReference"]["projectId"],
528
+ "datasetId" => table["tableReference"]["datasetId"],
529
+ "tableId" => table["tableReference"]["tableId"]
530
+ }.delete_if { |_, v| v.nil? },
531
+ "createDisposition" => create_disposition(options[:create]),
532
+ "writeDisposition" => write_disposition(options[:write]),
533
+ "sourceFormat" => source_format(path, options[:format])
534
+ }.delete_if { |_, v| v.nil? },
535
+ "dryRun" => options[:dryrun]
536
+ }.delete_if { |_, v| v.nil? }
537
+ }
538
+ end
539
+
540
+ def create_disposition str #:nodoc:
541
+ { "create_if_needed" => "CREATE_IF_NEEDED",
542
+ "createifneeded" => "CREATE_IF_NEEDED",
543
+ "if_needed" => "CREATE_IF_NEEDED",
544
+ "needed" => "CREATE_IF_NEEDED",
545
+ "create_never" => "CREATE_NEVER",
546
+ "createnever" => "CREATE_NEVER",
547
+ "never" => "CREATE_NEVER" }[str.to_s.downcase]
548
+ end
549
+
550
+ def write_disposition str #:nodoc:
551
+ { "write_truncate" => "WRITE_TRUNCATE",
552
+ "writetruncate" => "WRITE_TRUNCATE",
553
+ "truncate" => "WRITE_TRUNCATE",
554
+ "write_append" => "WRITE_APPEND",
555
+ "writeappend" => "WRITE_APPEND",
556
+ "append" => "WRITE_APPEND",
557
+ "write_empty" => "WRITE_EMPTY",
558
+ "writeempty" => "WRITE_EMPTY",
559
+ "empty" => "WRITE_EMPTY" }[str.to_s.downcase]
560
+ end
561
+
562
+ def priority_value str
563
+ { "batch" => "BATCH",
564
+ "interactive" => "INTERACTIVE" }[str.to_s.downcase]
565
+ end
566
+
567
+ def source_format path, format
568
+ val = { "csv" => "CSV",
569
+ "json" => "NEWLINE_DELIMITED_JSON",
570
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
571
+ "avro" => "AVRO" }[format.to_s.downcase]
572
+ return val unless val.nil?
573
+ return nil if path.nil?
574
+ return "CSV" if path.end_with? ".csv"
575
+ return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
576
+ return "AVRO" if path.end_with? ".avro"
577
+ nil
578
+ end
579
+
580
+ # rubocop:enable all
581
+
582
+ def load_media file, chunk_size = nil
583
+ local_path = Pathname(file).to_path
584
+ mime_type = "application/octet-stream"
585
+
586
+ media = Google::APIClient::UploadIO.new local_path, mime_type
587
+ media.chunk_size = chunk_size unless chunk_size.nil?
588
+ media
589
+ end
590
+ end
591
+ end
592
+ end