google-cloud-bigquery 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1141 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/bigquery/service"
18
+ require "google/cloud/bigquery/view"
19
+ require "google/cloud/bigquery/data"
20
+ require "google/cloud/bigquery/table/list"
21
+ require "google/cloud/bigquery/schema"
22
+ require "google/cloud/bigquery/insert_response"
23
+ require "google/apis/bigquery_v2"
24
+
25
+ module Google
26
+ module Cloud
27
+ module Bigquery
28
+ ##
29
+ # # Table
30
+ #
31
+ # A named resource representing a BigQuery table that holds zero or more
32
+ # records. Every table is defined by a schema that may contain nested and
33
+ # repeated fields.
34
+ #
35
+ # @see https://cloud.google.com/bigquery/preparing-data-for-bigquery
36
+ # Preparing Data for BigQuery
37
+ #
38
+ # @example
39
+ # require "google/cloud"
40
+ #
41
+ # gcloud = Google::Cloud.new
42
+ # bigquery = gcloud.bigquery
43
+ # dataset = bigquery.dataset "my_dataset"
44
+ #
45
+ # table = dataset.create_table "my_table" do |schema|
46
+ # schema.string "first_name", mode: :required
47
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
48
+ # nested_schema.string "place", mode: :required
49
+ # nested_schema.integer "number_of_years", mode: :required
50
+ # end
51
+ # end
52
+ #
53
+ # row = {
54
+ # "first_name" => "Alice",
55
+ # "cities_lived" => [
56
+ # {
57
+ # "place" => "Seattle",
58
+ # "number_of_years" => 5
59
+ # },
60
+ # {
61
+ # "place" => "Stockholm",
62
+ # "number_of_years" => 6
63
+ # }
64
+ # ]
65
+ # }
66
+ # table.insert row
67
+ #
68
+ class Table
69
+ ##
70
+ # @private The Service object.
71
+ attr_accessor :service
72
+
73
+ ##
74
+ # @private The Google API Client object.
75
+ attr_accessor :gapi
76
+
77
+ ##
78
+ # @private Create an empty Table object.
79
+ def initialize
80
+ @service = nil
81
+ @gapi = Google::Apis::BigqueryV2::Table.new
82
+ end
83
+
84
+ ##
85
+ # A unique ID for this table.
86
+ # The ID must contain only letters (a-z, A-Z), numbers (0-9),
87
+ # or underscores (_). The maximum length is 1,024 characters.
88
+ #
89
+ # @!group Attributes
90
+ #
91
+ def table_id
92
+ @gapi.table_reference.table_id
93
+ end
94
+
95
+ ##
96
+ # The ID of the `Dataset` containing this table.
97
+ #
98
+ # @!group Attributes
99
+ #
100
+ def dataset_id
101
+ @gapi.table_reference.dataset_id
102
+ end
103
+
104
+ ##
105
+ # The ID of the `Project` containing this table.
106
+ #
107
+ # @!group Attributes
108
+ #
109
+ def project_id
110
+ @gapi.table_reference.project_id
111
+ end
112
+
113
+ ##
114
+ # @private The gapi fragment containing the Project ID, Dataset ID, and
115
+ # Table ID as a camel-cased hash.
116
+ def table_ref
117
+ table_ref = @gapi.table_reference
118
+ table_ref = table_ref.to_hash if table_ref.respond_to? :to_hash
119
+ table_ref
120
+ end
121
+
122
+ ##
123
+ # The combined Project ID, Dataset ID, and Table ID for this table, in
124
+ # the format specified by the [Query
125
+ # Reference](https://cloud.google.com/bigquery/query-reference#from):
126
+ # `project_name:datasetId.tableId`. To use this value in queries see
127
+ # {#query_id}.
128
+ #
129
+ # @!group Attributes
130
+ #
131
+ def id
132
+ @gapi.id
133
+ end
134
+
135
+ ##
136
+ # The value returned by {#id}, wrapped in square brackets if the Project
137
+ # ID contains dashes, as specified by the [Query
138
+ # Reference](https://cloud.google.com/bigquery/query-reference#from).
139
+ # Useful in queries.
140
+ #
141
+ # @example
142
+ # require "google/cloud"
143
+ #
144
+ # gcloud = Google::Cloud.new
145
+ # bigquery = gcloud.bigquery
146
+ # dataset = bigquery.dataset "my_dataset"
147
+ # table = dataset.table "my_table"
148
+ #
149
+ # data = bigquery.query "SELECT name FROM #{table.query_id}"
150
+ #
151
+ # @!group Attributes
152
+ #
153
+ def query_id
154
+ project_id["-"] ? "[#{id}]" : id
155
+ end
156
+
157
+ ##
158
+ # The name of the table.
159
+ #
160
+ # @!group Attributes
161
+ #
162
+ def name
163
+ @gapi.friendly_name
164
+ end
165
+
166
+ ##
167
+ # Updates the name of the table.
168
+ #
169
+ # @!group Attributes
170
+ #
171
+ def name= new_name
172
+ @gapi.update! friendly_name: new_name
173
+ patch_gapi! :friendly_name
174
+ end
175
+
176
+ ##
177
+ # A string hash of the dataset.
178
+ #
179
+ # @!group Attributes
180
+ #
181
+ def etag
182
+ ensure_full_data!
183
+ @gapi.etag
184
+ end
185
+
186
+ ##
187
+ # A URL that can be used to access the dataset using the REST API.
188
+ #
189
+ # @!group Attributes
190
+ #
191
+ def api_url
192
+ ensure_full_data!
193
+ @gapi.self_link
194
+ end
195
+
196
+ ##
197
+ # The description of the table.
198
+ #
199
+ # @!group Attributes
200
+ #
201
+ def description
202
+ ensure_full_data!
203
+ @gapi.description
204
+ end
205
+
206
+ ##
207
+ # Updates the description of the table.
208
+ #
209
+ # @!group Attributes
210
+ #
211
+ def description= new_description
212
+ @gapi.update! description: new_description
213
+ patch_gapi! :description
214
+ end
215
+
216
+ ##
217
+ # The number of bytes in the table.
218
+ #
219
+ # @!group Data
220
+ #
221
+ def bytes_count
222
+ ensure_full_data!
223
+ begin
224
+ Integer @gapi.num_bytes
225
+ rescue
226
+ nil
227
+ end
228
+ end
229
+
230
+ ##
231
+ # The number of rows in the table.
232
+ #
233
+ # @!group Data
234
+ #
235
+ def rows_count
236
+ ensure_full_data!
237
+ begin
238
+ Integer @gapi.num_rows
239
+ rescue
240
+ nil
241
+ end
242
+ end
243
+
244
+ ##
245
+ # The time when this table was created.
246
+ #
247
+ # @!group Attributes
248
+ #
249
+ def created_at
250
+ ensure_full_data!
251
+ begin
252
+ Time.at(Integer(@gapi.creation_time) / 1000.0)
253
+ rescue
254
+ nil
255
+ end
256
+ end
257
+
258
+ ##
259
+ # The time when this table expires.
260
+ # If not present, the table will persist indefinitely.
261
+ # Expired tables will be deleted and their storage reclaimed.
262
+ #
263
+ # @!group Attributes
264
+ #
265
+ def expires_at
266
+ ensure_full_data!
267
+ begin
268
+ Time.at(Integer(@gapi.expiration_time) / 1000.0)
269
+ rescue
270
+ nil
271
+ end
272
+ end
273
+
274
+ ##
275
+ # The date when this table was last modified.
276
+ #
277
+ # @!group Attributes
278
+ #
279
+ def modified_at
280
+ ensure_full_data!
281
+ begin
282
+ Time.at(Integer(@gapi.last_modified_time) / 1000.0)
283
+ rescue
284
+ nil
285
+ end
286
+ end
287
+
288
+ ##
289
+ # Checks if the table's type is "TABLE".
290
+ #
291
+ # @!group Attributes
292
+ #
293
+ def table?
294
+ @gapi.type == "TABLE"
295
+ end
296
+
297
+ ##
298
+ # Checks if the table's type is "VIEW".
299
+ #
300
+ # @!group Attributes
301
+ #
302
+ def view?
303
+ @gapi.type == "VIEW"
304
+ end
305
+
306
+ ##
307
+ # The geographic location where the table should reside. Possible
308
+ # values include EU and US. The default value is US.
309
+ #
310
+ # @!group Attributes
311
+ #
312
+ def location
313
+ ensure_full_data!
314
+ @gapi.location
315
+ end
316
+
317
+ ##
318
+ # Returns the table's schema. This method can also be used to set,
319
+ # replace, or add to the schema by passing a block. See {Schema} for
320
+ # available methods.
321
+ #
322
+ # @param [Boolean] replace Whether to replace the existing schema with
323
+ # the new schema. If `true`, the fields will replace the existing
324
+ # schema. If `false`, the fields will be added to the existing schema.
325
+ # When a table already contains data, schema changes must be additive.
326
+ # Thus, the default value is `false`.
327
+ # @yield [schema] a block for setting the schema
328
+ # @yieldparam [Schema] schema the object accepting the schema
329
+ #
330
+ # @return [Google::Cloud::Bigquery::Schema]
331
+ #
332
+ # @example
333
+ # require "google/cloud"
334
+ #
335
+ # gcloud = Google::Cloud.new
336
+ # bigquery = gcloud.bigquery
337
+ # dataset = bigquery.dataset "my_dataset"
338
+ # table = dataset.create_table "my_table"
339
+ #
340
+ # table.schema do |schema|
341
+ # schema.string "first_name", mode: :required
342
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
343
+ # nested_schema.string "place", mode: :required
344
+ # nested_schema.integer "number_of_years", mode: :required
345
+ # end
346
+ # end
347
+ #
348
+ # @!group Attributes
349
+ #
350
+ def schema replace: false
351
+ ensure_full_data!
352
+ schema_builder = Schema.from_gapi @gapi.schema
353
+ if block_given?
354
+ if replace
355
+ empty_schema = Google::Apis::BigqueryV2::TableSchema.new(
356
+ fields: [])
357
+ schema_builder = Schema.from_gapi empty_schema
358
+ end
359
+ yield schema_builder
360
+ schema_builder.check_for_mutated_schema!
361
+ if schema_builder.changed?
362
+ @gapi.schema = schema_builder.to_gapi
363
+ patch_gapi! :schema
364
+ end
365
+ end
366
+ schema_builder.freeze
367
+ end
368
+
369
+ ##
370
+ # The fields of the table.
371
+ #
372
+ # @!group Attributes
373
+ #
374
+ def fields
375
+ schema.fields
376
+ end
377
+
378
+ ##
379
+ # The names of the columns in the table.
380
+ #
381
+ # @!group Attributes
382
+ #
383
+ def headers
384
+ fields.map(&:name)
385
+ end
386
+
387
+ ##
388
+ # Retrieves data from the table.
389
+ #
390
+ # @param [String] token Page token, returned by a previous call,
391
+ # identifying the result set.
392
+ #
393
+ # @param [Integer] max Maximum number of results to return.
394
+ # @param [Integer] start Zero-based index of the starting row to read.
395
+ #
396
+ # @return [Google::Cloud::Bigquery::Data]
397
+ #
398
+ # @example Paginate rows of data: (See {Data#next})
399
+ # require "google/cloud"
400
+ #
401
+ # gcloud = Google::Cloud.new
402
+ # bigquery = gcloud.bigquery
403
+ # dataset = bigquery.dataset "my_dataset"
404
+ # table = dataset.table "my_table"
405
+ #
406
+ # data = table.data
407
+ # data.each do |row|
408
+ # puts row["first_name"]
409
+ # end
410
+ # if data.next?
411
+ # more_data = data.next if data.next?
412
+ # end
413
+ #
414
+ # @example Retrieve all rows of data: (See {Data#all})
415
+ # require "google/cloud"
416
+ #
417
+ # gcloud = Google::Cloud.new
418
+ # bigquery = gcloud.bigquery
419
+ # dataset = bigquery.dataset "my_dataset"
420
+ # table = dataset.table "my_table"
421
+ #
422
+ # data = table.data
423
+ # data.all do |row|
424
+ # puts row["first_name"]
425
+ # end
426
+ #
427
+ # @!group Data
428
+ #
429
+ def data token: nil, max: nil, start: nil
430
+ ensure_service!
431
+ options = { token: token, max: max, start: start }
432
+ gapi = service.list_tabledata dataset_id, table_id, options
433
+ Data.from_gapi gapi, self
434
+ end
435
+
436
+ ##
437
+ # Copies the data from the table to another table. The destination table
438
+ # argument can also be a string identifier as specified by the [Query
439
+ # Reference](https://cloud.google.com/bigquery/query-reference#from):
440
+ # `project_name:datasetId.tableId`. This is useful for referencing
441
+ # tables in other projects and datasets.
442
+ #
443
+ # @param [Table, String] destination_table The destination for the
444
+ # copied data.
445
+ # @param [String] create Specifies whether the job is allowed to create
446
+ # new tables.
447
+ #
448
+ # The following values are supported:
449
+ #
450
+ # * `needed` - Create the table if it does not exist.
451
+ # * `never` - The table must already exist. A 'notFound' error is
452
+ # raised if the table does not exist.
453
+ # @param [String] write Specifies how to handle data already present in
454
+ # the destination table. The default value is `empty`.
455
+ #
456
+ # The following values are supported:
457
+ #
458
+ # * `truncate` - BigQuery overwrites the table data.
459
+ # * `append` - BigQuery appends the data to the table.
460
+ # * `empty` - An error will be returned if the destination table
461
+ # already contains data.
462
+ #
463
+ # @return [Google::Cloud::Bigquery::CopyJob]
464
+ #
465
+ # @example
466
+ # require "google/cloud"
467
+ #
468
+ # gcloud = Google::Cloud.new
469
+ # bigquery = gcloud.bigquery
470
+ # dataset = bigquery.dataset "my_dataset"
471
+ # table = dataset.table "my_table"
472
+ # destination_table = dataset.table "my_destination_table"
473
+ #
474
+ # copy_job = table.copy destination_table
475
+ #
476
+ # @example Passing a string identifier for the destination table:
477
+ # require "google/cloud"
478
+ #
479
+ # gcloud = Google::Cloud.new
480
+ # bigquery = gcloud.bigquery
481
+ # dataset = bigquery.dataset "my_dataset"
482
+ # table = dataset.table "my_table"
483
+ #
484
+ # copy_job = table.copy "other-project:other_dataset.other_table"
485
+ #
486
+ # @!group Data
487
+ #
488
+ def copy destination_table, create: nil, write: nil, dryrun: nil
489
+ ensure_service!
490
+ options = { create: create, write: write, dryrun: dryrun }
491
+ gapi = service.copy_table table_ref,
492
+ get_table_ref(destination_table),
493
+ options
494
+ Job.from_gapi gapi, service
495
+ end
496
+
497
+ ##
498
+ # Extract the data from the table to a Google Cloud Storage file.
499
+ #
500
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
501
+ # Exporting Data From BigQuery
502
+ #
503
+ # @param [Google::Cloud::Storage::File, String, Array<String>]
504
+ # extract_url The Google Storage file or file URI pattern(s) to which
505
+ # BigQuery should extract the table data.
506
+ # @param [String] format The exported file format. The default value is
507
+ # `csv`.
508
+ #
509
+ # The following values are supported:
510
+ #
511
+ # * `csv` - CSV
512
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
513
+ # * `avro` - [Avro](http://avro.apache.org/)
514
+ # @param [String] compression The compression type to use for exported
515
+ # files. Possible values include `GZIP` and `NONE`. The default value
516
+ # is `NONE`.
517
+ # @param [String] delimiter Delimiter to use between fields in the
518
+ # exported data. Default is <code>,</code>.
519
+ # @param [Boolean] header Whether to print out a header row in the
520
+ # results. Default is `true`.
521
+ #
522
+ #
523
+ # @return [Google::Cloud::Bigquery::ExtractJob]
524
+ #
525
+ # @example
526
+ # require "google/cloud"
527
+ #
528
+ # gcloud = Google::Cloud.new
529
+ # bigquery = gcloud.bigquery
530
+ # dataset = bigquery.dataset "my_dataset"
531
+ # table = dataset.table "my_table"
532
+ #
533
+ # extract_job = table.extract "gs://my-bucket/file-name.json",
534
+ # format: "json"
535
+ #
536
+ # @!group Data
537
+ #
538
+ def extract extract_url, format: nil, compression: nil, delimiter: nil,
539
+ header: nil, dryrun: nil
540
+ ensure_service!
541
+ options = { format: format, compression: compression,
542
+ delimiter: delimiter, header: header, dryrun: dryrun }
543
+ gapi = service.extract_table table_ref, extract_url, options
544
+ Job.from_gapi gapi, service
545
+ end
546
+
547
+ ##
548
+ # Loads data into the table. You can pass a google-cloud storage file
549
+ # path or a google-cloud storage file instance. Or, you can upload a
550
+ # file directly. See [Loading Data with a POST Request](
551
+ # https://cloud.google.com/bigquery/loading-data-post-request#multipart).
552
+ #
553
+ # @param [File, Google::Cloud::Storage::File, String] file A file or the
554
+ # URI of a Google Cloud Storage file containing data to load into the
555
+ # table.
556
+ # @param [String] format The exported file format. The default value is
557
+ # `csv`.
558
+ #
559
+ # The following values are supported:
560
+ #
561
+ # * `csv` - CSV
562
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
563
+ # * `avro` - [Avro](http://avro.apache.org/)
564
+ # * `datastore_backup` - Cloud Datastore backup
565
+ # @param [String] create Specifies whether the job is allowed to create
566
+ # new tables.
567
+ #
568
+ # The following values are supported:
569
+ #
570
+ # * `needed` - Create the table if it does not exist.
571
+ # * `never` - The table must already exist. A 'notFound' error is
572
+ # raised if the table does not exist.
573
+ # @param [String] write Specifies how to handle data already present in
574
+ # the table. The default value is `empty`.
575
+ #
576
+ # The following values are supported:
577
+ #
578
+ # * `truncate` - BigQuery overwrites the table data.
579
+ # * `append` - BigQuery appends the data to the table.
580
+ # * `empty` - An error will be returned if the table already contains
581
+ # data.
582
+ # @param [Array<String>] projection_fields If the `format` option is set
583
+ # to `datastore_backup`, indicates which entity properties to load
584
+ # from a Cloud Datastore backup. Property names are case sensitive and
585
+ # must be top-level properties. If not set, BigQuery loads all
586
+ # properties. If any named property isn't found in the Cloud Datastore
587
+ # backup, an invalid error is returned.
588
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
589
+ # optional columns. The missing values are treated as nulls. If
590
+ # `false`, records with missing trailing columns are treated as bad
591
+ # records, and if there are too many bad records, an invalid error is
592
+ # returned in the job result. The default value is `false`. Only
593
+ # applicable to CSV, ignored for other formats.
594
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
595
+ # quoted data sections that contain newline characters in a CSV file.
596
+ # The default value is `false`.
597
+ # @param [String] encoding The character encoding of the data. The
598
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
599
+ # `UTF-8`.
600
+ # @param [String] delimiter Specifices the separator for fields in a CSV
601
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
602
+ # then uses the first byte of the encoded string to split the data in
603
+ # its raw, binary state. Default is <code>,</code>.
604
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
605
+ # extra values that are not represented in the table schema. If true,
606
+ # the extra values are ignored. If false, records with extra columns
607
+ # are treated as bad records, and if there are too many bad records,
608
+ # an invalid error is returned in the job result. The default value is
609
+ # `false`.
610
+ #
611
+ # The `format` property determines what BigQuery treats as an extra
612
+ # value:
613
+ #
614
+ # * `CSV`: Trailing columns
615
+ # * `JSON`: Named values that don't match any column names
616
+ # @param [Integer] max_bad_records The maximum number of bad records
617
+ # that BigQuery can ignore when running the job. If the number of bad
618
+ # records exceeds this value, an invalid error is returned in the job
619
+ # result. The default value is `0`, which requires that all records
620
+ # are valid.
621
+ # @param [String] quote The value that is used to quote data sections in
622
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
623
+ # then uses the first byte of the encoded string to split the data in
624
+ # its raw, binary state. The default value is a double-quote
625
+ # <code>"</code>. If your data does not contain quoted sections, set
626
+ # the property value to an empty string. If your data contains quoted
627
+ # newline characters, you must also set the allowQuotedNewlines
628
+ # property to true.
629
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
630
+ # file that BigQuery will skip when loading the data. The default
631
+ # value is `0`. This property is useful if you have header rows in the
632
+ # file that should be skipped.
633
+ #
634
+ # @return [Google::Cloud::Bigquery::LoadJob]
635
+ #
636
+ # @example
637
+ # require "google/cloud"
638
+ #
639
+ # gcloud = Google::Cloud.new
640
+ # bigquery = gcloud.bigquery
641
+ # dataset = bigquery.dataset "my_dataset"
642
+ # table = dataset.table "my_table"
643
+ #
644
+ # load_job = table.load "gs://my-bucket/file-name.csv"
645
+ #
646
+ # @example Pass a google-cloud storage file instance:
647
+ # require "google/cloud"
648
+ # require "google/cloud/storage"
649
+ #
650
+ # gcloud = Google::Cloud.new
651
+ # bigquery = gcloud.bigquery
652
+ # dataset = bigquery.dataset "my_dataset"
653
+ # table = dataset.table "my_table"
654
+ #
655
+ # storage = gcloud.storage
656
+ # bucket = storage.bucket "my-bucket"
657
+ # file = bucket.file "file-name.csv"
658
+ # load_job = table.load file
659
+ #
660
+ # @example Upload a file directly:
661
+ # require "google/cloud"
662
+ #
663
+ # gcloud = Google::Cloud.new
664
+ # bigquery = gcloud.bigquery
665
+ # dataset = bigquery.dataset "my_dataset"
666
+ # table = dataset.table "my_table"
667
+ #
668
+ # file = File.open "my_data.csv"
669
+ # load_job = table.load file
670
+ #
671
+ # @!group Data
672
+ #
673
+ def load file, format: nil, create: nil, write: nil,
674
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
675
+ encoding: nil, delimiter: nil, ignore_unknown: nil,
676
+ max_bad_records: nil, quote: nil, skip_leading: nil,
677
+ dryrun: nil
678
+ ensure_service!
679
+ options = { format: format, create: create, write: write,
680
+ projection_fields: projection_fields,
681
+ jagged_rows: jagged_rows,
682
+ quoted_newlines: quoted_newlines, encoding: encoding,
683
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
684
+ max_bad_records: max_bad_records, quote: quote,
685
+ skip_leading: skip_leading, dryrun: dryrun }
686
+ return load_storage(file, options) if storage_url? file
687
+ return load_local(file, options) if local_file? file
688
+ fail Google::Cloud::Error, "Don't know how to load #{file}"
689
+ end
690
+
691
+ ##
692
+ # Inserts data into the table for near-immediate querying, without the
693
+ # need to complete a #load operation before the data can appear in query
694
+ # results.
695
+ #
696
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
697
+ # Streaming Data Into BigQuery
698
+ #
699
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
700
+ # containing the data.
701
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
702
+ # if invalid rows exist. The default value is `false`, which causes
703
+ # the entire request to fail if any invalid rows exist.
704
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
705
+ # do not match the schema. The unknown values are ignored. Default is
706
+ # false, which treats unknown values as errors.
707
+ #
708
+ # @return [Google::Cloud::Bigquery::InsertResponse]
709
+ #
710
+ # @example
711
+ # require "google/cloud"
712
+ #
713
+ # gcloud = Google::Cloud.new
714
+ # bigquery = gcloud.bigquery
715
+ # dataset = bigquery.dataset "my_dataset"
716
+ # table = dataset.table "my_table"
717
+ #
718
+ # rows = [
719
+ # { "first_name" => "Alice", "age" => 21 },
720
+ # { "first_name" => "Bob", "age" => 22 }
721
+ # ]
722
+ # table.insert rows
723
+ #
724
+ # @!group Data
725
+ #
726
+ def insert rows, skip_invalid: nil, ignore_unknown: nil
727
+ rows = [rows] if rows.is_a? Hash
728
+ ensure_service!
729
+ options = { skip_invalid: skip_invalid,
730
+ ignore_unknown: ignore_unknown }
731
+ gapi = service.insert_tabledata dataset_id, table_id, rows, options
732
+ InsertResponse.from_gapi rows, gapi
733
+ end
734
+
735
+ ##
736
+ # Permanently deletes the table.
737
+ #
738
+ # @return [Boolean] Returns `true` if the table was deleted.
739
+ #
740
+ # @example
741
+ # require "google/cloud"
742
+ #
743
+ # gcloud = Google::Cloud.new
744
+ # bigquery = gcloud.bigquery
745
+ # dataset = bigquery.dataset "my_dataset"
746
+ # table = dataset.table "my_table"
747
+ #
748
+ # table.delete
749
+ #
750
+ # @!group Lifecycle
751
+ #
752
+ def delete
753
+ ensure_service!
754
+ service.delete_table dataset_id, table_id
755
+ true
756
+ end
757
+
758
+ ##
759
+ # Reloads the table with current data from the BigQuery service.
760
+ #
761
+ # @!group Lifecycle
762
+ #
763
+ def reload!
764
+ ensure_service!
765
+ gapi = service.get_table dataset_id, table_id
766
+ @gapi = gapi
767
+ end
768
+ alias_method :refresh!, :reload!
769
+
770
+ ##
771
+ # @private New Table from a Google API Client object.
772
+ def self.from_gapi gapi, conn
773
+ klass = class_for gapi
774
+ klass.new.tap do |f|
775
+ f.gapi = gapi
776
+ f.service = conn
777
+ end
778
+ end
779
+
780
+ protected
781
+
782
+ ##
783
+ # Raise an error unless an active service is available.
784
+ def ensure_service!
785
+ fail "Must have active connection" unless service
786
+ end
787
+
788
+ def patch_gapi! *attributes
789
+ return if attributes.empty?
790
+ ensure_service!
791
+ patch_args = Hash[attributes.map do |attr|
792
+ [attr, @gapi.send(attr)]
793
+ end]
794
+ patch_gapi = Google::Apis::BigqueryV2::Table.new patch_args
795
+ @gapi = service.patch_table dataset_id, table_id, patch_gapi
796
+ end
797
+
798
+ def self.class_for gapi
799
+ return View if gapi.type == "VIEW"
800
+ self
801
+ end
802
+
803
+ def load_storage url, options = {}
804
+ # Convert to storage URL
805
+ url = url.to_gs_url if url.respond_to? :to_gs_url
806
+
807
+ gapi = service.load_table_gs_url dataset_id, table_id, url, options
808
+ Job.from_gapi gapi, service
809
+ end
810
+
811
+ def load_local file, options = {}
812
+ # Convert to storage URL
813
+ file = file.to_gs_url if file.respond_to? :to_gs_url
814
+
815
+ gapi = service.load_table_file dataset_id, table_id, file, options
816
+ Job.from_gapi gapi, service
817
+ end
818
+
819
+ def storage_url? file
820
+ file.respond_to?(:to_gs_url) ||
821
+ (file.respond_to?(:to_str) &&
822
+ file.to_str.downcase.start_with?("gs://"))
823
+ end
824
+
825
+ def local_file? file
826
+ ::File.file? file
827
+ rescue
828
+ false
829
+ end
830
+
831
+ ##
832
+ # Load the complete representation of the table if it has been
833
+ # only partially loaded by a request to the API list method.
834
+ def ensure_full_data!
835
+ reload_gapi! unless data_complete?
836
+ end
837
+
838
+ def reload_gapi!
839
+ ensure_service!
840
+ gapi = service.get_table dataset_id, table_id
841
+ @gapi = gapi
842
+ end
843
+
844
+ def data_complete?
845
+ @gapi.is_a? Google::Apis::BigqueryV2::Table
846
+ end
847
+
848
+ private
849
+
850
+ def get_table_ref table
851
+ if table.respond_to? :table_ref
852
+ table.table_ref
853
+ else
854
+ Service.table_ref_from_s table, table_ref
855
+ end
856
+ end
857
+
858
+ ##
859
+ # Yielded to a block to accumulate changes for a patch request.
860
+ class Updater < Table
861
+ ##
862
+ # A list of attributes that were updated.
863
+ attr_reader :updates
864
+
865
+ ##
866
+ # Create an Updater object.
867
+ def initialize gapi
868
+ @updates = []
869
+ @gapi = gapi
870
+ @schema = nil
871
+ end
872
+
873
+ ##
874
+ # Returns the table's schema. This method can also be used to set,
875
+ # replace, or add to the schema by passing a block. See {Schema} for
876
+ # available methods.
877
+ #
878
+ # @param [Boolean] replace Whether to replace the existing schema with
879
+ # the new schema. If `true`, the fields will replace the existing
880
+ # schema. If `false`, the fields will be added to the existing
881
+ # schema. When a table already contains data, schema changes must be
882
+ # additive. Thus, the default value is `false`.
883
+ # @yield [schema] a block for setting the schema
884
+ # @yieldparam [Schema] schema the object accepting the schema
885
+ #
886
+ # @return [Google::Cloud::Bigquery::Schema]
887
+ #
888
+ # @example
889
+ # require "google/cloud"
890
+ #
891
+ # gcloud = Google::Cloud.new
892
+ # bigquery = gcloud.bigquery
893
+ # dataset = bigquery.dataset "my_dataset"
894
+ # table = dataset.create_table "my_table" do |t|
895
+ # t.name = "My Table",
896
+ # t.description = "A description of my table."
897
+ # t.schema do |s|
898
+ # s.string "first_name", mode: :required
899
+ # s.record "cities_lived", mode: :repeated do |r|
900
+ # r.string "place", mode: :required
901
+ # r.integer "number_of_years", mode: :required
902
+ # end
903
+ # end
904
+ # end
905
+ #
906
+ # @!group Schema
907
+ #
908
+ def schema replace: false
909
+ # Same as Table#schema, but not frozen
910
+ # TODO: make sure to call ensure_full_data! on Dataset#update
911
+ @schema ||= Schema.from_gapi @gapi.schema
912
+ if block_given?
913
+ if replace
914
+ @schema = Schema.from_gapi \
915
+ Google::Apis::BigqueryV2::TableSchema.new(fields: [])
916
+ end
917
+ yield @schema
918
+ check_for_mutated_schema!
919
+ end
920
+ # Do not freeze on updater, allow modifications
921
+ @schema
922
+ end
923
+
924
+ ##
925
+ # Adds a string field to the schema.
926
+ #
927
+ # See {Schema#string}.
928
+ #
929
+ # @param [String] name The field name. The name must contain only
930
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
931
+ # start with a letter or underscore. The maximum length is 128
932
+ # characters.
933
+ # @param [String] description A description of the field.
934
+ # @param [Symbol] mode The field's mode. The possible values are
935
+ # `:nullable`, `:required`, and `:repeated`. The default value is
936
+ # `:nullable`.
937
+ #
938
+ # @example
939
+ # require "google/cloud"
940
+ #
941
+ # gcloud = Google::Cloud.new
942
+ # bigquery = gcloud.bigquery
943
+ # dataset = bigquery.dataset "my_dataset"
944
+ # table = dataset.create_table "my_table" do |schema|
945
+ # schema.string "first_name", mode: :required
946
+ # end
947
+ #
948
+ # @!group Schema
949
+ def string name, description: nil, mode: :nullable
950
+ schema.string name, description: description, mode: mode
951
+ end
952
+
953
+ ##
954
+ # Adds an integer field to the schema.
955
+ #
956
+ # See {Schema#integer}.
957
+ #
958
+ # @param [String] name The field name. The name must contain only
959
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
960
+ # start with a letter or underscore. The maximum length is 128
961
+ # characters.
962
+ # @param [String] description A description of the field.
963
+ # @param [Symbol] mode The field's mode. The possible values are
964
+ # `:nullable`, `:required`, and `:repeated`. The default value is
965
+ # `:nullable`.
966
+ #
967
+ # @example
968
+ # require "google/cloud"
969
+ #
970
+ # gcloud = Google::Cloud.new
971
+ # bigquery = gcloud.bigquery
972
+ # dataset = bigquery.dataset "my_dataset"
973
+ # table = dataset.create_table "my_table" do |schema|
974
+ # schema.integer "age", mode: :required
975
+ # end
976
+ #
977
+ # @!group Schema
978
+ def integer name, description: nil, mode: :nullable
979
+ schema.integer name, description: description, mode: mode
980
+ end
981
+
982
+ ##
983
+ # Adds a floating-point number field to the schema.
984
+ #
985
+ # See {Schema#float}.
986
+ #
987
+ # @param [String] name The field name. The name must contain only
988
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
989
+ # start with a letter or underscore. The maximum length is 128
990
+ # characters.
991
+ # @param [String] description A description of the field.
992
+ # @param [Symbol] mode The field's mode. The possible values are
993
+ # `:nullable`, `:required`, and `:repeated`. The default value is
994
+ # `:nullable`.
995
+ #
996
+ # @example
997
+ # require "google/cloud"
998
+ #
999
+ # gcloud = Google::Cloud.new
1000
+ # bigquery = gcloud.bigquery
1001
+ # dataset = bigquery.dataset "my_dataset"
1002
+ # table = dataset.create_table "my_table" do |schema|
1003
+ # schema.float "price", mode: :required
1004
+ # end
1005
+ #
1006
+ # @!group Schema
1007
+ def float name, description: nil, mode: :nullable
1008
+ schema.float name, description: description, mode: mode
1009
+ end
1010
+
1011
+ ##
1012
+ # Adds a boolean field to the schema.
1013
+ #
1014
+ # See {Schema#boolean}.
1015
+ #
1016
+ # @param [String] name The field name. The name must contain only
1017
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
1018
+ # start with a letter or underscore. The maximum length is 128
1019
+ # characters.
1020
+ # @param [String] description A description of the field.
1021
+ # @param [Symbol] mode The field's mode. The possible values are
1022
+ # `:nullable`, `:required`, and `:repeated`. The default value is
1023
+ # `:nullable`.
1024
+ #
1025
+ # @example
1026
+ # require "google/cloud"
1027
+ #
1028
+ # gcloud = Google::Cloud.new
1029
+ # bigquery = gcloud.bigquery
1030
+ # dataset = bigquery.dataset "my_dataset"
1031
+ # table = dataset.create_table "my_table" do |schema|
1032
+ # schema.boolean "active", mode: :required
1033
+ # end
1034
+ #
1035
+ # @!group Schema
1036
+ def boolean name, description: nil, mode: :nullable
1037
+ schema.boolean name, description: description, mode: mode
1038
+ end
1039
+
1040
+ ##
1041
+ # Adds a timestamp field to the schema.
1042
+ #
1043
+ # See {Schema#timestamp}.
1044
+ #
1045
+ # @param [String] name The field name. The name must contain only
1046
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
1047
+ # start with a letter or underscore. The maximum length is 128
1048
+ # characters.
1049
+ # @param [String] description A description of the field.
1050
+ # @param [Symbol] mode The field's mode. The possible values are
1051
+ # `:nullable`, `:required`, and `:repeated`. The default value is
1052
+ # `:nullable`.
1053
+ #
1054
+ # @example
1055
+ # require "google/cloud"
1056
+ #
1057
+ # gcloud = Google::Cloud.new
1058
+ # bigquery = gcloud.bigquery
1059
+ # dataset = bigquery.dataset "my_dataset"
1060
+ # table = dataset.create_table "my_table" do |schema|
1061
+ # schema.timestamp "creation_date", mode: :required
1062
+ # end
1063
+ #
1064
+ # @!group Schema
1065
+ def timestamp name, description: nil, mode: :nullable
1066
+ schema.timestamp name, description: description, mode: mode
1067
+ end
1068
+
1069
+ ##
1070
+ # Adds a record field to the schema. A block must be passed describing
1071
+ # the nested fields of the record. For more information about nested
1072
+ # and repeated records, see [Preparing Data for BigQuery
1073
+ # ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
1074
+ #
1075
+ # See {Schema#record}.
1076
+ #
1077
+ # @param [String] name The field name. The name must contain only
1078
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
1079
+ # start with a letter or underscore. The maximum length is 128
1080
+ # characters.
1081
+ # @param [String] description A description of the field.
1082
+ # @param [Symbol] mode The field's mode. The possible values are
1083
+ # `:nullable`, `:required`, and `:repeated`. The default value is
1084
+ # `:nullable`.
1085
+ # @yield [nested_schema] a block for setting the nested schema
1086
+ # @yieldparam [Schema] nested_schema the object accepting the
1087
+ # nested schema
1088
+ #
1089
+ # @example
1090
+ # require "google/cloud"
1091
+ #
1092
+ # gcloud = Google::Cloud.new
1093
+ # bigquery = gcloud.bigquery
1094
+ # dataset = bigquery.dataset "my_dataset"
1095
+ # table = dataset.create_table "my_table" do |schema|
1096
+ # schema.record "cities_lived", mode: :repeated do |cities_lived|
1097
+ # cities_lived.string "place", mode: :required
1098
+ # cities_lived.integer "number_of_years", mode: :required
1099
+ # end
1100
+ # end
1101
+ #
1102
+ # @!group Schema
1103
+ #
1104
+ def record name, description: nil, mode: nil, &block
1105
+ schema.record name, description: description, mode: mode, &block
1106
+ end
1107
+
1108
+ ##
1109
+ # Make sure any access changes are saved
1110
+ def check_for_mutated_schema!
1111
+ return if @schema.nil?
1112
+ @schema.check_for_mutated_schema!
1113
+ return unless @schema.changed?
1114
+ @gapi.schema = @schema.to_gapi
1115
+ patch_gapi! :schema
1116
+ end
1117
+
1118
+ def to_gapi
1119
+ check_for_mutated_schema!
1120
+ @gapi
1121
+ end
1122
+
1123
+ protected
1124
+
1125
+ ##
1126
+ # Change to a NOOP
1127
+ def ensure_full_data!
1128
+ # Do nothing because we trust the gapi is full before we get here.
1129
+ end
1130
+
1131
+ ##
1132
+ # Queue up all the updates instead of making them.
1133
+ def patch_gapi! attribute
1134
+ @updates << attribute
1135
+ @updates.uniq!
1136
+ end
1137
+ end
1138
+ end
1139
+ end
1140
+ end
1141
+ end