google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1141 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/errors"
17
+ require "google/cloud/bigquery/service"
18
+ require "google/cloud/bigquery/view"
19
+ require "google/cloud/bigquery/data"
20
+ require "google/cloud/bigquery/table/list"
21
+ require "google/cloud/bigquery/schema"
22
+ require "google/cloud/bigquery/insert_response"
23
+ require "google/apis/bigquery_v2"
24
+
25
+ module Google
26
+ module Cloud
27
+ module Bigquery
28
+ ##
29
+ # # Table
30
+ #
31
+ # A named resource representing a BigQuery table that holds zero or more
32
+ # records. Every table is defined by a schema that may contain nested and
33
+ # repeated fields.
34
+ #
35
+ # @see https://cloud.google.com/bigquery/preparing-data-for-bigquery
36
+ # Preparing Data for BigQuery
37
+ #
38
+ # @example
39
+ # require "google/cloud"
40
+ #
41
+ # gcloud = Google::Cloud.new
42
+ # bigquery = gcloud.bigquery
43
+ # dataset = bigquery.dataset "my_dataset"
44
+ #
45
+ # table = dataset.create_table "my_table" do |schema|
46
+ # schema.string "first_name", mode: :required
47
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
48
+ # nested_schema.string "place", mode: :required
49
+ # nested_schema.integer "number_of_years", mode: :required
50
+ # end
51
+ # end
52
+ #
53
+ # row = {
54
+ # "first_name" => "Alice",
55
+ # "cities_lived" => [
56
+ # {
57
+ # "place" => "Seattle",
58
+ # "number_of_years" => 5
59
+ # },
60
+ # {
61
+ # "place" => "Stockholm",
62
+ # "number_of_years" => 6
63
+ # }
64
+ # ]
65
+ # }
66
+ # table.insert row
67
+ #
68
+ class Table
69
+ ##
70
+ # @private The Service object.
71
+ attr_accessor :service
72
+
73
+ ##
74
+ # @private The Google API Client object.
75
+ attr_accessor :gapi
76
+
77
+ ##
78
+ # @private Create an empty Table object.
79
+ def initialize
80
+ @service = nil
81
+ @gapi = Google::Apis::BigqueryV2::Table.new
82
+ end
83
+
84
+ ##
85
+ # A unique ID for this table.
86
+ # The ID must contain only letters (a-z, A-Z), numbers (0-9),
87
+ # or underscores (_). The maximum length is 1,024 characters.
88
+ #
89
+ # @!group Attributes
90
+ #
91
+ def table_id
92
+ @gapi.table_reference.table_id
93
+ end
94
+
95
+ ##
96
+ # The ID of the `Dataset` containing this table.
97
+ #
98
+ # @!group Attributes
99
+ #
100
+ def dataset_id
101
+ @gapi.table_reference.dataset_id
102
+ end
103
+
104
+ ##
105
+ # The ID of the `Project` containing this table.
106
+ #
107
+ # @!group Attributes
108
+ #
109
+ def project_id
110
+ @gapi.table_reference.project_id
111
+ end
112
+
113
+ ##
114
+ # @private The gapi fragment containing the Project ID, Dataset ID, and
115
+ # Table ID as a camel-cased hash.
116
+ def table_ref
117
+ table_ref = @gapi.table_reference
118
+ table_ref = table_ref.to_hash if table_ref.respond_to? :to_hash
119
+ table_ref
120
+ end
121
+
122
+ ##
123
+ # The combined Project ID, Dataset ID, and Table ID for this table, in
124
+ # the format specified by the [Query
125
+ # Reference](https://cloud.google.com/bigquery/query-reference#from):
126
+ # `project_name:datasetId.tableId`. To use this value in queries see
127
+ # {#query_id}.
128
+ #
129
+ # @!group Attributes
130
+ #
131
+ def id
132
+ @gapi.id
133
+ end
134
+
135
+ ##
136
+ # The value returned by {#id}, wrapped in square brackets if the Project
137
+ # ID contains dashes, as specified by the [Query
138
+ # Reference](https://cloud.google.com/bigquery/query-reference#from).
139
+ # Useful in queries.
140
+ #
141
+ # @example
142
+ # require "google/cloud"
143
+ #
144
+ # gcloud = Google::Cloud.new
145
+ # bigquery = gcloud.bigquery
146
+ # dataset = bigquery.dataset "my_dataset"
147
+ # table = dataset.table "my_table"
148
+ #
149
+ # data = bigquery.query "SELECT name FROM #{table.query_id}"
150
+ #
151
+ # @!group Attributes
152
+ #
153
+ def query_id
154
+ project_id["-"] ? "[#{id}]" : id
155
+ end
156
+
157
+ ##
158
+ # The name of the table.
159
+ #
160
+ # @!group Attributes
161
+ #
162
+ def name
163
+ @gapi.friendly_name
164
+ end
165
+
166
+ ##
167
+ # Updates the name of the table.
168
+ #
169
+ # @!group Attributes
170
+ #
171
+ def name= new_name
172
+ @gapi.update! friendly_name: new_name
173
+ patch_gapi! :friendly_name
174
+ end
175
+
176
+ ##
177
+ # A string hash of the dataset.
178
+ #
179
+ # @!group Attributes
180
+ #
181
+ def etag
182
+ ensure_full_data!
183
+ @gapi.etag
184
+ end
185
+
186
+ ##
187
+ # A URL that can be used to access the dataset using the REST API.
188
+ #
189
+ # @!group Attributes
190
+ #
191
+ def api_url
192
+ ensure_full_data!
193
+ @gapi.self_link
194
+ end
195
+
196
+ ##
197
+ # The description of the table.
198
+ #
199
+ # @!group Attributes
200
+ #
201
+ def description
202
+ ensure_full_data!
203
+ @gapi.description
204
+ end
205
+
206
+ ##
207
+ # Updates the description of the table.
208
+ #
209
+ # @!group Attributes
210
+ #
211
+ def description= new_description
212
+ @gapi.update! description: new_description
213
+ patch_gapi! :description
214
+ end
215
+
216
+ ##
217
+ # The number of bytes in the table.
218
+ #
219
+ # @!group Data
220
+ #
221
+ def bytes_count
222
+ ensure_full_data!
223
+ begin
224
+ Integer @gapi.num_bytes
225
+ rescue
226
+ nil
227
+ end
228
+ end
229
+
230
+ ##
231
+ # The number of rows in the table.
232
+ #
233
+ # @!group Data
234
+ #
235
+ def rows_count
236
+ ensure_full_data!
237
+ begin
238
+ Integer @gapi.num_rows
239
+ rescue
240
+ nil
241
+ end
242
+ end
243
+
244
+ ##
245
+ # The time when this table was created.
246
+ #
247
+ # @!group Attributes
248
+ #
249
+ def created_at
250
+ ensure_full_data!
251
+ begin
252
+ Time.at(Integer(@gapi.creation_time) / 1000.0)
253
+ rescue
254
+ nil
255
+ end
256
+ end
257
+
258
+ ##
259
+ # The time when this table expires.
260
+ # If not present, the table will persist indefinitely.
261
+ # Expired tables will be deleted and their storage reclaimed.
262
+ #
263
+ # @!group Attributes
264
+ #
265
+ def expires_at
266
+ ensure_full_data!
267
+ begin
268
+ Time.at(Integer(@gapi.expiration_time) / 1000.0)
269
+ rescue
270
+ nil
271
+ end
272
+ end
273
+
274
+ ##
275
+ # The date when this table was last modified.
276
+ #
277
+ # @!group Attributes
278
+ #
279
+ def modified_at
280
+ ensure_full_data!
281
+ begin
282
+ Time.at(Integer(@gapi.last_modified_time) / 1000.0)
283
+ rescue
284
+ nil
285
+ end
286
+ end
287
+
288
+ ##
289
+ # Checks if the table's type is "TABLE".
290
+ #
291
+ # @!group Attributes
292
+ #
293
+ def table?
294
+ @gapi.type == "TABLE"
295
+ end
296
+
297
+ ##
298
+ # Checks if the table's type is "VIEW".
299
+ #
300
+ # @!group Attributes
301
+ #
302
+ def view?
303
+ @gapi.type == "VIEW"
304
+ end
305
+
306
+ ##
307
+ # The geographic location where the table should reside. Possible
308
+ # values include EU and US. The default value is US.
309
+ #
310
+ # @!group Attributes
311
+ #
312
+ def location
313
+ ensure_full_data!
314
+ @gapi.location
315
+ end
316
+
317
+ ##
318
+ # Returns the table's schema. This method can also be used to set,
319
+ # replace, or add to the schema by passing a block. See {Schema} for
320
+ # available methods.
321
+ #
322
+ # @param [Boolean] replace Whether to replace the existing schema with
323
+ # the new schema. If `true`, the fields will replace the existing
324
+ # schema. If `false`, the fields will be added to the existing schema.
325
+ # When a table already contains data, schema changes must be additive.
326
+ # Thus, the default value is `false`.
327
+ # @yield [schema] a block for setting the schema
328
+ # @yieldparam [Schema] schema the object accepting the schema
329
+ #
330
+ # @return [Google::Cloud::Bigquery::Schema]
331
+ #
332
+ # @example
333
+ # require "google/cloud"
334
+ #
335
+ # gcloud = Google::Cloud.new
336
+ # bigquery = gcloud.bigquery
337
+ # dataset = bigquery.dataset "my_dataset"
338
+ # table = dataset.create_table "my_table"
339
+ #
340
+ # table.schema do |schema|
341
+ # schema.string "first_name", mode: :required
342
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
343
+ # nested_schema.string "place", mode: :required
344
+ # nested_schema.integer "number_of_years", mode: :required
345
+ # end
346
+ # end
347
+ #
348
+ # @!group Attributes
349
+ #
350
+ def schema replace: false
351
+ ensure_full_data!
352
+ schema_builder = Schema.from_gapi @gapi.schema
353
+ if block_given?
354
+ if replace
355
+ empty_schema = Google::Apis::BigqueryV2::TableSchema.new(
356
+ fields: [])
357
+ schema_builder = Schema.from_gapi empty_schema
358
+ end
359
+ yield schema_builder
360
+ schema_builder.check_for_mutated_schema!
361
+ if schema_builder.changed?
362
+ @gapi.schema = schema_builder.to_gapi
363
+ patch_gapi! :schema
364
+ end
365
+ end
366
+ schema_builder.freeze
367
+ end
368
+
369
+ ##
370
+ # The fields of the table.
371
+ #
372
+ # @!group Attributes
373
+ #
374
+ def fields
375
+ schema.fields
376
+ end
377
+
378
+ ##
379
+ # The names of the columns in the table.
380
+ #
381
+ # @!group Attributes
382
+ #
383
+ def headers
384
+ fields.map(&:name)
385
+ end
386
+
387
+ ##
388
+ # Retrieves data from the table.
389
+ #
390
+ # @param [String] token Page token, returned by a previous call,
391
+ # identifying the result set.
392
+ #
393
+ # @param [Integer] max Maximum number of results to return.
394
+ # @param [Integer] start Zero-based index of the starting row to read.
395
+ #
396
+ # @return [Google::Cloud::Bigquery::Data]
397
+ #
398
+ # @example Paginate rows of data: (See {Data#next})
399
+ # require "google/cloud"
400
+ #
401
+ # gcloud = Google::Cloud.new
402
+ # bigquery = gcloud.bigquery
403
+ # dataset = bigquery.dataset "my_dataset"
404
+ # table = dataset.table "my_table"
405
+ #
406
+ # data = table.data
407
+ # data.each do |row|
408
+ # puts row["first_name"]
409
+ # end
410
+ # if data.next?
411
+ # more_data = data.next if data.next?
412
+ # end
413
+ #
414
+ # @example Retrieve all rows of data: (See {Data#all})
415
+ # require "google/cloud"
416
+ #
417
+ # gcloud = Google::Cloud.new
418
+ # bigquery = gcloud.bigquery
419
+ # dataset = bigquery.dataset "my_dataset"
420
+ # table = dataset.table "my_table"
421
+ #
422
+ # data = table.data
423
+ # data.all do |row|
424
+ # puts row["first_name"]
425
+ # end
426
+ #
427
+ # @!group Data
428
+ #
429
+ def data token: nil, max: nil, start: nil
430
+ ensure_service!
431
+ options = { token: token, max: max, start: start }
432
+ gapi = service.list_tabledata dataset_id, table_id, options
433
+ Data.from_gapi gapi, self
434
+ end
435
+
436
+ ##
437
+ # Copies the data from the table to another table. The destination table
438
+ # argument can also be a string identifier as specified by the [Query
439
+ # Reference](https://cloud.google.com/bigquery/query-reference#from):
440
+ # `project_name:datasetId.tableId`. This is useful for referencing
441
+ # tables in other projects and datasets.
442
+ #
443
+ # @param [Table, String] destination_table The destination for the
444
+ # copied data.
445
+ # @param [String] create Specifies whether the job is allowed to create
446
+ # new tables.
447
+ #
448
+ # The following values are supported:
449
+ #
450
+ # * `needed` - Create the table if it does not exist.
451
+ # * `never` - The table must already exist. A 'notFound' error is
452
+ # raised if the table does not exist.
453
+ # @param [String] write Specifies how to handle data already present in
454
+ # the destination table. The default value is `empty`.
455
+ #
456
+ # The following values are supported:
457
+ #
458
+ # * `truncate` - BigQuery overwrites the table data.
459
+ # * `append` - BigQuery appends the data to the table.
460
+ # * `empty` - An error will be returned if the destination table
461
+ # already contains data.
462
+ #
463
+ # @return [Google::Cloud::Bigquery::CopyJob]
464
+ #
465
+ # @example
466
+ # require "google/cloud"
467
+ #
468
+ # gcloud = Google::Cloud.new
469
+ # bigquery = gcloud.bigquery
470
+ # dataset = bigquery.dataset "my_dataset"
471
+ # table = dataset.table "my_table"
472
+ # destination_table = dataset.table "my_destination_table"
473
+ #
474
+ # copy_job = table.copy destination_table
475
+ #
476
+ # @example Passing a string identifier for the destination table:
477
+ # require "google/cloud"
478
+ #
479
+ # gcloud = Google::Cloud.new
480
+ # bigquery = gcloud.bigquery
481
+ # dataset = bigquery.dataset "my_dataset"
482
+ # table = dataset.table "my_table"
483
+ #
484
+ # copy_job = table.copy "other-project:other_dataset.other_table"
485
+ #
486
+ # @!group Data
487
+ #
488
+ def copy destination_table, create: nil, write: nil, dryrun: nil
489
+ ensure_service!
490
+ options = { create: create, write: write, dryrun: dryrun }
491
+ gapi = service.copy_table table_ref,
492
+ get_table_ref(destination_table),
493
+ options
494
+ Job.from_gapi gapi, service
495
+ end
496
+
497
+ ##
498
+ # Extract the data from the table to a Google Cloud Storage file.
499
+ #
500
+ # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
501
+ # Exporting Data From BigQuery
502
+ #
503
+ # @param [Google::Cloud::Storage::File, String, Array<String>]
504
+ # extract_url The Google Storage file or file URI pattern(s) to which
505
+ # BigQuery should extract the table data.
506
+ # @param [String] format The exported file format. The default value is
507
+ # `csv`.
508
+ #
509
+ # The following values are supported:
510
+ #
511
+ # * `csv` - CSV
512
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
513
+ # * `avro` - [Avro](http://avro.apache.org/)
514
+ # @param [String] compression The compression type to use for exported
515
+ # files. Possible values include `GZIP` and `NONE`. The default value
516
+ # is `NONE`.
517
+ # @param [String] delimiter Delimiter to use between fields in the
518
+ # exported data. Default is <code>,</code>.
519
+ # @param [Boolean] header Whether to print out a header row in the
520
+ # results. Default is `true`.
521
+ #
522
+ #
523
+ # @return [Google::Cloud::Bigquery::ExtractJob]
524
+ #
525
+ # @example
526
+ # require "google/cloud"
527
+ #
528
+ # gcloud = Google::Cloud.new
529
+ # bigquery = gcloud.bigquery
530
+ # dataset = bigquery.dataset "my_dataset"
531
+ # table = dataset.table "my_table"
532
+ #
533
+ # extract_job = table.extract "gs://my-bucket/file-name.json",
534
+ # format: "json"
535
+ #
536
+ # @!group Data
537
+ #
538
+ def extract extract_url, format: nil, compression: nil, delimiter: nil,
539
+ header: nil, dryrun: nil
540
+ ensure_service!
541
+ options = { format: format, compression: compression,
542
+ delimiter: delimiter, header: header, dryrun: dryrun }
543
+ gapi = service.extract_table table_ref, extract_url, options
544
+ Job.from_gapi gapi, service
545
+ end
546
+
547
+ ##
548
+ # Loads data into the table. You can pass a google-cloud storage file
549
+ # path or a google-cloud storage file instance. Or, you can upload a
550
+ # file directly. See [Loading Data with a POST Request](
551
+ # https://cloud.google.com/bigquery/loading-data-post-request#multipart).
552
+ #
553
+ # @param [File, Google::Cloud::Storage::File, String] file A file or the
554
+ # URI of a Google Cloud Storage file containing data to load into the
555
+ # table.
556
+ # @param [String] format The exported file format. The default value is
557
+ # `csv`.
558
+ #
559
+ # The following values are supported:
560
+ #
561
+ # * `csv` - CSV
562
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
563
+ # * `avro` - [Avro](http://avro.apache.org/)
564
+ # * `datastore_backup` - Cloud Datastore backup
565
+ # @param [String] create Specifies whether the job is allowed to create
566
+ # new tables.
567
+ #
568
+ # The following values are supported:
569
+ #
570
+ # * `needed` - Create the table if it does not exist.
571
+ # * `never` - The table must already exist. A 'notFound' error is
572
+ # raised if the table does not exist.
573
+ # @param [String] write Specifies how to handle data already present in
574
+ # the table. The default value is `empty`.
575
+ #
576
+ # The following values are supported:
577
+ #
578
+ # * `truncate` - BigQuery overwrites the table data.
579
+ # * `append` - BigQuery appends the data to the table.
580
+ # * `empty` - An error will be returned if the table already contains
581
+ # data.
582
+ # @param [Array<String>] projection_fields If the `format` option is set
583
+ # to `datastore_backup`, indicates which entity properties to load
584
+ # from a Cloud Datastore backup. Property names are case sensitive and
585
+ # must be top-level properties. If not set, BigQuery loads all
586
+ # properties. If any named property isn't found in the Cloud Datastore
587
+ # backup, an invalid error is returned.
588
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
589
+ # optional columns. The missing values are treated as nulls. If
590
+ # `false`, records with missing trailing columns are treated as bad
591
+ # records, and if there are too many bad records, an invalid error is
592
+ # returned in the job result. The default value is `false`. Only
593
+ # applicable to CSV, ignored for other formats.
594
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
595
+ # quoted data sections that contain newline characters in a CSV file.
596
+ # The default value is `false`.
597
+ # @param [String] encoding The character encoding of the data. The
598
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
599
+ # `UTF-8`.
600
+ # @param [String] delimiter Specifices the separator for fields in a CSV
601
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
602
+ # then uses the first byte of the encoded string to split the data in
603
+ # its raw, binary state. Default is <code>,</code>.
604
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
605
+ # extra values that are not represented in the table schema. If true,
606
+ # the extra values are ignored. If false, records with extra columns
607
+ # are treated as bad records, and if there are too many bad records,
608
+ # an invalid error is returned in the job result. The default value is
609
+ # `false`.
610
+ #
611
+ # The `format` property determines what BigQuery treats as an extra
612
+ # value:
613
+ #
614
+ # * `CSV`: Trailing columns
615
+ # * `JSON`: Named values that don't match any column names
616
+ # @param [Integer] max_bad_records The maximum number of bad records
617
+ # that BigQuery can ignore when running the job. If the number of bad
618
+ # records exceeds this value, an invalid error is returned in the job
619
+ # result. The default value is `0`, which requires that all records
620
+ # are valid.
621
+ # @param [String] quote The value that is used to quote data sections in
622
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
623
+ # then uses the first byte of the encoded string to split the data in
624
+ # its raw, binary state. The default value is a double-quote
625
+ # <code>"</code>. If your data does not contain quoted sections, set
626
+ # the property value to an empty string. If your data contains quoted
627
+ # newline characters, you must also set the allowQuotedNewlines
628
+ # property to true.
629
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
630
+ # file that BigQuery will skip when loading the data. The default
631
+ # value is `0`. This property is useful if you have header rows in the
632
+ # file that should be skipped.
633
+ #
634
+ # @return [Google::Cloud::Bigquery::LoadJob]
635
+ #
636
+ # @example
637
+ # require "google/cloud"
638
+ #
639
+ # gcloud = Google::Cloud.new
640
+ # bigquery = gcloud.bigquery
641
+ # dataset = bigquery.dataset "my_dataset"
642
+ # table = dataset.table "my_table"
643
+ #
644
+ # load_job = table.load "gs://my-bucket/file-name.csv"
645
+ #
646
+ # @example Pass a google-cloud storage file instance:
647
+ # require "google/cloud"
648
+ # require "google/cloud/storage"
649
+ #
650
+ # gcloud = Google::Cloud.new
651
+ # bigquery = gcloud.bigquery
652
+ # dataset = bigquery.dataset "my_dataset"
653
+ # table = dataset.table "my_table"
654
+ #
655
+ # storage = gcloud.storage
656
+ # bucket = storage.bucket "my-bucket"
657
+ # file = bucket.file "file-name.csv"
658
+ # load_job = table.load file
659
+ #
660
+ # @example Upload a file directly:
661
+ # require "google/cloud"
662
+ #
663
+ # gcloud = Google::Cloud.new
664
+ # bigquery = gcloud.bigquery
665
+ # dataset = bigquery.dataset "my_dataset"
666
+ # table = dataset.table "my_table"
667
+ #
668
+ # file = File.open "my_data.csv"
669
+ # load_job = table.load file
670
+ #
671
+ # @!group Data
672
+ #
673
+ def load file, format: nil, create: nil, write: nil,
674
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
675
+ encoding: nil, delimiter: nil, ignore_unknown: nil,
676
+ max_bad_records: nil, quote: nil, skip_leading: nil,
677
+ dryrun: nil
678
+ ensure_service!
679
+ options = { format: format, create: create, write: write,
680
+ projection_fields: projection_fields,
681
+ jagged_rows: jagged_rows,
682
+ quoted_newlines: quoted_newlines, encoding: encoding,
683
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
684
+ max_bad_records: max_bad_records, quote: quote,
685
+ skip_leading: skip_leading, dryrun: dryrun }
686
+ return load_storage(file, options) if storage_url? file
687
+ return load_local(file, options) if local_file? file
688
+ fail Google::Cloud::Error, "Don't know how to load #{file}"
689
+ end
690
+
691
+ ##
692
+ # Inserts data into the table for near-immediate querying, without the
693
+ # need to complete a #load operation before the data can appear in query
694
+ # results.
695
+ #
696
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
697
+ # Streaming Data Into BigQuery
698
+ #
699
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
700
+ # containing the data.
701
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
702
+ # if invalid rows exist. The default value is `false`, which causes
703
+ # the entire request to fail if any invalid rows exist.
704
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
705
+ # do not match the schema. The unknown values are ignored. Default is
706
+ # false, which treats unknown values as errors.
707
+ #
708
+ # @return [Google::Cloud::Bigquery::InsertResponse]
709
+ #
710
+ # @example
711
+ # require "google/cloud"
712
+ #
713
+ # gcloud = Google::Cloud.new
714
+ # bigquery = gcloud.bigquery
715
+ # dataset = bigquery.dataset "my_dataset"
716
+ # table = dataset.table "my_table"
717
+ #
718
+ # rows = [
719
+ # { "first_name" => "Alice", "age" => 21 },
720
+ # { "first_name" => "Bob", "age" => 22 }
721
+ # ]
722
+ # table.insert rows
723
+ #
724
+ # @!group Data
725
+ #
726
+ def insert rows, skip_invalid: nil, ignore_unknown: nil
727
+ rows = [rows] if rows.is_a? Hash
728
+ ensure_service!
729
+ options = { skip_invalid: skip_invalid,
730
+ ignore_unknown: ignore_unknown }
731
+ gapi = service.insert_tabledata dataset_id, table_id, rows, options
732
+ InsertResponse.from_gapi rows, gapi
733
+ end
734
+
735
+ ##
736
+ # Permanently deletes the table.
737
+ #
738
+ # @return [Boolean] Returns `true` if the table was deleted.
739
+ #
740
+ # @example
741
+ # require "google/cloud"
742
+ #
743
+ # gcloud = Google::Cloud.new
744
+ # bigquery = gcloud.bigquery
745
+ # dataset = bigquery.dataset "my_dataset"
746
+ # table = dataset.table "my_table"
747
+ #
748
+ # table.delete
749
+ #
750
+ # @!group Lifecycle
751
+ #
752
+ def delete
753
+ ensure_service!
754
+ service.delete_table dataset_id, table_id
755
+ true
756
+ end
757
+
758
+ ##
759
+ # Reloads the table with current data from the BigQuery service.
760
+ #
761
+ # @!group Lifecycle
762
+ #
763
+ def reload!
764
+ ensure_service!
765
+ gapi = service.get_table dataset_id, table_id
766
+ @gapi = gapi
767
+ end
768
+ alias_method :refresh!, :reload!
769
+
770
+ ##
771
+ # @private New Table from a Google API Client object.
772
+ def self.from_gapi gapi, conn
773
+ klass = class_for gapi
774
+ klass.new.tap do |f|
775
+ f.gapi = gapi
776
+ f.service = conn
777
+ end
778
+ end
779
+
780
+ protected
781
+
782
+ ##
783
+ # Raise an error unless an active service is available.
784
+ def ensure_service!
785
+ fail "Must have active connection" unless service
786
+ end
787
+
788
+ def patch_gapi! *attributes
789
+ return if attributes.empty?
790
+ ensure_service!
791
+ patch_args = Hash[attributes.map do |attr|
792
+ [attr, @gapi.send(attr)]
793
+ end]
794
+ patch_gapi = Google::Apis::BigqueryV2::Table.new patch_args
795
+ @gapi = service.patch_table dataset_id, table_id, patch_gapi
796
+ end
797
+
798
+ def self.class_for gapi
799
+ return View if gapi.type == "VIEW"
800
+ self
801
+ end
802
+
803
+ def load_storage url, options = {}
804
+ # Convert to storage URL
805
+ url = url.to_gs_url if url.respond_to? :to_gs_url
806
+
807
+ gapi = service.load_table_gs_url dataset_id, table_id, url, options
808
+ Job.from_gapi gapi, service
809
+ end
810
+
811
+ def load_local file, options = {}
812
+ # Convert to storage URL
813
+ file = file.to_gs_url if file.respond_to? :to_gs_url
814
+
815
+ gapi = service.load_table_file dataset_id, table_id, file, options
816
+ Job.from_gapi gapi, service
817
+ end
818
+
819
+ def storage_url? file
820
+ file.respond_to?(:to_gs_url) ||
821
+ (file.respond_to?(:to_str) &&
822
+ file.to_str.downcase.start_with?("gs://"))
823
+ end
824
+
825
+ def local_file? file
826
+ ::File.file? file
827
+ rescue
828
+ false
829
+ end
830
+
831
+ ##
832
+ # Load the complete representation of the table if it has been
833
+ # only partially loaded by a request to the API list method.
834
+ def ensure_full_data!
835
+ reload_gapi! unless data_complete?
836
+ end
837
+
838
+ def reload_gapi!
839
+ ensure_service!
840
+ gapi = service.get_table dataset_id, table_id
841
+ @gapi = gapi
842
+ end
843
+
844
+ def data_complete?
845
+ @gapi.is_a? Google::Apis::BigqueryV2::Table
846
+ end
847
+
848
+ private
849
+
850
+ def get_table_ref table
851
+ if table.respond_to? :table_ref
852
+ table.table_ref
853
+ else
854
+ Service.table_ref_from_s table, table_ref
855
+ end
856
+ end
857
+
858
+ ##
859
+ # Yielded to a block to accumulate changes for a patch request.
860
+ class Updater < Table
861
+ ##
862
+ # A list of attributes that were updated.
863
+ attr_reader :updates
864
+
865
+ ##
866
+ # Create an Updater object.
867
+ def initialize gapi
868
+ @updates = []
869
+ @gapi = gapi
870
+ @schema = nil
871
+ end
872
+
873
+ ##
874
+ # Returns the table's schema. This method can also be used to set,
875
+ # replace, or add to the schema by passing a block. See {Schema} for
876
+ # available methods.
877
+ #
878
+ # @param [Boolean] replace Whether to replace the existing schema with
879
+ # the new schema. If `true`, the fields will replace the existing
880
+ # schema. If `false`, the fields will be added to the existing
881
+ # schema. When a table already contains data, schema changes must be
882
+ # additive. Thus, the default value is `false`.
883
+ # @yield [schema] a block for setting the schema
884
+ # @yieldparam [Schema] schema the object accepting the schema
885
+ #
886
+ # @return [Google::Cloud::Bigquery::Schema]
887
+ #
888
+ # @example
889
+ # require "google/cloud"
890
+ #
891
+ # gcloud = Google::Cloud.new
892
+ # bigquery = gcloud.bigquery
893
+ # dataset = bigquery.dataset "my_dataset"
894
+ # table = dataset.create_table "my_table" do |t|
895
+ # t.name = "My Table",
896
+ # t.description = "A description of my table."
897
+ # t.schema do |s|
898
+ # s.string "first_name", mode: :required
899
+ # s.record "cities_lived", mode: :repeated do |r|
900
+ # r.string "place", mode: :required
901
+ # r.integer "number_of_years", mode: :required
902
+ # end
903
+ # end
904
+ # end
905
+ #
906
+ # @!group Schema
907
+ #
908
+ def schema replace: false
909
+ # Same as Table#schema, but not frozen
910
+ # TODO: make sure to call ensure_full_data! on Dataset#update
911
+ @schema ||= Schema.from_gapi @gapi.schema
912
+ if block_given?
913
+ if replace
914
+ @schema = Schema.from_gapi \
915
+ Google::Apis::BigqueryV2::TableSchema.new(fields: [])
916
+ end
917
+ yield @schema
918
+ check_for_mutated_schema!
919
+ end
920
+ # Do not freeze on updater, allow modifications
921
+ @schema
922
+ end
923
+
924
+ ##
925
+ # Adds a string field to the schema.
926
+ #
927
+ # See {Schema#string}.
928
+ #
929
+ # @param [String] name The field name. The name must contain only
930
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
931
+ # start with a letter or underscore. The maximum length is 128
932
+ # characters.
933
+ # @param [String] description A description of the field.
934
+ # @param [Symbol] mode The field's mode. The possible values are
935
+ # `:nullable`, `:required`, and `:repeated`. The default value is
936
+ # `:nullable`.
937
+ #
938
+ # @example
939
+ # require "google/cloud"
940
+ #
941
+ # gcloud = Google::Cloud.new
942
+ # bigquery = gcloud.bigquery
943
+ # dataset = bigquery.dataset "my_dataset"
944
+ # table = dataset.create_table "my_table" do |schema|
945
+ # schema.string "first_name", mode: :required
946
+ # end
947
+ #
948
+ # @!group Schema
949
+ def string name, description: nil, mode: :nullable
950
+ schema.string name, description: description, mode: mode
951
+ end
952
+
953
+ ##
954
+ # Adds an integer field to the schema.
955
+ #
956
+ # See {Schema#integer}.
957
+ #
958
+ # @param [String] name The field name. The name must contain only
959
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
960
+ # start with a letter or underscore. The maximum length is 128
961
+ # characters.
962
+ # @param [String] description A description of the field.
963
+ # @param [Symbol] mode The field's mode. The possible values are
964
+ # `:nullable`, `:required`, and `:repeated`. The default value is
965
+ # `:nullable`.
966
+ #
967
+ # @example
968
+ # require "google/cloud"
969
+ #
970
+ # gcloud = Google::Cloud.new
971
+ # bigquery = gcloud.bigquery
972
+ # dataset = bigquery.dataset "my_dataset"
973
+ # table = dataset.create_table "my_table" do |schema|
974
+ # schema.integer "age", mode: :required
975
+ # end
976
+ #
977
+ # @!group Schema
978
+ def integer name, description: nil, mode: :nullable
979
+ schema.integer name, description: description, mode: mode
980
+ end
981
+
982
+ ##
983
+ # Adds a floating-point number field to the schema.
984
+ #
985
+ # See {Schema#float}.
986
+ #
987
+ # @param [String] name The field name. The name must contain only
988
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
989
+ # start with a letter or underscore. The maximum length is 128
990
+ # characters.
991
+ # @param [String] description A description of the field.
992
+ # @param [Symbol] mode The field's mode. The possible values are
993
+ # `:nullable`, `:required`, and `:repeated`. The default value is
994
+ # `:nullable`.
995
+ #
996
+ # @example
997
+ # require "google/cloud"
998
+ #
999
+ # gcloud = Google::Cloud.new
1000
+ # bigquery = gcloud.bigquery
1001
+ # dataset = bigquery.dataset "my_dataset"
1002
+ # table = dataset.create_table "my_table" do |schema|
1003
+ # schema.float "price", mode: :required
1004
+ # end
1005
+ #
1006
+ # @!group Schema
1007
+ def float name, description: nil, mode: :nullable
1008
+ schema.float name, description: description, mode: mode
1009
+ end
1010
+
1011
+ ##
1012
+ # Adds a boolean field to the schema.
1013
+ #
1014
+ # See {Schema#boolean}.
1015
+ #
1016
+ # @param [String] name The field name. The name must contain only
1017
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
1018
+ # start with a letter or underscore. The maximum length is 128
1019
+ # characters.
1020
+ # @param [String] description A description of the field.
1021
+ # @param [Symbol] mode The field's mode. The possible values are
1022
+ # `:nullable`, `:required`, and `:repeated`. The default value is
1023
+ # `:nullable`.
1024
+ #
1025
+ # @example
1026
+ # require "google/cloud"
1027
+ #
1028
+ # gcloud = Google::Cloud.new
1029
+ # bigquery = gcloud.bigquery
1030
+ # dataset = bigquery.dataset "my_dataset"
1031
+ # table = dataset.create_table "my_table" do |schema|
1032
+ # schema.boolean "active", mode: :required
1033
+ # end
1034
+ #
1035
+ # @!group Schema
1036
+ def boolean name, description: nil, mode: :nullable
1037
+ schema.boolean name, description: description, mode: mode
1038
+ end
1039
+
1040
+ ##
1041
+ # Adds a timestamp field to the schema.
1042
+ #
1043
+ # See {Schema#timestamp}.
1044
+ #
1045
+ # @param [String] name The field name. The name must contain only
1046
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
1047
+ # start with a letter or underscore. The maximum length is 128
1048
+ # characters.
1049
+ # @param [String] description A description of the field.
1050
+ # @param [Symbol] mode The field's mode. The possible values are
1051
+ # `:nullable`, `:required`, and `:repeated`. The default value is
1052
+ # `:nullable`.
1053
+ #
1054
+ # @example
1055
+ # require "google/cloud"
1056
+ #
1057
+ # gcloud = Google::Cloud.new
1058
+ # bigquery = gcloud.bigquery
1059
+ # dataset = bigquery.dataset "my_dataset"
1060
+ # table = dataset.create_table "my_table" do |schema|
1061
+ # schema.timestamp "creation_date", mode: :required
1062
+ # end
1063
+ #
1064
+ # @!group Schema
1065
+ def timestamp name, description: nil, mode: :nullable
1066
+ schema.timestamp name, description: description, mode: mode
1067
+ end
1068
+
1069
+ ##
1070
+ # Adds a record field to the schema. A block must be passed describing
1071
+ # the nested fields of the record. For more information about nested
1072
+ # and repeated records, see [Preparing Data for BigQuery
1073
+ # ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
1074
+ #
1075
+ # See {Schema#record}.
1076
+ #
1077
+ # @param [String] name The field name. The name must contain only
1078
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
1079
+ # start with a letter or underscore. The maximum length is 128
1080
+ # characters.
1081
+ # @param [String] description A description of the field.
1082
+ # @param [Symbol] mode The field's mode. The possible values are
1083
+ # `:nullable`, `:required`, and `:repeated`. The default value is
1084
+ # `:nullable`.
1085
+ # @yield [nested_schema] a block for setting the nested schema
1086
+ # @yieldparam [Schema] nested_schema the object accepting the
1087
+ # nested schema
1088
+ #
1089
+ # @example
1090
+ # require "google/cloud"
1091
+ #
1092
+ # gcloud = Google::Cloud.new
1093
+ # bigquery = gcloud.bigquery
1094
+ # dataset = bigquery.dataset "my_dataset"
1095
+ # table = dataset.create_table "my_table" do |schema|
1096
+ # schema.record "cities_lived", mode: :repeated do |cities_lived|
1097
+ # cities_lived.string "place", mode: :required
1098
+ # cities_lived.integer "number_of_years", mode: :required
1099
+ # end
1100
+ # end
1101
+ #
1102
+ # @!group Schema
1103
+ #
1104
+ def record name, description: nil, mode: nil, &block
1105
+ schema.record name, description: description, mode: mode, &block
1106
+ end
1107
+
1108
+ ##
1109
+ # Make sure any access changes are saved
1110
+ def check_for_mutated_schema!
1111
+ return if @schema.nil?
1112
+ @schema.check_for_mutated_schema!
1113
+ return unless @schema.changed?
1114
+ @gapi.schema = @schema.to_gapi
1115
+ patch_gapi! :schema
1116
+ end
1117
+
1118
+ def to_gapi
1119
+ check_for_mutated_schema!
1120
+ @gapi
1121
+ end
1122
+
1123
+ protected
1124
+
1125
+ ##
1126
+ # Change to a NOOP
1127
+ def ensure_full_data!
1128
+ # Do nothing because we trust the gapi is full before we get here.
1129
+ end
1130
+
1131
+ ##
1132
+ # Queue up all the updates instead of making them.
1133
+ def patch_gapi! attribute
1134
+ @updates << attribute
1135
+ @updates.uniq!
1136
+ end
1137
+ end
1138
+ end
1139
+ end
1140
+ end
1141
+ end