google-cloud-bigquery 1.12.0 → 1.38.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHENTICATION.md +9 -28
- data/CHANGELOG.md +372 -1
- data/CONTRIBUTING.md +328 -116
- data/LOGGING.md +2 -2
- data/OVERVIEW.md +21 -20
- data/TROUBLESHOOTING.md +2 -8
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +154 -170
- data/lib/google/cloud/bigquery/copy_job.rb +40 -23
- data/lib/google/cloud/bigquery/credentials.rb +5 -12
- data/lib/google/cloud/bigquery/data.rb +109 -18
- data/lib/google/cloud/bigquery/dataset/access.rb +322 -51
- data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
- data/lib/google/cloud/bigquery/dataset.rb +960 -279
- data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/external.rb +50 -2256
- data/lib/google/cloud/bigquery/extract_job.rb +217 -58
- data/lib/google/cloud/bigquery/insert_response.rb +1 -3
- data/lib/google/cloud/bigquery/job/list.rb +13 -20
- data/lib/google/cloud/bigquery/job.rb +286 -11
- data/lib/google/cloud/bigquery/load_job.rb +801 -133
- data/lib/google/cloud/bigquery/model/list.rb +5 -9
- data/lib/google/cloud/bigquery/model.rb +247 -16
- data/lib/google/cloud/bigquery/policy.rb +432 -0
- data/lib/google/cloud/bigquery/project/list.rb +6 -11
- data/lib/google/cloud/bigquery/project.rb +526 -243
- data/lib/google/cloud/bigquery/query_job.rb +584 -125
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/routine.rb +1227 -0
- data/lib/google/cloud/bigquery/schema/field.rb +413 -63
- data/lib/google/cloud/bigquery/schema.rb +221 -48
- data/lib/google/cloud/bigquery/service.rb +186 -109
- data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
- data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -42
- data/lib/google/cloud/bigquery/table/list.rb +6 -11
- data/lib/google/cloud/bigquery/table.rb +1188 -326
- data/lib/google/cloud/bigquery/time.rb +6 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +18 -8
- data/lib/google-cloud-bigquery.rb +15 -13
- metadata +67 -40
@@ -18,6 +18,7 @@ require "google/cloud/errors"
|
|
18
18
|
require "google/cloud/bigquery/service"
|
19
19
|
require "google/cloud/bigquery/table"
|
20
20
|
require "google/cloud/bigquery/model"
|
21
|
+
require "google/cloud/bigquery/routine"
|
21
22
|
require "google/cloud/bigquery/external"
|
22
23
|
require "google/cloud/bigquery/dataset/list"
|
23
24
|
require "google/cloud/bigquery/dataset/access"
|
@@ -68,8 +69,8 @@ module Google
|
|
68
69
|
##
|
69
70
|
# A unique ID for this dataset, without the project name.
|
70
71
|
#
|
71
|
-
# @return [String] The ID must contain only letters (
|
72
|
-
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
72
|
+
# @return [String] The ID must contain only letters (`[A-Za-z]`), numbers
|
73
|
+
# (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
73
74
|
#
|
74
75
|
# @!group Attributes
|
75
76
|
#
|
@@ -312,12 +313,19 @@ module Google
|
|
312
313
|
# @param [Hash<String, String>] labels A hash containing key/value
|
313
314
|
# pairs.
|
314
315
|
#
|
315
|
-
#
|
316
|
-
#
|
317
|
-
#
|
318
|
-
# *
|
319
|
-
# *
|
320
|
-
#
|
316
|
+
# The labels applied to a resource must meet the following requirements:
|
317
|
+
#
|
318
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
319
|
+
# * Each label must be a key-value pair.
|
320
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
321
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
322
|
+
# a maximum length of 63 characters.
|
323
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
324
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
325
|
+
# international characters are allowed.
|
326
|
+
# * The key portion of a label must be unique. However, you can use the
|
327
|
+
# same key with multiple resources.
|
328
|
+
# * Keys must start with a lowercase letter or international character.
|
321
329
|
#
|
322
330
|
# @example
|
323
331
|
# require "google/cloud/bigquery"
|
@@ -335,6 +343,75 @@ module Google
|
|
335
343
|
patch_gapi! :labels
|
336
344
|
end
|
337
345
|
|
346
|
+
##
|
347
|
+
# The {EncryptionConfiguration} object that represents the default
|
348
|
+
# encryption method for all tables and models in the dataset. Once this
|
349
|
+
# property is set, all newly-created partitioned tables and models in
|
350
|
+
# the dataset will have their encryption set to this value, unless table
|
351
|
+
# creation request (or query) overrides it.
|
352
|
+
#
|
353
|
+
# Present only if this dataset is using custom default encryption.
|
354
|
+
#
|
355
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
356
|
+
# Protecting Data with Cloud KMS Keys
|
357
|
+
#
|
358
|
+
# @return [EncryptionConfiguration, nil] The default encryption
|
359
|
+
# configuration.
|
360
|
+
#
|
361
|
+
# @!group Attributes
|
362
|
+
#
|
363
|
+
# @example
|
364
|
+
# require "google/cloud/bigquery"
|
365
|
+
#
|
366
|
+
# bigquery = Google::Cloud::Bigquery.new
|
367
|
+
# dataset = bigquery.dataset "my_dataset"
|
368
|
+
#
|
369
|
+
# encrypt_config = dataset.default_encryption
|
370
|
+
#
|
371
|
+
# @!group Attributes
|
372
|
+
#
|
373
|
+
def default_encryption
|
374
|
+
return nil if reference?
|
375
|
+
ensure_full_data!
|
376
|
+
return nil if @gapi.default_encryption_configuration.nil?
|
377
|
+
EncryptionConfiguration.from_gapi(@gapi.default_encryption_configuration).freeze
|
378
|
+
end
|
379
|
+
|
380
|
+
##
|
381
|
+
# Set the {EncryptionConfiguration} object that represents the default
|
382
|
+
# encryption method for all tables and models in the dataset. Once this
|
383
|
+
# property is set, all newly-created partitioned tables and models in
|
384
|
+
# the dataset will have their encryption set to this value, unless table
|
385
|
+
# creation request (or query) overrides it.
|
386
|
+
#
|
387
|
+
# If the dataset is not a full resource representation (see
|
388
|
+
# {#resource_full?}), the full representation will be retrieved before
|
389
|
+
# the update to comply with ETag-based optimistic concurrency control.
|
390
|
+
#
|
391
|
+
# @see https://cloud.google.com/bigquery/docs/customer-managed-encryption
|
392
|
+
# Protecting Data with Cloud KMS Keys
|
393
|
+
#
|
394
|
+
# @param [EncryptionConfiguration] value The new encryption config.
|
395
|
+
#
|
396
|
+
# @example
|
397
|
+
# require "google/cloud/bigquery"
|
398
|
+
#
|
399
|
+
# bigquery = Google::Cloud::Bigquery.new
|
400
|
+
# dataset = bigquery.dataset "my_dataset"
|
401
|
+
#
|
402
|
+
# key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
|
403
|
+
# encrypt_config = bigquery.encryption kms_key: key_name
|
404
|
+
#
|
405
|
+
# dataset.default_encryption = encrypt_config
|
406
|
+
#
|
407
|
+
# @!group Attributes
|
408
|
+
#
|
409
|
+
def default_encryption= value
|
410
|
+
ensure_full_data!
|
411
|
+
@gapi.default_encryption_configuration = value.to_gapi
|
412
|
+
patch_gapi! :default_encryption_configuration
|
413
|
+
end
|
414
|
+
|
338
415
|
##
|
339
416
|
# Retrieves the access rules for a Dataset. The rules can be updated
|
340
417
|
# when passing a block, see {Dataset::Access} for all the methods
|
@@ -424,7 +501,7 @@ module Google
|
|
424
501
|
# you can pass the table's schema as a hash (see example.)
|
425
502
|
#
|
426
503
|
# @param [String] table_id The ID of the table. The ID must contain only
|
427
|
-
# letters (
|
504
|
+
# letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum
|
428
505
|
# length is 1,024 characters.
|
429
506
|
# @param [String] name A descriptive name for the table.
|
430
507
|
# @param [String] description A user-friendly description of the table.
|
@@ -485,6 +562,40 @@ module Google
|
|
485
562
|
# end
|
486
563
|
# end
|
487
564
|
#
|
565
|
+
# @example With time partitioning and clustering.
|
566
|
+
# require "google/cloud/bigquery"
|
567
|
+
#
|
568
|
+
# bigquery = Google::Cloud::Bigquery.new
|
569
|
+
# dataset = bigquery.dataset "my_dataset"
|
570
|
+
#
|
571
|
+
# table = dataset.create_table "my_table" do |t|
|
572
|
+
# t.schema do |schema|
|
573
|
+
# schema.timestamp "dob", mode: :required
|
574
|
+
# schema.string "first_name", mode: :required
|
575
|
+
# schema.string "last_name", mode: :required
|
576
|
+
# end
|
577
|
+
# t.time_partitioning_type = "DAY"
|
578
|
+
# t.time_partitioning_field = "dob"
|
579
|
+
# t.clustering_fields = ["last_name", "first_name"]
|
580
|
+
# end
|
581
|
+
#
|
582
|
+
# @example With range partitioning.
|
583
|
+
# require "google/cloud/bigquery"
|
584
|
+
#
|
585
|
+
# bigquery = Google::Cloud::Bigquery.new
|
586
|
+
# dataset = bigquery.dataset "my_dataset"
|
587
|
+
#
|
588
|
+
# table = dataset.create_table "my_table" do |t|
|
589
|
+
# t.schema do |schema|
|
590
|
+
# schema.integer "my_table_id", mode: :required
|
591
|
+
# schema.string "my_table_data", mode: :required
|
592
|
+
# end
|
593
|
+
# t.range_partitioning_field = "my_table_id"
|
594
|
+
# t.range_partitioning_start = 0
|
595
|
+
# t.range_partitioning_interval = 10
|
596
|
+
# t.range_partitioning_end = 100
|
597
|
+
# end
|
598
|
+
#
|
488
599
|
# @!group Table
|
489
600
|
#
|
490
601
|
def create_table table_id, name: nil, description: nil
|
@@ -507,17 +618,19 @@ module Google
|
|
507
618
|
end
|
508
619
|
|
509
620
|
##
|
510
|
-
# Creates a new
|
511
|
-
# table, which is a virtual table defined by the given SQL query.
|
621
|
+
# Creates a new view, which is a virtual table defined by the given SQL query.
|
512
622
|
#
|
513
|
-
# BigQuery's
|
514
|
-
#
|
515
|
-
# the view is queried. Queries are billed according to the total amount
|
623
|
+
# With BigQuery's logical views, the query that defines the view is re-executed
|
624
|
+
# every time the view is queried. Queries are billed according to the total amount
|
516
625
|
# of data in all table fields referenced directly or indirectly by the
|
517
626
|
# top-level query. (See {Table#view?} and {Table#query}.)
|
518
627
|
#
|
628
|
+
# For materialized views, see {#create_materialized_view}.
|
629
|
+
#
|
630
|
+
# @see https://cloud.google.com/bigquery/docs/views Creating views
|
631
|
+
#
|
519
632
|
# @param [String] table_id The ID of the view table. The ID must contain
|
520
|
-
# only letters (
|
633
|
+
# only letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The
|
521
634
|
# maximum length is 1,024 characters.
|
522
635
|
# @param [String] query The query that BigQuery executes when the view
|
523
636
|
# is referenced.
|
@@ -532,12 +645,20 @@ module Google
|
|
532
645
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
533
646
|
# dialect. Optional. The default value is false.
|
534
647
|
# @param [Array<String>, String] udfs User-defined function resources
|
535
|
-
# used in
|
536
|
-
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
648
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
649
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
537
650
|
# that contains code for a user-defined function (UDF). Providing an
|
538
651
|
# inline code resource is equivalent to providing a URI for a file
|
539
|
-
# containing the same code.
|
540
|
-
#
|
652
|
+
# containing the same code.
|
653
|
+
#
|
654
|
+
# This parameter is used for defining User Defined Function (UDF)
|
655
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
656
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
657
|
+
# Routines API to define UDF resources.
|
658
|
+
#
|
659
|
+
# For additional information on migrating, see: [Migrating to
|
660
|
+
# standard SQL - Differences in user-defined JavaScript
|
661
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
541
662
|
#
|
542
663
|
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
543
664
|
#
|
@@ -548,7 +669,7 @@ module Google
|
|
548
669
|
# dataset = bigquery.dataset "my_dataset"
|
549
670
|
#
|
550
671
|
# view = dataset.create_view "my_view",
|
551
|
-
#
|
672
|
+
# "SELECT name, age FROM proj.dataset.users"
|
552
673
|
#
|
553
674
|
# @example A name and description can be provided:
|
554
675
|
# require "google/cloud/bigquery"
|
@@ -557,13 +678,18 @@ module Google
|
|
557
678
|
# dataset = bigquery.dataset "my_dataset"
|
558
679
|
#
|
559
680
|
# view = dataset.create_view "my_view",
|
560
|
-
#
|
561
|
-
#
|
681
|
+
# "SELECT name, age FROM proj.dataset.users",
|
682
|
+
# name: "My View", description: "This is my view"
|
562
683
|
#
|
563
684
|
# @!group Table
|
564
685
|
#
|
565
|
-
def create_view table_id,
|
566
|
-
|
686
|
+
def create_view table_id,
|
687
|
+
query,
|
688
|
+
name: nil,
|
689
|
+
description: nil,
|
690
|
+
standard_sql: nil,
|
691
|
+
legacy_sql: nil,
|
692
|
+
udfs: nil
|
567
693
|
use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
|
568
694
|
new_view_opts = {
|
569
695
|
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
@@ -579,7 +705,81 @@ module Google
|
|
579
705
|
user_defined_function_resources: udfs_gapi(udfs)
|
580
706
|
)
|
581
707
|
}.delete_if { |_, v| v.nil? }
|
582
|
-
new_view = Google::Apis::BigqueryV2::Table.new
|
708
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
709
|
+
|
710
|
+
gapi = service.insert_table dataset_id, new_view
|
711
|
+
Table.from_gapi gapi, service
|
712
|
+
end
|
713
|
+
|
714
|
+
##
|
715
|
+
# Creates a new materialized view.
|
716
|
+
#
|
717
|
+
# Materialized views are precomputed views that periodically cache results of a query for increased performance
|
718
|
+
# and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
|
719
|
+
# only delta changes from the base table to compute up-to-date results.
|
720
|
+
#
|
721
|
+
# Queries that use materialized views are generally faster and consume less resources than queries that retrieve
|
722
|
+
# the same data only from the base table. Materialized views are helpful to significantly boost performance of
|
723
|
+
# workloads that have the characteristic of common and repeated queries.
|
724
|
+
#
|
725
|
+
# For logical views, see {#create_view}.
|
726
|
+
#
|
727
|
+
# @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
|
728
|
+
#
|
729
|
+
# @param [String] table_id The ID of the materialized view table. The ID must contain only letters (`[A-Za-z]`),
|
730
|
+
# numbers (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
|
731
|
+
# @param [String] query The query that BigQuery executes when the materialized view is referenced.
|
732
|
+
# @param [String] name A descriptive name for the table.
|
733
|
+
# @param [String] description A user-friendly description of the table.
|
734
|
+
# @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
|
735
|
+
# updated. Optional. The default value is true.
|
736
|
+
# @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
|
737
|
+
# will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
|
738
|
+
#
|
739
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
740
|
+
#
|
741
|
+
# @example
|
742
|
+
# require "google/cloud/bigquery"
|
743
|
+
#
|
744
|
+
# bigquery = Google::Cloud::Bigquery.new
|
745
|
+
# dataset = bigquery.dataset "my_dataset"
|
746
|
+
#
|
747
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
748
|
+
# "SELECT name, age FROM proj.dataset.users"
|
749
|
+
#
|
750
|
+
# @example Automatic refresh can be disabled:
|
751
|
+
# require "google/cloud/bigquery"
|
752
|
+
#
|
753
|
+
# bigquery = Google::Cloud::Bigquery.new
|
754
|
+
# dataset = bigquery.dataset "my_dataset"
|
755
|
+
#
|
756
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
757
|
+
# "SELECT name, age FROM proj.dataset.users",
|
758
|
+
# enable_refresh: false
|
759
|
+
#
|
760
|
+
# @!group Table
|
761
|
+
#
|
762
|
+
def create_materialized_view table_id,
|
763
|
+
query,
|
764
|
+
name: nil,
|
765
|
+
description: nil,
|
766
|
+
enable_refresh: nil,
|
767
|
+
refresh_interval_ms: nil
|
768
|
+
new_view_opts = {
|
769
|
+
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
770
|
+
project_id: project_id,
|
771
|
+
dataset_id: dataset_id,
|
772
|
+
table_id: table_id
|
773
|
+
),
|
774
|
+
friendly_name: name,
|
775
|
+
description: description,
|
776
|
+
materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
|
777
|
+
enable_refresh: enable_refresh,
|
778
|
+
query: query,
|
779
|
+
refresh_interval_ms: refresh_interval_ms
|
780
|
+
)
|
781
|
+
}.delete_if { |_, v| v.nil? }
|
782
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
583
783
|
|
584
784
|
gapi = service.insert_table dataset_id, new_view
|
585
785
|
Table.from_gapi gapi, service
|
@@ -619,9 +819,7 @@ module Google
|
|
619
819
|
#
|
620
820
|
def table table_id, skip_lookup: nil
|
621
821
|
ensure_service!
|
622
|
-
if skip_lookup
|
623
|
-
return Table.new_reference project_id, dataset_id, table_id, service
|
624
|
-
end
|
822
|
+
return Table.new_reference project_id, dataset_id, table_id, service if skip_lookup
|
625
823
|
gapi = service.get_table dataset_id, table_id
|
626
824
|
Table.from_gapi gapi, service
|
627
825
|
rescue Google::Cloud::NotFoundError
|
@@ -664,8 +862,7 @@ module Google
|
|
664
862
|
#
|
665
863
|
def tables token: nil, max: nil
|
666
864
|
ensure_service!
|
667
|
-
|
668
|
-
gapi = service.list_tables dataset_id, options
|
865
|
+
gapi = service.list_tables dataset_id, token: token, max: max
|
669
866
|
Table::List.from_gapi gapi, service, dataset_id, max
|
670
867
|
end
|
671
868
|
|
@@ -703,9 +900,7 @@ module Google
|
|
703
900
|
#
|
704
901
|
def model model_id, skip_lookup: nil
|
705
902
|
ensure_service!
|
706
|
-
if skip_lookup
|
707
|
-
return Model.new_reference project_id, dataset_id, model_id, service
|
708
|
-
end
|
903
|
+
return Model.new_reference project_id, dataset_id, model_id, service if skip_lookup
|
709
904
|
gapi = service.get_model dataset_id, model_id
|
710
905
|
Model.from_gapi_json gapi, service
|
711
906
|
rescue Google::Cloud::NotFoundError
|
@@ -752,6 +947,174 @@ module Google
|
|
752
947
|
Model::List.from_gapi gapi, service, dataset_id, max
|
753
948
|
end
|
754
949
|
|
950
|
+
##
|
951
|
+
# Creates a new routine. The following attributes may be set in the yielded block:
|
952
|
+
# {Routine::Updater#routine_type=}, {Routine::Updater#language=}, {Routine::Updater#arguments=},
|
953
|
+
# {Routine::Updater#return_type=}, {Routine::Updater#imported_libraries=}, {Routine::Updater#body=}, and
|
954
|
+
# {Routine::Updater#description=}.
|
955
|
+
#
|
956
|
+
# @param [String] routine_id The ID of the routine. The ID must contain only
|
957
|
+
# letters (`[A-Za-z]`), numbers (`[0-9]`), or underscores (`_`). The maximum length
|
958
|
+
# is 256 characters.
|
959
|
+
# @yield [routine] A block for setting properties on the routine.
|
960
|
+
# @yieldparam [Google::Cloud::Bigquery::Routine::Updater] routine An updater to set additional properties on the
|
961
|
+
# routine.
|
962
|
+
#
|
963
|
+
# @return [Google::Cloud::Bigquery::Routine] A new routine object.
|
964
|
+
#
|
965
|
+
# @example
|
966
|
+
# require "google/cloud/bigquery"
|
967
|
+
#
|
968
|
+
# bigquery = Google::Cloud::Bigquery.new
|
969
|
+
# dataset = bigquery.dataset "my_dataset"
|
970
|
+
#
|
971
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
972
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
973
|
+
# r.language = "SQL"
|
974
|
+
# r.arguments = [
|
975
|
+
# Google::Cloud::Bigquery::Argument.new(name: "x", data_type: "INT64")
|
976
|
+
# ]
|
977
|
+
# r.body = "x * 3"
|
978
|
+
# r.description = "My routine description"
|
979
|
+
# end
|
980
|
+
#
|
981
|
+
# puts routine.routine_id
|
982
|
+
#
|
983
|
+
# @example Extended example:
|
984
|
+
# require "google/cloud/bigquery"
|
985
|
+
#
|
986
|
+
# bigquery = Google::Cloud::Bigquery.new
|
987
|
+
# dataset = bigquery.dataset "my_dataset"
|
988
|
+
# routine = dataset.create_routine "my_routine" do |r|
|
989
|
+
# r.routine_type = "SCALAR_FUNCTION"
|
990
|
+
# r.language = :SQL
|
991
|
+
# r.body = "(SELECT SUM(IF(elem.name = \"foo\",elem.val,null)) FROM UNNEST(arr) AS elem)"
|
992
|
+
# r.arguments = [
|
993
|
+
# Google::Cloud::Bigquery::Argument.new(
|
994
|
+
# name: "arr",
|
995
|
+
# argument_kind: "FIXED_TYPE",
|
996
|
+
# data_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
997
|
+
# type_kind: "ARRAY",
|
998
|
+
# array_element_type: Google::Cloud::Bigquery::StandardSql::DataType.new(
|
999
|
+
# type_kind: "STRUCT",
|
1000
|
+
# struct_type: Google::Cloud::Bigquery::StandardSql::StructType.new(
|
1001
|
+
# fields: [
|
1002
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
1003
|
+
# name: "name",
|
1004
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "STRING")
|
1005
|
+
# ),
|
1006
|
+
# Google::Cloud::Bigquery::StandardSql::Field.new(
|
1007
|
+
# name: "val",
|
1008
|
+
# type: Google::Cloud::Bigquery::StandardSql::DataType.new(type_kind: "INT64")
|
1009
|
+
# )
|
1010
|
+
# ]
|
1011
|
+
# )
|
1012
|
+
# )
|
1013
|
+
# )
|
1014
|
+
# )
|
1015
|
+
# ]
|
1016
|
+
# end
|
1017
|
+
#
|
1018
|
+
# @!group Routine
|
1019
|
+
#
|
1020
|
+
def create_routine routine_id
|
1021
|
+
ensure_service!
|
1022
|
+
new_tb = Google::Apis::BigqueryV2::Routine.new(
|
1023
|
+
routine_reference: Google::Apis::BigqueryV2::RoutineReference.new(
|
1024
|
+
project_id: project_id, dataset_id: dataset_id, routine_id: routine_id
|
1025
|
+
)
|
1026
|
+
)
|
1027
|
+
updater = Routine::Updater.new new_tb
|
1028
|
+
|
1029
|
+
yield updater if block_given?
|
1030
|
+
|
1031
|
+
gapi = service.insert_routine dataset_id, updater.to_gapi
|
1032
|
+
Routine.from_gapi gapi, service
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
##
|
1036
|
+
# Retrieves an existing routine by ID.
|
1037
|
+
#
|
1038
|
+
# @param [String] routine_id The ID of a routine.
|
1039
|
+
# @param [Boolean] skip_lookup Optionally create just a local reference
|
1040
|
+
# object without verifying that the resource exists on the BigQuery
|
1041
|
+
# service. Calls made on this object will raise errors if the resource
|
1042
|
+
# does not exist. Default is `false`. Optional.
|
1043
|
+
#
|
1044
|
+
# @return [Google::Cloud::Bigquery::Routine, nil] Returns `nil` if the
|
1045
|
+
# routine does not exist.
|
1046
|
+
#
|
1047
|
+
# @example
|
1048
|
+
# require "google/cloud/bigquery"
|
1049
|
+
#
|
1050
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1051
|
+
# dataset = bigquery.dataset "my_dataset"
|
1052
|
+
#
|
1053
|
+
# routine = dataset.routine "my_routine"
|
1054
|
+
# puts routine.routine_id
|
1055
|
+
#
|
1056
|
+
# @example Avoid retrieving the routine resource with `skip_lookup`:
|
1057
|
+
# require "google/cloud/bigquery"
|
1058
|
+
#
|
1059
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1060
|
+
#
|
1061
|
+
# dataset = bigquery.dataset "my_dataset"
|
1062
|
+
#
|
1063
|
+
# routine = dataset.routine "my_routine", skip_lookup: true
|
1064
|
+
#
|
1065
|
+
# @!group Routine
|
1066
|
+
#
|
1067
|
+
def routine routine_id, skip_lookup: nil
|
1068
|
+
ensure_service!
|
1069
|
+
return Routine.new_reference project_id, dataset_id, routine_id, service if skip_lookup
|
1070
|
+
gapi = service.get_routine dataset_id, routine_id
|
1071
|
+
Routine.from_gapi gapi, service
|
1072
|
+
rescue Google::Cloud::NotFoundError
|
1073
|
+
nil
|
1074
|
+
end
|
1075
|
+
|
1076
|
+
##
|
1077
|
+
# Retrieves the list of routines belonging to the dataset.
|
1078
|
+
#
|
1079
|
+
# @param [String] token A previously-returned page token representing
|
1080
|
+
# part of the larger set of results to view.
|
1081
|
+
# @param [Integer] max Maximum number of routines to return.
|
1082
|
+
# @param [String] filter If set, then only the routines matching this filter are returned. The current supported
|
1083
|
+
# form is `routineType:`, with a {Routine#routine_type} enum value. Example: `routineType:SCALAR_FUNCTION`.
|
1084
|
+
#
|
1085
|
+
# @return [Array<Google::Cloud::Bigquery::Routine>] An array of routines
|
1086
|
+
# (See {Google::Cloud::Bigquery::Routine::List})
|
1087
|
+
#
|
1088
|
+
# @example
|
1089
|
+
# require "google/cloud/bigquery"
|
1090
|
+
#
|
1091
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1092
|
+
# dataset = bigquery.dataset "my_dataset"
|
1093
|
+
#
|
1094
|
+
# routines = dataset.routines
|
1095
|
+
# routines.each do |routine|
|
1096
|
+
# puts routine.routine_id
|
1097
|
+
# end
|
1098
|
+
#
|
1099
|
+
# @example Retrieve all routines: (See {Routine::List#all})
|
1100
|
+
# require "google/cloud/bigquery"
|
1101
|
+
#
|
1102
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1103
|
+
# dataset = bigquery.dataset "my_dataset"
|
1104
|
+
#
|
1105
|
+
# routines = dataset.routines
|
1106
|
+
# routines.all do |routine|
|
1107
|
+
# puts routine.routine_id
|
1108
|
+
# end
|
1109
|
+
#
|
1110
|
+
# @!group Routine
|
1111
|
+
#
|
1112
|
+
def routines token: nil, max: nil, filter: nil
|
1113
|
+
ensure_service!
|
1114
|
+
gapi = service.list_routines dataset_id, token: token, max: max, filter: filter
|
1115
|
+
Routine::List.from_gapi gapi, service, dataset_id, max, filter: filter
|
1116
|
+
end
|
1117
|
+
|
755
1118
|
##
|
756
1119
|
# Queries data by creating a [query
|
757
1120
|
# job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
|
@@ -759,27 +1122,6 @@ module Google
|
|
759
1122
|
# Sets the current dataset as the default dataset in the query. Useful
|
760
1123
|
# for using unqualified table names.
|
761
1124
|
#
|
762
|
-
# When using standard SQL and passing arguments using `params`, Ruby
|
763
|
-
# types are mapped to BigQuery types as follows:
|
764
|
-
#
|
765
|
-
# | BigQuery | Ruby | Notes |
|
766
|
-
# |-------------|----------------|---|
|
767
|
-
# | `BOOL` | `true`/`false` | |
|
768
|
-
# | `INT64` | `Integer` | |
|
769
|
-
# | `FLOAT64` | `Float` | |
|
770
|
-
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
771
|
-
# | `STRING` | `String` | |
|
772
|
-
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
773
|
-
# | `DATE` | `Date` | |
|
774
|
-
# | `TIMESTAMP` | `Time` | |
|
775
|
-
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
776
|
-
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
777
|
-
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
778
|
-
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
779
|
-
#
|
780
|
-
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
781
|
-
# for an overview of each BigQuery data type, including allowed values.
|
782
|
-
#
|
783
1125
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
784
1126
|
# {QueryJob::Updater#location=} in a block passed to this method. If the
|
785
1127
|
# dataset is a full resource representation (see {#resource_full?}), the
|
@@ -790,13 +1132,60 @@ module Google
|
|
790
1132
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
791
1133
|
# query to execute. Example: "SELECT count(f1) FROM
|
792
1134
|
# [myProjectId:myDatasetId.myTableId]".
|
793
|
-
# @param [Array, Hash] params Standard SQL only. Used to pass query
|
794
|
-
#
|
795
|
-
#
|
796
|
-
#
|
797
|
-
#
|
798
|
-
#
|
799
|
-
#
|
1135
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
1136
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
1137
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
1138
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
1139
|
+
# true.
|
1140
|
+
#
|
1141
|
+
# BigQuery types are converted from Ruby types as follows:
|
1142
|
+
#
|
1143
|
+
# | BigQuery | Ruby | Notes |
|
1144
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
1145
|
+
# | `BOOL` | `true`/`false` | |
|
1146
|
+
# | `INT64` | `Integer` | |
|
1147
|
+
# | `FLOAT64` | `Float` | |
|
1148
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
1149
|
+
# | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
1150
|
+
# | `STRING` | `String` | |
|
1151
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1152
|
+
# | `DATE` | `Date` | |
|
1153
|
+
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
1154
|
+
# | `TIMESTAMP` | `Time` | |
|
1155
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1156
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1157
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1158
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1159
|
+
#
|
1160
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1161
|
+
# of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
|
1162
|
+
# GIS data](https://cloud.google.com/bigquery/docs/gis-data).
|
1163
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1164
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1165
|
+
# specify the SQL type for these values.
|
1166
|
+
#
|
1167
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
1168
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1169
|
+
# should be BigQuery type codes from the following list:
|
1170
|
+
#
|
1171
|
+
# * `:BOOL`
|
1172
|
+
# * `:INT64`
|
1173
|
+
# * `:FLOAT64`
|
1174
|
+
# * `:NUMERIC`
|
1175
|
+
# * `:BIGNUMERIC`
|
1176
|
+
# * `:STRING`
|
1177
|
+
# * `:DATETIME`
|
1178
|
+
# * `:DATE`
|
1179
|
+
# * `:GEOGRAPHY`
|
1180
|
+
# * `:TIMESTAMP`
|
1181
|
+
# * `:TIME`
|
1182
|
+
# * `:BYTES`
|
1183
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
1184
|
+
# are specified as `[:INT64]`.
|
1185
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
1186
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
1187
|
+
#
|
1188
|
+
# Types are optional.
|
800
1189
|
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
801
1190
|
# that represents the mapping of the external tables to the table
|
802
1191
|
# names used in the SQL query. The hash keys are the table names, and
|
@@ -855,13 +1244,19 @@ module Google
|
|
855
1244
|
# Flattens all nested and repeated fields in the query results. The
|
856
1245
|
# default value is `true`. `large_results` parameter must be `true` if
|
857
1246
|
# this is set to `false`.
|
1247
|
+
# @param [Integer] maximum_billing_tier Limits the billing tier for this
|
1248
|
+
# job. Queries that have resource usage beyond this tier will fail
|
1249
|
+
# (without incurring a charge). WARNING: The billed byte amount can be
|
1250
|
+
# multiplied by an amount up to this number! Most users should not need
|
1251
|
+
# to alter this setting, and we recommend that you avoid introducing new
|
1252
|
+
# uses of it. Deprecated.
|
858
1253
|
# @param [Integer] maximum_bytes_billed Limits the bytes billed for this
|
859
1254
|
# job. Queries that will have bytes billed beyond this limit will fail
|
860
1255
|
# (without incurring a charge). Optional. If unspecified, this will be
|
861
1256
|
# set to your project default.
|
862
1257
|
# @param [String] job_id A user-defined ID for the query job. The ID
|
863
|
-
# must contain only letters (
|
864
|
-
# (_), or dashes (
|
1258
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1259
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
865
1260
|
# `job_id` is provided, then `prefix` will not be used.
|
866
1261
|
#
|
867
1262
|
# See [Generating a job
|
@@ -870,27 +1265,48 @@ module Google
|
|
870
1265
|
# prepended to a generated value to produce a unique job ID. For
|
871
1266
|
# example, the prefix `daily_import_job_` can be given to generate a
|
872
1267
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
873
|
-
# prefix must contain only letters (
|
874
|
-
# underscores (_), or dashes (
|
1268
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1269
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
875
1270
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
876
1271
|
# be used.
|
877
1272
|
# @param [Hash] labels A hash of user-provided labels associated with
|
878
|
-
# the job. You can use these to organize and group your jobs.
|
879
|
-
#
|
880
|
-
#
|
881
|
-
#
|
882
|
-
#
|
883
|
-
#
|
884
|
-
#
|
1273
|
+
# the job. You can use these to organize and group your jobs.
|
1274
|
+
#
|
1275
|
+
# The labels applied to a resource must meet the following requirements:
|
1276
|
+
#
|
1277
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1278
|
+
# * Each label must be a key-value pair.
|
1279
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1280
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1281
|
+
# a maximum length of 63 characters.
|
1282
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1283
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1284
|
+
# international characters are allowed.
|
1285
|
+
# * The key portion of a label must be unique. However, you can use the
|
1286
|
+
# same key with multiple resources.
|
1287
|
+
# * Keys must start with a lowercase letter or international character.
|
885
1288
|
# @param [Array<String>, String] udfs User-defined function resources
|
886
|
-
# used in
|
887
|
-
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
1289
|
+
# used in a legacy SQL query. May be either a code resource to load from
|
1290
|
+
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
888
1291
|
# that contains code for a user-defined function (UDF). Providing an
|
889
1292
|
# inline code resource is equivalent to providing a URI for a file
|
890
|
-
# containing the same code.
|
891
|
-
#
|
892
|
-
#
|
893
|
-
#
|
1293
|
+
# containing the same code.
|
1294
|
+
#
|
1295
|
+
# This parameter is used for defining User Defined Function (UDF)
|
1296
|
+
# resources only when using legacy SQL. Users of standard SQL should
|
1297
|
+
# leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
|
1298
|
+
# Routines API to define UDF resources.
|
1299
|
+
#
|
1300
|
+
# For additional information on migrating, see: [Migrating to
|
1301
|
+
# standard SQL - Differences in user-defined JavaScript
|
1302
|
+
# functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
|
1303
|
+
# @param [Boolean] create_session If true, creates a new session, where the
|
1304
|
+
# session ID will be a server generated random id. If false, runs query
|
1305
|
+
# with an existing session ID when one is provided in the `session_id`
|
1306
|
+
# param, otherwise runs query in non-session mode. See {Job#session_id}.
|
1307
|
+
# The default value is false.
|
1308
|
+
# @param [String] session_id The ID of an existing session. See also the
|
1309
|
+
# `create_session` param and {Job#session_id}.
|
894
1310
|
# @yield [job] a job configuration object
|
895
1311
|
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
896
1312
|
# configuration object for setting additional options for the query.
|
@@ -960,32 +1376,62 @@ module Google
|
|
960
1376
|
# end
|
961
1377
|
# end
|
962
1378
|
#
|
1379
|
+
# @example Query using named query parameters with types:
|
1380
|
+
# require "google/cloud/bigquery"
|
1381
|
+
#
|
1382
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1383
|
+
# dataset = bigquery.dataset "my_dataset"
|
1384
|
+
#
|
1385
|
+
# job = dataset.query_job "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
|
1386
|
+
# params: { ids: [] },
|
1387
|
+
# types: { ids: [:INT64] }
|
1388
|
+
#
|
1389
|
+
# job.wait_until_done!
|
1390
|
+
# if !job.failed?
|
1391
|
+
# job.data.each do |row|
|
1392
|
+
# puts row[:name]
|
1393
|
+
# end
|
1394
|
+
# end
|
1395
|
+
#
|
963
1396
|
# @example Execute a DDL statement:
|
964
1397
|
# require "google/cloud/bigquery"
|
965
1398
|
#
|
966
1399
|
# bigquery = Google::Cloud::Bigquery.new
|
1400
|
+
# dataset = bigquery.dataset "my_dataset"
|
967
1401
|
#
|
968
|
-
# job =
|
1402
|
+
# job = dataset.query_job "CREATE TABLE my_table (x INT64)"
|
969
1403
|
#
|
970
1404
|
# job.wait_until_done!
|
971
1405
|
# if !job.failed?
|
972
|
-
# table_ref = job.ddl_target_table
|
1406
|
+
# table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
973
1407
|
# end
|
974
1408
|
#
|
975
1409
|
# @example Execute a DML statement:
|
976
1410
|
# require "google/cloud/bigquery"
|
977
1411
|
#
|
978
1412
|
# bigquery = Google::Cloud::Bigquery.new
|
1413
|
+
# dataset = bigquery.dataset "my_dataset"
|
979
1414
|
#
|
980
|
-
# job =
|
981
|
-
# "SET x = x + 1 " \
|
982
|
-
# "WHERE x IS NOT NULL"
|
1415
|
+
# job = dataset.query_job "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
|
983
1416
|
#
|
984
1417
|
# job.wait_until_done!
|
985
1418
|
# if !job.failed?
|
986
1419
|
# puts job.num_dml_affected_rows
|
987
1420
|
# end
|
988
1421
|
#
|
1422
|
+
# @example Run query in a session:
|
1423
|
+
# require "google/cloud/bigquery"
|
1424
|
+
#
|
1425
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1426
|
+
# dataset = bigquery.dataset "my_dataset"
|
1427
|
+
#
|
1428
|
+
# job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
|
1429
|
+
#
|
1430
|
+
# job.wait_until_done!
|
1431
|
+
#
|
1432
|
+
# session_id = job.session_id
|
1433
|
+
# data = dataset.query "SELECT * FROM temptable", session_id: session_id
|
1434
|
+
#
|
989
1435
|
# @example Query using external data source, set destination:
|
990
1436
|
# require "google/cloud/bigquery"
|
991
1437
|
#
|
@@ -1012,21 +1458,52 @@ module Google
|
|
1012
1458
|
#
|
1013
1459
|
# @!group Data
|
1014
1460
|
#
|
1015
|
-
def query_job query,
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1461
|
+
def query_job query,
|
1462
|
+
params: nil,
|
1463
|
+
types: nil,
|
1464
|
+
external: nil,
|
1465
|
+
priority: "INTERACTIVE",
|
1466
|
+
cache: true,
|
1467
|
+
table: nil,
|
1468
|
+
create: nil,
|
1469
|
+
write: nil,
|
1470
|
+
dryrun: nil,
|
1471
|
+
standard_sql: nil,
|
1472
|
+
legacy_sql: nil,
|
1473
|
+
large_results: nil,
|
1474
|
+
flatten: nil,
|
1475
|
+
maximum_billing_tier: nil,
|
1476
|
+
maximum_bytes_billed: nil,
|
1477
|
+
job_id: nil,
|
1478
|
+
prefix: nil,
|
1479
|
+
labels: nil,
|
1480
|
+
udfs: nil,
|
1481
|
+
create_session: nil,
|
1482
|
+
session_id: nil
|
1021
1483
|
ensure_service!
|
1022
|
-
options = {
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1484
|
+
options = {
|
1485
|
+
params: params,
|
1486
|
+
types: types,
|
1487
|
+
external: external,
|
1488
|
+
priority: priority,
|
1489
|
+
cache: cache,
|
1490
|
+
table: table,
|
1491
|
+
create: create,
|
1492
|
+
write: write,
|
1493
|
+
dryrun: dryrun,
|
1494
|
+
standard_sql: standard_sql,
|
1495
|
+
legacy_sql: legacy_sql,
|
1496
|
+
large_results: large_results,
|
1497
|
+
flatten: flatten,
|
1498
|
+
maximum_billing_tier: maximum_billing_tier,
|
1499
|
+
maximum_bytes_billed: maximum_bytes_billed,
|
1500
|
+
job_id: job_id,
|
1501
|
+
prefix: prefix,
|
1502
|
+
labels: labels,
|
1503
|
+
udfs: udfs,
|
1504
|
+
create_session: create_session,
|
1505
|
+
session_id: session_id
|
1506
|
+
}
|
1030
1507
|
|
1031
1508
|
updater = QueryJob::Updater.from_options service, query, options
|
1032
1509
|
updater.dataset = self
|
@@ -1048,27 +1525,6 @@ module Google
|
|
1048
1525
|
# Sets the current dataset as the default dataset in the query. Useful
|
1049
1526
|
# for using unqualified table names.
|
1050
1527
|
#
|
1051
|
-
# When using standard SQL and passing arguments using `params`, Ruby
|
1052
|
-
# types are mapped to BigQuery types as follows:
|
1053
|
-
#
|
1054
|
-
# | BigQuery | Ruby | Notes |
|
1055
|
-
# |-------------|----------------|---|
|
1056
|
-
# | `BOOL` | `true`/`false` | |
|
1057
|
-
# | `INT64` | `Integer` | |
|
1058
|
-
# | `FLOAT64` | `Float` | |
|
1059
|
-
# | `NUMERIC` | `BigDecimal` | Will be rounded to 9 decimal places |
|
1060
|
-
# | `STRING` | `String` | |
|
1061
|
-
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1062
|
-
# | `DATE` | `Date` | |
|
1063
|
-
# | `TIMESTAMP` | `Time` | |
|
1064
|
-
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1065
|
-
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1066
|
-
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1067
|
-
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1068
|
-
#
|
1069
|
-
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
1070
|
-
# for an overview of each BigQuery data type, including allowed values.
|
1071
|
-
#
|
1072
1528
|
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1073
1529
|
# {QueryJob::Updater#location=} in a block passed to this method. If the
|
1074
1530
|
# dataset is a full resource representation (see {#resource_full?}), the
|
@@ -1081,13 +1537,60 @@ module Google
|
|
1081
1537
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
1082
1538
|
# query to execute. Example: "SELECT count(f1) FROM
|
1083
1539
|
# [myProjectId:myDatasetId.myTableId]".
|
1084
|
-
# @param [Array, Hash] params Standard SQL only. Used to pass query
|
1085
|
-
#
|
1086
|
-
#
|
1087
|
-
#
|
1088
|
-
#
|
1089
|
-
#
|
1090
|
-
#
|
1540
|
+
# @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
|
1541
|
+
# either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
|
1542
|
+
# query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
|
1543
|
+
# use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
|
1544
|
+
# true.
|
1545
|
+
#
|
1546
|
+
# BigQuery types are converted from Ruby types as follows:
|
1547
|
+
#
|
1548
|
+
# | BigQuery | Ruby | Notes |
|
1549
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
1550
|
+
# | `BOOL` | `true`/`false` | |
|
1551
|
+
# | `INT64` | `Integer` | |
|
1552
|
+
# | `FLOAT64` | `Float` | |
|
1553
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
1554
|
+
# | `BIGNUMERIC` | `BigDecimal` | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
1555
|
+
# | `STRING` | `String` | |
|
1556
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1557
|
+
# | `DATE` | `Date` | |
|
1558
|
+
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
1559
|
+
# | `TIMESTAMP` | `Time` | |
|
1560
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1561
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1562
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1563
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1564
|
+
#
|
1565
|
+
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1566
|
+
# of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
|
1567
|
+
# GIS data](https://cloud.google.com/bigquery/docs/gis-data).
|
1568
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1569
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1570
|
+
# specify the SQL type for these values.
|
1571
|
+
#
|
1572
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
1573
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1574
|
+
# should be BigQuery type codes from the following list:
|
1575
|
+
#
|
1576
|
+
# * `:BOOL`
|
1577
|
+
# * `:INT64`
|
1578
|
+
# * `:FLOAT64`
|
1579
|
+
# * `:NUMERIC`
|
1580
|
+
# * `:BIGNUMERIC`
|
1581
|
+
# * `:STRING`
|
1582
|
+
# * `:DATETIME`
|
1583
|
+
# * `:DATE`
|
1584
|
+
# * `:GEOGRAPHY`
|
1585
|
+
# * `:TIMESTAMP`
|
1586
|
+
# * `:TIME`
|
1587
|
+
# * `:BYTES`
|
1588
|
+
# * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
|
1589
|
+
# are specified as `[:INT64]`.
|
1590
|
+
# * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
|
1591
|
+
# match the `params` hash, and the values are the types value that matches the data.
|
1592
|
+
#
|
1593
|
+
# Types are optional.
|
1091
1594
|
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
1092
1595
|
# that represents the mapping of the external tables to the table
|
1093
1596
|
# names used in the SQL query. The hash keys are the table names, and
|
@@ -1122,6 +1625,8 @@ module Google
|
|
1122
1625
|
# When set to false, the values of `large_results` and `flatten` are
|
1123
1626
|
# ignored; the query will be run as if `large_results` is true and
|
1124
1627
|
# `flatten` is false. Optional. The default value is false.
|
1628
|
+
# @param [String] session_id The ID of an existing session. See the
|
1629
|
+
# `create_session` param in {#query_job} and {Job#session_id}.
|
1125
1630
|
# @yield [job] a job configuration object
|
1126
1631
|
# @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
|
1127
1632
|
# configuration object for setting additional options for the query.
|
@@ -1136,9 +1641,12 @@ module Google
|
|
1136
1641
|
#
|
1137
1642
|
# data = dataset.query "SELECT name FROM my_table"
|
1138
1643
|
#
|
1644
|
+
# # Iterate over the first page of results
|
1139
1645
|
# data.each do |row|
|
1140
1646
|
# puts row[:name]
|
1141
1647
|
# end
|
1648
|
+
# # Retrieve the next page of results
|
1649
|
+
# data = data.next if data.next?
|
1142
1650
|
#
|
1143
1651
|
# @example Query using legacy SQL:
|
1144
1652
|
# require "google/cloud/bigquery"
|
@@ -1149,9 +1657,12 @@ module Google
|
|
1149
1657
|
# data = dataset.query "SELECT name FROM my_table",
|
1150
1658
|
# legacy_sql: true
|
1151
1659
|
#
|
1660
|
+
# # Iterate over the first page of results
|
1152
1661
|
# data.each do |row|
|
1153
1662
|
# puts row[:name]
|
1154
1663
|
# end
|
1664
|
+
# # Retrieve the next page of results
|
1665
|
+
# data = data.next if data.next?
|
1155
1666
|
#
|
1156
1667
|
# @example Query using positional query parameters:
|
1157
1668
|
# require "google/cloud/bigquery"
|
@@ -1162,9 +1673,12 @@ module Google
|
|
1162
1673
|
# data = dataset.query "SELECT name FROM my_table WHERE id = ?",
|
1163
1674
|
# params: [1]
|
1164
1675
|
#
|
1676
|
+
# # Iterate over the first page of results
|
1165
1677
|
# data.each do |row|
|
1166
1678
|
# puts row[:name]
|
1167
1679
|
# end
|
1680
|
+
# # Retrieve the next page of results
|
1681
|
+
# data = data.next if data.next?
|
1168
1682
|
#
|
1169
1683
|
# @example Query using named query parameters:
|
1170
1684
|
# require "google/cloud/bigquery"
|
@@ -1175,30 +1689,63 @@ module Google
|
|
1175
1689
|
# data = dataset.query "SELECT name FROM my_table WHERE id = @id",
|
1176
1690
|
# params: { id: 1 }
|
1177
1691
|
#
|
1692
|
+
# # Iterate over the first page of results
|
1693
|
+
# data.each do |row|
|
1694
|
+
# puts row[:name]
|
1695
|
+
# end
|
1696
|
+
# # Retrieve the next page of results
|
1697
|
+
# data = data.next if data.next?
|
1698
|
+
#
|
1699
|
+
# @example Query using named query parameters with types:
|
1700
|
+
# require "google/cloud/bigquery"
|
1701
|
+
#
|
1702
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1703
|
+
# dataset = bigquery.dataset "my_dataset"
|
1704
|
+
#
|
1705
|
+
# data = dataset.query "SELECT name FROM my_table WHERE id IN UNNEST(@ids)",
|
1706
|
+
# params: { ids: [] },
|
1707
|
+
# types: { ids: [:INT64] }
|
1708
|
+
#
|
1709
|
+
# # Iterate over the first page of results
|
1178
1710
|
# data.each do |row|
|
1179
1711
|
# puts row[:name]
|
1180
1712
|
# end
|
1713
|
+
# # Retrieve the next page of results
|
1714
|
+
# data = data.next if data.next?
|
1181
1715
|
#
|
1182
1716
|
# @example Execute a DDL statement:
|
1183
1717
|
# require "google/cloud/bigquery"
|
1184
1718
|
#
|
1185
1719
|
# bigquery = Google::Cloud::Bigquery.new
|
1720
|
+
# dataset = bigquery.dataset "my_dataset"
|
1186
1721
|
#
|
1187
|
-
# data =
|
1722
|
+
# data = dataset.query "CREATE TABLE my_table (x INT64)"
|
1188
1723
|
#
|
1189
|
-
# table_ref = data.ddl_target_table
|
1724
|
+
# table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
|
1190
1725
|
#
|
1191
1726
|
# @example Execute a DML statement:
|
1192
1727
|
# require "google/cloud/bigquery"
|
1193
1728
|
#
|
1194
1729
|
# bigquery = Google::Cloud::Bigquery.new
|
1730
|
+
# dataset = bigquery.dataset "my_dataset"
|
1195
1731
|
#
|
1196
|
-
# data =
|
1197
|
-
# "SET x = x + 1 " \
|
1198
|
-
# "WHERE x IS NOT NULL"
|
1732
|
+
# data = dataset.query "UPDATE my_table SET x = x + 1 WHERE x IS NOT NULL"
|
1199
1733
|
#
|
1200
1734
|
# puts data.num_dml_affected_rows
|
1201
1735
|
#
|
1736
|
+
# @example Run query in a session:
|
1737
|
+
# require "google/cloud/bigquery"
|
1738
|
+
#
|
1739
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1740
|
+
# dataset = bigquery.dataset "my_dataset"
|
1741
|
+
#
|
1742
|
+
# job = dataset.query_job "CREATE TEMPORARY TABLE temptable AS SELECT 17 as foo", create_session: true
|
1743
|
+
#
|
1744
|
+
# job.wait_until_done!
|
1745
|
+
#
|
1746
|
+
# session_id = job.session_id
|
1747
|
+
# data = dataset.query "SELECT * FROM temptable", session_id: session_id
|
1748
|
+
#
|
1202
1749
|
# @example Query using external data source, set destination:
|
1203
1750
|
# require "google/cloud/bigquery"
|
1204
1751
|
#
|
@@ -1216,17 +1763,34 @@ module Google
|
|
1216
1763
|
# query.table = dataset.table "my_table", skip_lookup: true
|
1217
1764
|
# end
|
1218
1765
|
#
|
1766
|
+
# # Iterate over the first page of results
|
1219
1767
|
# data.each do |row|
|
1220
1768
|
# puts row[:name]
|
1221
1769
|
# end
|
1770
|
+
# # Retrieve the next page of results
|
1771
|
+
# data = data.next if data.next?
|
1222
1772
|
#
|
1223
1773
|
# @!group Data
|
1224
1774
|
#
|
1225
|
-
def query query,
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1775
|
+
def query query,
|
1776
|
+
params: nil,
|
1777
|
+
types: nil,
|
1778
|
+
external: nil,
|
1779
|
+
max: nil,
|
1780
|
+
cache: true,
|
1781
|
+
standard_sql: nil,
|
1782
|
+
legacy_sql: nil,
|
1783
|
+
session_id: nil,
|
1784
|
+
&block
|
1785
|
+
job = query_job query,
|
1786
|
+
params: params,
|
1787
|
+
types: types,
|
1788
|
+
external: external,
|
1789
|
+
cache: cache,
|
1790
|
+
standard_sql: standard_sql,
|
1791
|
+
legacy_sql: legacy_sql,
|
1792
|
+
session_id: session_id,
|
1793
|
+
&block
|
1230
1794
|
job.wait_until_done!
|
1231
1795
|
ensure_job_succeeded! job
|
1232
1796
|
|
@@ -1407,8 +1971,8 @@ module Google
|
|
1407
1971
|
# this option. Also note that for most use cases, the block yielded by
|
1408
1972
|
# this method is a more convenient way to configure the schema.
|
1409
1973
|
# @param [String] job_id A user-defined ID for the load job. The ID
|
1410
|
-
# must contain only letters (
|
1411
|
-
# (_), or dashes (
|
1974
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1975
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1412
1976
|
# `job_id` is provided, then `prefix` will not be used.
|
1413
1977
|
#
|
1414
1978
|
# See [Generating a job
|
@@ -1417,18 +1981,26 @@ module Google
|
|
1417
1981
|
# prepended to a generated value to produce a unique job ID. For
|
1418
1982
|
# example, the prefix `daily_import_job_` can be given to generate a
|
1419
1983
|
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1420
|
-
# prefix must contain only letters (
|
1421
|
-
# underscores (_), or dashes (
|
1984
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1985
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1422
1986
|
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1423
1987
|
# be used.
|
1424
1988
|
# @param [Hash] labels A hash of user-provided labels associated with
|
1425
|
-
# the job. You can use these to organize and group your jobs.
|
1426
|
-
#
|
1427
|
-
#
|
1428
|
-
#
|
1429
|
-
#
|
1430
|
-
#
|
1431
|
-
#
|
1989
|
+
# the job. You can use these to organize and group your jobs.
|
1990
|
+
#
|
1991
|
+
# The labels applied to a resource must meet the following requirements:
|
1992
|
+
#
|
1993
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1994
|
+
# * Each label must be a key-value pair.
|
1995
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1996
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1997
|
+
# a maximum length of 63 characters.
|
1998
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1999
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
2000
|
+
# international characters are allowed.
|
2001
|
+
# * The key portion of a label must be unique. However, you can use the
|
2002
|
+
# same key with multiple resources.
|
2003
|
+
# * Keys must start with a lowercase letter or international character.
|
1432
2004
|
# @yield [updater] A block for setting the schema and other
|
1433
2005
|
# options for the destination table. The schema can be omitted if the
|
1434
2006
|
# destination table already exists, or if you're loading data from a
|
@@ -1522,29 +2094,19 @@ module Google
|
|
1522
2094
|
#
|
1523
2095
|
# @!group Data
|
1524
2096
|
#
|
1525
|
-
def load_job table_id, files, format: nil, create: nil, write: nil,
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
skip_leading: nil, schema: nil, job_id: nil, prefix: nil,
|
1530
|
-
labels: nil, autodetect: nil, null_marker: nil, dryrun: nil
|
2097
|
+
def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2098
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2099
|
+
quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
2100
|
+
null_marker: nil, dryrun: nil
|
1531
2101
|
ensure_service!
|
1532
2102
|
|
1533
2103
|
updater = load_job_updater table_id,
|
1534
|
-
format: format, create: create,
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
delimiter: delimiter,
|
1541
|
-
ignore_unknown: ignore_unknown,
|
1542
|
-
max_bad_records: max_bad_records,
|
1543
|
-
quote: quote, skip_leading: skip_leading,
|
1544
|
-
dryrun: dryrun, schema: schema,
|
1545
|
-
job_id: job_id, prefix: prefix,
|
1546
|
-
labels: labels, autodetect: autodetect,
|
1547
|
-
null_marker: null_marker
|
2104
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
2105
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2106
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
2107
|
+
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
2108
|
+
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
2109
|
+
autodetect: autodetect, null_marker: null_marker
|
1548
2110
|
|
1549
2111
|
yield updater if block_given?
|
1550
2112
|
|
@@ -1760,21 +2322,14 @@ module Google
|
|
1760
2322
|
#
|
1761
2323
|
# @!group Data
|
1762
2324
|
#
|
1763
|
-
def load table_id, files, format: nil, create: nil, write: nil,
|
1764
|
-
|
1765
|
-
|
1766
|
-
max_bad_records: nil, quote: nil, skip_leading: nil,
|
1767
|
-
schema: nil, autodetect: nil, null_marker: nil, &block
|
2325
|
+
def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2326
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2327
|
+
quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
|
1768
2328
|
job = load_job table_id, files,
|
1769
|
-
format: format, create: create, write: write,
|
1770
|
-
|
1771
|
-
|
1772
|
-
|
1773
|
-
encoding: encoding, delimiter: delimiter,
|
1774
|
-
ignore_unknown: ignore_unknown,
|
1775
|
-
max_bad_records: max_bad_records,
|
1776
|
-
quote: quote, skip_leading: skip_leading,
|
1777
|
-
schema: schema, autodetect: autodetect,
|
2329
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
2330
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2331
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
2332
|
+
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
1778
2333
|
null_marker: null_marker, &block
|
1779
2334
|
|
1780
2335
|
job.wait_until_done!
|
@@ -1825,7 +2380,7 @@ module Google
|
|
1825
2380
|
# dataset = bigquery.dataset "my_dataset", skip_lookup: true
|
1826
2381
|
# dataset.exists? # true
|
1827
2382
|
#
|
1828
|
-
def exists? force:
|
2383
|
+
def exists? force: false
|
1829
2384
|
return gapi_exists? if force
|
1830
2385
|
# If we have a memoized value, return it
|
1831
2386
|
return @exists unless @exists.nil?
|
@@ -1935,14 +2490,12 @@ module Google
|
|
1935
2490
|
end
|
1936
2491
|
|
1937
2492
|
##
|
1938
|
-
# @private New lazy Dataset object without making an HTTP request.
|
2493
|
+
# @private New lazy Dataset object without making an HTTP request, for use with the skip_lookup option.
|
1939
2494
|
def self.new_reference project_id, dataset_id, service
|
1940
2495
|
raise ArgumentError, "dataset_id is required" unless dataset_id
|
1941
2496
|
new.tap do |b|
|
1942
|
-
reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new
|
1943
|
-
project_id: project_id,
|
1944
|
-
dataset_id: dataset_id
|
1945
|
-
)
|
2497
|
+
reference_gapi = Google::Apis::BigqueryV2::DatasetReference.new \
|
2498
|
+
project_id: project_id, dataset_id: dataset_id
|
1946
2499
|
b.service = service
|
1947
2500
|
b.instance_variable_set :@reference, reference_gapi
|
1948
2501
|
end
|
@@ -1953,18 +2506,47 @@ module Google
|
|
1953
2506
|
# the need to complete a load operation before the data can appear in
|
1954
2507
|
# query results.
|
1955
2508
|
#
|
2509
|
+
# Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
|
2510
|
+
# complex types:
|
2511
|
+
#
|
2512
|
+
# | BigQuery | Ruby | Notes |
|
2513
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
2514
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
2515
|
+
# | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
|
2516
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
2517
|
+
# | `DATE` | `Date` | |
|
2518
|
+
# | `GEOGRAPHY` | `String` | |
|
2519
|
+
# | `TIMESTAMP` | `Time` | |
|
2520
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
2521
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
2522
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
2523
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
2524
|
+
#
|
2525
|
+
# Because BigQuery's streaming API is designed for high insertion rates,
|
2526
|
+
# modifications to the underlying table metadata are eventually
|
2527
|
+
# consistent when interacting with the streaming system. In most cases
|
2528
|
+
# metadata changes are propagated within minutes, but during this period
|
2529
|
+
# API responses may reflect the inconsistent state of the table.
|
2530
|
+
#
|
1956
2531
|
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
1957
2532
|
# Streaming Data Into BigQuery
|
1958
2533
|
#
|
2534
|
+
# @see https://cloud.google.com/bigquery/troubleshooting-errors#metadata-errors-for-streaming-inserts
|
2535
|
+
# BigQuery Troubleshooting: Metadata errors for streaming inserts
|
2536
|
+
#
|
1959
2537
|
# @param [String] table_id The ID of the destination table.
|
1960
2538
|
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
1961
|
-
# containing the data. Required.
|
1962
|
-
#
|
1963
|
-
#
|
1964
|
-
#
|
1965
|
-
#
|
1966
|
-
#
|
1967
|
-
#
|
2539
|
+
# containing the data. Required. `BigDecimal` values will be rounded to
|
2540
|
+
# scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
|
2541
|
+
# rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
|
2542
|
+
# instead of `BigDecimal`.
|
2543
|
+
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
2544
|
+
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
2545
|
+
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
2546
|
+
# not provided, the client library will assign a UUID to each row before the request is sent.
|
2547
|
+
#
|
2548
|
+
# The value `:skip` can be provided to skip the generation of IDs for all rows, or to skip the generation of an
|
2549
|
+
# ID for a specific row in the array.
|
1968
2550
|
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1969
2551
|
# if invalid rows exist. The default value is `false`, which causes
|
1970
2552
|
# the entire request to fail if any invalid rows exist.
|
@@ -1975,6 +2557,12 @@ module Google
|
|
1975
2557
|
# a new table with the given `table_id`, if no table is found for
|
1976
2558
|
# `table_id`. The default value is false.
|
1977
2559
|
#
|
2560
|
+
# @yield [table] a block for setting the table
|
2561
|
+
# @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
|
2562
|
+
# to set additional properties on the table in the API request to
|
2563
|
+
# create it. Only used when `autocreate` is set and the table does not
|
2564
|
+
# already exist.
|
2565
|
+
#
|
1978
2566
|
# @return [Google::Cloud::Bigquery::InsertResponse] An insert response
|
1979
2567
|
# object.
|
1980
2568
|
#
|
@@ -2018,42 +2606,36 @@ module Google
|
|
2018
2606
|
# t.schema.integer "age", mode: :required
|
2019
2607
|
# end
|
2020
2608
|
#
|
2609
|
+
# @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
|
2610
|
+
# require "google/cloud/bigquery"
|
2611
|
+
#
|
2612
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2613
|
+
# dataset = bigquery.dataset "my_dataset"
|
2614
|
+
#
|
2615
|
+
# row = {
|
2616
|
+
# "my_numeric" => BigDecimal("123456798.987654321"),
|
2617
|
+
# "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
|
2618
|
+
# }
|
2619
|
+
# dataset.insert "my_table", row
|
2620
|
+
#
|
2021
2621
|
# @!group Data
|
2022
2622
|
#
|
2023
|
-
def insert table_id, rows, insert_ids: nil, skip_invalid: nil,
|
2024
|
-
ignore_unknown: nil, autocreate: nil
|
2623
|
+
def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
|
2025
2624
|
rows = [rows] if rows.is_a? Hash
|
2625
|
+
raise ArgumentError, "No rows provided" if rows.empty?
|
2626
|
+
|
2627
|
+
insert_ids = Array.new(rows.count) { :skip } if insert_ids == :skip
|
2026
2628
|
insert_ids = Array insert_ids
|
2027
|
-
if insert_ids.count
|
2629
|
+
if insert_ids.count.positive? && insert_ids.count != rows.count
|
2028
2630
|
raise ArgumentError, "insert_ids must be the same size as rows"
|
2029
2631
|
end
|
2030
2632
|
|
2031
2633
|
if autocreate
|
2032
|
-
|
2033
|
-
|
2034
|
-
ignore_unknown: ignore_unknown,
|
2035
|
-
insert_ids: insert_ids
|
2036
|
-
rescue Google::Cloud::NotFoundError
|
2037
|
-
sleep rand(1..60)
|
2038
|
-
begin
|
2039
|
-
create_table table_id do |tbl_updater|
|
2040
|
-
yield tbl_updater if block_given?
|
2041
|
-
end
|
2042
|
-
# rubocop:disable Lint/HandleExceptions
|
2043
|
-
rescue Google::Cloud::AlreadyExistsError
|
2044
|
-
end
|
2045
|
-
# rubocop:enable Lint/HandleExceptions
|
2046
|
-
|
2047
|
-
sleep 60
|
2048
|
-
insert table_id, rows, skip_invalid: skip_invalid,
|
2049
|
-
ignore_unknown: ignore_unknown,
|
2050
|
-
autocreate: true,
|
2051
|
-
insert_ids: insert_ids
|
2052
|
-
end
|
2634
|
+
insert_data_with_autocreate table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
|
2635
|
+
insert_ids: insert_ids, &block
|
2053
2636
|
else
|
2054
|
-
insert_data table_id, rows, skip_invalid:
|
2055
|
-
|
2056
|
-
insert_ids: insert_ids
|
2637
|
+
insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown,
|
2638
|
+
insert_ids: insert_ids
|
2057
2639
|
end
|
2058
2640
|
end
|
2059
2641
|
|
@@ -2104,9 +2686,8 @@ module Google
|
|
2104
2686
|
#
|
2105
2687
|
# inserter.stop.wait!
|
2106
2688
|
#
|
2107
|
-
def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
|
2108
|
-
|
2109
|
-
threads: 4, &block
|
2689
|
+
def insert_async table_id, skip_invalid: nil, ignore_unknown: nil, max_bytes: 10_000_000, max_rows: 500,
|
2690
|
+
interval: 10, threads: 4, &block
|
2110
2691
|
ensure_service!
|
2111
2692
|
|
2112
2693
|
# Get table, don't use Dataset#table which handles NotFoundError
|
@@ -2121,15 +2702,28 @@ module Google
|
|
2121
2702
|
|
2122
2703
|
protected
|
2123
2704
|
|
2124
|
-
def
|
2125
|
-
|
2705
|
+
def insert_data_with_autocreate table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
|
2706
|
+
insert_data table_id, rows, skip_invalid: skip_invalid, ignore_unknown: ignore_unknown, insert_ids: insert_ids
|
2707
|
+
rescue Google::Cloud::NotFoundError
|
2708
|
+
sleep rand(1..60)
|
2709
|
+
begin
|
2710
|
+
create_table table_id do |tbl_updater|
|
2711
|
+
yield tbl_updater if block_given?
|
2712
|
+
end
|
2713
|
+
rescue Google::Cloud::AlreadyExistsError
|
2714
|
+
# Do nothing if it already exists
|
2715
|
+
end
|
2716
|
+
sleep 60
|
2717
|
+
retry
|
2718
|
+
end
|
2719
|
+
|
2720
|
+
def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil, insert_ids: nil
|
2126
2721
|
rows = [rows] if rows.is_a? Hash
|
2127
2722
|
raise ArgumentError, "No rows provided" if rows.empty?
|
2128
2723
|
ensure_service!
|
2129
|
-
|
2130
|
-
|
2131
|
-
|
2132
|
-
gapi = service.insert_tabledata dataset_id, table_id, rows, options
|
2724
|
+
gapi = service.insert_tabledata dataset_id, table_id, rows, skip_invalid: skip_invalid,
|
2725
|
+
ignore_unknown: ignore_unknown,
|
2726
|
+
insert_ids: insert_ids
|
2133
2727
|
InsertResponse.from_gapi rows, gapi
|
2134
2728
|
end
|
2135
2729
|
|
@@ -2160,10 +2754,8 @@ module Google
|
|
2160
2754
|
def patch_gapi! *attributes
|
2161
2755
|
return if attributes.empty?
|
2162
2756
|
ensure_service!
|
2163
|
-
patch_args = Hash[attributes.map
|
2164
|
-
|
2165
|
-
end]
|
2166
|
-
patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
|
2757
|
+
patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
|
2758
|
+
patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
|
2167
2759
|
patch_gapi.etag = etag if etag
|
2168
2760
|
@gapi = service.patch_dataset dataset_id, patch_gapi
|
2169
2761
|
end
|
@@ -2172,7 +2764,7 @@ module Google
|
|
2172
2764
|
# Load the complete representation of the dataset if it has been
|
2173
2765
|
# only partially loaded by a request to the API list method.
|
2174
2766
|
def ensure_full_data!
|
2175
|
-
reload!
|
2767
|
+
reload! unless resource_full?
|
2176
2768
|
end
|
2177
2769
|
|
2178
2770
|
def ensure_job_succeeded! job
|
@@ -2203,11 +2795,8 @@ module Google
|
|
2203
2795
|
)
|
2204
2796
|
end
|
2205
2797
|
|
2206
|
-
def load_job_csv_options! job, jagged_rows: nil,
|
2207
|
-
|
2208
|
-
delimiter: nil,
|
2209
|
-
quote: nil, skip_leading: nil,
|
2210
|
-
null_marker: nil
|
2798
|
+
def load_job_csv_options! job, jagged_rows: nil, quoted_newlines: nil, delimiter: nil, quote: nil,
|
2799
|
+
skip_leading: nil, null_marker: nil
|
2211
2800
|
job.jagged_rows = jagged_rows unless jagged_rows.nil?
|
2212
2801
|
job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
|
2213
2802
|
job.delimiter = delimiter unless delimiter.nil?
|
@@ -2216,17 +2805,11 @@ module Google
|
|
2216
2805
|
job.skip_leading = skip_leading unless skip_leading.nil?
|
2217
2806
|
end
|
2218
2807
|
|
2219
|
-
def load_job_file_options! job, format: nil,
|
2220
|
-
|
2221
|
-
|
2222
|
-
encoding: nil, delimiter: nil,
|
2223
|
-
ignore_unknown: nil, max_bad_records: nil,
|
2224
|
-
quote: nil, skip_leading: nil,
|
2225
|
-
null_marker: nil
|
2808
|
+
def load_job_file_options! job, format: nil, projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
|
2809
|
+
encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
2810
|
+
skip_leading: nil, null_marker: nil
|
2226
2811
|
job.format = format unless format.nil?
|
2227
|
-
unless projection_fields.nil?
|
2228
|
-
job.projection_fields = projection_fields
|
2229
|
-
end
|
2812
|
+
job.projection_fields = projection_fields unless projection_fields.nil?
|
2230
2813
|
job.encoding = encoding unless encoding.nil?
|
2231
2814
|
job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
|
2232
2815
|
job.max_bad_records = max_bad_records unless max_bad_records.nil?
|
@@ -2238,16 +2821,11 @@ module Google
|
|
2238
2821
|
null_marker: null_marker
|
2239
2822
|
end
|
2240
2823
|
|
2241
|
-
def load_job_updater table_id, format: nil, create: nil,
|
2242
|
-
|
2243
|
-
|
2244
|
-
|
2245
|
-
|
2246
|
-
quote: nil, skip_leading: nil, dryrun: nil,
|
2247
|
-
schema: nil, job_id: nil, prefix: nil, labels: nil,
|
2248
|
-
autodetect: nil, null_marker: nil
|
2249
|
-
new_job = load_job_gapi table_id, dryrun, job_id: job_id,
|
2250
|
-
prefix: prefix
|
2824
|
+
def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2825
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
|
2826
|
+
max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
2827
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
2828
|
+
new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
|
2251
2829
|
LoadJob::Updater.new(new_job).tap do |job|
|
2252
2830
|
job.location = location if location # may be dataset reference
|
2253
2831
|
job.create = create unless create.nil?
|
@@ -2285,9 +2863,7 @@ module Google
|
|
2285
2863
|
job_gapi.configuration.load.update! source_uris: urls
|
2286
2864
|
if job_gapi.configuration.load.source_format.nil?
|
2287
2865
|
source_format = Convert.derive_source_format_from_list urls
|
2288
|
-
unless source_format.nil?
|
2289
|
-
job_gapi.configuration.load.source_format = source_format
|
2290
|
-
end
|
2866
|
+
job_gapi.configuration.load.source_format = source_format unless source_format.nil?
|
2291
2867
|
end
|
2292
2868
|
end
|
2293
2869
|
|
@@ -2299,9 +2875,7 @@ module Google
|
|
2299
2875
|
path = Pathname(file).to_path
|
2300
2876
|
if job_gapi.configuration.load.source_format.nil?
|
2301
2877
|
source_format = Convert.derive_source_format path
|
2302
|
-
unless source_format.nil?
|
2303
|
-
job_gapi.configuration.load.source_format = source_format
|
2304
|
-
end
|
2878
|
+
job_gapi.configuration.load.source_format = source_format unless source_format.nil?
|
2305
2879
|
end
|
2306
2880
|
|
2307
2881
|
gapi = service.load_table_file file, job_gapi
|
@@ -2310,21 +2884,18 @@ module Google
|
|
2310
2884
|
|
2311
2885
|
def load_local_or_uri file, updater
|
2312
2886
|
job_gapi = updater.to_gapi
|
2313
|
-
|
2314
|
-
|
2315
|
-
|
2316
|
-
|
2317
|
-
|
2318
|
-
job
|
2887
|
+
if local_file? file
|
2888
|
+
load_local file, job_gapi
|
2889
|
+
else
|
2890
|
+
load_storage file, job_gapi
|
2891
|
+
end
|
2319
2892
|
end
|
2320
2893
|
|
2321
2894
|
def storage_url? files
|
2322
2895
|
[files].flatten.all? do |file|
|
2323
2896
|
file.respond_to?(:to_gs_url) ||
|
2324
|
-
(file.respond_to?(:to_str) &&
|
2325
|
-
|
2326
|
-
(file.is_a?(URI) &&
|
2327
|
-
file.to_s.downcase.start_with?("gs://"))
|
2897
|
+
(file.respond_to?(:to_str) && file.to_str.downcase.start_with?("gs://")) ||
|
2898
|
+
(file.is_a?(URI) && file.to_s.downcase.start_with?("gs://"))
|
2328
2899
|
end
|
2329
2900
|
end
|
2330
2901
|
|
@@ -2348,15 +2919,16 @@ module Google
|
|
2348
2919
|
end
|
2349
2920
|
|
2350
2921
|
##
|
2351
|
-
# Yielded to a block to accumulate changes for a
|
2922
|
+
# Yielded to a block to accumulate changes for a create request. See {Project#create_dataset}.
|
2352
2923
|
class Updater < Dataset
|
2353
2924
|
##
|
2354
|
-
# A list of attributes that were updated.
|
2925
|
+
# @private A list of attributes that were updated.
|
2355
2926
|
attr_reader :updates
|
2356
2927
|
|
2357
2928
|
##
|
2358
|
-
# Create an Updater object.
|
2929
|
+
# @private Create an Updater object.
|
2359
2930
|
def initialize gapi
|
2931
|
+
super()
|
2360
2932
|
@updates = []
|
2361
2933
|
@gapi = gapi
|
2362
2934
|
end
|
@@ -2372,8 +2944,115 @@ module Google
|
|
2372
2944
|
@access
|
2373
2945
|
end
|
2374
2946
|
|
2947
|
+
# rubocop:disable Style/MethodDefParentheses
|
2948
|
+
|
2949
|
+
##
|
2950
|
+
# @raise [RuntimeError] not implemented
|
2951
|
+
def delete(*)
|
2952
|
+
raise "not implemented in #{self.class}"
|
2953
|
+
end
|
2954
|
+
|
2955
|
+
##
|
2956
|
+
# @raise [RuntimeError] not implemented
|
2957
|
+
def create_table(*)
|
2958
|
+
raise "not implemented in #{self.class}"
|
2959
|
+
end
|
2960
|
+
|
2961
|
+
##
|
2962
|
+
# @raise [RuntimeError] not implemented
|
2963
|
+
def create_view(*)
|
2964
|
+
raise "not implemented in #{self.class}"
|
2965
|
+
end
|
2966
|
+
|
2375
2967
|
##
|
2376
|
-
#
|
2968
|
+
# @raise [RuntimeError] not implemented
|
2969
|
+
def create_materialized_view(*)
|
2970
|
+
raise "not implemented in #{self.class}"
|
2971
|
+
end
|
2972
|
+
|
2973
|
+
##
|
2974
|
+
# @raise [RuntimeError] not implemented
|
2975
|
+
def table(*)
|
2976
|
+
raise "not implemented in #{self.class}"
|
2977
|
+
end
|
2978
|
+
|
2979
|
+
##
|
2980
|
+
# @raise [RuntimeError] not implemented
|
2981
|
+
def tables(*)
|
2982
|
+
raise "not implemented in #{self.class}"
|
2983
|
+
end
|
2984
|
+
|
2985
|
+
##
|
2986
|
+
# @raise [RuntimeError] not implemented
|
2987
|
+
def model(*)
|
2988
|
+
raise "not implemented in #{self.class}"
|
2989
|
+
end
|
2990
|
+
|
2991
|
+
##
|
2992
|
+
# @raise [RuntimeError] not implemented
|
2993
|
+
def models(*)
|
2994
|
+
raise "not implemented in #{self.class}"
|
2995
|
+
end
|
2996
|
+
|
2997
|
+
##
|
2998
|
+
# @raise [RuntimeError] not implemented
|
2999
|
+
def create_routine(*)
|
3000
|
+
raise "not implemented in #{self.class}"
|
3001
|
+
end
|
3002
|
+
|
3003
|
+
##
|
3004
|
+
# @raise [RuntimeError] not implemented
|
3005
|
+
def routine(*)
|
3006
|
+
raise "not implemented in #{self.class}"
|
3007
|
+
end
|
3008
|
+
|
3009
|
+
##
|
3010
|
+
# @raise [RuntimeError] not implemented
|
3011
|
+
def routines(*)
|
3012
|
+
raise "not implemented in #{self.class}"
|
3013
|
+
end
|
3014
|
+
|
3015
|
+
##
|
3016
|
+
# @raise [RuntimeError] not implemented
|
3017
|
+
def query_job(*)
|
3018
|
+
raise "not implemented in #{self.class}"
|
3019
|
+
end
|
3020
|
+
|
3021
|
+
##
|
3022
|
+
# @raise [RuntimeError] not implemented
|
3023
|
+
def query(*)
|
3024
|
+
raise "not implemented in #{self.class}"
|
3025
|
+
end
|
3026
|
+
|
3027
|
+
##
|
3028
|
+
# @raise [RuntimeError] not implemented
|
3029
|
+
def external(*)
|
3030
|
+
raise "not implemented in #{self.class}"
|
3031
|
+
end
|
3032
|
+
|
3033
|
+
##
|
3034
|
+
# @raise [RuntimeError] not implemented
|
3035
|
+
def load_job(*)
|
3036
|
+
raise "not implemented in #{self.class}"
|
3037
|
+
end
|
3038
|
+
|
3039
|
+
##
|
3040
|
+
# @raise [RuntimeError] not implemented
|
3041
|
+
def load(*)
|
3042
|
+
raise "not implemented in #{self.class}"
|
3043
|
+
end
|
3044
|
+
|
3045
|
+
##
|
3046
|
+
# @raise [RuntimeError] not implemented
|
3047
|
+
def reload!
|
3048
|
+
raise "not implemented in #{self.class}"
|
3049
|
+
end
|
3050
|
+
alias refresh! reload!
|
3051
|
+
|
3052
|
+
# rubocop:enable Style/MethodDefParentheses
|
3053
|
+
|
3054
|
+
##
|
3055
|
+
# @private Make sure any access changes are saved
|
2377
3056
|
def check_for_mutated_access!
|
2378
3057
|
return if @access.nil?
|
2379
3058
|
return unless @access.changed?
|
@@ -2381,6 +3060,8 @@ module Google
|
|
2381
3060
|
patch_gapi! :access
|
2382
3061
|
end
|
2383
3062
|
|
3063
|
+
##
|
3064
|
+
# @private
|
2384
3065
|
def to_gapi
|
2385
3066
|
check_for_mutated_access!
|
2386
3067
|
@gapi
|