google-cloud-bigquery 1.25.0 → 1.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/CONTRIBUTING.md +4 -5
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +15 -14
- data/lib/google/cloud/bigquery/convert.rb +72 -76
- data/lib/google/cloud/bigquery/copy_job.rb +1 -0
- data/lib/google/cloud/bigquery/data.rb +2 -2
- data/lib/google/cloud/bigquery/dataset.rb +181 -62
- data/lib/google/cloud/bigquery/dataset/access.rb +3 -3
- data/lib/google/cloud/bigquery/dataset/list.rb +2 -2
- data/lib/google/cloud/bigquery/external.rb +328 -3
- data/lib/google/cloud/bigquery/extract_job.rb +8 -10
- data/lib/google/cloud/bigquery/job.rb +43 -3
- data/lib/google/cloud/bigquery/job/list.rb +4 -4
- data/lib/google/cloud/bigquery/load_job.rb +228 -27
- data/lib/google/cloud/bigquery/model/list.rb +2 -2
- data/lib/google/cloud/bigquery/policy.rb +2 -1
- data/lib/google/cloud/bigquery/project.rb +47 -43
- data/lib/google/cloud/bigquery/project/list.rb +2 -2
- data/lib/google/cloud/bigquery/query_job.rb +62 -48
- data/lib/google/cloud/bigquery/routine.rb +128 -9
- data/lib/google/cloud/bigquery/routine/list.rb +2 -2
- data/lib/google/cloud/bigquery/schema.rb +39 -3
- data/lib/google/cloud/bigquery/schema/field.rb +63 -13
- data/lib/google/cloud/bigquery/service.rb +11 -13
- data/lib/google/cloud/bigquery/standard_sql.rb +15 -3
- data/lib/google/cloud/bigquery/table.rb +246 -52
- data/lib/google/cloud/bigquery/table/async_inserter.rb +44 -17
- data/lib/google/cloud/bigquery/table/list.rb +2 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +15 -15
@@ -482,14 +482,14 @@ module Google
|
|
482
482
|
# puts row[:word]
|
483
483
|
# end
|
484
484
|
#
|
485
|
-
def all request_limit: nil
|
485
|
+
def all request_limit: nil, &block
|
486
486
|
request_limit = request_limit.to_i if request_limit
|
487
487
|
|
488
488
|
return enum_for :all, request_limit: request_limit unless block_given?
|
489
489
|
|
490
490
|
results = self
|
491
491
|
loop do
|
492
|
-
results.each
|
492
|
+
results.each(&block)
|
493
493
|
if request_limit
|
494
494
|
request_limit -= 1
|
495
495
|
break if request_limit.negative?
|
@@ -618,15 +618,17 @@ module Google
|
|
618
618
|
end
|
619
619
|
|
620
620
|
##
|
621
|
-
# Creates a new
|
622
|
-
# table, which is a virtual table defined by the given SQL query.
|
621
|
+
# Creates a new view, which is a virtual table defined by the given SQL query.
|
623
622
|
#
|
624
|
-
# BigQuery's
|
625
|
-
#
|
626
|
-
# the view is queried. Queries are billed according to the total amount
|
623
|
+
# With BigQuery's logical views, the query that defines the view is re-executed
|
624
|
+
# every time the view is queried. Queries are billed according to the total amount
|
627
625
|
# of data in all table fields referenced directly or indirectly by the
|
628
626
|
# top-level query. (See {Table#view?} and {Table#query}.)
|
629
627
|
#
|
628
|
+
# For materialized views, see {#create_materialized_view}.
|
629
|
+
#
|
630
|
+
# @see https://cloud.google.com/bigquery/docs/views Creating views
|
631
|
+
#
|
630
632
|
# @param [String] table_id The ID of the view table. The ID must contain
|
631
633
|
# only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
|
632
634
|
# maximum length is 1,024 characters.
|
@@ -667,7 +669,7 @@ module Google
|
|
667
669
|
# dataset = bigquery.dataset "my_dataset"
|
668
670
|
#
|
669
671
|
# view = dataset.create_view "my_view",
|
670
|
-
#
|
672
|
+
# "SELECT name, age FROM proj.dataset.users"
|
671
673
|
#
|
672
674
|
# @example A name and description can be provided:
|
673
675
|
# require "google/cloud/bigquery"
|
@@ -676,13 +678,18 @@ module Google
|
|
676
678
|
# dataset = bigquery.dataset "my_dataset"
|
677
679
|
#
|
678
680
|
# view = dataset.create_view "my_view",
|
679
|
-
#
|
680
|
-
#
|
681
|
+
# "SELECT name, age FROM proj.dataset.users",
|
682
|
+
# name: "My View", description: "This is my view"
|
681
683
|
#
|
682
684
|
# @!group Table
|
683
685
|
#
|
684
|
-
def create_view table_id,
|
685
|
-
|
686
|
+
def create_view table_id,
|
687
|
+
query,
|
688
|
+
name: nil,
|
689
|
+
description: nil,
|
690
|
+
standard_sql: nil,
|
691
|
+
legacy_sql: nil,
|
692
|
+
udfs: nil
|
686
693
|
use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
|
687
694
|
new_view_opts = {
|
688
695
|
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
@@ -698,7 +705,81 @@ module Google
|
|
698
705
|
user_defined_function_resources: udfs_gapi(udfs)
|
699
706
|
)
|
700
707
|
}.delete_if { |_, v| v.nil? }
|
701
|
-
new_view = Google::Apis::BigqueryV2::Table.new
|
708
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
709
|
+
|
710
|
+
gapi = service.insert_table dataset_id, new_view
|
711
|
+
Table.from_gapi gapi, service
|
712
|
+
end
|
713
|
+
|
714
|
+
##
|
715
|
+
# Creates a new materialized view.
|
716
|
+
#
|
717
|
+
# Materialized views are precomputed views that periodically cache results of a query for increased performance
|
718
|
+
# and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
|
719
|
+
# only delta changes from the base table to compute up-to-date results.
|
720
|
+
#
|
721
|
+
# Queries that use materialized views are generally faster and consume less resources than queries that retrieve
|
722
|
+
# the same data only from the base table. Materialized views are helpful to significantly boost performance of
|
723
|
+
# workloads that have the characteristic of common and repeated queries.
|
724
|
+
#
|
725
|
+
# For logical views, see {#create_view}.
|
726
|
+
#
|
727
|
+
# @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
|
728
|
+
#
|
729
|
+
# @param [String] table_id The ID of the materialized view table. The ID must contain only letters (a-z, A-Z),
|
730
|
+
# numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
|
731
|
+
# @param [String] query The query that BigQuery executes when the materialized view is referenced.
|
732
|
+
# @param [String] name A descriptive name for the table.
|
733
|
+
# @param [String] description A user-friendly description of the table.
|
734
|
+
# @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
|
735
|
+
# updated. Optional. The default value is true.
|
736
|
+
# @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
|
737
|
+
# will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
|
738
|
+
#
|
739
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
740
|
+
#
|
741
|
+
# @example
|
742
|
+
# require "google/cloud/bigquery"
|
743
|
+
#
|
744
|
+
# bigquery = Google::Cloud::Bigquery.new
|
745
|
+
# dataset = bigquery.dataset "my_dataset"
|
746
|
+
#
|
747
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
748
|
+
# "SELECT name, age FROM proj.dataset.users"
|
749
|
+
#
|
750
|
+
# @example Automatic refresh can be disabled:
|
751
|
+
# require "google/cloud/bigquery"
|
752
|
+
#
|
753
|
+
# bigquery = Google::Cloud::Bigquery.new
|
754
|
+
# dataset = bigquery.dataset "my_dataset"
|
755
|
+
#
|
756
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
757
|
+
# "SELECT name, age FROM proj.dataset.users",
|
758
|
+
# enable_refresh: false
|
759
|
+
#
|
760
|
+
# @!group Table
|
761
|
+
#
|
762
|
+
def create_materialized_view table_id,
|
763
|
+
query,
|
764
|
+
name: nil,
|
765
|
+
description: nil,
|
766
|
+
enable_refresh: nil,
|
767
|
+
refresh_interval_ms: nil
|
768
|
+
new_view_opts = {
|
769
|
+
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
770
|
+
project_id: project_id,
|
771
|
+
dataset_id: dataset_id,
|
772
|
+
table_id: table_id
|
773
|
+
),
|
774
|
+
friendly_name: name,
|
775
|
+
description: description,
|
776
|
+
materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
|
777
|
+
enable_refresh: enable_refresh,
|
778
|
+
query: query,
|
779
|
+
refresh_interval_ms: refresh_interval_ms
|
780
|
+
)
|
781
|
+
}.delete_if { |_, v| v.nil? }
|
782
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
702
783
|
|
703
784
|
gapi = service.insert_table dataset_id, new_view
|
704
785
|
Table.from_gapi gapi, service
|
@@ -1059,35 +1140,37 @@ module Google
|
|
1059
1140
|
#
|
1060
1141
|
# Ruby types are mapped to BigQuery types as follows:
|
1061
1142
|
#
|
1062
|
-
# | BigQuery
|
1063
|
-
#
|
1064
|
-
# | `BOOL`
|
1065
|
-
# | `INT64`
|
1066
|
-
# | `FLOAT64`
|
1067
|
-
# | `NUMERIC`
|
1068
|
-
# | `
|
1069
|
-
# | `
|
1070
|
-
# | `
|
1071
|
-
# | `
|
1072
|
-
# | `
|
1073
|
-
# | `
|
1074
|
-
# | `
|
1075
|
-
# | `
|
1143
|
+
# | BigQuery | Ruby | Notes |
|
1144
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
1145
|
+
# | `BOOL` | `true`/`false` | |
|
1146
|
+
# | `INT64` | `Integer` | |
|
1147
|
+
# | `FLOAT64` | `Float` | |
|
1148
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
1149
|
+
# | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
|
1150
|
+
# | `STRING` | `String` | |
|
1151
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1152
|
+
# | `DATE` | `Date` | |
|
1153
|
+
# | `TIMESTAMP` | `Time` | |
|
1154
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1155
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1156
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1157
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1076
1158
|
#
|
1077
1159
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1078
1160
|
# of each BigQuery data type, including allowed values.
|
1079
|
-
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1080
|
-
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1081
|
-
# type for these values.
|
1161
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1162
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1163
|
+
# specify the SQL type for these values.
|
1082
1164
|
#
|
1083
|
-
#
|
1084
|
-
# parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1085
|
-
# type codes from the following list:
|
1165
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
1166
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1167
|
+
# should be BigQuery type codes from the following list:
|
1086
1168
|
#
|
1087
1169
|
# * `:BOOL`
|
1088
1170
|
# * `:INT64`
|
1089
1171
|
# * `:FLOAT64`
|
1090
1172
|
# * `:NUMERIC`
|
1173
|
+
# * `:BIGNUMERIC`
|
1091
1174
|
# * `:STRING`
|
1092
1175
|
# * `:DATETIME`
|
1093
1176
|
# * `:DATE`
|
@@ -1400,35 +1483,37 @@ module Google
|
|
1400
1483
|
#
|
1401
1484
|
# Ruby types are mapped to BigQuery types as follows:
|
1402
1485
|
#
|
1403
|
-
# | BigQuery
|
1404
|
-
#
|
1405
|
-
# | `BOOL`
|
1406
|
-
# | `INT64`
|
1407
|
-
# | `FLOAT64`
|
1408
|
-
# | `NUMERIC`
|
1409
|
-
# | `
|
1410
|
-
# | `
|
1411
|
-
# | `
|
1412
|
-
# | `
|
1413
|
-
# | `
|
1414
|
-
# | `
|
1415
|
-
# | `
|
1416
|
-
# | `
|
1486
|
+
# | BigQuery | Ruby | Notes |
|
1487
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
1488
|
+
# | `BOOL` | `true`/`false` | |
|
1489
|
+
# | `INT64` | `Integer` | |
|
1490
|
+
# | `FLOAT64` | `Float` | |
|
1491
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
1492
|
+
# | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
|
1493
|
+
# | `STRING` | `String` | |
|
1494
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1495
|
+
# | `DATE` | `Date` | |
|
1496
|
+
# | `TIMESTAMP` | `Time` | |
|
1497
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1498
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1499
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1500
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1417
1501
|
#
|
1418
1502
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1419
1503
|
# of each BigQuery data type, including allowed values.
|
1420
|
-
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1421
|
-
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1422
|
-
# type for these values.
|
1504
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1505
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1506
|
+
# specify the SQL type for these values.
|
1423
1507
|
#
|
1424
|
-
#
|
1425
|
-
# parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1426
|
-
# type codes from the following list:
|
1508
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
1509
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1510
|
+
# should be BigQuery type codes from the following list:
|
1427
1511
|
#
|
1428
1512
|
# * `:BOOL`
|
1429
1513
|
# * `:INT64`
|
1430
1514
|
# * `:FLOAT64`
|
1431
1515
|
# * `:NUMERIC`
|
1516
|
+
# * `:BIGNUMERIC`
|
1432
1517
|
# * `:STRING`
|
1433
1518
|
# * `:DATETIME`
|
1434
1519
|
# * `:DATE`
|
@@ -2327,6 +2412,21 @@ module Google
|
|
2327
2412
|
# the need to complete a load operation before the data can appear in
|
2328
2413
|
# query results.
|
2329
2414
|
#
|
2415
|
+
# Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
|
2416
|
+
# complex types:
|
2417
|
+
#
|
2418
|
+
# | BigQuery | Ruby | Notes |
|
2419
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
2420
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
2421
|
+
# | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
|
2422
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
2423
|
+
# | `DATE` | `Date` | |
|
2424
|
+
# | `TIMESTAMP` | `Time` | |
|
2425
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
2426
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
2427
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
2428
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
2429
|
+
#
|
2330
2430
|
# Because BigQuery's streaming API is designed for high insertion rates,
|
2331
2431
|
# modifications to the underlying table metadata are eventually
|
2332
2432
|
# consistent when interacting with the streaming system. In most cases
|
@@ -2341,7 +2441,10 @@ module Google
|
|
2341
2441
|
#
|
2342
2442
|
# @param [String] table_id The ID of the destination table.
|
2343
2443
|
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
2344
|
-
# containing the data. Required.
|
2444
|
+
# containing the data. Required. `BigDecimal` values will be rounded to
|
2445
|
+
# scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
|
2446
|
+
# rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
|
2447
|
+
# instead of `BigDecimal`.
|
2345
2448
|
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
2346
2449
|
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
2347
2450
|
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
@@ -2408,6 +2511,18 @@ module Google
|
|
2408
2511
|
# t.schema.integer "age", mode: :required
|
2409
2512
|
# end
|
2410
2513
|
#
|
2514
|
+
# @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
|
2515
|
+
# require "google/cloud/bigquery"
|
2516
|
+
#
|
2517
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2518
|
+
# dataset = bigquery.dataset "my_dataset"
|
2519
|
+
#
|
2520
|
+
# row = {
|
2521
|
+
# "my_numeric" => BigDecimal("123456798.987654321"),
|
2522
|
+
# "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
|
2523
|
+
# }
|
2524
|
+
# dataset.insert "my_table", row
|
2525
|
+
#
|
2411
2526
|
# @!group Data
|
2412
2527
|
#
|
2413
2528
|
def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
|
@@ -2500,11 +2615,9 @@ module Google
|
|
2500
2615
|
create_table table_id do |tbl_updater|
|
2501
2616
|
yield tbl_updater if block_given?
|
2502
2617
|
end
|
2503
|
-
# rubocop:disable Lint/HandleExceptions
|
2504
2618
|
rescue Google::Cloud::AlreadyExistsError
|
2619
|
+
# Do nothing if it already exists
|
2505
2620
|
end
|
2506
|
-
# rubocop:enable Lint/HandleExceptions
|
2507
|
-
|
2508
2621
|
sleep 60
|
2509
2622
|
retry
|
2510
2623
|
end
|
@@ -2547,7 +2660,7 @@ module Google
|
|
2547
2660
|
return if attributes.empty?
|
2548
2661
|
ensure_service!
|
2549
2662
|
patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
|
2550
|
-
patch_gapi = Google::Apis::BigqueryV2::Dataset.new
|
2663
|
+
patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
|
2551
2664
|
patch_gapi.etag = etag if etag
|
2552
2665
|
@gapi = service.patch_dataset dataset_id, patch_gapi
|
2553
2666
|
end
|
@@ -2676,12 +2789,11 @@ module Google
|
|
2676
2789
|
|
2677
2790
|
def load_local_or_uri file, updater
|
2678
2791
|
job_gapi = updater.to_gapi
|
2679
|
-
|
2680
|
-
|
2681
|
-
|
2682
|
-
|
2683
|
-
|
2684
|
-
job
|
2792
|
+
if local_file? file
|
2793
|
+
load_local file, job_gapi
|
2794
|
+
else
|
2795
|
+
load_storage file, job_gapi
|
2796
|
+
end
|
2685
2797
|
end
|
2686
2798
|
|
2687
2799
|
def storage_url? files
|
@@ -2721,6 +2833,7 @@ module Google
|
|
2721
2833
|
##
|
2722
2834
|
# @private Create an Updater object.
|
2723
2835
|
def initialize gapi
|
2836
|
+
super()
|
2724
2837
|
@updates = []
|
2725
2838
|
@gapi = gapi
|
2726
2839
|
end
|
@@ -2756,6 +2869,12 @@ module Google
|
|
2756
2869
|
raise "not implemented in #{self.class}"
|
2757
2870
|
end
|
2758
2871
|
|
2872
|
+
##
|
2873
|
+
# @raise [RuntimeError] not implemented
|
2874
|
+
def create_materialized_view(*)
|
2875
|
+
raise "not implemented in #{self.class}"
|
2876
|
+
end
|
2877
|
+
|
2759
2878
|
##
|
2760
2879
|
# @raise [RuntimeError] not implemented
|
2761
2880
|
def table(*)
|
@@ -1194,7 +1194,7 @@ module Google
|
|
1194
1194
|
@rules.reject!(&find_by_scope_and_value(scope, value))
|
1195
1195
|
# Add new rule for this role, scope, and value
|
1196
1196
|
opts = { role: role, scope => value }
|
1197
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
1197
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
1198
1198
|
end
|
1199
1199
|
|
1200
1200
|
# @private
|
@@ -1204,7 +1204,7 @@ module Google
|
|
1204
1204
|
@rules.reject!(&find_by_scope_and_resource_ref(:routine, value))
|
1205
1205
|
# Add new rule for this role, scope, and value
|
1206
1206
|
opts = { routine: value }
|
1207
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
1207
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
1208
1208
|
end
|
1209
1209
|
|
1210
1210
|
# @private
|
@@ -1215,7 +1215,7 @@ module Google
|
|
1215
1215
|
@rules.reject!(&find_by_scope_and_resource_ref(:view, value))
|
1216
1216
|
# Add new rule for this role, scope, and value
|
1217
1217
|
opts = { view: value }
|
1218
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
1218
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
1219
1219
|
end
|
1220
1220
|
|
1221
1221
|
# @private
|
@@ -120,12 +120,12 @@ module Google
|
|
120
120
|
# puts dataset.name
|
121
121
|
# end
|
122
122
|
#
|
123
|
-
def all request_limit: nil
|
123
|
+
def all request_limit: nil, &block
|
124
124
|
request_limit = request_limit.to_i if request_limit
|
125
125
|
return enum_for :all, request_limit: request_limit unless block_given?
|
126
126
|
results = self
|
127
127
|
loop do
|
128
|
-
results.each
|
128
|
+
results.each(&block)
|
129
129
|
if request_limit
|
130
130
|
request_limit -= 1
|
131
131
|
break if request_limit.negative?
|
@@ -52,6 +52,24 @@ module Google
|
|
52
52
|
# # Retrieve the next page of results
|
53
53
|
# data = data.next if data.next?
|
54
54
|
#
|
55
|
+
# @example Hive partitioning options:
|
56
|
+
# require "google/cloud/bigquery"
|
57
|
+
#
|
58
|
+
# bigquery = Google::Cloud::Bigquery.new
|
59
|
+
#
|
60
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
61
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
62
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
63
|
+
# ext.hive_partitioning_mode = :auto
|
64
|
+
# ext.hive_partitioning_require_partition_filter = true
|
65
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# external_data.hive_partitioning? #=> true
|
69
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
70
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
71
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
72
|
+
#
|
55
73
|
module External
|
56
74
|
##
|
57
75
|
# @private New External from URLs and format
|
@@ -79,7 +97,8 @@ module Google
|
|
79
97
|
# @private Determine source_format from inputs
|
80
98
|
def self.source_format_for urls, format
|
81
99
|
val = {
|
82
|
-
"csv"
|
100
|
+
"csv" => "CSV",
|
101
|
+
"avro" => "AVRO",
|
83
102
|
"json" => "NEWLINE_DELIMITED_JSON",
|
84
103
|
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
85
104
|
"sheets" => "GOOGLE_SHEETS",
|
@@ -87,7 +106,9 @@ module Google
|
|
87
106
|
"datastore" => "DATASTORE_BACKUP",
|
88
107
|
"backup" => "DATASTORE_BACKUP",
|
89
108
|
"datastore_backup" => "DATASTORE_BACKUP",
|
90
|
-
"bigtable" => "BIGTABLE"
|
109
|
+
"bigtable" => "BIGTABLE",
|
110
|
+
"orc" => "ORC",
|
111
|
+
"parquet" => "PARQUET"
|
91
112
|
}[format.to_s.downcase]
|
92
113
|
return val unless val.nil?
|
93
114
|
Array(urls).each do |url|
|
@@ -110,7 +131,7 @@ module Google
|
|
110
131
|
when "GOOGLE_SHEETS" then External::SheetsSource
|
111
132
|
when "BIGTABLE" then External::BigtableSource
|
112
133
|
else
|
113
|
-
# AVRO
|
134
|
+
# AVRO, DATASTORE_BACKUP, PARQUET
|
114
135
|
External::DataSource
|
115
136
|
end
|
116
137
|
end
|
@@ -148,6 +169,24 @@ module Google
|
|
148
169
|
# # Retrieve the next page of results
|
149
170
|
# data = data.next if data.next?
|
150
171
|
#
|
172
|
+
# @example Hive partitioning options:
|
173
|
+
# require "google/cloud/bigquery"
|
174
|
+
#
|
175
|
+
# bigquery = Google::Cloud::Bigquery.new
|
176
|
+
#
|
177
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
178
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
179
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
180
|
+
# ext.hive_partitioning_mode = :auto
|
181
|
+
# ext.hive_partitioning_require_partition_filter = true
|
182
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# external_data.hive_partitioning? #=> true
|
186
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
187
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
188
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
189
|
+
#
|
151
190
|
class DataSource
|
152
191
|
##
|
153
192
|
# @private The Google API Client object.
|
@@ -302,6 +341,52 @@ module Google
|
|
302
341
|
@gapi.source_format == "BIGTABLE"
|
303
342
|
end
|
304
343
|
|
344
|
+
##
|
345
|
+
# Whether the data format is "ORC".
|
346
|
+
#
|
347
|
+
# @return [Boolean]
|
348
|
+
#
|
349
|
+
# @example
|
350
|
+
# require "google/cloud/bigquery"
|
351
|
+
#
|
352
|
+
# bigquery = Google::Cloud::Bigquery.new
|
353
|
+
#
|
354
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
355
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
356
|
+
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
357
|
+
# ext.hive_partitioning_mode = :auto
|
358
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
359
|
+
# end
|
360
|
+
# external_data.format #=> "ORC"
|
361
|
+
# external_data.orc? #=> true
|
362
|
+
#
|
363
|
+
def orc?
|
364
|
+
@gapi.source_format == "ORC"
|
365
|
+
end
|
366
|
+
|
367
|
+
##
|
368
|
+
# Whether the data format is "PARQUET".
|
369
|
+
#
|
370
|
+
# @return [Boolean]
|
371
|
+
#
|
372
|
+
# @example
|
373
|
+
# require "google/cloud/bigquery"
|
374
|
+
#
|
375
|
+
# bigquery = Google::Cloud::Bigquery.new
|
376
|
+
#
|
377
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
378
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
379
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
380
|
+
# ext.hive_partitioning_mode = :auto
|
381
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
382
|
+
# end
|
383
|
+
# external_data.format #=> "PARQUET"
|
384
|
+
# external_data.parquet? #=> true
|
385
|
+
#
|
386
|
+
def parquet?
|
387
|
+
@gapi.source_format == "PARQUET"
|
388
|
+
end
|
389
|
+
|
305
390
|
##
|
306
391
|
# The fully-qualified URIs that point to your data in Google Cloud.
|
307
392
|
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
@@ -536,6 +621,246 @@ module Google
|
|
536
621
|
@gapi.max_bad_records = new_max_bad_records
|
537
622
|
end
|
538
623
|
|
624
|
+
###
|
625
|
+
# Checks if hive partitioning options are set.
|
626
|
+
#
|
627
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
628
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
629
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
630
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
631
|
+
#
|
632
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
633
|
+
#
|
634
|
+
# @example
|
635
|
+
# require "google/cloud/bigquery"
|
636
|
+
#
|
637
|
+
# bigquery = Google::Cloud::Bigquery.new
|
638
|
+
#
|
639
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
640
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
641
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
642
|
+
# ext.hive_partitioning_mode = :auto
|
643
|
+
# ext.hive_partitioning_require_partition_filter = true
|
644
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
645
|
+
# end
|
646
|
+
#
|
647
|
+
# external_data.hive_partitioning? #=> true
|
648
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
649
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
650
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
651
|
+
#
|
652
|
+
def hive_partitioning?
|
653
|
+
!@gapi.hive_partitioning_options.nil?
|
654
|
+
end
|
655
|
+
|
656
|
+
###
|
657
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
658
|
+
#
|
659
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
660
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
661
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
662
|
+
#
|
663
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
664
|
+
#
|
665
|
+
# @example
|
666
|
+
# require "google/cloud/bigquery"
|
667
|
+
#
|
668
|
+
# bigquery = Google::Cloud::Bigquery.new
|
669
|
+
#
|
670
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
671
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
672
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
673
|
+
# ext.hive_partitioning_mode = :auto
|
674
|
+
# ext.hive_partitioning_require_partition_filter = true
|
675
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
676
|
+
# end
|
677
|
+
#
|
678
|
+
# external_data.hive_partitioning? #=> true
|
679
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
680
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
681
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
682
|
+
#
|
683
|
+
def hive_partitioning_mode
|
684
|
+
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
685
|
+
end
|
686
|
+
|
687
|
+
##
|
688
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
689
|
+
#
|
690
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
691
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
692
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
693
|
+
#
|
694
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
695
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
696
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
697
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
698
|
+
#
|
699
|
+
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
700
|
+
#
|
701
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
702
|
+
#
|
703
|
+
# @example
|
704
|
+
# require "google/cloud/bigquery"
|
705
|
+
#
|
706
|
+
# bigquery = Google::Cloud::Bigquery.new
|
707
|
+
#
|
708
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
709
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
710
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
711
|
+
# ext.hive_partitioning_mode = :auto
|
712
|
+
# ext.hive_partitioning_require_partition_filter = true
|
713
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
714
|
+
# end
|
715
|
+
#
|
716
|
+
# external_data.hive_partitioning? #=> true
|
717
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
718
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
719
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
720
|
+
#
|
721
|
+
def hive_partitioning_mode= mode
|
722
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
723
|
+
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
724
|
+
end
|
725
|
+
|
726
|
+
###
|
727
|
+
# Whether queries over the table using this external data source require a partition filter that can be used
|
728
|
+
# for partition elimination to be specified. Note that this field should only be true when creating a
|
729
|
+
# permanent external table or querying a temporary external table.
|
730
|
+
#
|
731
|
+
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
732
|
+
#
|
733
|
+
# @example
|
734
|
+
# require "google/cloud/bigquery"
|
735
|
+
#
|
736
|
+
# bigquery = Google::Cloud::Bigquery.new
|
737
|
+
#
|
738
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
739
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
740
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
741
|
+
# ext.hive_partitioning_mode = :auto
|
742
|
+
# ext.hive_partitioning_require_partition_filter = true
|
743
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
744
|
+
# end
|
745
|
+
#
|
746
|
+
# external_data.hive_partitioning? #=> true
|
747
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
748
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
749
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
750
|
+
#
|
751
|
+
def hive_partitioning_require_partition_filter?
|
752
|
+
return false unless hive_partitioning?
|
753
|
+
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
754
|
+
end
|
755
|
+
|
756
|
+
##
|
757
|
+
# Sets whether queries over the table using this external data source require a partition filter
|
758
|
+
# that can be used for partition elimination to be specified.
|
759
|
+
#
|
760
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
761
|
+
#
|
762
|
+
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
763
|
+
#
|
764
|
+
# @example
|
765
|
+
# require "google/cloud/bigquery"
|
766
|
+
#
|
767
|
+
# bigquery = Google::Cloud::Bigquery.new
|
768
|
+
#
|
769
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
770
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
771
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
772
|
+
# ext.hive_partitioning_mode = :auto
|
773
|
+
# ext.hive_partitioning_require_partition_filter = true
|
774
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
775
|
+
# end
|
776
|
+
#
|
777
|
+
# external_data.hive_partitioning? #=> true
|
778
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
779
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
780
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
781
|
+
#
|
782
|
+
def hive_partitioning_require_partition_filter= require_partition_filter
|
783
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
784
|
+
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
785
|
+
end
|
786
|
+
|
787
|
+
###
|
788
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
789
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
790
|
+
# layout:
|
791
|
+
#
|
792
|
+
# ```
|
793
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
794
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
795
|
+
# ```
|
796
|
+
#
|
797
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
798
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
799
|
+
#
|
800
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
801
|
+
#
|
802
|
+
# @example
|
803
|
+
# require "google/cloud/bigquery"
|
804
|
+
#
|
805
|
+
# bigquery = Google::Cloud::Bigquery.new
|
806
|
+
#
|
807
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
808
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
809
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
810
|
+
# ext.hive_partitioning_mode = :auto
|
811
|
+
# ext.hive_partitioning_require_partition_filter = true
|
812
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
813
|
+
# end
|
814
|
+
#
|
815
|
+
# external_data.hive_partitioning? #=> true
|
816
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
817
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
818
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
819
|
+
#
|
820
|
+
def hive_partitioning_source_uri_prefix
|
821
|
+
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
822
|
+
end
|
823
|
+
|
824
|
+
##
|
825
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
826
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
827
|
+
# layout:
|
828
|
+
#
|
829
|
+
# ```
|
830
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
831
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
832
|
+
# ```
|
833
|
+
#
|
834
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
835
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
836
|
+
#
|
837
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
838
|
+
#
|
839
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
840
|
+
#
|
841
|
+
# @example
|
842
|
+
# require "google/cloud/bigquery"
|
843
|
+
#
|
844
|
+
# bigquery = Google::Cloud::Bigquery.new
|
845
|
+
#
|
846
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
847
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
848
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
849
|
+
# ext.hive_partitioning_mode = :auto
|
850
|
+
# ext.hive_partitioning_require_partition_filter = true
|
851
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
852
|
+
# end
|
853
|
+
#
|
854
|
+
# external_data.hive_partitioning? #=> true
|
855
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
856
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
857
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
858
|
+
#
|
859
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
860
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
861
|
+
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
862
|
+
end
|
863
|
+
|
539
864
|
##
|
540
865
|
# @private Google API Client object.
|
541
866
|
def to_gapi
|