google-cloud-bigquery 1.25.0 → 1.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/CONTRIBUTING.md +4 -5
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +15 -14
- data/lib/google/cloud/bigquery/convert.rb +72 -76
- data/lib/google/cloud/bigquery/copy_job.rb +1 -0
- data/lib/google/cloud/bigquery/data.rb +2 -2
- data/lib/google/cloud/bigquery/dataset.rb +181 -62
- data/lib/google/cloud/bigquery/dataset/access.rb +3 -3
- data/lib/google/cloud/bigquery/dataset/list.rb +2 -2
- data/lib/google/cloud/bigquery/external.rb +328 -3
- data/lib/google/cloud/bigquery/extract_job.rb +8 -10
- data/lib/google/cloud/bigquery/job.rb +43 -3
- data/lib/google/cloud/bigquery/job/list.rb +4 -4
- data/lib/google/cloud/bigquery/load_job.rb +228 -27
- data/lib/google/cloud/bigquery/model/list.rb +2 -2
- data/lib/google/cloud/bigquery/policy.rb +2 -1
- data/lib/google/cloud/bigquery/project.rb +47 -43
- data/lib/google/cloud/bigquery/project/list.rb +2 -2
- data/lib/google/cloud/bigquery/query_job.rb +62 -48
- data/lib/google/cloud/bigquery/routine.rb +128 -9
- data/lib/google/cloud/bigquery/routine/list.rb +2 -2
- data/lib/google/cloud/bigquery/schema.rb +39 -3
- data/lib/google/cloud/bigquery/schema/field.rb +63 -13
- data/lib/google/cloud/bigquery/service.rb +11 -13
- data/lib/google/cloud/bigquery/standard_sql.rb +15 -3
- data/lib/google/cloud/bigquery/table.rb +246 -52
- data/lib/google/cloud/bigquery/table/async_inserter.rb +44 -17
- data/lib/google/cloud/bigquery/table/list.rb +2 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +15 -15
@@ -482,14 +482,14 @@ module Google
|
|
482
482
|
# puts row[:word]
|
483
483
|
# end
|
484
484
|
#
|
485
|
-
def all request_limit: nil
|
485
|
+
def all request_limit: nil, &block
|
486
486
|
request_limit = request_limit.to_i if request_limit
|
487
487
|
|
488
488
|
return enum_for :all, request_limit: request_limit unless block_given?
|
489
489
|
|
490
490
|
results = self
|
491
491
|
loop do
|
492
|
-
results.each
|
492
|
+
results.each(&block)
|
493
493
|
if request_limit
|
494
494
|
request_limit -= 1
|
495
495
|
break if request_limit.negative?
|
@@ -618,15 +618,17 @@ module Google
|
|
618
618
|
end
|
619
619
|
|
620
620
|
##
|
621
|
-
# Creates a new
|
622
|
-
# table, which is a virtual table defined by the given SQL query.
|
621
|
+
# Creates a new view, which is a virtual table defined by the given SQL query.
|
623
622
|
#
|
624
|
-
# BigQuery's
|
625
|
-
#
|
626
|
-
# the view is queried. Queries are billed according to the total amount
|
623
|
+
# With BigQuery's logical views, the query that defines the view is re-executed
|
624
|
+
# every time the view is queried. Queries are billed according to the total amount
|
627
625
|
# of data in all table fields referenced directly or indirectly by the
|
628
626
|
# top-level query. (See {Table#view?} and {Table#query}.)
|
629
627
|
#
|
628
|
+
# For materialized views, see {#create_materialized_view}.
|
629
|
+
#
|
630
|
+
# @see https://cloud.google.com/bigquery/docs/views Creating views
|
631
|
+
#
|
630
632
|
# @param [String] table_id The ID of the view table. The ID must contain
|
631
633
|
# only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
|
632
634
|
# maximum length is 1,024 characters.
|
@@ -667,7 +669,7 @@ module Google
|
|
667
669
|
# dataset = bigquery.dataset "my_dataset"
|
668
670
|
#
|
669
671
|
# view = dataset.create_view "my_view",
|
670
|
-
#
|
672
|
+
# "SELECT name, age FROM proj.dataset.users"
|
671
673
|
#
|
672
674
|
# @example A name and description can be provided:
|
673
675
|
# require "google/cloud/bigquery"
|
@@ -676,13 +678,18 @@ module Google
|
|
676
678
|
# dataset = bigquery.dataset "my_dataset"
|
677
679
|
#
|
678
680
|
# view = dataset.create_view "my_view",
|
679
|
-
#
|
680
|
-
#
|
681
|
+
# "SELECT name, age FROM proj.dataset.users",
|
682
|
+
# name: "My View", description: "This is my view"
|
681
683
|
#
|
682
684
|
# @!group Table
|
683
685
|
#
|
684
|
-
def create_view table_id,
|
685
|
-
|
686
|
+
def create_view table_id,
|
687
|
+
query,
|
688
|
+
name: nil,
|
689
|
+
description: nil,
|
690
|
+
standard_sql: nil,
|
691
|
+
legacy_sql: nil,
|
692
|
+
udfs: nil
|
686
693
|
use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
|
687
694
|
new_view_opts = {
|
688
695
|
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
@@ -698,7 +705,81 @@ module Google
|
|
698
705
|
user_defined_function_resources: udfs_gapi(udfs)
|
699
706
|
)
|
700
707
|
}.delete_if { |_, v| v.nil? }
|
701
|
-
new_view = Google::Apis::BigqueryV2::Table.new
|
708
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
709
|
+
|
710
|
+
gapi = service.insert_table dataset_id, new_view
|
711
|
+
Table.from_gapi gapi, service
|
712
|
+
end
|
713
|
+
|
714
|
+
##
|
715
|
+
# Creates a new materialized view.
|
716
|
+
#
|
717
|
+
# Materialized views are precomputed views that periodically cache results of a query for increased performance
|
718
|
+
# and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
|
719
|
+
# only delta changes from the base table to compute up-to-date results.
|
720
|
+
#
|
721
|
+
# Queries that use materialized views are generally faster and consume less resources than queries that retrieve
|
722
|
+
# the same data only from the base table. Materialized views are helpful to significantly boost performance of
|
723
|
+
# workloads that have the characteristic of common and repeated queries.
|
724
|
+
#
|
725
|
+
# For logical views, see {#create_view}.
|
726
|
+
#
|
727
|
+
# @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
|
728
|
+
#
|
729
|
+
# @param [String] table_id The ID of the materialized view table. The ID must contain only letters (a-z, A-Z),
|
730
|
+
# numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
|
731
|
+
# @param [String] query The query that BigQuery executes when the materialized view is referenced.
|
732
|
+
# @param [String] name A descriptive name for the table.
|
733
|
+
# @param [String] description A user-friendly description of the table.
|
734
|
+
# @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
|
735
|
+
# updated. Optional. The default value is true.
|
736
|
+
# @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
|
737
|
+
# will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
|
738
|
+
#
|
739
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
740
|
+
#
|
741
|
+
# @example
|
742
|
+
# require "google/cloud/bigquery"
|
743
|
+
#
|
744
|
+
# bigquery = Google::Cloud::Bigquery.new
|
745
|
+
# dataset = bigquery.dataset "my_dataset"
|
746
|
+
#
|
747
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
748
|
+
# "SELECT name, age FROM proj.dataset.users"
|
749
|
+
#
|
750
|
+
# @example Automatic refresh can be disabled:
|
751
|
+
# require "google/cloud/bigquery"
|
752
|
+
#
|
753
|
+
# bigquery = Google::Cloud::Bigquery.new
|
754
|
+
# dataset = bigquery.dataset "my_dataset"
|
755
|
+
#
|
756
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
757
|
+
# "SELECT name, age FROM proj.dataset.users",
|
758
|
+
# enable_refresh: false
|
759
|
+
#
|
760
|
+
# @!group Table
|
761
|
+
#
|
762
|
+
def create_materialized_view table_id,
|
763
|
+
query,
|
764
|
+
name: nil,
|
765
|
+
description: nil,
|
766
|
+
enable_refresh: nil,
|
767
|
+
refresh_interval_ms: nil
|
768
|
+
new_view_opts = {
|
769
|
+
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
770
|
+
project_id: project_id,
|
771
|
+
dataset_id: dataset_id,
|
772
|
+
table_id: table_id
|
773
|
+
),
|
774
|
+
friendly_name: name,
|
775
|
+
description: description,
|
776
|
+
materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
|
777
|
+
enable_refresh: enable_refresh,
|
778
|
+
query: query,
|
779
|
+
refresh_interval_ms: refresh_interval_ms
|
780
|
+
)
|
781
|
+
}.delete_if { |_, v| v.nil? }
|
782
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
702
783
|
|
703
784
|
gapi = service.insert_table dataset_id, new_view
|
704
785
|
Table.from_gapi gapi, service
|
@@ -1059,35 +1140,37 @@ module Google
|
|
1059
1140
|
#
|
1060
1141
|
# Ruby types are mapped to BigQuery types as follows:
|
1061
1142
|
#
|
1062
|
-
# | BigQuery
|
1063
|
-
#
|
1064
|
-
# | `BOOL`
|
1065
|
-
# | `INT64`
|
1066
|
-
# | `FLOAT64`
|
1067
|
-
# | `NUMERIC`
|
1068
|
-
# | `
|
1069
|
-
# | `
|
1070
|
-
# | `
|
1071
|
-
# | `
|
1072
|
-
# | `
|
1073
|
-
# | `
|
1074
|
-
# | `
|
1075
|
-
# | `
|
1143
|
+
# | BigQuery | Ruby | Notes |
|
1144
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
1145
|
+
# | `BOOL` | `true`/`false` | |
|
1146
|
+
# | `INT64` | `Integer` | |
|
1147
|
+
# | `FLOAT64` | `Float` | |
|
1148
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
1149
|
+
# | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
|
1150
|
+
# | `STRING` | `String` | |
|
1151
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1152
|
+
# | `DATE` | `Date` | |
|
1153
|
+
# | `TIMESTAMP` | `Time` | |
|
1154
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1155
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1156
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1157
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1076
1158
|
#
|
1077
1159
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1078
1160
|
# of each BigQuery data type, including allowed values.
|
1079
|
-
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1080
|
-
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1081
|
-
# type for these values.
|
1161
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1162
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1163
|
+
# specify the SQL type for these values.
|
1082
1164
|
#
|
1083
|
-
#
|
1084
|
-
# parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1085
|
-
# type codes from the following list:
|
1165
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
1166
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1167
|
+
# should be BigQuery type codes from the following list:
|
1086
1168
|
#
|
1087
1169
|
# * `:BOOL`
|
1088
1170
|
# * `:INT64`
|
1089
1171
|
# * `:FLOAT64`
|
1090
1172
|
# * `:NUMERIC`
|
1173
|
+
# * `:BIGNUMERIC`
|
1091
1174
|
# * `:STRING`
|
1092
1175
|
# * `:DATETIME`
|
1093
1176
|
# * `:DATE`
|
@@ -1400,35 +1483,37 @@ module Google
|
|
1400
1483
|
#
|
1401
1484
|
# Ruby types are mapped to BigQuery types as follows:
|
1402
1485
|
#
|
1403
|
-
# | BigQuery
|
1404
|
-
#
|
1405
|
-
# | `BOOL`
|
1406
|
-
# | `INT64`
|
1407
|
-
# | `FLOAT64`
|
1408
|
-
# | `NUMERIC`
|
1409
|
-
# | `
|
1410
|
-
# | `
|
1411
|
-
# | `
|
1412
|
-
# | `
|
1413
|
-
# | `
|
1414
|
-
# | `
|
1415
|
-
# | `
|
1416
|
-
# | `
|
1486
|
+
# | BigQuery | Ruby | Notes |
|
1487
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
1488
|
+
# | `BOOL` | `true`/`false` | |
|
1489
|
+
# | `INT64` | `Integer` | |
|
1490
|
+
# | `FLOAT64` | `Float` | |
|
1491
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
1492
|
+
# | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
|
1493
|
+
# | `STRING` | `String` | |
|
1494
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
1495
|
+
# | `DATE` | `Date` | |
|
1496
|
+
# | `TIMESTAMP` | `Time` | |
|
1497
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
1498
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
1499
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
1500
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
1417
1501
|
#
|
1418
1502
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
1419
1503
|
# of each BigQuery data type, including allowed values.
|
1420
|
-
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1421
|
-
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1422
|
-
# type for these values.
|
1504
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
1505
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
1506
|
+
# specify the SQL type for these values.
|
1423
1507
|
#
|
1424
|
-
#
|
1425
|
-
# parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1426
|
-
# type codes from the following list:
|
1508
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
1509
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
1510
|
+
# should be BigQuery type codes from the following list:
|
1427
1511
|
#
|
1428
1512
|
# * `:BOOL`
|
1429
1513
|
# * `:INT64`
|
1430
1514
|
# * `:FLOAT64`
|
1431
1515
|
# * `:NUMERIC`
|
1516
|
+
# * `:BIGNUMERIC`
|
1432
1517
|
# * `:STRING`
|
1433
1518
|
# * `:DATETIME`
|
1434
1519
|
# * `:DATE`
|
@@ -2327,6 +2412,21 @@ module Google
|
|
2327
2412
|
# the need to complete a load operation before the data can appear in
|
2328
2413
|
# query results.
|
2329
2414
|
#
|
2415
|
+
# Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
|
2416
|
+
# complex types:
|
2417
|
+
#
|
2418
|
+
# | BigQuery | Ruby | Notes |
|
2419
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
2420
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
2421
|
+
# | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
|
2422
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
2423
|
+
# | `DATE` | `Date` | |
|
2424
|
+
# | `TIMESTAMP` | `Time` | |
|
2425
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
2426
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
2427
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
2428
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
2429
|
+
#
|
2330
2430
|
# Because BigQuery's streaming API is designed for high insertion rates,
|
2331
2431
|
# modifications to the underlying table metadata are eventually
|
2332
2432
|
# consistent when interacting with the streaming system. In most cases
|
@@ -2341,7 +2441,10 @@ module Google
|
|
2341
2441
|
#
|
2342
2442
|
# @param [String] table_id The ID of the destination table.
|
2343
2443
|
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
2344
|
-
# containing the data. Required.
|
2444
|
+
# containing the data. Required. `BigDecimal` values will be rounded to
|
2445
|
+
# scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
|
2446
|
+
# rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
|
2447
|
+
# instead of `BigDecimal`.
|
2345
2448
|
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
2346
2449
|
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
2347
2450
|
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
@@ -2408,6 +2511,18 @@ module Google
|
|
2408
2511
|
# t.schema.integer "age", mode: :required
|
2409
2512
|
# end
|
2410
2513
|
#
|
2514
|
+
# @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
|
2515
|
+
# require "google/cloud/bigquery"
|
2516
|
+
#
|
2517
|
+
# bigquery = Google::Cloud::Bigquery.new
|
2518
|
+
# dataset = bigquery.dataset "my_dataset"
|
2519
|
+
#
|
2520
|
+
# row = {
|
2521
|
+
# "my_numeric" => BigDecimal("123456798.987654321"),
|
2522
|
+
# "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
|
2523
|
+
# }
|
2524
|
+
# dataset.insert "my_table", row
|
2525
|
+
#
|
2411
2526
|
# @!group Data
|
2412
2527
|
#
|
2413
2528
|
def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
|
@@ -2500,11 +2615,9 @@ module Google
|
|
2500
2615
|
create_table table_id do |tbl_updater|
|
2501
2616
|
yield tbl_updater if block_given?
|
2502
2617
|
end
|
2503
|
-
# rubocop:disable Lint/HandleExceptions
|
2504
2618
|
rescue Google::Cloud::AlreadyExistsError
|
2619
|
+
# Do nothing if it already exists
|
2505
2620
|
end
|
2506
|
-
# rubocop:enable Lint/HandleExceptions
|
2507
|
-
|
2508
2621
|
sleep 60
|
2509
2622
|
retry
|
2510
2623
|
end
|
@@ -2547,7 +2660,7 @@ module Google
|
|
2547
2660
|
return if attributes.empty?
|
2548
2661
|
ensure_service!
|
2549
2662
|
patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
|
2550
|
-
patch_gapi = Google::Apis::BigqueryV2::Dataset.new
|
2663
|
+
patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
|
2551
2664
|
patch_gapi.etag = etag if etag
|
2552
2665
|
@gapi = service.patch_dataset dataset_id, patch_gapi
|
2553
2666
|
end
|
@@ -2676,12 +2789,11 @@ module Google
|
|
2676
2789
|
|
2677
2790
|
def load_local_or_uri file, updater
|
2678
2791
|
job_gapi = updater.to_gapi
|
2679
|
-
|
2680
|
-
|
2681
|
-
|
2682
|
-
|
2683
|
-
|
2684
|
-
job
|
2792
|
+
if local_file? file
|
2793
|
+
load_local file, job_gapi
|
2794
|
+
else
|
2795
|
+
load_storage file, job_gapi
|
2796
|
+
end
|
2685
2797
|
end
|
2686
2798
|
|
2687
2799
|
def storage_url? files
|
@@ -2721,6 +2833,7 @@ module Google
|
|
2721
2833
|
##
|
2722
2834
|
# @private Create an Updater object.
|
2723
2835
|
def initialize gapi
|
2836
|
+
super()
|
2724
2837
|
@updates = []
|
2725
2838
|
@gapi = gapi
|
2726
2839
|
end
|
@@ -2756,6 +2869,12 @@ module Google
|
|
2756
2869
|
raise "not implemented in #{self.class}"
|
2757
2870
|
end
|
2758
2871
|
|
2872
|
+
##
|
2873
|
+
# @raise [RuntimeError] not implemented
|
2874
|
+
def create_materialized_view(*)
|
2875
|
+
raise "not implemented in #{self.class}"
|
2876
|
+
end
|
2877
|
+
|
2759
2878
|
##
|
2760
2879
|
# @raise [RuntimeError] not implemented
|
2761
2880
|
def table(*)
|
@@ -1194,7 +1194,7 @@ module Google
|
|
1194
1194
|
@rules.reject!(&find_by_scope_and_value(scope, value))
|
1195
1195
|
# Add new rule for this role, scope, and value
|
1196
1196
|
opts = { role: role, scope => value }
|
1197
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
1197
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
1198
1198
|
end
|
1199
1199
|
|
1200
1200
|
# @private
|
@@ -1204,7 +1204,7 @@ module Google
|
|
1204
1204
|
@rules.reject!(&find_by_scope_and_resource_ref(:routine, value))
|
1205
1205
|
# Add new rule for this role, scope, and value
|
1206
1206
|
opts = { routine: value }
|
1207
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
1207
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
1208
1208
|
end
|
1209
1209
|
|
1210
1210
|
# @private
|
@@ -1215,7 +1215,7 @@ module Google
|
|
1215
1215
|
@rules.reject!(&find_by_scope_and_resource_ref(:view, value))
|
1216
1216
|
# Add new rule for this role, scope, and value
|
1217
1217
|
opts = { view: value }
|
1218
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
1218
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
1219
1219
|
end
|
1220
1220
|
|
1221
1221
|
# @private
|
@@ -120,12 +120,12 @@ module Google
|
|
120
120
|
# puts dataset.name
|
121
121
|
# end
|
122
122
|
#
|
123
|
-
def all request_limit: nil
|
123
|
+
def all request_limit: nil, &block
|
124
124
|
request_limit = request_limit.to_i if request_limit
|
125
125
|
return enum_for :all, request_limit: request_limit unless block_given?
|
126
126
|
results = self
|
127
127
|
loop do
|
128
|
-
results.each
|
128
|
+
results.each(&block)
|
129
129
|
if request_limit
|
130
130
|
request_limit -= 1
|
131
131
|
break if request_limit.negative?
|
@@ -52,6 +52,24 @@ module Google
|
|
52
52
|
# # Retrieve the next page of results
|
53
53
|
# data = data.next if data.next?
|
54
54
|
#
|
55
|
+
# @example Hive partitioning options:
|
56
|
+
# require "google/cloud/bigquery"
|
57
|
+
#
|
58
|
+
# bigquery = Google::Cloud::Bigquery.new
|
59
|
+
#
|
60
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
61
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
62
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
63
|
+
# ext.hive_partitioning_mode = :auto
|
64
|
+
# ext.hive_partitioning_require_partition_filter = true
|
65
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# external_data.hive_partitioning? #=> true
|
69
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
70
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
71
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
72
|
+
#
|
55
73
|
module External
|
56
74
|
##
|
57
75
|
# @private New External from URLs and format
|
@@ -79,7 +97,8 @@ module Google
|
|
79
97
|
# @private Determine source_format from inputs
|
80
98
|
def self.source_format_for urls, format
|
81
99
|
val = {
|
82
|
-
"csv"
|
100
|
+
"csv" => "CSV",
|
101
|
+
"avro" => "AVRO",
|
83
102
|
"json" => "NEWLINE_DELIMITED_JSON",
|
84
103
|
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
85
104
|
"sheets" => "GOOGLE_SHEETS",
|
@@ -87,7 +106,9 @@ module Google
|
|
87
106
|
"datastore" => "DATASTORE_BACKUP",
|
88
107
|
"backup" => "DATASTORE_BACKUP",
|
89
108
|
"datastore_backup" => "DATASTORE_BACKUP",
|
90
|
-
"bigtable" => "BIGTABLE"
|
109
|
+
"bigtable" => "BIGTABLE",
|
110
|
+
"orc" => "ORC",
|
111
|
+
"parquet" => "PARQUET"
|
91
112
|
}[format.to_s.downcase]
|
92
113
|
return val unless val.nil?
|
93
114
|
Array(urls).each do |url|
|
@@ -110,7 +131,7 @@ module Google
|
|
110
131
|
when "GOOGLE_SHEETS" then External::SheetsSource
|
111
132
|
when "BIGTABLE" then External::BigtableSource
|
112
133
|
else
|
113
|
-
# AVRO
|
134
|
+
# AVRO, DATASTORE_BACKUP, PARQUET
|
114
135
|
External::DataSource
|
115
136
|
end
|
116
137
|
end
|
@@ -148,6 +169,24 @@ module Google
|
|
148
169
|
# # Retrieve the next page of results
|
149
170
|
# data = data.next if data.next?
|
150
171
|
#
|
172
|
+
# @example Hive partitioning options:
|
173
|
+
# require "google/cloud/bigquery"
|
174
|
+
#
|
175
|
+
# bigquery = Google::Cloud::Bigquery.new
|
176
|
+
#
|
177
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
178
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
179
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
180
|
+
# ext.hive_partitioning_mode = :auto
|
181
|
+
# ext.hive_partitioning_require_partition_filter = true
|
182
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# external_data.hive_partitioning? #=> true
|
186
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
187
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
188
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
189
|
+
#
|
151
190
|
class DataSource
|
152
191
|
##
|
153
192
|
# @private The Google API Client object.
|
@@ -302,6 +341,52 @@ module Google
|
|
302
341
|
@gapi.source_format == "BIGTABLE"
|
303
342
|
end
|
304
343
|
|
344
|
+
##
|
345
|
+
# Whether the data format is "ORC".
|
346
|
+
#
|
347
|
+
# @return [Boolean]
|
348
|
+
#
|
349
|
+
# @example
|
350
|
+
# require "google/cloud/bigquery"
|
351
|
+
#
|
352
|
+
# bigquery = Google::Cloud::Bigquery.new
|
353
|
+
#
|
354
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
355
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
356
|
+
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
357
|
+
# ext.hive_partitioning_mode = :auto
|
358
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
359
|
+
# end
|
360
|
+
# external_data.format #=> "ORC"
|
361
|
+
# external_data.orc? #=> true
|
362
|
+
#
|
363
|
+
def orc?
|
364
|
+
@gapi.source_format == "ORC"
|
365
|
+
end
|
366
|
+
|
367
|
+
##
|
368
|
+
# Whether the data format is "PARQUET".
|
369
|
+
#
|
370
|
+
# @return [Boolean]
|
371
|
+
#
|
372
|
+
# @example
|
373
|
+
# require "google/cloud/bigquery"
|
374
|
+
#
|
375
|
+
# bigquery = Google::Cloud::Bigquery.new
|
376
|
+
#
|
377
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
378
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
379
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
380
|
+
# ext.hive_partitioning_mode = :auto
|
381
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
382
|
+
# end
|
383
|
+
# external_data.format #=> "PARQUET"
|
384
|
+
# external_data.parquet? #=> true
|
385
|
+
#
|
386
|
+
def parquet?
|
387
|
+
@gapi.source_format == "PARQUET"
|
388
|
+
end
|
389
|
+
|
305
390
|
##
|
306
391
|
# The fully-qualified URIs that point to your data in Google Cloud.
|
307
392
|
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
@@ -536,6 +621,246 @@ module Google
|
|
536
621
|
@gapi.max_bad_records = new_max_bad_records
|
537
622
|
end
|
538
623
|
|
624
|
+
###
|
625
|
+
# Checks if hive partitioning options are set.
|
626
|
+
#
|
627
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
628
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
629
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
630
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
631
|
+
#
|
632
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
633
|
+
#
|
634
|
+
# @example
|
635
|
+
# require "google/cloud/bigquery"
|
636
|
+
#
|
637
|
+
# bigquery = Google::Cloud::Bigquery.new
|
638
|
+
#
|
639
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
640
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
641
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
642
|
+
# ext.hive_partitioning_mode = :auto
|
643
|
+
# ext.hive_partitioning_require_partition_filter = true
|
644
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
645
|
+
# end
|
646
|
+
#
|
647
|
+
# external_data.hive_partitioning? #=> true
|
648
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
649
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
650
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
651
|
+
#
|
652
|
+
def hive_partitioning?
|
653
|
+
!@gapi.hive_partitioning_options.nil?
|
654
|
+
end
|
655
|
+
|
656
|
+
###
|
657
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
658
|
+
#
|
659
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
660
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
661
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
662
|
+
#
|
663
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
664
|
+
#
|
665
|
+
# @example
|
666
|
+
# require "google/cloud/bigquery"
|
667
|
+
#
|
668
|
+
# bigquery = Google::Cloud::Bigquery.new
|
669
|
+
#
|
670
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
671
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
672
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
673
|
+
# ext.hive_partitioning_mode = :auto
|
674
|
+
# ext.hive_partitioning_require_partition_filter = true
|
675
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
676
|
+
# end
|
677
|
+
#
|
678
|
+
# external_data.hive_partitioning? #=> true
|
679
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
680
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
681
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
682
|
+
#
|
683
|
+
def hive_partitioning_mode
|
684
|
+
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
685
|
+
end
|
686
|
+
|
687
|
+
##
|
688
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
689
|
+
#
|
690
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
691
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
692
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
693
|
+
#
|
694
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
695
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
696
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
697
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
698
|
+
#
|
699
|
+
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
700
|
+
#
|
701
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
702
|
+
#
|
703
|
+
# @example
|
704
|
+
# require "google/cloud/bigquery"
|
705
|
+
#
|
706
|
+
# bigquery = Google::Cloud::Bigquery.new
|
707
|
+
#
|
708
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
709
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
710
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
711
|
+
# ext.hive_partitioning_mode = :auto
|
712
|
+
# ext.hive_partitioning_require_partition_filter = true
|
713
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
714
|
+
# end
|
715
|
+
#
|
716
|
+
# external_data.hive_partitioning? #=> true
|
717
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
718
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
719
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
720
|
+
#
|
721
|
+
def hive_partitioning_mode= mode
|
722
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
723
|
+
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
724
|
+
end
|
725
|
+
|
726
|
+
###
|
727
|
+
# Whether queries over the table using this external data source require a partition filter that can be used
|
728
|
+
# for partition elimination to be specified. Note that this field should only be true when creating a
|
729
|
+
# permanent external table or querying a temporary external table.
|
730
|
+
#
|
731
|
+
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
732
|
+
#
|
733
|
+
# @example
|
734
|
+
# require "google/cloud/bigquery"
|
735
|
+
#
|
736
|
+
# bigquery = Google::Cloud::Bigquery.new
|
737
|
+
#
|
738
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
739
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
740
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
741
|
+
# ext.hive_partitioning_mode = :auto
|
742
|
+
# ext.hive_partitioning_require_partition_filter = true
|
743
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
744
|
+
# end
|
745
|
+
#
|
746
|
+
# external_data.hive_partitioning? #=> true
|
747
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
748
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
749
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
750
|
+
#
|
751
|
+
def hive_partitioning_require_partition_filter?
|
752
|
+
return false unless hive_partitioning?
|
753
|
+
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
754
|
+
end
|
755
|
+
|
756
|
+
##
|
757
|
+
# Sets whether queries over the table using this external data source require a partition filter
|
758
|
+
# that can be used for partition elimination to be specified.
|
759
|
+
#
|
760
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
761
|
+
#
|
762
|
+
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
763
|
+
#
|
764
|
+
# @example
|
765
|
+
# require "google/cloud/bigquery"
|
766
|
+
#
|
767
|
+
# bigquery = Google::Cloud::Bigquery.new
|
768
|
+
#
|
769
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
770
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
771
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
772
|
+
# ext.hive_partitioning_mode = :auto
|
773
|
+
# ext.hive_partitioning_require_partition_filter = true
|
774
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
775
|
+
# end
|
776
|
+
#
|
777
|
+
# external_data.hive_partitioning? #=> true
|
778
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
779
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
780
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
781
|
+
#
|
782
|
+
def hive_partitioning_require_partition_filter= require_partition_filter
|
783
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
784
|
+
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
785
|
+
end
|
786
|
+
|
787
|
+
###
|
788
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
789
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
790
|
+
# layout:
|
791
|
+
#
|
792
|
+
# ```
|
793
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
794
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
795
|
+
# ```
|
796
|
+
#
|
797
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
798
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
799
|
+
#
|
800
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
801
|
+
#
|
802
|
+
# @example
|
803
|
+
# require "google/cloud/bigquery"
|
804
|
+
#
|
805
|
+
# bigquery = Google::Cloud::Bigquery.new
|
806
|
+
#
|
807
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
808
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
809
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
810
|
+
# ext.hive_partitioning_mode = :auto
|
811
|
+
# ext.hive_partitioning_require_partition_filter = true
|
812
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
813
|
+
# end
|
814
|
+
#
|
815
|
+
# external_data.hive_partitioning? #=> true
|
816
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
817
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
818
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
819
|
+
#
|
820
|
+
def hive_partitioning_source_uri_prefix
|
821
|
+
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
822
|
+
end
|
823
|
+
|
824
|
+
##
|
825
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
826
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
827
|
+
# layout:
|
828
|
+
#
|
829
|
+
# ```
|
830
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
831
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
832
|
+
# ```
|
833
|
+
#
|
834
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
835
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
836
|
+
#
|
837
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
838
|
+
#
|
839
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
840
|
+
#
|
841
|
+
# @example
|
842
|
+
# require "google/cloud/bigquery"
|
843
|
+
#
|
844
|
+
# bigquery = Google::Cloud::Bigquery.new
|
845
|
+
#
|
846
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
847
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
848
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
849
|
+
# ext.hive_partitioning_mode = :auto
|
850
|
+
# ext.hive_partitioning_require_partition_filter = true
|
851
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
852
|
+
# end
|
853
|
+
#
|
854
|
+
# external_data.hive_partitioning? #=> true
|
855
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
856
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
857
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
858
|
+
#
|
859
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
860
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
861
|
+
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
862
|
+
end
|
863
|
+
|
539
864
|
##
|
540
865
|
# @private Google API Client object.
|
541
866
|
def to_gapi
|