google-cloud-bigquery 1.27.0 → 1.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +58 -0
- data/CONTRIBUTING.md +3 -4
- data/LOGGING.md +1 -1
- data/OVERVIEW.md +15 -14
- data/lib/google/cloud/bigquery/convert.rb +72 -76
- data/lib/google/cloud/bigquery/copy_job.rb +1 -0
- data/lib/google/cloud/bigquery/data.rb +2 -2
- data/lib/google/cloud/bigquery/dataset.rb +181 -62
- data/lib/google/cloud/bigquery/dataset/access.rb +3 -3
- data/lib/google/cloud/bigquery/dataset/list.rb +2 -2
- data/lib/google/cloud/bigquery/external.rb +9 -2619
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/extract_job.rb +4 -2
- data/lib/google/cloud/bigquery/job.rb +9 -3
- data/lib/google/cloud/bigquery/job/list.rb +4 -4
- data/lib/google/cloud/bigquery/load_job.rb +178 -19
- data/lib/google/cloud/bigquery/model/list.rb +2 -2
- data/lib/google/cloud/bigquery/policy.rb +2 -1
- data/lib/google/cloud/bigquery/project.rb +47 -43
- data/lib/google/cloud/bigquery/project/list.rb +2 -2
- data/lib/google/cloud/bigquery/query_job.rb +84 -62
- data/lib/google/cloud/bigquery/routine.rb +1 -4
- data/lib/google/cloud/bigquery/routine/list.rb +2 -2
- data/lib/google/cloud/bigquery/schema.rb +39 -3
- data/lib/google/cloud/bigquery/schema/field.rb +63 -13
- data/lib/google/cloud/bigquery/service.rb +11 -13
- data/lib/google/cloud/bigquery/standard_sql.rb +15 -3
- data/lib/google/cloud/bigquery/table.rb +312 -69
- data/lib/google/cloud/bigquery/table/async_inserter.rb +44 -17
- data/lib/google/cloud/bigquery/table/list.rb +2 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +28 -14
|
@@ -482,14 +482,14 @@ module Google
|
|
|
482
482
|
# puts row[:word]
|
|
483
483
|
# end
|
|
484
484
|
#
|
|
485
|
-
def all request_limit: nil
|
|
485
|
+
def all request_limit: nil, &block
|
|
486
486
|
request_limit = request_limit.to_i if request_limit
|
|
487
487
|
|
|
488
488
|
return enum_for :all, request_limit: request_limit unless block_given?
|
|
489
489
|
|
|
490
490
|
results = self
|
|
491
491
|
loop do
|
|
492
|
-
results.each
|
|
492
|
+
results.each(&block)
|
|
493
493
|
if request_limit
|
|
494
494
|
request_limit -= 1
|
|
495
495
|
break if request_limit.negative?
|
|
@@ -618,15 +618,17 @@ module Google
|
|
|
618
618
|
end
|
|
619
619
|
|
|
620
620
|
##
|
|
621
|
-
# Creates a new
|
|
622
|
-
# table, which is a virtual table defined by the given SQL query.
|
|
621
|
+
# Creates a new view, which is a virtual table defined by the given SQL query.
|
|
623
622
|
#
|
|
624
|
-
# BigQuery's
|
|
625
|
-
#
|
|
626
|
-
# the view is queried. Queries are billed according to the total amount
|
|
623
|
+
# With BigQuery's logical views, the query that defines the view is re-executed
|
|
624
|
+
# every time the view is queried. Queries are billed according to the total amount
|
|
627
625
|
# of data in all table fields referenced directly or indirectly by the
|
|
628
626
|
# top-level query. (See {Table#view?} and {Table#query}.)
|
|
629
627
|
#
|
|
628
|
+
# For materialized views, see {#create_materialized_view}.
|
|
629
|
+
#
|
|
630
|
+
# @see https://cloud.google.com/bigquery/docs/views Creating views
|
|
631
|
+
#
|
|
630
632
|
# @param [String] table_id The ID of the view table. The ID must contain
|
|
631
633
|
# only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
|
|
632
634
|
# maximum length is 1,024 characters.
|
|
@@ -667,7 +669,7 @@ module Google
|
|
|
667
669
|
# dataset = bigquery.dataset "my_dataset"
|
|
668
670
|
#
|
|
669
671
|
# view = dataset.create_view "my_view",
|
|
670
|
-
#
|
|
672
|
+
# "SELECT name, age FROM proj.dataset.users"
|
|
671
673
|
#
|
|
672
674
|
# @example A name and description can be provided:
|
|
673
675
|
# require "google/cloud/bigquery"
|
|
@@ -676,13 +678,18 @@ module Google
|
|
|
676
678
|
# dataset = bigquery.dataset "my_dataset"
|
|
677
679
|
#
|
|
678
680
|
# view = dataset.create_view "my_view",
|
|
679
|
-
#
|
|
680
|
-
#
|
|
681
|
+
# "SELECT name, age FROM proj.dataset.users",
|
|
682
|
+
# name: "My View", description: "This is my view"
|
|
681
683
|
#
|
|
682
684
|
# @!group Table
|
|
683
685
|
#
|
|
684
|
-
def create_view table_id,
|
|
685
|
-
|
|
686
|
+
def create_view table_id,
|
|
687
|
+
query,
|
|
688
|
+
name: nil,
|
|
689
|
+
description: nil,
|
|
690
|
+
standard_sql: nil,
|
|
691
|
+
legacy_sql: nil,
|
|
692
|
+
udfs: nil
|
|
686
693
|
use_legacy_sql = Convert.resolve_legacy_sql standard_sql, legacy_sql
|
|
687
694
|
new_view_opts = {
|
|
688
695
|
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
|
@@ -698,7 +705,81 @@ module Google
|
|
|
698
705
|
user_defined_function_resources: udfs_gapi(udfs)
|
|
699
706
|
)
|
|
700
707
|
}.delete_if { |_, v| v.nil? }
|
|
701
|
-
new_view = Google::Apis::BigqueryV2::Table.new
|
|
708
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
|
709
|
+
|
|
710
|
+
gapi = service.insert_table dataset_id, new_view
|
|
711
|
+
Table.from_gapi gapi, service
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
##
|
|
715
|
+
# Creates a new materialized view.
|
|
716
|
+
#
|
|
717
|
+
# Materialized views are precomputed views that periodically cache results of a query for increased performance
|
|
718
|
+
# and efficiency. BigQuery leverages precomputed results from materialized views and whenever possible reads
|
|
719
|
+
# only delta changes from the base table to compute up-to-date results.
|
|
720
|
+
#
|
|
721
|
+
# Queries that use materialized views are generally faster and consume less resources than queries that retrieve
|
|
722
|
+
# the same data only from the base table. Materialized views are helpful to significantly boost performance of
|
|
723
|
+
# workloads that have the characteristic of common and repeated queries.
|
|
724
|
+
#
|
|
725
|
+
# For logical views, see {#create_view}.
|
|
726
|
+
#
|
|
727
|
+
# @see https://cloud.google.com/bigquery/docs/materialized-views-intro Introduction to materialized views
|
|
728
|
+
#
|
|
729
|
+
# @param [String] table_id The ID of the materialized view table. The ID must contain only letters (a-z, A-Z),
|
|
730
|
+
# numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
|
|
731
|
+
# @param [String] query The query that BigQuery executes when the materialized view is referenced.
|
|
732
|
+
# @param [String] name A descriptive name for the table.
|
|
733
|
+
# @param [String] description A user-friendly description of the table.
|
|
734
|
+
# @param [Boolean] enable_refresh Enable automatic refresh of the materialized view when the base table is
|
|
735
|
+
# updated. Optional. The default value is true.
|
|
736
|
+
# @param [Integer] refresh_interval_ms The maximum frequency in milliseconds at which this materialized view
|
|
737
|
+
# will be refreshed. Optional. The default value is `1_800_000` (30 minutes).
|
|
738
|
+
#
|
|
739
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
|
740
|
+
#
|
|
741
|
+
# @example
|
|
742
|
+
# require "google/cloud/bigquery"
|
|
743
|
+
#
|
|
744
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
745
|
+
# dataset = bigquery.dataset "my_dataset"
|
|
746
|
+
#
|
|
747
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
|
748
|
+
# "SELECT name, age FROM proj.dataset.users"
|
|
749
|
+
#
|
|
750
|
+
# @example Automatic refresh can be disabled:
|
|
751
|
+
# require "google/cloud/bigquery"
|
|
752
|
+
#
|
|
753
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
754
|
+
# dataset = bigquery.dataset "my_dataset"
|
|
755
|
+
#
|
|
756
|
+
# materialized_view = dataset.create_materialized_view "my_materialized_view",
|
|
757
|
+
# "SELECT name, age FROM proj.dataset.users",
|
|
758
|
+
# enable_refresh: false
|
|
759
|
+
#
|
|
760
|
+
# @!group Table
|
|
761
|
+
#
|
|
762
|
+
def create_materialized_view table_id,
|
|
763
|
+
query,
|
|
764
|
+
name: nil,
|
|
765
|
+
description: nil,
|
|
766
|
+
enable_refresh: nil,
|
|
767
|
+
refresh_interval_ms: nil
|
|
768
|
+
new_view_opts = {
|
|
769
|
+
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
|
770
|
+
project_id: project_id,
|
|
771
|
+
dataset_id: dataset_id,
|
|
772
|
+
table_id: table_id
|
|
773
|
+
),
|
|
774
|
+
friendly_name: name,
|
|
775
|
+
description: description,
|
|
776
|
+
materialized_view: Google::Apis::BigqueryV2::MaterializedViewDefinition.new(
|
|
777
|
+
enable_refresh: enable_refresh,
|
|
778
|
+
query: query,
|
|
779
|
+
refresh_interval_ms: refresh_interval_ms
|
|
780
|
+
)
|
|
781
|
+
}.delete_if { |_, v| v.nil? }
|
|
782
|
+
new_view = Google::Apis::BigqueryV2::Table.new(**new_view_opts)
|
|
702
783
|
|
|
703
784
|
gapi = service.insert_table dataset_id, new_view
|
|
704
785
|
Table.from_gapi gapi, service
|
|
@@ -1059,35 +1140,37 @@ module Google
|
|
|
1059
1140
|
#
|
|
1060
1141
|
# Ruby types are mapped to BigQuery types as follows:
|
|
1061
1142
|
#
|
|
1062
|
-
# | BigQuery
|
|
1063
|
-
#
|
|
1064
|
-
# | `BOOL`
|
|
1065
|
-
# | `INT64`
|
|
1066
|
-
# | `FLOAT64`
|
|
1067
|
-
# | `NUMERIC`
|
|
1068
|
-
# | `
|
|
1069
|
-
# | `
|
|
1070
|
-
# | `
|
|
1071
|
-
# | `
|
|
1072
|
-
# | `
|
|
1073
|
-
# | `
|
|
1074
|
-
# | `
|
|
1075
|
-
# | `
|
|
1143
|
+
# | BigQuery | Ruby | Notes |
|
|
1144
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
|
1145
|
+
# | `BOOL` | `true`/`false` | |
|
|
1146
|
+
# | `INT64` | `Integer` | |
|
|
1147
|
+
# | `FLOAT64` | `Float` | |
|
|
1148
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
|
1149
|
+
# | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
|
|
1150
|
+
# | `STRING` | `String` | |
|
|
1151
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
|
1152
|
+
# | `DATE` | `Date` | |
|
|
1153
|
+
# | `TIMESTAMP` | `Time` | |
|
|
1154
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
|
1155
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
|
1156
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
|
1157
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
|
1076
1158
|
#
|
|
1077
1159
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
|
1078
1160
|
# of each BigQuery data type, including allowed values.
|
|
1079
|
-
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
|
1080
|
-
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
|
1081
|
-
# type for these values.
|
|
1161
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
|
1162
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
|
1163
|
+
# specify the SQL type for these values.
|
|
1082
1164
|
#
|
|
1083
|
-
#
|
|
1084
|
-
# parameters. This must be an `Hash` when the query uses named query parameters. The values
|
|
1085
|
-
# type codes from the following list:
|
|
1165
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
|
1166
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
|
1167
|
+
# should be BigQuery type codes from the following list:
|
|
1086
1168
|
#
|
|
1087
1169
|
# * `:BOOL`
|
|
1088
1170
|
# * `:INT64`
|
|
1089
1171
|
# * `:FLOAT64`
|
|
1090
1172
|
# * `:NUMERIC`
|
|
1173
|
+
# * `:BIGNUMERIC`
|
|
1091
1174
|
# * `:STRING`
|
|
1092
1175
|
# * `:DATETIME`
|
|
1093
1176
|
# * `:DATE`
|
|
@@ -1400,35 +1483,37 @@ module Google
|
|
|
1400
1483
|
#
|
|
1401
1484
|
# Ruby types are mapped to BigQuery types as follows:
|
|
1402
1485
|
#
|
|
1403
|
-
# | BigQuery
|
|
1404
|
-
#
|
|
1405
|
-
# | `BOOL`
|
|
1406
|
-
# | `INT64`
|
|
1407
|
-
# | `FLOAT64`
|
|
1408
|
-
# | `NUMERIC`
|
|
1409
|
-
# | `
|
|
1410
|
-
# | `
|
|
1411
|
-
# | `
|
|
1412
|
-
# | `
|
|
1413
|
-
# | `
|
|
1414
|
-
# | `
|
|
1415
|
-
# | `
|
|
1416
|
-
# | `
|
|
1486
|
+
# | BigQuery | Ruby | Notes |
|
|
1487
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
|
1488
|
+
# | `BOOL` | `true`/`false` | |
|
|
1489
|
+
# | `INT64` | `Integer` | |
|
|
1490
|
+
# | `FLOAT64` | `Float` | |
|
|
1491
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
|
1492
|
+
# | `BIGNUMERIC` | | Query param values must be mapped in `types`. |
|
|
1493
|
+
# | `STRING` | `String` | |
|
|
1494
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
|
1495
|
+
# | `DATE` | `Date` | |
|
|
1496
|
+
# | `TIMESTAMP` | `Time` | |
|
|
1497
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
|
1498
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
|
1499
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
|
1500
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
|
1417
1501
|
#
|
|
1418
1502
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
|
|
1419
1503
|
# of each BigQuery data type, including allowed values.
|
|
1420
|
-
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
|
1421
|
-
# infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
|
1422
|
-
# type for these values.
|
|
1504
|
+
# @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
|
|
1505
|
+
# possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
|
|
1506
|
+
# specify the SQL type for these values.
|
|
1423
1507
|
#
|
|
1424
|
-
#
|
|
1425
|
-
# parameters. This must be an `Hash` when the query uses named query parameters. The values
|
|
1426
|
-
# type codes from the following list:
|
|
1508
|
+
# Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
|
|
1509
|
+
# positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
|
|
1510
|
+
# should be BigQuery type codes from the following list:
|
|
1427
1511
|
#
|
|
1428
1512
|
# * `:BOOL`
|
|
1429
1513
|
# * `:INT64`
|
|
1430
1514
|
# * `:FLOAT64`
|
|
1431
1515
|
# * `:NUMERIC`
|
|
1516
|
+
# * `:BIGNUMERIC`
|
|
1432
1517
|
# * `:STRING`
|
|
1433
1518
|
# * `:DATETIME`
|
|
1434
1519
|
# * `:DATE`
|
|
@@ -2327,6 +2412,21 @@ module Google
|
|
|
2327
2412
|
# the need to complete a load operation before the data can appear in
|
|
2328
2413
|
# query results.
|
|
2329
2414
|
#
|
|
2415
|
+
# Simple Ruby types are generally accepted per JSON rules, along with the following support for BigQuery's more
|
|
2416
|
+
# complex types:
|
|
2417
|
+
#
|
|
2418
|
+
# | BigQuery | Ruby | Notes |
|
|
2419
|
+
# |--------------|--------------------------------------|----------------------------------------------------|
|
|
2420
|
+
# | `NUMERIC` | `BigDecimal` | `BigDecimal` values will be rounded to scale 9. |
|
|
2421
|
+
# | `BIGNUMERIC` | `String` | Pass as `String` to avoid rounding to scale 9. |
|
|
2422
|
+
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
|
2423
|
+
# | `DATE` | `Date` | |
|
|
2424
|
+
# | `TIMESTAMP` | `Time` | |
|
|
2425
|
+
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
|
2426
|
+
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
|
2427
|
+
# | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
|
|
2428
|
+
# | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
|
|
2429
|
+
#
|
|
2330
2430
|
# Because BigQuery's streaming API is designed for high insertion rates,
|
|
2331
2431
|
# modifications to the underlying table metadata are eventually
|
|
2332
2432
|
# consistent when interacting with the streaming system. In most cases
|
|
@@ -2341,7 +2441,10 @@ module Google
|
|
|
2341
2441
|
#
|
|
2342
2442
|
# @param [String] table_id The ID of the destination table.
|
|
2343
2443
|
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
|
2344
|
-
# containing the data. Required.
|
|
2444
|
+
# containing the data. Required. `BigDecimal` values will be rounded to
|
|
2445
|
+
# scale 9 to conform with the BigQuery `NUMERIC` data type. To avoid
|
|
2446
|
+
# rounding `BIGNUMERIC` type values with scale greater than 9, use `String`
|
|
2447
|
+
# instead of `BigDecimal`.
|
|
2345
2448
|
# @param [Array<String|Symbol>, Symbol] insert_ids A unique ID for each row. BigQuery uses this property to
|
|
2346
2449
|
# detect duplicate insertion requests on a best-effort basis. For more information, see [data
|
|
2347
2450
|
# consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency). Optional. If
|
|
@@ -2408,6 +2511,18 @@ module Google
|
|
|
2408
2511
|
# t.schema.integer "age", mode: :required
|
|
2409
2512
|
# end
|
|
2410
2513
|
#
|
|
2514
|
+
# @example Pass `BIGNUMERIC` value as a string to avoid rounding to scale 9 in the conversion from `BigDecimal`:
|
|
2515
|
+
# require "google/cloud/bigquery"
|
|
2516
|
+
#
|
|
2517
|
+
# bigquery = Google::Cloud::Bigquery.new
|
|
2518
|
+
# dataset = bigquery.dataset "my_dataset"
|
|
2519
|
+
#
|
|
2520
|
+
# row = {
|
|
2521
|
+
# "my_numeric" => BigDecimal("123456798.987654321"),
|
|
2522
|
+
# "my_bignumeric" => "123456798.98765432100001" # BigDecimal would be rounded, use String instead!
|
|
2523
|
+
# }
|
|
2524
|
+
# dataset.insert "my_table", row
|
|
2525
|
+
#
|
|
2411
2526
|
# @!group Data
|
|
2412
2527
|
#
|
|
2413
2528
|
def insert table_id, rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil, autocreate: nil, &block
|
|
@@ -2500,11 +2615,9 @@ module Google
|
|
|
2500
2615
|
create_table table_id do |tbl_updater|
|
|
2501
2616
|
yield tbl_updater if block_given?
|
|
2502
2617
|
end
|
|
2503
|
-
# rubocop:disable Lint/HandleExceptions
|
|
2504
2618
|
rescue Google::Cloud::AlreadyExistsError
|
|
2619
|
+
# Do nothing if it already exists
|
|
2505
2620
|
end
|
|
2506
|
-
# rubocop:enable Lint/HandleExceptions
|
|
2507
|
-
|
|
2508
2621
|
sleep 60
|
|
2509
2622
|
retry
|
|
2510
2623
|
end
|
|
@@ -2547,7 +2660,7 @@ module Google
|
|
|
2547
2660
|
return if attributes.empty?
|
|
2548
2661
|
ensure_service!
|
|
2549
2662
|
patch_args = Hash[attributes.map { |attr| [attr, @gapi.send(attr)] }]
|
|
2550
|
-
patch_gapi = Google::Apis::BigqueryV2::Dataset.new
|
|
2663
|
+
patch_gapi = Google::Apis::BigqueryV2::Dataset.new(**patch_args)
|
|
2551
2664
|
patch_gapi.etag = etag if etag
|
|
2552
2665
|
@gapi = service.patch_dataset dataset_id, patch_gapi
|
|
2553
2666
|
end
|
|
@@ -2676,12 +2789,11 @@ module Google
|
|
|
2676
2789
|
|
|
2677
2790
|
def load_local_or_uri file, updater
|
|
2678
2791
|
job_gapi = updater.to_gapi
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
job
|
|
2792
|
+
if local_file? file
|
|
2793
|
+
load_local file, job_gapi
|
|
2794
|
+
else
|
|
2795
|
+
load_storage file, job_gapi
|
|
2796
|
+
end
|
|
2685
2797
|
end
|
|
2686
2798
|
|
|
2687
2799
|
def storage_url? files
|
|
@@ -2721,6 +2833,7 @@ module Google
|
|
|
2721
2833
|
##
|
|
2722
2834
|
# @private Create an Updater object.
|
|
2723
2835
|
def initialize gapi
|
|
2836
|
+
super()
|
|
2724
2837
|
@updates = []
|
|
2725
2838
|
@gapi = gapi
|
|
2726
2839
|
end
|
|
@@ -2756,6 +2869,12 @@ module Google
|
|
|
2756
2869
|
raise "not implemented in #{self.class}"
|
|
2757
2870
|
end
|
|
2758
2871
|
|
|
2872
|
+
##
|
|
2873
|
+
# @raise [RuntimeError] not implemented
|
|
2874
|
+
def create_materialized_view(*)
|
|
2875
|
+
raise "not implemented in #{self.class}"
|
|
2876
|
+
end
|
|
2877
|
+
|
|
2759
2878
|
##
|
|
2760
2879
|
# @raise [RuntimeError] not implemented
|
|
2761
2880
|
def table(*)
|
|
@@ -1194,7 +1194,7 @@ module Google
|
|
|
1194
1194
|
@rules.reject!(&find_by_scope_and_value(scope, value))
|
|
1195
1195
|
# Add new rule for this role, scope, and value
|
|
1196
1196
|
opts = { role: role, scope => value }
|
|
1197
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
|
1197
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
|
1198
1198
|
end
|
|
1199
1199
|
|
|
1200
1200
|
# @private
|
|
@@ -1204,7 +1204,7 @@ module Google
|
|
|
1204
1204
|
@rules.reject!(&find_by_scope_and_resource_ref(:routine, value))
|
|
1205
1205
|
# Add new rule for this role, scope, and value
|
|
1206
1206
|
opts = { routine: value }
|
|
1207
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
|
1207
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
|
1208
1208
|
end
|
|
1209
1209
|
|
|
1210
1210
|
# @private
|
|
@@ -1215,7 +1215,7 @@ module Google
|
|
|
1215
1215
|
@rules.reject!(&find_by_scope_and_resource_ref(:view, value))
|
|
1216
1216
|
# Add new rule for this role, scope, and value
|
|
1217
1217
|
opts = { view: value }
|
|
1218
|
-
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(opts)
|
|
1218
|
+
@rules << Google::Apis::BigqueryV2::Dataset::Access.new(**opts)
|
|
1219
1219
|
end
|
|
1220
1220
|
|
|
1221
1221
|
# @private
|
|
@@ -120,12 +120,12 @@ module Google
|
|
|
120
120
|
# puts dataset.name
|
|
121
121
|
# end
|
|
122
122
|
#
|
|
123
|
-
def all request_limit: nil
|
|
123
|
+
def all request_limit: nil, &block
|
|
124
124
|
request_limit = request_limit.to_i if request_limit
|
|
125
125
|
return enum_for :all, request_limit: request_limit unless block_given?
|
|
126
126
|
results = self
|
|
127
127
|
loop do
|
|
128
|
-
results.each
|
|
128
|
+
results.each(&block)
|
|
129
129
|
if request_limit
|
|
130
130
|
request_limit -= 1
|
|
131
131
|
break if request_limit.negative?
|
|
@@ -13,8 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
require "google/
|
|
17
|
-
require "
|
|
16
|
+
require "google/cloud/bigquery/external/data_source"
|
|
17
|
+
require "google/cloud/bigquery/external/bigtable_source"
|
|
18
|
+
require "google/cloud/bigquery/external/csv_source"
|
|
19
|
+
require "google/cloud/bigquery/external/json_source"
|
|
20
|
+
require "google/cloud/bigquery/external/parquet_source"
|
|
21
|
+
require "google/cloud/bigquery/external/sheets_source"
|
|
18
22
|
|
|
19
23
|
module Google
|
|
20
24
|
module Cloud
|
|
@@ -114,6 +118,7 @@ module Google
|
|
|
114
118
|
Array(urls).each do |url|
|
|
115
119
|
return "CSV" if url.end_with? ".csv"
|
|
116
120
|
return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
|
|
121
|
+
return "PARQUET" if url.end_with? ".parquet"
|
|
117
122
|
return "AVRO" if url.end_with? ".avro"
|
|
118
123
|
return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
|
|
119
124
|
return "GOOGLE_SHEETS" if url.start_with? "https://docs.google.com/spreadsheets/"
|
|
@@ -128,2629 +133,14 @@ module Google
|
|
|
128
133
|
case format
|
|
129
134
|
when "CSV" then External::CsvSource
|
|
130
135
|
when "NEWLINE_DELIMITED_JSON" then External::JsonSource
|
|
136
|
+
when "PARQUET" then External::ParquetSource
|
|
131
137
|
when "GOOGLE_SHEETS" then External::SheetsSource
|
|
132
138
|
when "BIGTABLE" then External::BigtableSource
|
|
133
139
|
else
|
|
134
|
-
# AVRO, DATASTORE_BACKUP
|
|
140
|
+
# AVRO, DATASTORE_BACKUP
|
|
135
141
|
External::DataSource
|
|
136
142
|
end
|
|
137
143
|
end
|
|
138
|
-
|
|
139
|
-
##
|
|
140
|
-
# # DataSource
|
|
141
|
-
#
|
|
142
|
-
# External::DataSource and its subclasses represents an external data
|
|
143
|
-
# source that can be queried from directly, even though the data is not
|
|
144
|
-
# stored in BigQuery. Instead of loading or streaming the data, this
|
|
145
|
-
# object references the external data source.
|
|
146
|
-
#
|
|
147
|
-
# The AVRO and Datastore Backup formats use {External::DataSource}. See
|
|
148
|
-
# {External::CsvSource}, {External::JsonSource},
|
|
149
|
-
# {External::SheetsSource}, {External::BigtableSource} for the other
|
|
150
|
-
# formats.
|
|
151
|
-
#
|
|
152
|
-
# @example
|
|
153
|
-
# require "google/cloud/bigquery"
|
|
154
|
-
#
|
|
155
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
156
|
-
#
|
|
157
|
-
# avro_url = "gs://bucket/path/to/data.avro"
|
|
158
|
-
# avro_table = bigquery.external avro_url do |avro|
|
|
159
|
-
# avro.autodetect = true
|
|
160
|
-
# end
|
|
161
|
-
#
|
|
162
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
163
|
-
# external: { my_ext_table: avro_table }
|
|
164
|
-
#
|
|
165
|
-
# # Iterate over the first page of results
|
|
166
|
-
# data.each do |row|
|
|
167
|
-
# puts row[:name]
|
|
168
|
-
# end
|
|
169
|
-
# # Retrieve the next page of results
|
|
170
|
-
# data = data.next if data.next?
|
|
171
|
-
#
|
|
172
|
-
# @example Hive partitioning options:
|
|
173
|
-
# require "google/cloud/bigquery"
|
|
174
|
-
#
|
|
175
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
176
|
-
#
|
|
177
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
178
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
179
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
180
|
-
# ext.hive_partitioning_mode = :auto
|
|
181
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
182
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
183
|
-
# end
|
|
184
|
-
#
|
|
185
|
-
# external_data.hive_partitioning? #=> true
|
|
186
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
187
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
188
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
189
|
-
#
|
|
190
|
-
class DataSource
|
|
191
|
-
##
|
|
192
|
-
# @private The Google API Client object.
|
|
193
|
-
attr_accessor :gapi
|
|
194
|
-
|
|
195
|
-
##
|
|
196
|
-
# @private Create an empty Table object.
|
|
197
|
-
def initialize
|
|
198
|
-
@gapi = Google::Apis::BigqueryV2::ExternalDataConfiguration.new
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
##
|
|
202
|
-
# The data format. For CSV files, specify "CSV". For Google sheets,
|
|
203
|
-
# specify "GOOGLE_SHEETS". For newline-delimited JSON, specify
|
|
204
|
-
# "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO". For Google
|
|
205
|
-
# Cloud Datastore backups, specify "DATASTORE_BACKUP". [Beta] For
|
|
206
|
-
# Google Cloud Bigtable, specify "BIGTABLE".
|
|
207
|
-
#
|
|
208
|
-
# @return [String]
|
|
209
|
-
#
|
|
210
|
-
# @example
|
|
211
|
-
# require "google/cloud/bigquery"
|
|
212
|
-
#
|
|
213
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
214
|
-
#
|
|
215
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
216
|
-
# csv_table = bigquery.external csv_url
|
|
217
|
-
#
|
|
218
|
-
# csv_table.format #=> "CSV"
|
|
219
|
-
#
|
|
220
|
-
def format
|
|
221
|
-
@gapi.source_format
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
##
|
|
225
|
-
# Whether the data format is "CSV".
|
|
226
|
-
#
|
|
227
|
-
# @return [Boolean]
|
|
228
|
-
#
|
|
229
|
-
# @example
|
|
230
|
-
# require "google/cloud/bigquery"
|
|
231
|
-
#
|
|
232
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
233
|
-
#
|
|
234
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
235
|
-
# csv_table = bigquery.external csv_url
|
|
236
|
-
#
|
|
237
|
-
# csv_table.format #=> "CSV"
|
|
238
|
-
# csv_table.csv? #=> true
|
|
239
|
-
#
|
|
240
|
-
def csv?
|
|
241
|
-
@gapi.source_format == "CSV"
|
|
242
|
-
end
|
|
243
|
-
|
|
244
|
-
##
|
|
245
|
-
# Whether the data format is "NEWLINE_DELIMITED_JSON".
|
|
246
|
-
#
|
|
247
|
-
# @return [Boolean]
|
|
248
|
-
#
|
|
249
|
-
# @example
|
|
250
|
-
# require "google/cloud/bigquery"
|
|
251
|
-
#
|
|
252
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
253
|
-
#
|
|
254
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
255
|
-
# json_table = bigquery.external json_url
|
|
256
|
-
#
|
|
257
|
-
# json_table.format #=> "NEWLINE_DELIMITED_JSON"
|
|
258
|
-
# json_table.json? #=> true
|
|
259
|
-
#
|
|
260
|
-
def json?
|
|
261
|
-
@gapi.source_format == "NEWLINE_DELIMITED_JSON"
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
##
|
|
265
|
-
# Whether the data format is "GOOGLE_SHEETS".
|
|
266
|
-
#
|
|
267
|
-
# @return [Boolean]
|
|
268
|
-
#
|
|
269
|
-
# @example
|
|
270
|
-
# require "google/cloud/bigquery"
|
|
271
|
-
#
|
|
272
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
273
|
-
#
|
|
274
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
275
|
-
# sheets_table = bigquery.external sheets_url
|
|
276
|
-
#
|
|
277
|
-
# sheets_table.format #=> "GOOGLE_SHEETS"
|
|
278
|
-
# sheets_table.sheets? #=> true
|
|
279
|
-
#
|
|
280
|
-
def sheets?
|
|
281
|
-
@gapi.source_format == "GOOGLE_SHEETS"
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
##
|
|
285
|
-
# Whether the data format is "AVRO".
|
|
286
|
-
#
|
|
287
|
-
# @return [Boolean]
|
|
288
|
-
#
|
|
289
|
-
# @example
|
|
290
|
-
# require "google/cloud/bigquery"
|
|
291
|
-
#
|
|
292
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
293
|
-
#
|
|
294
|
-
# avro_url = "gs://bucket/path/to/data.avro"
|
|
295
|
-
# avro_table = bigquery.external avro_url
|
|
296
|
-
#
|
|
297
|
-
# avro_table.format #=> "AVRO"
|
|
298
|
-
# avro_table.avro? #=> true
|
|
299
|
-
#
|
|
300
|
-
def avro?
|
|
301
|
-
@gapi.source_format == "AVRO"
|
|
302
|
-
end
|
|
303
|
-
|
|
304
|
-
##
|
|
305
|
-
# Whether the data format is "DATASTORE_BACKUP".
|
|
306
|
-
#
|
|
307
|
-
# @return [Boolean]
|
|
308
|
-
#
|
|
309
|
-
# @example
|
|
310
|
-
# require "google/cloud/bigquery"
|
|
311
|
-
#
|
|
312
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
313
|
-
#
|
|
314
|
-
# backup_url = "gs://bucket/path/to/data.backup_info"
|
|
315
|
-
# backup_table = bigquery.external backup_url
|
|
316
|
-
#
|
|
317
|
-
# backup_table.format #=> "DATASTORE_BACKUP"
|
|
318
|
-
# backup_table.backup? #=> true
|
|
319
|
-
#
|
|
320
|
-
def backup?
|
|
321
|
-
@gapi.source_format == "DATASTORE_BACKUP"
|
|
322
|
-
end
|
|
323
|
-
|
|
324
|
-
##
|
|
325
|
-
# Whether the data format is "BIGTABLE".
|
|
326
|
-
#
|
|
327
|
-
# @return [Boolean]
|
|
328
|
-
#
|
|
329
|
-
# @example
|
|
330
|
-
# require "google/cloud/bigquery"
|
|
331
|
-
#
|
|
332
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
333
|
-
#
|
|
334
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
335
|
-
# bigtable_table = bigquery.external bigtable_url
|
|
336
|
-
#
|
|
337
|
-
# bigtable_table.format #=> "BIGTABLE"
|
|
338
|
-
# bigtable_table.bigtable? #=> true
|
|
339
|
-
#
|
|
340
|
-
def bigtable?
|
|
341
|
-
@gapi.source_format == "BIGTABLE"
|
|
342
|
-
end
|
|
343
|
-
|
|
344
|
-
##
|
|
345
|
-
# Whether the data format is "ORC".
|
|
346
|
-
#
|
|
347
|
-
# @return [Boolean]
|
|
348
|
-
#
|
|
349
|
-
# @example
|
|
350
|
-
# require "google/cloud/bigquery"
|
|
351
|
-
#
|
|
352
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
353
|
-
#
|
|
354
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
355
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
356
|
-
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
|
357
|
-
# ext.hive_partitioning_mode = :auto
|
|
358
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
359
|
-
# end
|
|
360
|
-
# external_data.format #=> "ORC"
|
|
361
|
-
# external_data.orc? #=> true
|
|
362
|
-
#
|
|
363
|
-
def orc?
|
|
364
|
-
@gapi.source_format == "ORC"
|
|
365
|
-
end
|
|
366
|
-
|
|
367
|
-
##
|
|
368
|
-
# Whether the data format is "PARQUET".
|
|
369
|
-
#
|
|
370
|
-
# @return [Boolean]
|
|
371
|
-
#
|
|
372
|
-
# @example
|
|
373
|
-
# require "google/cloud/bigquery"
|
|
374
|
-
#
|
|
375
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
376
|
-
#
|
|
377
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
378
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
379
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
380
|
-
# ext.hive_partitioning_mode = :auto
|
|
381
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
382
|
-
# end
|
|
383
|
-
# external_data.format #=> "PARQUET"
|
|
384
|
-
# external_data.parquet? #=> true
|
|
385
|
-
#
|
|
386
|
-
def parquet?
|
|
387
|
-
@gapi.source_format == "PARQUET"
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
##
|
|
391
|
-
# The fully-qualified URIs that point to your data in Google Cloud.
|
|
392
|
-
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
|
393
|
-
# character and it must come after the 'bucket' name. Size limits
|
|
394
|
-
# related to load jobs apply to external data sources. For Google
|
|
395
|
-
# Cloud Bigtable URIs: Exactly one URI can be specified and it has be
|
|
396
|
-
# a fully specified and valid HTTPS URL for a Google Cloud Bigtable
|
|
397
|
-
# table. For Google Cloud Datastore backups, exactly one URI can be
|
|
398
|
-
# specified, and it must end with '.backup_info'. Also, the '*'
|
|
399
|
-
# wildcard character is not allowed.
|
|
400
|
-
#
|
|
401
|
-
# @return [Array<String>]
|
|
402
|
-
#
|
|
403
|
-
# @example
|
|
404
|
-
# require "google/cloud/bigquery"
|
|
405
|
-
#
|
|
406
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
407
|
-
#
|
|
408
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
409
|
-
# csv_table = bigquery.external csv_url
|
|
410
|
-
#
|
|
411
|
-
# csv_table.urls #=> ["gs://bucket/path/to/data.csv"]
|
|
412
|
-
#
|
|
413
|
-
def urls
|
|
414
|
-
@gapi.source_uris
|
|
415
|
-
end
|
|
416
|
-
|
|
417
|
-
##
|
|
418
|
-
# Indicates if the schema and format options are detected
|
|
419
|
-
# automatically.
|
|
420
|
-
#
|
|
421
|
-
# @return [Boolean]
|
|
422
|
-
#
|
|
423
|
-
# @example
|
|
424
|
-
# require "google/cloud/bigquery"
|
|
425
|
-
#
|
|
426
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
427
|
-
#
|
|
428
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
429
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
430
|
-
# csv.autodetect = true
|
|
431
|
-
# end
|
|
432
|
-
#
|
|
433
|
-
# csv_table.autodetect #=> true
|
|
434
|
-
#
|
|
435
|
-
def autodetect
|
|
436
|
-
@gapi.autodetect
|
|
437
|
-
end
|
|
438
|
-
|
|
439
|
-
##
|
|
440
|
-
# Set whether to detect schema and format options automatically. Any
|
|
441
|
-
# option specified explicitly will be honored.
|
|
442
|
-
#
|
|
443
|
-
# @param [Boolean] new_autodetect New autodetect value
|
|
444
|
-
#
|
|
445
|
-
# @example
|
|
446
|
-
# require "google/cloud/bigquery"
|
|
447
|
-
#
|
|
448
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
449
|
-
#
|
|
450
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
451
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
452
|
-
# csv.autodetect = true
|
|
453
|
-
# end
|
|
454
|
-
#
|
|
455
|
-
# csv_table.autodetect #=> true
|
|
456
|
-
#
|
|
457
|
-
def autodetect= new_autodetect
|
|
458
|
-
frozen_check!
|
|
459
|
-
@gapi.autodetect = new_autodetect
|
|
460
|
-
end
|
|
461
|
-
|
|
462
|
-
##
|
|
463
|
-
# The compression type of the data source. Possible values include
|
|
464
|
-
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
|
465
|
-
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
|
466
|
-
# and Avro formats. Optional.
|
|
467
|
-
#
|
|
468
|
-
# @return [String]
|
|
469
|
-
#
|
|
470
|
-
# @example
|
|
471
|
-
# require "google/cloud/bigquery"
|
|
472
|
-
#
|
|
473
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
474
|
-
#
|
|
475
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
476
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
477
|
-
# csv.compression = "GZIP"
|
|
478
|
-
# end
|
|
479
|
-
#
|
|
480
|
-
# csv_table.compression #=> "GZIP"
|
|
481
|
-
def compression
|
|
482
|
-
@gapi.compression
|
|
483
|
-
end
|
|
484
|
-
|
|
485
|
-
##
|
|
486
|
-
# Set the compression type of the data source. Possible values include
|
|
487
|
-
# `"GZIP"` and `nil`. The default value is `nil`. This setting is
|
|
488
|
-
# ignored for Google Cloud Bigtable, Google Cloud Datastore backups
|
|
489
|
-
# and Avro formats. Optional.
|
|
490
|
-
#
|
|
491
|
-
# @param [String] new_compression New compression value
|
|
492
|
-
#
|
|
493
|
-
# @example
|
|
494
|
-
# require "google/cloud/bigquery"
|
|
495
|
-
#
|
|
496
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
497
|
-
#
|
|
498
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
499
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
500
|
-
# csv.compression = "GZIP"
|
|
501
|
-
# end
|
|
502
|
-
#
|
|
503
|
-
# csv_table.compression #=> "GZIP"
|
|
504
|
-
#
|
|
505
|
-
def compression= new_compression
|
|
506
|
-
frozen_check!
|
|
507
|
-
@gapi.compression = new_compression
|
|
508
|
-
end
|
|
509
|
-
|
|
510
|
-
##
|
|
511
|
-
# Indicates if BigQuery should allow extra values that are not
|
|
512
|
-
# represented in the table schema. If `true`, the extra values are
|
|
513
|
-
# ignored. If `false`, records with extra columns are treated as bad
|
|
514
|
-
# records, and if there are too many bad records, an invalid error is
|
|
515
|
-
# returned in the job result. The default value is `false`.
|
|
516
|
-
#
|
|
517
|
-
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
|
518
|
-
# that don't match any column names in `JSON`. This setting is ignored
|
|
519
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
520
|
-
# formats. Optional.
|
|
521
|
-
#
|
|
522
|
-
# @return [Boolean]
|
|
523
|
-
#
|
|
524
|
-
# @example
|
|
525
|
-
# require "google/cloud/bigquery"
|
|
526
|
-
#
|
|
527
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
528
|
-
#
|
|
529
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
530
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
531
|
-
# csv.ignore_unknown = true
|
|
532
|
-
# end
|
|
533
|
-
#
|
|
534
|
-
# csv_table.ignore_unknown #=> true
|
|
535
|
-
#
|
|
536
|
-
def ignore_unknown
|
|
537
|
-
@gapi.ignore_unknown_values
|
|
538
|
-
end
|
|
539
|
-
|
|
540
|
-
##
|
|
541
|
-
# Set whether BigQuery should allow extra values that are not
|
|
542
|
-
# represented in the table schema. If `true`, the extra values are
|
|
543
|
-
# ignored. If `false`, records with extra columns are treated as bad
|
|
544
|
-
# records, and if there are too many bad records, an invalid error is
|
|
545
|
-
# returned in the job result. The default value is `false`.
|
|
546
|
-
#
|
|
547
|
-
# BigQuery treats trailing columns as an extra in `CSV`, named values
|
|
548
|
-
# that don't match any column names in `JSON`. This setting is ignored
|
|
549
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
550
|
-
# formats. Optional.
|
|
551
|
-
#
|
|
552
|
-
# @param [Boolean] new_ignore_unknown New ignore_unknown value
|
|
553
|
-
#
|
|
554
|
-
# @example
|
|
555
|
-
# require "google/cloud/bigquery"
|
|
556
|
-
#
|
|
557
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
558
|
-
#
|
|
559
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
560
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
561
|
-
# csv.ignore_unknown = true
|
|
562
|
-
# end
|
|
563
|
-
#
|
|
564
|
-
# csv_table.ignore_unknown #=> true
|
|
565
|
-
#
|
|
566
|
-
def ignore_unknown= new_ignore_unknown
|
|
567
|
-
frozen_check!
|
|
568
|
-
@gapi.ignore_unknown_values = new_ignore_unknown
|
|
569
|
-
end
|
|
570
|
-
|
|
571
|
-
##
|
|
572
|
-
# The maximum number of bad records that BigQuery can ignore when
|
|
573
|
-
# reading data. If the number of bad records exceeds this value, an
|
|
574
|
-
# invalid error is returned in the job result. The default value is 0,
|
|
575
|
-
# which requires that all records are valid. This setting is ignored
|
|
576
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
577
|
-
# formats.
|
|
578
|
-
#
|
|
579
|
-
# @return [Integer]
|
|
580
|
-
#
|
|
581
|
-
# @example
|
|
582
|
-
# require "google/cloud/bigquery"
|
|
583
|
-
#
|
|
584
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
585
|
-
#
|
|
586
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
587
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
588
|
-
# csv.max_bad_records = 10
|
|
589
|
-
# end
|
|
590
|
-
#
|
|
591
|
-
# csv_table.max_bad_records #=> 10
|
|
592
|
-
#
|
|
593
|
-
def max_bad_records
|
|
594
|
-
@gapi.max_bad_records
|
|
595
|
-
end
|
|
596
|
-
|
|
597
|
-
##
|
|
598
|
-
# Set the maximum number of bad records that BigQuery can ignore when
|
|
599
|
-
# reading data. If the number of bad records exceeds this value, an
|
|
600
|
-
# invalid error is returned in the job result. The default value is 0,
|
|
601
|
-
# which requires that all records are valid. This setting is ignored
|
|
602
|
-
# for Google Cloud Bigtable, Google Cloud Datastore backups and Avro
|
|
603
|
-
# formats.
|
|
604
|
-
#
|
|
605
|
-
# @param [Integer] new_max_bad_records New max_bad_records value
|
|
606
|
-
#
|
|
607
|
-
# @example
|
|
608
|
-
# require "google/cloud/bigquery"
|
|
609
|
-
#
|
|
610
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
611
|
-
#
|
|
612
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
613
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
614
|
-
# csv.max_bad_records = 10
|
|
615
|
-
# end
|
|
616
|
-
#
|
|
617
|
-
# csv_table.max_bad_records #=> 10
|
|
618
|
-
#
|
|
619
|
-
def max_bad_records= new_max_bad_records
|
|
620
|
-
frozen_check!
|
|
621
|
-
@gapi.max_bad_records = new_max_bad_records
|
|
622
|
-
end
|
|
623
|
-
|
|
624
|
-
###
|
|
625
|
-
# Checks if hive partitioning options are set.
|
|
626
|
-
#
|
|
627
|
-
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
|
628
|
-
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
|
629
|
-
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
|
630
|
-
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
|
631
|
-
#
|
|
632
|
-
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
|
633
|
-
#
|
|
634
|
-
# @example
|
|
635
|
-
# require "google/cloud/bigquery"
|
|
636
|
-
#
|
|
637
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
638
|
-
#
|
|
639
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
640
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
641
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
642
|
-
# ext.hive_partitioning_mode = :auto
|
|
643
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
644
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
645
|
-
# end
|
|
646
|
-
#
|
|
647
|
-
# external_data.hive_partitioning? #=> true
|
|
648
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
649
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
650
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
651
|
-
#
|
|
652
|
-
def hive_partitioning?
|
|
653
|
-
!@gapi.hive_partitioning_options.nil?
|
|
654
|
-
end
|
|
655
|
-
|
|
656
|
-
###
|
|
657
|
-
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
|
658
|
-
#
|
|
659
|
-
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
|
660
|
-
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
|
661
|
-
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
|
662
|
-
#
|
|
663
|
-
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
|
664
|
-
#
|
|
665
|
-
# @example
|
|
666
|
-
# require "google/cloud/bigquery"
|
|
667
|
-
#
|
|
668
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
669
|
-
#
|
|
670
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
671
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
672
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
673
|
-
# ext.hive_partitioning_mode = :auto
|
|
674
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
675
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
676
|
-
# end
|
|
677
|
-
#
|
|
678
|
-
# external_data.hive_partitioning? #=> true
|
|
679
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
680
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
681
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
682
|
-
#
|
|
683
|
-
def hive_partitioning_mode
|
|
684
|
-
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
|
685
|
-
end
|
|
686
|
-
|
|
687
|
-
##
|
|
688
|
-
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
|
689
|
-
#
|
|
690
|
-
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
|
691
|
-
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
|
692
|
-
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
|
693
|
-
#
|
|
694
|
-
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
|
695
|
-
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
|
696
|
-
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
|
697
|
-
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
|
698
|
-
#
|
|
699
|
-
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
|
700
|
-
#
|
|
701
|
-
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
|
702
|
-
#
|
|
703
|
-
# @example
|
|
704
|
-
# require "google/cloud/bigquery"
|
|
705
|
-
#
|
|
706
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
707
|
-
#
|
|
708
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
709
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
710
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
711
|
-
# ext.hive_partitioning_mode = :auto
|
|
712
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
713
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
714
|
-
# end
|
|
715
|
-
#
|
|
716
|
-
# external_data.hive_partitioning? #=> true
|
|
717
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
718
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
719
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
720
|
-
#
|
|
721
|
-
def hive_partitioning_mode= mode
|
|
722
|
-
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
|
723
|
-
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
|
724
|
-
end
|
|
725
|
-
|
|
726
|
-
###
|
|
727
|
-
# Whether queries over the table using this external data source require a partition filter that can be used
|
|
728
|
-
# for partition elimination to be specified. Note that this field should only be true when creating a
|
|
729
|
-
# permanent external table or querying a temporary external table.
|
|
730
|
-
#
|
|
731
|
-
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
|
732
|
-
#
|
|
733
|
-
# @example
|
|
734
|
-
# require "google/cloud/bigquery"
|
|
735
|
-
#
|
|
736
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
737
|
-
#
|
|
738
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
739
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
740
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
741
|
-
# ext.hive_partitioning_mode = :auto
|
|
742
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
743
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
744
|
-
# end
|
|
745
|
-
#
|
|
746
|
-
# external_data.hive_partitioning? #=> true
|
|
747
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
748
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
749
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
750
|
-
#
|
|
751
|
-
def hive_partitioning_require_partition_filter?
|
|
752
|
-
return false unless hive_partitioning?
|
|
753
|
-
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
|
754
|
-
end
|
|
755
|
-
|
|
756
|
-
##
|
|
757
|
-
# Sets whether queries over the table using this external data source require a partition filter
|
|
758
|
-
# that can be used for partition elimination to be specified.
|
|
759
|
-
#
|
|
760
|
-
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
|
761
|
-
#
|
|
762
|
-
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
|
763
|
-
#
|
|
764
|
-
# @example
|
|
765
|
-
# require "google/cloud/bigquery"
|
|
766
|
-
#
|
|
767
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
768
|
-
#
|
|
769
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
770
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
771
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
772
|
-
# ext.hive_partitioning_mode = :auto
|
|
773
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
774
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
775
|
-
# end
|
|
776
|
-
#
|
|
777
|
-
# external_data.hive_partitioning? #=> true
|
|
778
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
779
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
780
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
781
|
-
#
|
|
782
|
-
def hive_partitioning_require_partition_filter= require_partition_filter
|
|
783
|
-
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
|
784
|
-
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
|
785
|
-
end
|
|
786
|
-
|
|
787
|
-
###
|
|
788
|
-
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
|
789
|
-
# immediately before the partition key encoding begins. For example, consider files following this data
|
|
790
|
-
# layout:
|
|
791
|
-
#
|
|
792
|
-
# ```
|
|
793
|
-
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
|
794
|
-
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
|
795
|
-
# ```
|
|
796
|
-
#
|
|
797
|
-
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
|
798
|
-
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
|
799
|
-
#
|
|
800
|
-
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
|
801
|
-
#
|
|
802
|
-
# @example
|
|
803
|
-
# require "google/cloud/bigquery"
|
|
804
|
-
#
|
|
805
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
806
|
-
#
|
|
807
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
808
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
809
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
810
|
-
# ext.hive_partitioning_mode = :auto
|
|
811
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
812
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
813
|
-
# end
|
|
814
|
-
#
|
|
815
|
-
# external_data.hive_partitioning? #=> true
|
|
816
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
817
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
818
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
819
|
-
#
|
|
820
|
-
def hive_partitioning_source_uri_prefix
|
|
821
|
-
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
|
822
|
-
end
|
|
823
|
-
|
|
824
|
-
##
|
|
825
|
-
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
|
826
|
-
# immediately before the partition key encoding begins. For example, consider files following this data
|
|
827
|
-
# layout:
|
|
828
|
-
#
|
|
829
|
-
# ```
|
|
830
|
-
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
|
831
|
-
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
|
832
|
-
# ```
|
|
833
|
-
#
|
|
834
|
-
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
|
835
|
-
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
|
836
|
-
#
|
|
837
|
-
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
|
838
|
-
#
|
|
839
|
-
# @param [String] source_uri_prefix The common prefix for all source uris.
|
|
840
|
-
#
|
|
841
|
-
# @example
|
|
842
|
-
# require "google/cloud/bigquery"
|
|
843
|
-
#
|
|
844
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
845
|
-
#
|
|
846
|
-
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
|
847
|
-
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
|
848
|
-
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
|
849
|
-
# ext.hive_partitioning_mode = :auto
|
|
850
|
-
# ext.hive_partitioning_require_partition_filter = true
|
|
851
|
-
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
|
852
|
-
# end
|
|
853
|
-
#
|
|
854
|
-
# external_data.hive_partitioning? #=> true
|
|
855
|
-
# external_data.hive_partitioning_mode #=> "AUTO"
|
|
856
|
-
# external_data.hive_partitioning_require_partition_filter? #=> true
|
|
857
|
-
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
|
858
|
-
#
|
|
859
|
-
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
|
860
|
-
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
|
861
|
-
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
|
862
|
-
end
|
|
863
|
-
|
|
864
|
-
##
|
|
865
|
-
# @private Google API Client object.
|
|
866
|
-
def to_gapi
|
|
867
|
-
@gapi
|
|
868
|
-
end
|
|
869
|
-
|
|
870
|
-
##
|
|
871
|
-
# @private Google API Client object.
|
|
872
|
-
def self.from_gapi gapi
|
|
873
|
-
new_table = new
|
|
874
|
-
new_table.instance_variable_set :@gapi, gapi
|
|
875
|
-
new_table
|
|
876
|
-
end
|
|
877
|
-
|
|
878
|
-
protected
|
|
879
|
-
|
|
880
|
-
def frozen_check!
|
|
881
|
-
return unless frozen?
|
|
882
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
883
|
-
end
|
|
884
|
-
end
|
|
885
|
-
|
|
886
|
-
##
|
|
887
|
-
# # CsvSource
|
|
888
|
-
#
|
|
889
|
-
# {External::CsvSource} is a subclass of {External::DataSource} and
|
|
890
|
-
# represents a CSV external data source that can be queried from
|
|
891
|
-
# directly, such as Google Cloud Storage or Google Drive, even though
|
|
892
|
-
# the data is not stored in BigQuery. Instead of loading or streaming
|
|
893
|
-
# the data, this object references the external data source.
|
|
894
|
-
#
|
|
895
|
-
# @example
|
|
896
|
-
# require "google/cloud/bigquery"
|
|
897
|
-
#
|
|
898
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
899
|
-
#
|
|
900
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
901
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
902
|
-
# csv.autodetect = true
|
|
903
|
-
# csv.skip_leading_rows = 1
|
|
904
|
-
# end
|
|
905
|
-
#
|
|
906
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
907
|
-
# external: { my_ext_table: csv_table }
|
|
908
|
-
#
|
|
909
|
-
# # Iterate over the first page of results
|
|
910
|
-
# data.each do |row|
|
|
911
|
-
# puts row[:name]
|
|
912
|
-
# end
|
|
913
|
-
# # Retrieve the next page of results
|
|
914
|
-
# data = data.next if data.next?
|
|
915
|
-
#
|
|
916
|
-
class CsvSource < External::DataSource
|
|
917
|
-
##
|
|
918
|
-
# @private Create an empty CsvSource object.
|
|
919
|
-
def initialize
|
|
920
|
-
super
|
|
921
|
-
@gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
|
|
922
|
-
end
|
|
923
|
-
|
|
924
|
-
##
|
|
925
|
-
# Indicates if BigQuery should accept rows that are missing trailing
|
|
926
|
-
# optional columns.
|
|
927
|
-
#
|
|
928
|
-
# @return [Boolean]
|
|
929
|
-
#
|
|
930
|
-
# @example
|
|
931
|
-
# require "google/cloud/bigquery"
|
|
932
|
-
#
|
|
933
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
934
|
-
#
|
|
935
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
936
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
937
|
-
# csv.jagged_rows = true
|
|
938
|
-
# end
|
|
939
|
-
#
|
|
940
|
-
# csv_table.jagged_rows #=> true
|
|
941
|
-
#
|
|
942
|
-
def jagged_rows
|
|
943
|
-
@gapi.csv_options.allow_jagged_rows
|
|
944
|
-
end
|
|
945
|
-
|
|
946
|
-
##
|
|
947
|
-
# Set whether BigQuery should accept rows that are missing trailing
|
|
948
|
-
# optional columns.
|
|
949
|
-
#
|
|
950
|
-
# @param [Boolean] new_jagged_rows New jagged_rows value
|
|
951
|
-
#
|
|
952
|
-
# @example
|
|
953
|
-
# require "google/cloud/bigquery"
|
|
954
|
-
#
|
|
955
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
956
|
-
#
|
|
957
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
958
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
959
|
-
# csv.jagged_rows = true
|
|
960
|
-
# end
|
|
961
|
-
#
|
|
962
|
-
# csv_table.jagged_rows #=> true
|
|
963
|
-
#
|
|
964
|
-
def jagged_rows= new_jagged_rows
|
|
965
|
-
frozen_check!
|
|
966
|
-
@gapi.csv_options.allow_jagged_rows = new_jagged_rows
|
|
967
|
-
end
|
|
968
|
-
|
|
969
|
-
##
|
|
970
|
-
# Indicates if BigQuery should allow quoted data sections that contain
|
|
971
|
-
# newline characters in a CSV file.
|
|
972
|
-
#
|
|
973
|
-
# @return [Boolean]
|
|
974
|
-
#
|
|
975
|
-
# @example
|
|
976
|
-
# require "google/cloud/bigquery"
|
|
977
|
-
#
|
|
978
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
979
|
-
#
|
|
980
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
981
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
982
|
-
# csv.quoted_newlines = true
|
|
983
|
-
# end
|
|
984
|
-
#
|
|
985
|
-
# csv_table.quoted_newlines #=> true
|
|
986
|
-
#
|
|
987
|
-
def quoted_newlines
|
|
988
|
-
@gapi.csv_options.allow_quoted_newlines
|
|
989
|
-
end
|
|
990
|
-
|
|
991
|
-
##
|
|
992
|
-
# Set whether BigQuery should allow quoted data sections that contain
|
|
993
|
-
# newline characters in a CSV file.
|
|
994
|
-
#
|
|
995
|
-
# @param [Boolean] new_quoted_newlines New quoted_newlines value
|
|
996
|
-
#
|
|
997
|
-
# @example
|
|
998
|
-
# require "google/cloud/bigquery"
|
|
999
|
-
#
|
|
1000
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1001
|
-
#
|
|
1002
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1003
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1004
|
-
# csv.quoted_newlines = true
|
|
1005
|
-
# end
|
|
1006
|
-
#
|
|
1007
|
-
# csv_table.quoted_newlines #=> true
|
|
1008
|
-
#
|
|
1009
|
-
def quoted_newlines= new_quoted_newlines
|
|
1010
|
-
frozen_check!
|
|
1011
|
-
@gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
|
|
1012
|
-
end
|
|
1013
|
-
|
|
1014
|
-
##
|
|
1015
|
-
# The character encoding of the data.
|
|
1016
|
-
#
|
|
1017
|
-
# @return [String]
|
|
1018
|
-
#
|
|
1019
|
-
# @example
|
|
1020
|
-
# require "google/cloud/bigquery"
|
|
1021
|
-
#
|
|
1022
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1023
|
-
#
|
|
1024
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1025
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1026
|
-
# csv.encoding = "UTF-8"
|
|
1027
|
-
# end
|
|
1028
|
-
#
|
|
1029
|
-
# csv_table.encoding #=> "UTF-8"
|
|
1030
|
-
#
|
|
1031
|
-
def encoding
|
|
1032
|
-
@gapi.csv_options.encoding
|
|
1033
|
-
end
|
|
1034
|
-
|
|
1035
|
-
##
|
|
1036
|
-
# Set the character encoding of the data.
|
|
1037
|
-
#
|
|
1038
|
-
# @param [String] new_encoding New encoding value
|
|
1039
|
-
#
|
|
1040
|
-
# @example
|
|
1041
|
-
# require "google/cloud/bigquery"
|
|
1042
|
-
#
|
|
1043
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1044
|
-
#
|
|
1045
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1046
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1047
|
-
# csv.encoding = "UTF-8"
|
|
1048
|
-
# end
|
|
1049
|
-
#
|
|
1050
|
-
# csv_table.encoding #=> "UTF-8"
|
|
1051
|
-
#
|
|
1052
|
-
def encoding= new_encoding
|
|
1053
|
-
frozen_check!
|
|
1054
|
-
@gapi.csv_options.encoding = new_encoding
|
|
1055
|
-
end
|
|
1056
|
-
|
|
1057
|
-
##
|
|
1058
|
-
# Checks if the character encoding of the data is "UTF-8". This is the
|
|
1059
|
-
# default.
|
|
1060
|
-
#
|
|
1061
|
-
# @return [Boolean]
|
|
1062
|
-
#
|
|
1063
|
-
# @example
|
|
1064
|
-
# require "google/cloud/bigquery"
|
|
1065
|
-
#
|
|
1066
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1067
|
-
#
|
|
1068
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1069
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1070
|
-
# csv.encoding = "UTF-8"
|
|
1071
|
-
# end
|
|
1072
|
-
#
|
|
1073
|
-
# csv_table.encoding #=> "UTF-8"
|
|
1074
|
-
# csv_table.utf8? #=> true
|
|
1075
|
-
#
|
|
1076
|
-
def utf8?
|
|
1077
|
-
return true if encoding.nil?
|
|
1078
|
-
encoding == "UTF-8"
|
|
1079
|
-
end
|
|
1080
|
-
|
|
1081
|
-
##
|
|
1082
|
-
# Checks if the character encoding of the data is "ISO-8859-1".
|
|
1083
|
-
#
|
|
1084
|
-
# @return [Boolean]
|
|
1085
|
-
#
|
|
1086
|
-
# @example
|
|
1087
|
-
# require "google/cloud/bigquery"
|
|
1088
|
-
#
|
|
1089
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1090
|
-
#
|
|
1091
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1092
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1093
|
-
# csv.encoding = "ISO-8859-1"
|
|
1094
|
-
# end
|
|
1095
|
-
#
|
|
1096
|
-
# csv_table.encoding #=> "ISO-8859-1"
|
|
1097
|
-
# csv_table.iso8859_1? #=> true
|
|
1098
|
-
#
|
|
1099
|
-
def iso8859_1?
|
|
1100
|
-
encoding == "ISO-8859-1"
|
|
1101
|
-
end
|
|
1102
|
-
|
|
1103
|
-
##
|
|
1104
|
-
# The separator for fields in a CSV file.
|
|
1105
|
-
#
|
|
1106
|
-
# @return [String]
|
|
1107
|
-
#
|
|
1108
|
-
# @example
|
|
1109
|
-
# require "google/cloud/bigquery"
|
|
1110
|
-
#
|
|
1111
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1112
|
-
#
|
|
1113
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1114
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1115
|
-
# csv.delimiter = "|"
|
|
1116
|
-
# end
|
|
1117
|
-
#
|
|
1118
|
-
# csv_table.delimiter #=> "|"
|
|
1119
|
-
#
|
|
1120
|
-
def delimiter
|
|
1121
|
-
@gapi.csv_options.field_delimiter
|
|
1122
|
-
end
|
|
1123
|
-
|
|
1124
|
-
##
|
|
1125
|
-
# Set the separator for fields in a CSV file.
|
|
1126
|
-
#
|
|
1127
|
-
# @param [String] new_delimiter New delimiter value
|
|
1128
|
-
#
|
|
1129
|
-
# @example
|
|
1130
|
-
# require "google/cloud/bigquery"
|
|
1131
|
-
#
|
|
1132
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1133
|
-
#
|
|
1134
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1135
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1136
|
-
# csv.delimiter = "|"
|
|
1137
|
-
# end
|
|
1138
|
-
#
|
|
1139
|
-
# csv_table.delimiter #=> "|"
|
|
1140
|
-
#
|
|
1141
|
-
def delimiter= new_delimiter
|
|
1142
|
-
frozen_check!
|
|
1143
|
-
@gapi.csv_options.field_delimiter = new_delimiter
|
|
1144
|
-
end
|
|
1145
|
-
|
|
1146
|
-
##
|
|
1147
|
-
# The value that is used to quote data sections in a CSV file.
|
|
1148
|
-
#
|
|
1149
|
-
# @return [String]
|
|
1150
|
-
#
|
|
1151
|
-
# @example
|
|
1152
|
-
# require "google/cloud/bigquery"
|
|
1153
|
-
#
|
|
1154
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1155
|
-
#
|
|
1156
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1157
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1158
|
-
# csv.quote = "'"
|
|
1159
|
-
# end
|
|
1160
|
-
#
|
|
1161
|
-
# csv_table.quote #=> "'"
|
|
1162
|
-
#
|
|
1163
|
-
def quote
|
|
1164
|
-
@gapi.csv_options.quote
|
|
1165
|
-
end
|
|
1166
|
-
|
|
1167
|
-
##
|
|
1168
|
-
# Set the value that is used to quote data sections in a CSV file.
|
|
1169
|
-
#
|
|
1170
|
-
# @param [String] new_quote New quote value
|
|
1171
|
-
#
|
|
1172
|
-
# @example
|
|
1173
|
-
# require "google/cloud/bigquery"
|
|
1174
|
-
#
|
|
1175
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1176
|
-
#
|
|
1177
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1178
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1179
|
-
# csv.quote = "'"
|
|
1180
|
-
# end
|
|
1181
|
-
#
|
|
1182
|
-
# csv_table.quote #=> "'"
|
|
1183
|
-
#
|
|
1184
|
-
def quote= new_quote
|
|
1185
|
-
frozen_check!
|
|
1186
|
-
@gapi.csv_options.quote = new_quote
|
|
1187
|
-
end
|
|
1188
|
-
|
|
1189
|
-
##
|
|
1190
|
-
# The number of rows at the top of a CSV file that BigQuery will skip
|
|
1191
|
-
# when reading the data.
|
|
1192
|
-
#
|
|
1193
|
-
# @return [Integer]
|
|
1194
|
-
#
|
|
1195
|
-
# @example
|
|
1196
|
-
# require "google/cloud/bigquery"
|
|
1197
|
-
#
|
|
1198
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1199
|
-
#
|
|
1200
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1201
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1202
|
-
# csv.skip_leading_rows = 1
|
|
1203
|
-
# end
|
|
1204
|
-
#
|
|
1205
|
-
# csv_table.skip_leading_rows #=> 1
|
|
1206
|
-
#
|
|
1207
|
-
def skip_leading_rows
|
|
1208
|
-
@gapi.csv_options.skip_leading_rows
|
|
1209
|
-
end
|
|
1210
|
-
|
|
1211
|
-
##
|
|
1212
|
-
# Set the number of rows at the top of a CSV file that BigQuery will
|
|
1213
|
-
# skip when reading the data.
|
|
1214
|
-
#
|
|
1215
|
-
# @param [Integer] row_count New skip_leading_rows value
|
|
1216
|
-
#
|
|
1217
|
-
# @example
|
|
1218
|
-
# require "google/cloud/bigquery"
|
|
1219
|
-
#
|
|
1220
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1221
|
-
#
|
|
1222
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1223
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1224
|
-
# csv.skip_leading_rows = 1
|
|
1225
|
-
# end
|
|
1226
|
-
#
|
|
1227
|
-
# csv_table.skip_leading_rows #=> 1
|
|
1228
|
-
#
|
|
1229
|
-
def skip_leading_rows= row_count
|
|
1230
|
-
frozen_check!
|
|
1231
|
-
@gapi.csv_options.skip_leading_rows = row_count
|
|
1232
|
-
end
|
|
1233
|
-
|
|
1234
|
-
##
|
|
1235
|
-
# The schema for the data.
|
|
1236
|
-
#
|
|
1237
|
-
# @param [Boolean] replace Whether to replace the existing schema with
|
|
1238
|
-
# the new schema. If `true`, the fields will replace the existing
|
|
1239
|
-
# schema. If `false`, the fields will be added to the existing
|
|
1240
|
-
# schema. The default value is `false`.
|
|
1241
|
-
# @yield [schema] a block for setting the schema
|
|
1242
|
-
# @yieldparam [Schema] schema the object accepting the schema
|
|
1243
|
-
#
|
|
1244
|
-
# @return [Google::Cloud::Bigquery::Schema]
|
|
1245
|
-
#
|
|
1246
|
-
# @example
|
|
1247
|
-
# require "google/cloud/bigquery"
|
|
1248
|
-
#
|
|
1249
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1250
|
-
#
|
|
1251
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1252
|
-
# csv_table = bigquery.external csv_url do |csv|
|
|
1253
|
-
# csv.schema do |schema|
|
|
1254
|
-
# schema.string "name", mode: :required
|
|
1255
|
-
# schema.string "email", mode: :required
|
|
1256
|
-
# schema.integer "age", mode: :required
|
|
1257
|
-
# schema.boolean "active", mode: :required
|
|
1258
|
-
# end
|
|
1259
|
-
# end
|
|
1260
|
-
#
|
|
1261
|
-
def schema replace: false
|
|
1262
|
-
@schema ||= Schema.from_gapi @gapi.schema
|
|
1263
|
-
if replace
|
|
1264
|
-
frozen_check!
|
|
1265
|
-
@schema = Schema.from_gapi
|
|
1266
|
-
end
|
|
1267
|
-
@schema.freeze if frozen?
|
|
1268
|
-
yield @schema if block_given?
|
|
1269
|
-
@schema
|
|
1270
|
-
end
|
|
1271
|
-
|
|
1272
|
-
##
|
|
1273
|
-
# Set the schema for the data.
|
|
1274
|
-
#
|
|
1275
|
-
# @param [Schema] new_schema The schema object.
|
|
1276
|
-
#
|
|
1277
|
-
# @example
|
|
1278
|
-
# require "google/cloud/bigquery"
|
|
1279
|
-
#
|
|
1280
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1281
|
-
#
|
|
1282
|
-
# csv_shema = bigquery.schema do |schema|
|
|
1283
|
-
# schema.string "name", mode: :required
|
|
1284
|
-
# schema.string "email", mode: :required
|
|
1285
|
-
# schema.integer "age", mode: :required
|
|
1286
|
-
# schema.boolean "active", mode: :required
|
|
1287
|
-
# end
|
|
1288
|
-
#
|
|
1289
|
-
# csv_url = "gs://bucket/path/to/data.csv"
|
|
1290
|
-
# csv_table = bigquery.external csv_url
|
|
1291
|
-
# csv_table.schema = csv_shema
|
|
1292
|
-
#
|
|
1293
|
-
def schema= new_schema
|
|
1294
|
-
frozen_check!
|
|
1295
|
-
@schema = new_schema
|
|
1296
|
-
end
|
|
1297
|
-
|
|
1298
|
-
##
|
|
1299
|
-
# The fields of the schema.
|
|
1300
|
-
#
|
|
1301
|
-
# @return [Array<Schema::Field>] An array of field objects.
|
|
1302
|
-
#
|
|
1303
|
-
def fields
|
|
1304
|
-
schema.fields
|
|
1305
|
-
end
|
|
1306
|
-
|
|
1307
|
-
##
|
|
1308
|
-
# The names of the columns in the schema.
|
|
1309
|
-
#
|
|
1310
|
-
# @return [Array<Symbol>] An array of column names.
|
|
1311
|
-
#
|
|
1312
|
-
def headers
|
|
1313
|
-
schema.headers
|
|
1314
|
-
end
|
|
1315
|
-
|
|
1316
|
-
##
|
|
1317
|
-
# The types of the fields in the data in the schema, using the same
|
|
1318
|
-
# format as the optional query parameter types.
|
|
1319
|
-
#
|
|
1320
|
-
# @return [Hash] A hash with field names as keys, and types as values.
|
|
1321
|
-
#
|
|
1322
|
-
def param_types
|
|
1323
|
-
schema.param_types
|
|
1324
|
-
end
|
|
1325
|
-
|
|
1326
|
-
##
|
|
1327
|
-
# @private Google API Client object.
|
|
1328
|
-
def to_gapi
|
|
1329
|
-
@gapi.schema = @schema.to_gapi if @schema
|
|
1330
|
-
@gapi
|
|
1331
|
-
end
|
|
1332
|
-
|
|
1333
|
-
##
|
|
1334
|
-
# @private Google API Client object.
|
|
1335
|
-
def self.from_gapi gapi
|
|
1336
|
-
new_table = super
|
|
1337
|
-
schema = Schema.from_gapi gapi.schema
|
|
1338
|
-
new_table.instance_variable_set :@schema, schema
|
|
1339
|
-
new_table
|
|
1340
|
-
end
|
|
1341
|
-
end
|
|
1342
|
-
|
|
1343
|
-
##
|
|
1344
|
-
# # JsonSource
|
|
1345
|
-
#
|
|
1346
|
-
# {External::JsonSource} is a subclass of {External::DataSource} and
|
|
1347
|
-
# represents a JSON external data source that can be queried from
|
|
1348
|
-
# directly, such as Google Cloud Storage or Google Drive, even though
|
|
1349
|
-
# the data is not stored in BigQuery. Instead of loading or streaming
|
|
1350
|
-
# the data, this object references the external data source.
|
|
1351
|
-
#
|
|
1352
|
-
# @example
|
|
1353
|
-
# require "google/cloud/bigquery"
|
|
1354
|
-
#
|
|
1355
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1356
|
-
#
|
|
1357
|
-
# require "google/cloud/bigquery"
|
|
1358
|
-
#
|
|
1359
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1360
|
-
#
|
|
1361
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
1362
|
-
# json_table = bigquery.external json_url do |json|
|
|
1363
|
-
# json.schema do |schema|
|
|
1364
|
-
# schema.string "name", mode: :required
|
|
1365
|
-
# schema.string "email", mode: :required
|
|
1366
|
-
# schema.integer "age", mode: :required
|
|
1367
|
-
# schema.boolean "active", mode: :required
|
|
1368
|
-
# end
|
|
1369
|
-
# end
|
|
1370
|
-
#
|
|
1371
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1372
|
-
# external: { my_ext_table: json_table }
|
|
1373
|
-
#
|
|
1374
|
-
# # Iterate over the first page of results
|
|
1375
|
-
# data.each do |row|
|
|
1376
|
-
# puts row[:name]
|
|
1377
|
-
# end
|
|
1378
|
-
# # Retrieve the next page of results
|
|
1379
|
-
# data = data.next if data.next?
|
|
1380
|
-
#
|
|
1381
|
-
class JsonSource < External::DataSource
|
|
1382
|
-
##
|
|
1383
|
-
# The schema for the data.
|
|
1384
|
-
#
|
|
1385
|
-
# @param [Boolean] replace Whether to replace the existing schema with
|
|
1386
|
-
# the new schema. If `true`, the fields will replace the existing
|
|
1387
|
-
# schema. If `false`, the fields will be added to the existing
|
|
1388
|
-
# schema. The default value is `false`.
|
|
1389
|
-
# @yield [schema] a block for setting the schema
|
|
1390
|
-
# @yieldparam [Schema] schema the object accepting the schema
|
|
1391
|
-
#
|
|
1392
|
-
# @return [Google::Cloud::Bigquery::Schema]
|
|
1393
|
-
#
|
|
1394
|
-
# @example
|
|
1395
|
-
# require "google/cloud/bigquery"
|
|
1396
|
-
#
|
|
1397
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1398
|
-
#
|
|
1399
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
1400
|
-
# json_table = bigquery.external json_url do |json|
|
|
1401
|
-
# json.schema do |schema|
|
|
1402
|
-
# schema.string "name", mode: :required
|
|
1403
|
-
# schema.string "email", mode: :required
|
|
1404
|
-
# schema.integer "age", mode: :required
|
|
1405
|
-
# schema.boolean "active", mode: :required
|
|
1406
|
-
# end
|
|
1407
|
-
# end
|
|
1408
|
-
#
|
|
1409
|
-
def schema replace: false
|
|
1410
|
-
@schema ||= Schema.from_gapi @gapi.schema
|
|
1411
|
-
if replace
|
|
1412
|
-
frozen_check!
|
|
1413
|
-
@schema = Schema.from_gapi
|
|
1414
|
-
end
|
|
1415
|
-
@schema.freeze if frozen?
|
|
1416
|
-
yield @schema if block_given?
|
|
1417
|
-
@schema
|
|
1418
|
-
end
|
|
1419
|
-
|
|
1420
|
-
##
|
|
1421
|
-
# Set the schema for the data.
|
|
1422
|
-
#
|
|
1423
|
-
# @param [Schema] new_schema The schema object.
|
|
1424
|
-
#
|
|
1425
|
-
# @example
|
|
1426
|
-
# require "google/cloud/bigquery"
|
|
1427
|
-
#
|
|
1428
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1429
|
-
#
|
|
1430
|
-
# json_shema = bigquery.schema do |schema|
|
|
1431
|
-
# schema.string "name", mode: :required
|
|
1432
|
-
# schema.string "email", mode: :required
|
|
1433
|
-
# schema.integer "age", mode: :required
|
|
1434
|
-
# schema.boolean "active", mode: :required
|
|
1435
|
-
# end
|
|
1436
|
-
#
|
|
1437
|
-
# json_url = "gs://bucket/path/to/data.json"
|
|
1438
|
-
# json_table = bigquery.external json_url
|
|
1439
|
-
# json_table.schema = json_shema
|
|
1440
|
-
#
|
|
1441
|
-
def schema= new_schema
|
|
1442
|
-
frozen_check!
|
|
1443
|
-
@schema = new_schema
|
|
1444
|
-
end
|
|
1445
|
-
|
|
1446
|
-
##
|
|
1447
|
-
# The fields of the schema.
|
|
1448
|
-
#
|
|
1449
|
-
# @return [Array<Schema::Field>] An array of field objects.
|
|
1450
|
-
#
|
|
1451
|
-
def fields
|
|
1452
|
-
schema.fields
|
|
1453
|
-
end
|
|
1454
|
-
|
|
1455
|
-
##
|
|
1456
|
-
# The names of the columns in the schema.
|
|
1457
|
-
#
|
|
1458
|
-
# @return [Array<Symbol>] An array of column names.
|
|
1459
|
-
#
|
|
1460
|
-
def headers
|
|
1461
|
-
schema.headers
|
|
1462
|
-
end
|
|
1463
|
-
|
|
1464
|
-
##
|
|
1465
|
-
# The types of the fields in the data in the schema, using the same
|
|
1466
|
-
# format as the optional query parameter types.
|
|
1467
|
-
#
|
|
1468
|
-
# @return [Hash] A hash with field names as keys, and types as values.
|
|
1469
|
-
#
|
|
1470
|
-
def param_types
|
|
1471
|
-
schema.param_types
|
|
1472
|
-
end
|
|
1473
|
-
|
|
1474
|
-
##
|
|
1475
|
-
# @private Google API Client object.
|
|
1476
|
-
def to_gapi
|
|
1477
|
-
@gapi.schema = @schema.to_gapi if @schema
|
|
1478
|
-
@gapi
|
|
1479
|
-
end
|
|
1480
|
-
|
|
1481
|
-
##
|
|
1482
|
-
# @private Google API Client object.
|
|
1483
|
-
def self.from_gapi gapi
|
|
1484
|
-
new_table = super
|
|
1485
|
-
schema = Schema.from_gapi gapi.schema
|
|
1486
|
-
new_table.instance_variable_set :@schema, schema
|
|
1487
|
-
new_table
|
|
1488
|
-
end
|
|
1489
|
-
end
|
|
1490
|
-
|
|
1491
|
-
##
|
|
1492
|
-
# # SheetsSource
|
|
1493
|
-
#
|
|
1494
|
-
# {External::SheetsSource} is a subclass of {External::DataSource} and
|
|
1495
|
-
# represents a Google Sheets external data source that can be queried
|
|
1496
|
-
# from directly, even though the data is not stored in BigQuery. Instead
|
|
1497
|
-
# of loading or streaming the data, this object references the external
|
|
1498
|
-
# data source.
|
|
1499
|
-
#
|
|
1500
|
-
# @example
|
|
1501
|
-
# require "google/cloud/bigquery"
|
|
1502
|
-
#
|
|
1503
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1504
|
-
#
|
|
1505
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1506
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1507
|
-
# sheets.skip_leading_rows = 1
|
|
1508
|
-
# end
|
|
1509
|
-
#
|
|
1510
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1511
|
-
# external: { my_ext_table: sheets_table }
|
|
1512
|
-
#
|
|
1513
|
-
# # Iterate over the first page of results
|
|
1514
|
-
# data.each do |row|
|
|
1515
|
-
# puts row[:name]
|
|
1516
|
-
# end
|
|
1517
|
-
# # Retrieve the next page of results
|
|
1518
|
-
# data = data.next if data.next?
|
|
1519
|
-
#
|
|
1520
|
-
class SheetsSource < External::DataSource
|
|
1521
|
-
##
|
|
1522
|
-
# @private Create an empty SheetsSource object.
|
|
1523
|
-
def initialize
|
|
1524
|
-
super
|
|
1525
|
-
@gapi.google_sheets_options = Google::Apis::BigqueryV2::GoogleSheetsOptions.new
|
|
1526
|
-
end
|
|
1527
|
-
|
|
1528
|
-
##
|
|
1529
|
-
# The number of rows at the top of a sheet that BigQuery will skip
|
|
1530
|
-
# when reading the data. The default value is `0`.
|
|
1531
|
-
#
|
|
1532
|
-
# This property is useful if you have header rows that should be
|
|
1533
|
-
# skipped. When `autodetect` is on, behavior is the following:
|
|
1534
|
-
#
|
|
1535
|
-
# * `nil` - Autodetect tries to detect headers in the first row. If
|
|
1536
|
-
# they are not detected, the row is read as data. Otherwise data is
|
|
1537
|
-
# read starting from the second row.
|
|
1538
|
-
# * `0` - Instructs autodetect that there are no headers and data
|
|
1539
|
-
# should be read starting from the first row.
|
|
1540
|
-
# * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
|
|
1541
|
-
# in row `N`. If headers are not detected, row `N` is just skipped.
|
|
1542
|
-
# Otherwise row `N` is used to extract column names for the detected
|
|
1543
|
-
# schema.
|
|
1544
|
-
#
|
|
1545
|
-
# @return [Integer]
|
|
1546
|
-
#
|
|
1547
|
-
# @example
|
|
1548
|
-
# require "google/cloud/bigquery"
|
|
1549
|
-
#
|
|
1550
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1551
|
-
#
|
|
1552
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1553
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1554
|
-
# sheets.skip_leading_rows = 1
|
|
1555
|
-
# end
|
|
1556
|
-
#
|
|
1557
|
-
# sheets_table.skip_leading_rows #=> 1
|
|
1558
|
-
#
|
|
1559
|
-
def skip_leading_rows
|
|
1560
|
-
@gapi.google_sheets_options.skip_leading_rows
|
|
1561
|
-
end
|
|
1562
|
-
|
|
1563
|
-
##
|
|
1564
|
-
# Set the number of rows at the top of a sheet that BigQuery will skip
|
|
1565
|
-
# when reading the data.
|
|
1566
|
-
#
|
|
1567
|
-
# @param [Integer] row_count New skip_leading_rows value
|
|
1568
|
-
#
|
|
1569
|
-
# @example
|
|
1570
|
-
# require "google/cloud/bigquery"
|
|
1571
|
-
#
|
|
1572
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1573
|
-
#
|
|
1574
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1575
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1576
|
-
# sheets.skip_leading_rows = 1
|
|
1577
|
-
# end
|
|
1578
|
-
#
|
|
1579
|
-
# sheets_table.skip_leading_rows #=> 1
|
|
1580
|
-
#
|
|
1581
|
-
def skip_leading_rows= row_count
|
|
1582
|
-
frozen_check!
|
|
1583
|
-
@gapi.google_sheets_options.skip_leading_rows = row_count
|
|
1584
|
-
end
|
|
1585
|
-
|
|
1586
|
-
##
|
|
1587
|
-
# Range of a sheet to query from. Only used when non-empty. Typical
|
|
1588
|
-
# format: `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
|
|
1589
|
-
#
|
|
1590
|
-
# @return [String] Range of a sheet to query from.
|
|
1591
|
-
#
|
|
1592
|
-
# @example
|
|
1593
|
-
# require "google/cloud/bigquery"
|
|
1594
|
-
#
|
|
1595
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1596
|
-
#
|
|
1597
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1598
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1599
|
-
# sheets.range = "sheet1!A1:B20"
|
|
1600
|
-
# end
|
|
1601
|
-
#
|
|
1602
|
-
# sheets_table.range #=> "sheet1!A1:B20"
|
|
1603
|
-
#
|
|
1604
|
-
def range
|
|
1605
|
-
@gapi.google_sheets_options.range
|
|
1606
|
-
end
|
|
1607
|
-
|
|
1608
|
-
##
|
|
1609
|
-
# Set the range of a sheet to query from. Only used when non-empty.
|
|
1610
|
-
# Typical format:
|
|
1611
|
-
# `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
|
|
1612
|
-
#
|
|
1613
|
-
# @param [String] new_range New range of a sheet to query from.
|
|
1614
|
-
#
|
|
1615
|
-
# @example
|
|
1616
|
-
# require "google/cloud/bigquery"
|
|
1617
|
-
#
|
|
1618
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1619
|
-
#
|
|
1620
|
-
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
|
1621
|
-
# sheets_table = bigquery.external sheets_url do |sheets|
|
|
1622
|
-
# sheets.range = "sheet1!A1:B20"
|
|
1623
|
-
# end
|
|
1624
|
-
#
|
|
1625
|
-
# sheets_table.range #=> "sheet1!A1:B20"
|
|
1626
|
-
#
|
|
1627
|
-
def range= new_range
|
|
1628
|
-
frozen_check!
|
|
1629
|
-
@gapi.google_sheets_options.range = new_range
|
|
1630
|
-
end
|
|
1631
|
-
end
|
|
1632
|
-
|
|
1633
|
-
##
|
|
1634
|
-
# # BigtableSource
|
|
1635
|
-
#
|
|
1636
|
-
# {External::BigtableSource} is a subclass of {External::DataSource} and
|
|
1637
|
-
# represents a Bigtable external data source that can be queried from
|
|
1638
|
-
# directly, even though the data is not stored in BigQuery. Instead of
|
|
1639
|
-
# loading or streaming the data, this object references the external
|
|
1640
|
-
# data source.
|
|
1641
|
-
#
|
|
1642
|
-
# @example
|
|
1643
|
-
# require "google/cloud/bigquery"
|
|
1644
|
-
#
|
|
1645
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1646
|
-
#
|
|
1647
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1648
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1649
|
-
# bt.rowkey_as_string = true
|
|
1650
|
-
# bt.add_family "user" do |u|
|
|
1651
|
-
# u.add_string "name"
|
|
1652
|
-
# u.add_string "email"
|
|
1653
|
-
# u.add_integer "age"
|
|
1654
|
-
# u.add_boolean "active"
|
|
1655
|
-
# end
|
|
1656
|
-
# end
|
|
1657
|
-
#
|
|
1658
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1659
|
-
# external: { my_ext_table: bigtable_table }
|
|
1660
|
-
#
|
|
1661
|
-
# # Iterate over the first page of results
|
|
1662
|
-
# data.each do |row|
|
|
1663
|
-
# puts row[:name]
|
|
1664
|
-
# end
|
|
1665
|
-
# # Retrieve the next page of results
|
|
1666
|
-
# data = data.next if data.next?
|
|
1667
|
-
#
|
|
1668
|
-
class BigtableSource < External::DataSource
|
|
1669
|
-
##
|
|
1670
|
-
# @private Create an empty BigtableSource object.
|
|
1671
|
-
def initialize
|
|
1672
|
-
super
|
|
1673
|
-
@gapi.bigtable_options = Google::Apis::BigqueryV2::BigtableOptions.new
|
|
1674
|
-
@families = []
|
|
1675
|
-
end
|
|
1676
|
-
|
|
1677
|
-
##
|
|
1678
|
-
# List of column families to expose in the table schema along with
|
|
1679
|
-
# their types. This list restricts the column families that can be
|
|
1680
|
-
# referenced in queries and specifies their value types. You can use
|
|
1681
|
-
# this list to do type conversions - see
|
|
1682
|
-
# {BigtableSource::ColumnFamily#type} for more details. If you leave
|
|
1683
|
-
# this list empty, all column families are present in the table schema
|
|
1684
|
-
# and their values are read as `BYTES`. During a query only the column
|
|
1685
|
-
# families referenced in that query are read from Bigtable.
|
|
1686
|
-
#
|
|
1687
|
-
# @return [Array<BigtableSource::ColumnFamily>]
|
|
1688
|
-
#
|
|
1689
|
-
# @example
|
|
1690
|
-
# require "google/cloud/bigquery"
|
|
1691
|
-
#
|
|
1692
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1693
|
-
#
|
|
1694
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1695
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1696
|
-
# bt.rowkey_as_string = true
|
|
1697
|
-
# bt.add_family "user" do |u|
|
|
1698
|
-
# u.add_string "name"
|
|
1699
|
-
# u.add_string "email"
|
|
1700
|
-
# u.add_integer "age"
|
|
1701
|
-
# u.add_boolean "active"
|
|
1702
|
-
# end
|
|
1703
|
-
# end
|
|
1704
|
-
#
|
|
1705
|
-
# bigtable_table.families.count #=> 1
|
|
1706
|
-
#
|
|
1707
|
-
def families
|
|
1708
|
-
@families
|
|
1709
|
-
end
|
|
1710
|
-
|
|
1711
|
-
##
|
|
1712
|
-
# Add a column family to expose in the table schema along with its
|
|
1713
|
-
# types. Columns belonging to the column family may also be exposed.
|
|
1714
|
-
#
|
|
1715
|
-
# @param [String] family_id Identifier of the column family. See
|
|
1716
|
-
# {BigtableSource::ColumnFamily#family_id}.
|
|
1717
|
-
# @param [String] encoding The encoding of the values when the type is
|
|
1718
|
-
# not `STRING`. See {BigtableSource::ColumnFamily#encoding}.
|
|
1719
|
-
# @param [Boolean] latest Whether only the latest version of value are
|
|
1720
|
-
# exposed for all columns in this column family. See
|
|
1721
|
-
# {BigtableSource::ColumnFamily#latest}.
|
|
1722
|
-
# @param [String] type The type to convert the value in cells of this
|
|
1723
|
-
# column. See {BigtableSource::ColumnFamily#type}.
|
|
1724
|
-
#
|
|
1725
|
-
# @yield [family] a block for setting the family
|
|
1726
|
-
# @yieldparam [BigtableSource::ColumnFamily] family the family object
|
|
1727
|
-
#
|
|
1728
|
-
# @return [BigtableSource::ColumnFamily]
|
|
1729
|
-
#
|
|
1730
|
-
# @example
|
|
1731
|
-
# require "google/cloud/bigquery"
|
|
1732
|
-
#
|
|
1733
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1734
|
-
#
|
|
1735
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1736
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1737
|
-
# bt.rowkey_as_string = true
|
|
1738
|
-
# bt.add_family "user" do |u|
|
|
1739
|
-
# u.add_string "name"
|
|
1740
|
-
# u.add_string "email"
|
|
1741
|
-
# u.add_integer "age"
|
|
1742
|
-
# u.add_boolean "active"
|
|
1743
|
-
# end
|
|
1744
|
-
# end
|
|
1745
|
-
#
|
|
1746
|
-
def add_family family_id, encoding: nil, latest: nil, type: nil
|
|
1747
|
-
frozen_check!
|
|
1748
|
-
fam = BigtableSource::ColumnFamily.new
|
|
1749
|
-
fam.family_id = family_id
|
|
1750
|
-
fam.encoding = encoding if encoding
|
|
1751
|
-
fam.latest = latest if latest
|
|
1752
|
-
fam.type = type if type
|
|
1753
|
-
yield fam if block_given?
|
|
1754
|
-
@families << fam
|
|
1755
|
-
fam
|
|
1756
|
-
end
|
|
1757
|
-
|
|
1758
|
-
##
|
|
1759
|
-
# Whether the rowkey column families will be read and converted to
|
|
1760
|
-
# string. Otherwise they are read with `BYTES` type values and users
|
|
1761
|
-
# need to manually cast them with `CAST` if necessary. The default
|
|
1762
|
-
# value is `false`.
|
|
1763
|
-
#
|
|
1764
|
-
# @return [Boolean]
|
|
1765
|
-
#
|
|
1766
|
-
# @example
|
|
1767
|
-
# require "google/cloud/bigquery"
|
|
1768
|
-
#
|
|
1769
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1770
|
-
#
|
|
1771
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1772
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1773
|
-
# bt.rowkey_as_string = true
|
|
1774
|
-
# end
|
|
1775
|
-
#
|
|
1776
|
-
# bigtable_table.rowkey_as_string #=> true
|
|
1777
|
-
#
|
|
1778
|
-
def rowkey_as_string
|
|
1779
|
-
@gapi.bigtable_options.read_rowkey_as_string
|
|
1780
|
-
end
|
|
1781
|
-
|
|
1782
|
-
##
|
|
1783
|
-
# Set the number of rows at the top of a sheet that BigQuery will skip
|
|
1784
|
-
# when reading the data.
|
|
1785
|
-
#
|
|
1786
|
-
# @param [Boolean] row_rowkey New rowkey_as_string value
|
|
1787
|
-
#
|
|
1788
|
-
# @example
|
|
1789
|
-
# require "google/cloud/bigquery"
|
|
1790
|
-
#
|
|
1791
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1792
|
-
#
|
|
1793
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1794
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1795
|
-
# bt.rowkey_as_string = true
|
|
1796
|
-
# end
|
|
1797
|
-
#
|
|
1798
|
-
# bigtable_table.rowkey_as_string #=> true
|
|
1799
|
-
#
|
|
1800
|
-
def rowkey_as_string= row_rowkey
|
|
1801
|
-
frozen_check!
|
|
1802
|
-
@gapi.bigtable_options.read_rowkey_as_string = row_rowkey
|
|
1803
|
-
end
|
|
1804
|
-
|
|
1805
|
-
##
|
|
1806
|
-
# @private Google API Client object.
|
|
1807
|
-
def to_gapi
|
|
1808
|
-
@gapi.bigtable_options.column_families = @families.map(&:to_gapi)
|
|
1809
|
-
@gapi
|
|
1810
|
-
end
|
|
1811
|
-
|
|
1812
|
-
##
|
|
1813
|
-
# @private Google API Client object.
|
|
1814
|
-
def self.from_gapi gapi
|
|
1815
|
-
new_table = super
|
|
1816
|
-
families = Array gapi.bigtable_options.column_families
|
|
1817
|
-
families = families.map { |fam_gapi| BigtableSource::ColumnFamily.from_gapi fam_gapi }
|
|
1818
|
-
new_table.instance_variable_set :@families, families
|
|
1819
|
-
new_table
|
|
1820
|
-
end
|
|
1821
|
-
|
|
1822
|
-
##
|
|
1823
|
-
# @private
|
|
1824
|
-
def freeze
|
|
1825
|
-
@families.map(&:freeze!)
|
|
1826
|
-
@families.freeze!
|
|
1827
|
-
super
|
|
1828
|
-
end
|
|
1829
|
-
|
|
1830
|
-
protected
|
|
1831
|
-
|
|
1832
|
-
def frozen_check!
|
|
1833
|
-
return unless frozen?
|
|
1834
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
1835
|
-
end
|
|
1836
|
-
|
|
1837
|
-
##
|
|
1838
|
-
# # BigtableSource::ColumnFamily
|
|
1839
|
-
#
|
|
1840
|
-
# A Bigtable column family used to expose in the table schema along
|
|
1841
|
-
# with its types and columns.
|
|
1842
|
-
#
|
|
1843
|
-
# @example
|
|
1844
|
-
# require "google/cloud/bigquery"
|
|
1845
|
-
#
|
|
1846
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1847
|
-
#
|
|
1848
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1849
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1850
|
-
# bt.rowkey_as_string = true
|
|
1851
|
-
# bt.add_family "user" do |u|
|
|
1852
|
-
# u.add_string "name"
|
|
1853
|
-
# u.add_string "email"
|
|
1854
|
-
# u.add_integer "age"
|
|
1855
|
-
# u.add_boolean "active"
|
|
1856
|
-
# end
|
|
1857
|
-
# end
|
|
1858
|
-
#
|
|
1859
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
1860
|
-
# external: { my_ext_table: bigtable_table }
|
|
1861
|
-
#
|
|
1862
|
-
# # Iterate over the first page of results
|
|
1863
|
-
# data.each do |row|
|
|
1864
|
-
# puts row[:name]
|
|
1865
|
-
# end
|
|
1866
|
-
# # Retrieve the next page of results
|
|
1867
|
-
# data = data.next if data.next?
|
|
1868
|
-
#
|
|
1869
|
-
class ColumnFamily
|
|
1870
|
-
##
|
|
1871
|
-
# @private Create an empty BigtableSource::ColumnFamily object.
|
|
1872
|
-
def initialize
|
|
1873
|
-
@gapi = Google::Apis::BigqueryV2::BigtableColumnFamily.new
|
|
1874
|
-
@columns = []
|
|
1875
|
-
end
|
|
1876
|
-
|
|
1877
|
-
##
|
|
1878
|
-
# The encoding of the values when the type is not `STRING`.
|
|
1879
|
-
#
|
|
1880
|
-
# @return [String]
|
|
1881
|
-
#
|
|
1882
|
-
# @example
|
|
1883
|
-
# require "google/cloud/bigquery"
|
|
1884
|
-
#
|
|
1885
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1886
|
-
#
|
|
1887
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1888
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1889
|
-
# bt.add_family "user" do |u|
|
|
1890
|
-
# u.encoding = "UTF-8"
|
|
1891
|
-
# end
|
|
1892
|
-
# end
|
|
1893
|
-
#
|
|
1894
|
-
# bigtable_table.families[0].encoding #=> "UTF-8"
|
|
1895
|
-
#
|
|
1896
|
-
def encoding
|
|
1897
|
-
@gapi.encoding
|
|
1898
|
-
end
|
|
1899
|
-
|
|
1900
|
-
##
|
|
1901
|
-
# Set the encoding of the values when the type is not `STRING`.
|
|
1902
|
-
# Acceptable encoding values are:
|
|
1903
|
-
#
|
|
1904
|
-
# * `TEXT` - indicates values are alphanumeric text strings.
|
|
1905
|
-
# * `BINARY` - indicates values are encoded using HBase
|
|
1906
|
-
# `Bytes.toBytes` family of functions. This can be overridden on a
|
|
1907
|
-
# column.
|
|
1908
|
-
#
|
|
1909
|
-
# @param [String] new_encoding New encoding value
|
|
1910
|
-
#
|
|
1911
|
-
# @example
|
|
1912
|
-
# require "google/cloud/bigquery"
|
|
1913
|
-
#
|
|
1914
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1915
|
-
#
|
|
1916
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1917
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1918
|
-
# bt.add_family "user" do |u|
|
|
1919
|
-
# u.encoding = "UTF-8"
|
|
1920
|
-
# end
|
|
1921
|
-
# end
|
|
1922
|
-
#
|
|
1923
|
-
# bigtable_table.families[0].encoding #=> "UTF-8"
|
|
1924
|
-
#
|
|
1925
|
-
def encoding= new_encoding
|
|
1926
|
-
frozen_check!
|
|
1927
|
-
@gapi.encoding = new_encoding
|
|
1928
|
-
end
|
|
1929
|
-
|
|
1930
|
-
##
|
|
1931
|
-
# Identifier of the column family.
|
|
1932
|
-
#
|
|
1933
|
-
# @return [String]
|
|
1934
|
-
#
|
|
1935
|
-
# @example
|
|
1936
|
-
# require "google/cloud/bigquery"
|
|
1937
|
-
#
|
|
1938
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1939
|
-
#
|
|
1940
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1941
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1942
|
-
# bt.add_family "user"
|
|
1943
|
-
# end
|
|
1944
|
-
#
|
|
1945
|
-
# bigtable_table.families[0].family_id #=> "user"
|
|
1946
|
-
#
|
|
1947
|
-
def family_id
|
|
1948
|
-
@gapi.family_id
|
|
1949
|
-
end
|
|
1950
|
-
|
|
1951
|
-
##
|
|
1952
|
-
# Set the identifier of the column family.
|
|
1953
|
-
#
|
|
1954
|
-
# @param [String] new_family_id New family_id value
|
|
1955
|
-
#
|
|
1956
|
-
# @example
|
|
1957
|
-
# require "google/cloud/bigquery"
|
|
1958
|
-
#
|
|
1959
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1960
|
-
#
|
|
1961
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1962
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1963
|
-
# bt.add_family "user"
|
|
1964
|
-
# end
|
|
1965
|
-
#
|
|
1966
|
-
# bigtable_table.families[0].family_id #=> "user"
|
|
1967
|
-
# bigtable_table.families[0].family_id = "User"
|
|
1968
|
-
# bigtable_table.families[0].family_id #=> "User"
|
|
1969
|
-
#
|
|
1970
|
-
def family_id= new_family_id
|
|
1971
|
-
frozen_check!
|
|
1972
|
-
@gapi.family_id = new_family_id
|
|
1973
|
-
end
|
|
1974
|
-
|
|
1975
|
-
##
|
|
1976
|
-
# Whether only the latest version of value are exposed for all
|
|
1977
|
-
# columns in this column family.
|
|
1978
|
-
#
|
|
1979
|
-
# @return [Boolean]
|
|
1980
|
-
#
|
|
1981
|
-
# @example
|
|
1982
|
-
# require "google/cloud/bigquery"
|
|
1983
|
-
#
|
|
1984
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
1985
|
-
#
|
|
1986
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
1987
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
1988
|
-
# bt.add_family "user" do |u|
|
|
1989
|
-
# u.latest = true
|
|
1990
|
-
# end
|
|
1991
|
-
# end
|
|
1992
|
-
#
|
|
1993
|
-
# bigtable_table.families[0].latest #=> true
|
|
1994
|
-
#
|
|
1995
|
-
def latest
|
|
1996
|
-
@gapi.only_read_latest
|
|
1997
|
-
end
|
|
1998
|
-
|
|
1999
|
-
##
|
|
2000
|
-
# Set whether only the latest version of value are exposed for all
|
|
2001
|
-
# columns in this column family.
|
|
2002
|
-
#
|
|
2003
|
-
# @param [Boolean] new_latest New latest value
|
|
2004
|
-
#
|
|
2005
|
-
# @example
|
|
2006
|
-
# require "google/cloud/bigquery"
|
|
2007
|
-
#
|
|
2008
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2009
|
-
#
|
|
2010
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2011
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2012
|
-
# bt.add_family "user" do |u|
|
|
2013
|
-
# u.latest = true
|
|
2014
|
-
# end
|
|
2015
|
-
# end
|
|
2016
|
-
#
|
|
2017
|
-
# bigtable_table.families[0].latest #=> true
|
|
2018
|
-
#
|
|
2019
|
-
def latest= new_latest
|
|
2020
|
-
frozen_check!
|
|
2021
|
-
@gapi.only_read_latest = new_latest
|
|
2022
|
-
end
|
|
2023
|
-
|
|
2024
|
-
##
|
|
2025
|
-
# The type to convert the value in cells of this column family. The
|
|
2026
|
-
# values are expected to be encoded using HBase `Bytes.toBytes`
|
|
2027
|
-
# function when using the `BINARY` encoding value. The following
|
|
2028
|
-
# BigQuery types are allowed:
|
|
2029
|
-
#
|
|
2030
|
-
# * `BYTES`
|
|
2031
|
-
# * `STRING`
|
|
2032
|
-
# * `INTEGER`
|
|
2033
|
-
# * `FLOAT`
|
|
2034
|
-
# * `BOOLEAN`
|
|
2035
|
-
#
|
|
2036
|
-
# Default type is `BYTES`. This can be overridden on a column.
|
|
2037
|
-
#
|
|
2038
|
-
# @return [String]
|
|
2039
|
-
#
|
|
2040
|
-
# @example
|
|
2041
|
-
# require "google/cloud/bigquery"
|
|
2042
|
-
#
|
|
2043
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2044
|
-
#
|
|
2045
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2046
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2047
|
-
# bt.add_family "user" do |u|
|
|
2048
|
-
# u.type = "STRING"
|
|
2049
|
-
# end
|
|
2050
|
-
# end
|
|
2051
|
-
#
|
|
2052
|
-
# bigtable_table.families[0].type #=> "STRING"
|
|
2053
|
-
#
|
|
2054
|
-
def type
|
|
2055
|
-
@gapi.type
|
|
2056
|
-
end
|
|
2057
|
-
|
|
2058
|
-
##
|
|
2059
|
-
# Set the type to convert the value in cells of this column family.
|
|
2060
|
-
# The values are expected to be encoded using HBase `Bytes.toBytes`
|
|
2061
|
-
# function when using the `BINARY` encoding value. The following
|
|
2062
|
-
# BigQuery types are allowed:
|
|
2063
|
-
#
|
|
2064
|
-
# * `BYTES`
|
|
2065
|
-
# * `STRING`
|
|
2066
|
-
# * `INTEGER`
|
|
2067
|
-
# * `FLOAT`
|
|
2068
|
-
# * `BOOLEAN`
|
|
2069
|
-
#
|
|
2070
|
-
# Default type is `BYTES`. This can be overridden on a column.
|
|
2071
|
-
#
|
|
2072
|
-
# @param [String] new_type New type value
|
|
2073
|
-
#
|
|
2074
|
-
# @example
|
|
2075
|
-
# require "google/cloud/bigquery"
|
|
2076
|
-
#
|
|
2077
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2078
|
-
#
|
|
2079
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2080
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2081
|
-
# bt.add_family "user" do |u|
|
|
2082
|
-
# u.type = "STRING"
|
|
2083
|
-
# end
|
|
2084
|
-
# end
|
|
2085
|
-
#
|
|
2086
|
-
# bigtable_table.families[0].type #=> "STRING"
|
|
2087
|
-
#
|
|
2088
|
-
def type= new_type
|
|
2089
|
-
frozen_check!
|
|
2090
|
-
@gapi.type = new_type
|
|
2091
|
-
end
|
|
2092
|
-
|
|
2093
|
-
##
|
|
2094
|
-
# Lists of columns that should be exposed as individual fields.
|
|
2095
|
-
#
|
|
2096
|
-
# @return [Array<BigtableSource::Column>]
|
|
2097
|
-
#
|
|
2098
|
-
# @example
|
|
2099
|
-
# require "google/cloud/bigquery"
|
|
2100
|
-
#
|
|
2101
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2102
|
-
#
|
|
2103
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2104
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2105
|
-
# bt.rowkey_as_string = true
|
|
2106
|
-
# bt.add_family "user" do |u|
|
|
2107
|
-
# u.add_string "name"
|
|
2108
|
-
# u.add_string "email"
|
|
2109
|
-
# u.add_integer "age"
|
|
2110
|
-
# u.add_boolean "active"
|
|
2111
|
-
# end
|
|
2112
|
-
# end
|
|
2113
|
-
#
|
|
2114
|
-
# bigtable_table.families[0].columns.count #=> 4
|
|
2115
|
-
#
|
|
2116
|
-
def columns
|
|
2117
|
-
@columns
|
|
2118
|
-
end
|
|
2119
|
-
|
|
2120
|
-
##
|
|
2121
|
-
# Add a column to the column family to expose in the table schema
|
|
2122
|
-
# along with its types.
|
|
2123
|
-
#
|
|
2124
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2125
|
-
# {BigtableSource::Column#qualifier}.
|
|
2126
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2127
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2128
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2129
|
-
# {BigtableSource::Column#field_name}.
|
|
2130
|
-
# @param [String] type The type to convert the value in cells of
|
|
2131
|
-
# this column. See {BigtableSource::Column#type}. The following
|
|
2132
|
-
# BigQuery types are allowed:
|
|
2133
|
-
#
|
|
2134
|
-
# * `BYTES`
|
|
2135
|
-
# * `STRING`
|
|
2136
|
-
# * `INTEGER`
|
|
2137
|
-
# * `FLOAT`
|
|
2138
|
-
# * `BOOLEAN`
|
|
2139
|
-
#
|
|
2140
|
-
# @yield [column] a block for setting the column
|
|
2141
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2142
|
-
#
|
|
2143
|
-
# @return [Array<BigtableSource::Column>]
|
|
2144
|
-
#
|
|
2145
|
-
# @example
|
|
2146
|
-
# require "google/cloud/bigquery"
|
|
2147
|
-
#
|
|
2148
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2149
|
-
#
|
|
2150
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2151
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2152
|
-
# bt.rowkey_as_string = true
|
|
2153
|
-
# bt.add_family "user" do |u|
|
|
2154
|
-
# u.add_column "name", type: "STRING"
|
|
2155
|
-
# end
|
|
2156
|
-
# end
|
|
2157
|
-
#
|
|
2158
|
-
def add_column qualifier, as: nil, type: nil
|
|
2159
|
-
frozen_check!
|
|
2160
|
-
col = BigtableSource::Column.new
|
|
2161
|
-
col.qualifier = qualifier
|
|
2162
|
-
col.field_name = as if as
|
|
2163
|
-
col.type = type if type
|
|
2164
|
-
yield col if block_given?
|
|
2165
|
-
@columns << col
|
|
2166
|
-
col
|
|
2167
|
-
end
|
|
2168
|
-
|
|
2169
|
-
##
|
|
2170
|
-
# Add a column to the column family to expose in the table schema
|
|
2171
|
-
# that is specified as the `BYTES` type.
|
|
2172
|
-
#
|
|
2173
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2174
|
-
# {BigtableSource::Column#qualifier}.
|
|
2175
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2176
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2177
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2178
|
-
# {BigtableSource::Column#field_name}.
|
|
2179
|
-
#
|
|
2180
|
-
# @yield [column] a block for setting the column
|
|
2181
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2182
|
-
#
|
|
2183
|
-
# @return [Array<BigtableSource::Column>]
|
|
2184
|
-
#
|
|
2185
|
-
# @example
|
|
2186
|
-
# require "google/cloud/bigquery"
|
|
2187
|
-
#
|
|
2188
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2189
|
-
#
|
|
2190
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2191
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2192
|
-
# bt.rowkey_as_string = true
|
|
2193
|
-
# bt.add_family "user" do |u|
|
|
2194
|
-
# u.add_bytes "avatar"
|
|
2195
|
-
# end
|
|
2196
|
-
# end
|
|
2197
|
-
#
|
|
2198
|
-
def add_bytes qualifier, as: nil
|
|
2199
|
-
col = add_column qualifier, as: as, type: "BYTES"
|
|
2200
|
-
yield col if block_given?
|
|
2201
|
-
col
|
|
2202
|
-
end
|
|
2203
|
-
|
|
2204
|
-
##
|
|
2205
|
-
# Add a column to the column family to expose in the table schema
|
|
2206
|
-
# that is specified as the `STRING` type.
|
|
2207
|
-
#
|
|
2208
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2209
|
-
# {BigtableSource::Column#qualifier}.
|
|
2210
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2211
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2212
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2213
|
-
# {BigtableSource::Column#field_name}.
|
|
2214
|
-
#
|
|
2215
|
-
# @yield [column] a block for setting the column
|
|
2216
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2217
|
-
#
|
|
2218
|
-
# @return [Array<BigtableSource::Column>]
|
|
2219
|
-
#
|
|
2220
|
-
# @example
|
|
2221
|
-
# require "google/cloud/bigquery"
|
|
2222
|
-
#
|
|
2223
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2224
|
-
#
|
|
2225
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2226
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2227
|
-
# bt.rowkey_as_string = true
|
|
2228
|
-
# bt.add_family "user" do |u|
|
|
2229
|
-
# u.add_string "name"
|
|
2230
|
-
# end
|
|
2231
|
-
# end
|
|
2232
|
-
#
|
|
2233
|
-
def add_string qualifier, as: nil
|
|
2234
|
-
col = add_column qualifier, as: as, type: "STRING"
|
|
2235
|
-
yield col if block_given?
|
|
2236
|
-
col
|
|
2237
|
-
end
|
|
2238
|
-
|
|
2239
|
-
##
|
|
2240
|
-
# Add a column to the column family to expose in the table schema
|
|
2241
|
-
# that is specified as the `INTEGER` type.
|
|
2242
|
-
#
|
|
2243
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2244
|
-
# {BigtableSource::Column#qualifier}.
|
|
2245
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2246
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2247
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2248
|
-
# {BigtableSource::Column#field_name}.
|
|
2249
|
-
#
|
|
2250
|
-
# @yield [column] a block for setting the column
|
|
2251
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2252
|
-
#
|
|
2253
|
-
# @return [Array<BigtableSource::Column>]
|
|
2254
|
-
#
|
|
2255
|
-
# @example
|
|
2256
|
-
# require "google/cloud/bigquery"
|
|
2257
|
-
#
|
|
2258
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2259
|
-
#
|
|
2260
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2261
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2262
|
-
# bt.rowkey_as_string = true
|
|
2263
|
-
# bt.add_family "user" do |u|
|
|
2264
|
-
# u.add_integer "age"
|
|
2265
|
-
# end
|
|
2266
|
-
# end
|
|
2267
|
-
#
|
|
2268
|
-
def add_integer qualifier, as: nil
|
|
2269
|
-
col = add_column qualifier, as: as, type: "INTEGER"
|
|
2270
|
-
yield col if block_given?
|
|
2271
|
-
col
|
|
2272
|
-
end
|
|
2273
|
-
|
|
2274
|
-
##
|
|
2275
|
-
# Add a column to the column family to expose in the table schema
|
|
2276
|
-
# that is specified as the `FLOAT` type.
|
|
2277
|
-
#
|
|
2278
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2279
|
-
# {BigtableSource::Column#qualifier}.
|
|
2280
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2281
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2282
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2283
|
-
# {BigtableSource::Column#field_name}.
|
|
2284
|
-
#
|
|
2285
|
-
# @yield [column] a block for setting the column
|
|
2286
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2287
|
-
#
|
|
2288
|
-
# @return [Array<BigtableSource::Column>]
|
|
2289
|
-
#
|
|
2290
|
-
# @example
|
|
2291
|
-
# require "google/cloud/bigquery"
|
|
2292
|
-
#
|
|
2293
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2294
|
-
#
|
|
2295
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2296
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2297
|
-
# bt.rowkey_as_string = true
|
|
2298
|
-
# bt.add_family "user" do |u|
|
|
2299
|
-
# u.add_float "score"
|
|
2300
|
-
# end
|
|
2301
|
-
# end
|
|
2302
|
-
#
|
|
2303
|
-
def add_float qualifier, as: nil
|
|
2304
|
-
col = add_column qualifier, as: as, type: "FLOAT"
|
|
2305
|
-
yield col if block_given?
|
|
2306
|
-
col
|
|
2307
|
-
end
|
|
2308
|
-
|
|
2309
|
-
##
|
|
2310
|
-
# Add a column to the column family to expose in the table schema
|
|
2311
|
-
# that is specified as the `BOOLEAN` type.
|
|
2312
|
-
#
|
|
2313
|
-
# @param [String] qualifier Qualifier of the column. See
|
|
2314
|
-
# {BigtableSource::Column#qualifier}.
|
|
2315
|
-
# @param [String] as A valid identifier to be used as the column
|
|
2316
|
-
# field name if the qualifier is not a valid BigQuery field
|
|
2317
|
-
# identifier (i.e. does not match `[a-zA-Z][a-zA-Z0-9_]*`). See
|
|
2318
|
-
# {BigtableSource::Column#field_name}.
|
|
2319
|
-
#
|
|
2320
|
-
# @yield [column] a block for setting the column
|
|
2321
|
-
# @yieldparam [BigtableSource::Column] column the column object
|
|
2322
|
-
#
|
|
2323
|
-
# @return [Array<BigtableSource::Column>]
|
|
2324
|
-
#
|
|
2325
|
-
# @example
|
|
2326
|
-
# require "google/cloud/bigquery"
|
|
2327
|
-
#
|
|
2328
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2329
|
-
#
|
|
2330
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2331
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2332
|
-
# bt.rowkey_as_string = true
|
|
2333
|
-
# bt.add_family "user" do |u|
|
|
2334
|
-
# u.add_boolean "active"
|
|
2335
|
-
# end
|
|
2336
|
-
# end
|
|
2337
|
-
#
|
|
2338
|
-
def add_boolean qualifier, as: nil
|
|
2339
|
-
col = add_column qualifier, as: as, type: "BOOLEAN"
|
|
2340
|
-
yield col if block_given?
|
|
2341
|
-
col
|
|
2342
|
-
end
|
|
2343
|
-
|
|
2344
|
-
##
|
|
2345
|
-
# @private Google API Client object.
|
|
2346
|
-
def to_gapi
|
|
2347
|
-
@gapi.columns = @columns.map(&:to_gapi)
|
|
2348
|
-
@gapi
|
|
2349
|
-
end
|
|
2350
|
-
|
|
2351
|
-
##
|
|
2352
|
-
# @private Google API Client object.
|
|
2353
|
-
def self.from_gapi gapi
|
|
2354
|
-
new_fam = new
|
|
2355
|
-
new_fam.instance_variable_set :@gapi, gapi
|
|
2356
|
-
columns = Array(gapi.columns).map { |col_gapi| BigtableSource::Column.from_gapi col_gapi }
|
|
2357
|
-
new_fam.instance_variable_set :@columns, columns
|
|
2358
|
-
new_fam
|
|
2359
|
-
end
|
|
2360
|
-
|
|
2361
|
-
##
|
|
2362
|
-
# @private
|
|
2363
|
-
def freeze
|
|
2364
|
-
@columns.map(&:freeze!)
|
|
2365
|
-
@columns.freeze!
|
|
2366
|
-
super
|
|
2367
|
-
end
|
|
2368
|
-
|
|
2369
|
-
protected
|
|
2370
|
-
|
|
2371
|
-
def frozen_check!
|
|
2372
|
-
return unless frozen?
|
|
2373
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
2374
|
-
end
|
|
2375
|
-
end
|
|
2376
|
-
|
|
2377
|
-
##
|
|
2378
|
-
# # BigtableSource::Column
|
|
2379
|
-
#
|
|
2380
|
-
# A Bigtable column to expose in the table schema along with its
|
|
2381
|
-
# types.
|
|
2382
|
-
#
|
|
2383
|
-
# @example
|
|
2384
|
-
# require "google/cloud/bigquery"
|
|
2385
|
-
#
|
|
2386
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2387
|
-
#
|
|
2388
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2389
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2390
|
-
# bt.rowkey_as_string = true
|
|
2391
|
-
# bt.add_family "user" do |u|
|
|
2392
|
-
# u.add_string "name"
|
|
2393
|
-
# u.add_string "email"
|
|
2394
|
-
# u.add_integer "age"
|
|
2395
|
-
# u.add_boolean "active"
|
|
2396
|
-
# end
|
|
2397
|
-
# end
|
|
2398
|
-
#
|
|
2399
|
-
# data = bigquery.query "SELECT * FROM my_ext_table",
|
|
2400
|
-
# external: { my_ext_table: bigtable_table }
|
|
2401
|
-
#
|
|
2402
|
-
# # Iterate over the first page of results
|
|
2403
|
-
# data.each do |row|
|
|
2404
|
-
# puts row[:name]
|
|
2405
|
-
# end
|
|
2406
|
-
# # Retrieve the next page of results
|
|
2407
|
-
# data = data.next if data.next?
|
|
2408
|
-
#
|
|
2409
|
-
class Column
|
|
2410
|
-
##
|
|
2411
|
-
# @private Create an empty BigtableSource::Column object.
|
|
2412
|
-
def initialize
|
|
2413
|
-
@gapi = Google::Apis::BigqueryV2::BigtableColumn.new
|
|
2414
|
-
end
|
|
2415
|
-
|
|
2416
|
-
##
|
|
2417
|
-
# Qualifier of the column. Columns in the parent column family that
|
|
2418
|
-
# has this exact qualifier are exposed as `.` field. If the
|
|
2419
|
-
# qualifier is valid UTF-8 string, it will be represented as a UTF-8
|
|
2420
|
-
# string. Otherwise, it will represented as a ASCII-8BIT string.
|
|
2421
|
-
#
|
|
2422
|
-
# If the qualifier is not a valid BigQuery field identifier (does
|
|
2423
|
-
# not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
|
|
2424
|
-
# provided as `field_name`.
|
|
2425
|
-
#
|
|
2426
|
-
# @return [String]
|
|
2427
|
-
#
|
|
2428
|
-
# @example
|
|
2429
|
-
# require "google/cloud/bigquery"
|
|
2430
|
-
#
|
|
2431
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2432
|
-
#
|
|
2433
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2434
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2435
|
-
# bt.add_family "user" do |u|
|
|
2436
|
-
# u.add_string "name" do |col|
|
|
2437
|
-
# col.qualifier # "user"
|
|
2438
|
-
# col.qualifier = "User"
|
|
2439
|
-
# col.qualifier # "User"
|
|
2440
|
-
# end
|
|
2441
|
-
# end
|
|
2442
|
-
# end
|
|
2443
|
-
#
|
|
2444
|
-
def qualifier
|
|
2445
|
-
@gapi.qualifier_string || Base64.strict_decode64(@gapi.qualifier_encoded.to_s)
|
|
2446
|
-
end
|
|
2447
|
-
|
|
2448
|
-
##
|
|
2449
|
-
# Set the qualifier of the column. Columns in the parent column
|
|
2450
|
-
# family that has this exact qualifier are exposed as `.` field.
|
|
2451
|
-
# Values that are valid UTF-8 strings will be treated as such. All
|
|
2452
|
-
# other values will be treated as `BINARY`.
|
|
2453
|
-
#
|
|
2454
|
-
# @param [String] new_qualifier New qualifier value
|
|
2455
|
-
#
|
|
2456
|
-
# @example
|
|
2457
|
-
# require "google/cloud/bigquery"
|
|
2458
|
-
#
|
|
2459
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2460
|
-
#
|
|
2461
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2462
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2463
|
-
# bt.add_family "user" do |u|
|
|
2464
|
-
# u.add_string "name" do |col|
|
|
2465
|
-
# col.qualifier # "user"
|
|
2466
|
-
# col.qualifier = "User"
|
|
2467
|
-
# col.qualifier # "User"
|
|
2468
|
-
# end
|
|
2469
|
-
# end
|
|
2470
|
-
# end
|
|
2471
|
-
#
|
|
2472
|
-
def qualifier= new_qualifier
|
|
2473
|
-
frozen_check!
|
|
2474
|
-
raise ArgumentError if new_qualifier.nil?
|
|
2475
|
-
|
|
2476
|
-
utf8_qualifier = new_qualifier.encode Encoding::UTF_8
|
|
2477
|
-
if utf8_qualifier.valid_encoding?
|
|
2478
|
-
@gapi.qualifier_string = utf8_qualifier
|
|
2479
|
-
if @gapi.instance_variables.include? :@qualifier_encoded
|
|
2480
|
-
@gapi.remove_instance_variable :@qualifier_encoded
|
|
2481
|
-
end
|
|
2482
|
-
else
|
|
2483
|
-
@gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
|
|
2484
|
-
if @gapi.instance_variables.include? :@qualifier_string
|
|
2485
|
-
@gapi.remove_instance_variable :@qualifier_string
|
|
2486
|
-
end
|
|
2487
|
-
end
|
|
2488
|
-
rescue EncodingError
|
|
2489
|
-
@gapi.qualifier_encoded = Base64.strict_encode64 new_qualifier
|
|
2490
|
-
@gapi.remove_instance_variable :@qualifier_string if @gapi.instance_variables.include? :@qualifier_string
|
|
2491
|
-
end
|
|
2492
|
-
|
|
2493
|
-
##
|
|
2494
|
-
# The encoding of the values when the type is not `STRING`.
|
|
2495
|
-
#
|
|
2496
|
-
# @return [String]
|
|
2497
|
-
#
|
|
2498
|
-
# @example
|
|
2499
|
-
# require "google/cloud/bigquery"
|
|
2500
|
-
#
|
|
2501
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2502
|
-
#
|
|
2503
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2504
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2505
|
-
# bt.add_family "user" do |u|
|
|
2506
|
-
# u.add_bytes "name" do |col|
|
|
2507
|
-
# col.encoding = "TEXT"
|
|
2508
|
-
# col.encoding # "TEXT"
|
|
2509
|
-
# end
|
|
2510
|
-
# end
|
|
2511
|
-
# end
|
|
2512
|
-
#
|
|
2513
|
-
def encoding
|
|
2514
|
-
@gapi.encoding
|
|
2515
|
-
end
|
|
2516
|
-
|
|
2517
|
-
##
|
|
2518
|
-
# Set the encoding of the values when the type is not `STRING`.
|
|
2519
|
-
# Acceptable encoding values are:
|
|
2520
|
-
#
|
|
2521
|
-
# * `TEXT` - indicates values are alphanumeric text strings.
|
|
2522
|
-
# * `BINARY` - indicates values are encoded using HBase
|
|
2523
|
-
# `Bytes.toBytes` family of functions. This can be overridden on a
|
|
2524
|
-
# column.
|
|
2525
|
-
#
|
|
2526
|
-
# @param [String] new_encoding New encoding value
|
|
2527
|
-
#
|
|
2528
|
-
# @example
|
|
2529
|
-
# require "google/cloud/bigquery"
|
|
2530
|
-
#
|
|
2531
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2532
|
-
#
|
|
2533
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2534
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2535
|
-
# bt.add_family "user" do |u|
|
|
2536
|
-
# u.add_bytes "name" do |col|
|
|
2537
|
-
# col.encoding = "TEXT"
|
|
2538
|
-
# col.encoding # "TEXT"
|
|
2539
|
-
# end
|
|
2540
|
-
# end
|
|
2541
|
-
# end
|
|
2542
|
-
#
|
|
2543
|
-
def encoding= new_encoding
|
|
2544
|
-
frozen_check!
|
|
2545
|
-
@gapi.encoding = new_encoding
|
|
2546
|
-
end
|
|
2547
|
-
|
|
2548
|
-
##
|
|
2549
|
-
# If the qualifier is not a valid BigQuery field identifier (does
|
|
2550
|
-
# not match `[a-zA-Z][a-zA-Z0-9_]*`) a valid identifier must be
|
|
2551
|
-
# provided as the column field name and is used as field name in
|
|
2552
|
-
# queries.
|
|
2553
|
-
#
|
|
2554
|
-
# @return [String]
|
|
2555
|
-
#
|
|
2556
|
-
# @example
|
|
2557
|
-
# require "google/cloud/bigquery"
|
|
2558
|
-
#
|
|
2559
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2560
|
-
#
|
|
2561
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2562
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2563
|
-
# bt.add_family "user" do |u|
|
|
2564
|
-
# u.add_string "001_name", as: "user" do |col|
|
|
2565
|
-
# col.field_name # "user"
|
|
2566
|
-
# col.field_name = "User"
|
|
2567
|
-
# col.field_name # "User"
|
|
2568
|
-
# end
|
|
2569
|
-
# end
|
|
2570
|
-
# end
|
|
2571
|
-
#
|
|
2572
|
-
def field_name
|
|
2573
|
-
@gapi.field_name
|
|
2574
|
-
end
|
|
2575
|
-
|
|
2576
|
-
##
|
|
2577
|
-
# Sets the identifier to be used as the column field name in queries
|
|
2578
|
-
# when the qualifier is not a valid BigQuery field identifier (does
|
|
2579
|
-
# not match `[a-zA-Z][a-zA-Z0-9_]*`).
|
|
2580
|
-
#
|
|
2581
|
-
# @param [String] new_field_name New field_name value
|
|
2582
|
-
#
|
|
2583
|
-
# @example
|
|
2584
|
-
# require "google/cloud/bigquery"
|
|
2585
|
-
#
|
|
2586
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2587
|
-
#
|
|
2588
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2589
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2590
|
-
# bt.add_family "user" do |u|
|
|
2591
|
-
# u.add_string "001_name", as: "user" do |col|
|
|
2592
|
-
# col.field_name # "user"
|
|
2593
|
-
# col.field_name = "User"
|
|
2594
|
-
# col.field_name # "User"
|
|
2595
|
-
# end
|
|
2596
|
-
# end
|
|
2597
|
-
# end
|
|
2598
|
-
#
|
|
2599
|
-
def field_name= new_field_name
|
|
2600
|
-
frozen_check!
|
|
2601
|
-
@gapi.field_name = new_field_name
|
|
2602
|
-
end
|
|
2603
|
-
|
|
2604
|
-
##
|
|
2605
|
-
# Whether only the latest version of value in this column are
|
|
2606
|
-
# exposed. Can also be set at the column family level. However, this
|
|
2607
|
-
# value takes precedence when set at both levels.
|
|
2608
|
-
#
|
|
2609
|
-
# @return [Boolean]
|
|
2610
|
-
#
|
|
2611
|
-
# @example
|
|
2612
|
-
# require "google/cloud/bigquery"
|
|
2613
|
-
#
|
|
2614
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2615
|
-
#
|
|
2616
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2617
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2618
|
-
# bt.add_family "user" do |u|
|
|
2619
|
-
# u.add_string "name" do |col|
|
|
2620
|
-
# col.latest = true
|
|
2621
|
-
# col.latest # true
|
|
2622
|
-
# end
|
|
2623
|
-
# end
|
|
2624
|
-
# end
|
|
2625
|
-
#
|
|
2626
|
-
def latest
|
|
2627
|
-
@gapi.only_read_latest
|
|
2628
|
-
end
|
|
2629
|
-
|
|
2630
|
-
##
|
|
2631
|
-
# Set whether only the latest version of value in this column are
|
|
2632
|
-
# exposed. Can also be set at the column family level. However, this
|
|
2633
|
-
# value takes precedence when set at both levels.
|
|
2634
|
-
#
|
|
2635
|
-
# @param [Boolean] new_latest New latest value
|
|
2636
|
-
#
|
|
2637
|
-
# @example
|
|
2638
|
-
# require "google/cloud/bigquery"
|
|
2639
|
-
#
|
|
2640
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2641
|
-
#
|
|
2642
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2643
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2644
|
-
# bt.add_family "user" do |u|
|
|
2645
|
-
# u.add_string "name" do |col|
|
|
2646
|
-
# col.latest = true
|
|
2647
|
-
# col.latest # true
|
|
2648
|
-
# end
|
|
2649
|
-
# end
|
|
2650
|
-
# end
|
|
2651
|
-
#
|
|
2652
|
-
def latest= new_latest
|
|
2653
|
-
frozen_check!
|
|
2654
|
-
@gapi.only_read_latest = new_latest
|
|
2655
|
-
end
|
|
2656
|
-
|
|
2657
|
-
##
|
|
2658
|
-
# The type to convert the value in cells of this column. The values
|
|
2659
|
-
# are expected to be encoded using HBase `Bytes.toBytes` function
|
|
2660
|
-
# when using the `BINARY` encoding value. The following BigQuery
|
|
2661
|
-
# types are allowed:
|
|
2662
|
-
#
|
|
2663
|
-
# * `BYTES`
|
|
2664
|
-
# * `STRING`
|
|
2665
|
-
# * `INTEGER`
|
|
2666
|
-
# * `FLOAT`
|
|
2667
|
-
# * `BOOLEAN`
|
|
2668
|
-
#
|
|
2669
|
-
# Default type is `BYTES`. Can also be set at the column family
|
|
2670
|
-
# level. However, this value takes precedence when set at both
|
|
2671
|
-
# levels.
|
|
2672
|
-
#
|
|
2673
|
-
# @return [String]
|
|
2674
|
-
#
|
|
2675
|
-
# @example
|
|
2676
|
-
# require "google/cloud/bigquery"
|
|
2677
|
-
#
|
|
2678
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2679
|
-
#
|
|
2680
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2681
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2682
|
-
# bt.add_family "user" do |u|
|
|
2683
|
-
# u.add_string "name" do |col|
|
|
2684
|
-
# col.type # "STRING"
|
|
2685
|
-
# end
|
|
2686
|
-
# end
|
|
2687
|
-
# end
|
|
2688
|
-
#
|
|
2689
|
-
def type
|
|
2690
|
-
@gapi.type
|
|
2691
|
-
end
|
|
2692
|
-
|
|
2693
|
-
##
|
|
2694
|
-
# Set the type to convert the value in cells of this column. The
|
|
2695
|
-
# values are expected to be encoded using HBase `Bytes.toBytes`
|
|
2696
|
-
# function when using the `BINARY` encoding value. The following
|
|
2697
|
-
# BigQuery types are allowed:
|
|
2698
|
-
#
|
|
2699
|
-
# * `BYTES`
|
|
2700
|
-
# * `STRING`
|
|
2701
|
-
# * `INTEGER`
|
|
2702
|
-
# * `FLOAT`
|
|
2703
|
-
# * `BOOLEAN`
|
|
2704
|
-
#
|
|
2705
|
-
# Default type is `BYTES`. Can also be set at the column family
|
|
2706
|
-
# level. However, this value takes precedence when set at both
|
|
2707
|
-
# levels.
|
|
2708
|
-
#
|
|
2709
|
-
# @param [String] new_type New type value
|
|
2710
|
-
#
|
|
2711
|
-
# @example
|
|
2712
|
-
# require "google/cloud/bigquery"
|
|
2713
|
-
#
|
|
2714
|
-
# bigquery = Google::Cloud::Bigquery.new
|
|
2715
|
-
#
|
|
2716
|
-
# bigtable_url = "https://googleapis.com/bigtable/projects/..."
|
|
2717
|
-
# bigtable_table = bigquery.external bigtable_url do |bt|
|
|
2718
|
-
# bt.add_family "user" do |u|
|
|
2719
|
-
# u.add_string "name" do |col|
|
|
2720
|
-
# col.type # "STRING"
|
|
2721
|
-
# col.type = "BYTES"
|
|
2722
|
-
# col.type # "BYTES"
|
|
2723
|
-
# end
|
|
2724
|
-
# end
|
|
2725
|
-
# end
|
|
2726
|
-
#
|
|
2727
|
-
def type= new_type
|
|
2728
|
-
frozen_check!
|
|
2729
|
-
@gapi.type = new_type
|
|
2730
|
-
end
|
|
2731
|
-
|
|
2732
|
-
##
|
|
2733
|
-
# @private Google API Client object.
|
|
2734
|
-
def to_gapi
|
|
2735
|
-
@gapi
|
|
2736
|
-
end
|
|
2737
|
-
|
|
2738
|
-
##
|
|
2739
|
-
# @private Google API Client object.
|
|
2740
|
-
def self.from_gapi gapi
|
|
2741
|
-
new_col = new
|
|
2742
|
-
new_col.instance_variable_set :@gapi, gapi
|
|
2743
|
-
new_col
|
|
2744
|
-
end
|
|
2745
|
-
|
|
2746
|
-
protected
|
|
2747
|
-
|
|
2748
|
-
def frozen_check!
|
|
2749
|
-
return unless frozen?
|
|
2750
|
-
raise ArgumentError, "Cannot modify external data source when frozen"
|
|
2751
|
-
end
|
|
2752
|
-
end
|
|
2753
|
-
end
|
|
2754
144
|
end
|
|
2755
145
|
end
|
|
2756
146
|
end
|