google-cloud-bigquery 1.25.0 → 1.26.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/CONTRIBUTING.md +1 -1
- data/lib/google/cloud/bigquery/external.rb +328 -3
- data/lib/google/cloud/bigquery/extract_job.rb +4 -8
- data/lib/google/cloud/bigquery/load_job.rb +176 -24
- data/lib/google/cloud/bigquery/query_job.rb +1 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c2f8d8d7a564df2c916cced8adf4c0b621a1e11250a3a04decfce08995a4a24
|
4
|
+
data.tar.gz: 57bbda27d4c54e0522b564b8cdc62fb5e9dce8b26c91d3f0604c624ff113f057
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f445a86a5435cafc236d82faf91df46a06c6cee8612d8e6c4011450c93b73b61a883510909c6bb25e72516ae1a57c8a605736d82e0b63f0b10ad25cab90a1280
|
7
|
+
data.tar.gz: 43423f2bea5cea82c6c19bcf639bfb690f4718b0450140d299616ece59b147258ef877cafbc17b99c68274c310a0cd0123ae6b6db0b37bba2fb08be0395837f0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Release History
|
2
2
|
|
3
|
+
### 1.26.0 / 2021-01-13
|
4
|
+
|
5
|
+
#### Features
|
6
|
+
|
7
|
+
* Add support for Hive Partitioning
|
8
|
+
* Add hive partitioning options to External::DataSource
|
9
|
+
* Add hive partitioning options to LoadJob and LoadJob::Updater
|
10
|
+
* Replace google-api-client with google-apis-bigquery_v2
|
11
|
+
|
3
12
|
### 1.25.0 / 2020-11-16
|
4
13
|
|
5
14
|
#### Features
|
data/CONTRIBUTING.md
CHANGED
@@ -52,6 +52,24 @@ module Google
|
|
52
52
|
# # Retrieve the next page of results
|
53
53
|
# data = data.next if data.next?
|
54
54
|
#
|
55
|
+
# @example Hive partitioning options:
|
56
|
+
# require "google/cloud/bigquery"
|
57
|
+
#
|
58
|
+
# bigquery = Google::Cloud::Bigquery.new
|
59
|
+
#
|
60
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
61
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
62
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
63
|
+
# ext.hive_partitioning_mode = :auto
|
64
|
+
# ext.hive_partitioning_require_partition_filter = true
|
65
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# external_data.hive_partitioning? #=> true
|
69
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
70
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
71
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
72
|
+
#
|
55
73
|
module External
|
56
74
|
##
|
57
75
|
# @private New External from URLs and format
|
@@ -79,7 +97,8 @@ module Google
|
|
79
97
|
# @private Determine source_format from inputs
|
80
98
|
def self.source_format_for urls, format
|
81
99
|
val = {
|
82
|
-
"csv"
|
100
|
+
"csv" => "CSV",
|
101
|
+
"avro" => "AVRO",
|
83
102
|
"json" => "NEWLINE_DELIMITED_JSON",
|
84
103
|
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
85
104
|
"sheets" => "GOOGLE_SHEETS",
|
@@ -87,7 +106,9 @@ module Google
|
|
87
106
|
"datastore" => "DATASTORE_BACKUP",
|
88
107
|
"backup" => "DATASTORE_BACKUP",
|
89
108
|
"datastore_backup" => "DATASTORE_BACKUP",
|
90
|
-
"bigtable" => "BIGTABLE"
|
109
|
+
"bigtable" => "BIGTABLE",
|
110
|
+
"orc" => "ORC",
|
111
|
+
"parquet" => "PARQUET"
|
91
112
|
}[format.to_s.downcase]
|
92
113
|
return val unless val.nil?
|
93
114
|
Array(urls).each do |url|
|
@@ -110,7 +131,7 @@ module Google
|
|
110
131
|
when "GOOGLE_SHEETS" then External::SheetsSource
|
111
132
|
when "BIGTABLE" then External::BigtableSource
|
112
133
|
else
|
113
|
-
# AVRO
|
134
|
+
# AVRO, DATASTORE_BACKUP, PARQUET
|
114
135
|
External::DataSource
|
115
136
|
end
|
116
137
|
end
|
@@ -148,6 +169,24 @@ module Google
|
|
148
169
|
# # Retrieve the next page of results
|
149
170
|
# data = data.next if data.next?
|
150
171
|
#
|
172
|
+
# @example Hive partitioning options:
|
173
|
+
# require "google/cloud/bigquery"
|
174
|
+
#
|
175
|
+
# bigquery = Google::Cloud::Bigquery.new
|
176
|
+
#
|
177
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
178
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
179
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
180
|
+
# ext.hive_partitioning_mode = :auto
|
181
|
+
# ext.hive_partitioning_require_partition_filter = true
|
182
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# external_data.hive_partitioning? #=> true
|
186
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
187
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
188
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
189
|
+
#
|
151
190
|
class DataSource
|
152
191
|
##
|
153
192
|
# @private The Google API Client object.
|
@@ -302,6 +341,52 @@ module Google
|
|
302
341
|
@gapi.source_format == "BIGTABLE"
|
303
342
|
end
|
304
343
|
|
344
|
+
##
|
345
|
+
# Whether the data format is "ORC".
|
346
|
+
#
|
347
|
+
# @return [Boolean]
|
348
|
+
#
|
349
|
+
# @example
|
350
|
+
# require "google/cloud/bigquery"
|
351
|
+
#
|
352
|
+
# bigquery = Google::Cloud::Bigquery.new
|
353
|
+
#
|
354
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
355
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
356
|
+
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
357
|
+
# ext.hive_partitioning_mode = :auto
|
358
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
359
|
+
# end
|
360
|
+
# external_data.format #=> "ORC"
|
361
|
+
# external_data.orc? #=> true
|
362
|
+
#
|
363
|
+
def orc?
|
364
|
+
@gapi.source_format == "ORC"
|
365
|
+
end
|
366
|
+
|
367
|
+
##
|
368
|
+
# Whether the data format is "PARQUET".
|
369
|
+
#
|
370
|
+
# @return [Boolean]
|
371
|
+
#
|
372
|
+
# @example
|
373
|
+
# require "google/cloud/bigquery"
|
374
|
+
#
|
375
|
+
# bigquery = Google::Cloud::Bigquery.new
|
376
|
+
#
|
377
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
378
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
379
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
380
|
+
# ext.hive_partitioning_mode = :auto
|
381
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
382
|
+
# end
|
383
|
+
# external_data.format #=> "PARQUET"
|
384
|
+
# external_data.parquet? #=> true
|
385
|
+
#
|
386
|
+
def parquet?
|
387
|
+
@gapi.source_format == "PARQUET"
|
388
|
+
end
|
389
|
+
|
305
390
|
##
|
306
391
|
# The fully-qualified URIs that point to your data in Google Cloud.
|
307
392
|
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
@@ -536,6 +621,246 @@ module Google
|
|
536
621
|
@gapi.max_bad_records = new_max_bad_records
|
537
622
|
end
|
538
623
|
|
624
|
+
###
|
625
|
+
# Checks if hive partitioning options are set.
|
626
|
+
#
|
627
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
628
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
629
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
630
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
631
|
+
#
|
632
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
633
|
+
#
|
634
|
+
# @example
|
635
|
+
# require "google/cloud/bigquery"
|
636
|
+
#
|
637
|
+
# bigquery = Google::Cloud::Bigquery.new
|
638
|
+
#
|
639
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
640
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
641
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
642
|
+
# ext.hive_partitioning_mode = :auto
|
643
|
+
# ext.hive_partitioning_require_partition_filter = true
|
644
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
645
|
+
# end
|
646
|
+
#
|
647
|
+
# external_data.hive_partitioning? #=> true
|
648
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
649
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
650
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
651
|
+
#
|
652
|
+
def hive_partitioning?
|
653
|
+
!@gapi.hive_partitioning_options.nil?
|
654
|
+
end
|
655
|
+
|
656
|
+
###
|
657
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
658
|
+
#
|
659
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
660
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
661
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
662
|
+
#
|
663
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
664
|
+
#
|
665
|
+
# @example
|
666
|
+
# require "google/cloud/bigquery"
|
667
|
+
#
|
668
|
+
# bigquery = Google::Cloud::Bigquery.new
|
669
|
+
#
|
670
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
671
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
672
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
673
|
+
# ext.hive_partitioning_mode = :auto
|
674
|
+
# ext.hive_partitioning_require_partition_filter = true
|
675
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
676
|
+
# end
|
677
|
+
#
|
678
|
+
# external_data.hive_partitioning? #=> true
|
679
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
680
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
681
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
682
|
+
#
|
683
|
+
def hive_partitioning_mode
|
684
|
+
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
685
|
+
end
|
686
|
+
|
687
|
+
##
|
688
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
689
|
+
#
|
690
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
691
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
692
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
693
|
+
#
|
694
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
695
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
696
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
697
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
698
|
+
#
|
699
|
+
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
700
|
+
#
|
701
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
702
|
+
#
|
703
|
+
# @example
|
704
|
+
# require "google/cloud/bigquery"
|
705
|
+
#
|
706
|
+
# bigquery = Google::Cloud::Bigquery.new
|
707
|
+
#
|
708
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
709
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
710
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
711
|
+
# ext.hive_partitioning_mode = :auto
|
712
|
+
# ext.hive_partitioning_require_partition_filter = true
|
713
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
714
|
+
# end
|
715
|
+
#
|
716
|
+
# external_data.hive_partitioning? #=> true
|
717
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
718
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
719
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
720
|
+
#
|
721
|
+
def hive_partitioning_mode= mode
|
722
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
723
|
+
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
724
|
+
end
|
725
|
+
|
726
|
+
###
|
727
|
+
# Whether queries over the table using this external data source require a partition filter that can be used
|
728
|
+
# for partition elimination to be specified. Note that this field should only be true when creating a
|
729
|
+
# permanent external table or querying a temporary external table.
|
730
|
+
#
|
731
|
+
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
732
|
+
#
|
733
|
+
# @example
|
734
|
+
# require "google/cloud/bigquery"
|
735
|
+
#
|
736
|
+
# bigquery = Google::Cloud::Bigquery.new
|
737
|
+
#
|
738
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
739
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
740
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
741
|
+
# ext.hive_partitioning_mode = :auto
|
742
|
+
# ext.hive_partitioning_require_partition_filter = true
|
743
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
744
|
+
# end
|
745
|
+
#
|
746
|
+
# external_data.hive_partitioning? #=> true
|
747
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
748
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
749
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
750
|
+
#
|
751
|
+
def hive_partitioning_require_partition_filter?
|
752
|
+
return false unless hive_partitioning?
|
753
|
+
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
754
|
+
end
|
755
|
+
|
756
|
+
##
|
757
|
+
# Sets whether queries over the table using this external data source require a partition filter
|
758
|
+
# that can be used for partition elimination to be specified.
|
759
|
+
#
|
760
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
761
|
+
#
|
762
|
+
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
763
|
+
#
|
764
|
+
# @example
|
765
|
+
# require "google/cloud/bigquery"
|
766
|
+
#
|
767
|
+
# bigquery = Google::Cloud::Bigquery.new
|
768
|
+
#
|
769
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
770
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
771
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
772
|
+
# ext.hive_partitioning_mode = :auto
|
773
|
+
# ext.hive_partitioning_require_partition_filter = true
|
774
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
775
|
+
# end
|
776
|
+
#
|
777
|
+
# external_data.hive_partitioning? #=> true
|
778
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
779
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
780
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
781
|
+
#
|
782
|
+
def hive_partitioning_require_partition_filter= require_partition_filter
|
783
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
784
|
+
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
785
|
+
end
|
786
|
+
|
787
|
+
###
|
788
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
789
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
790
|
+
# layout:
|
791
|
+
#
|
792
|
+
# ```
|
793
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
794
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
795
|
+
# ```
|
796
|
+
#
|
797
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
798
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
799
|
+
#
|
800
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
801
|
+
#
|
802
|
+
# @example
|
803
|
+
# require "google/cloud/bigquery"
|
804
|
+
#
|
805
|
+
# bigquery = Google::Cloud::Bigquery.new
|
806
|
+
#
|
807
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
808
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
809
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
810
|
+
# ext.hive_partitioning_mode = :auto
|
811
|
+
# ext.hive_partitioning_require_partition_filter = true
|
812
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
813
|
+
# end
|
814
|
+
#
|
815
|
+
# external_data.hive_partitioning? #=> true
|
816
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
817
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
818
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
819
|
+
#
|
820
|
+
def hive_partitioning_source_uri_prefix
|
821
|
+
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
822
|
+
end
|
823
|
+
|
824
|
+
##
|
825
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
826
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
827
|
+
# layout:
|
828
|
+
#
|
829
|
+
# ```
|
830
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
831
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
832
|
+
# ```
|
833
|
+
#
|
834
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
835
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
836
|
+
#
|
837
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
838
|
+
#
|
839
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
840
|
+
#
|
841
|
+
# @example
|
842
|
+
# require "google/cloud/bigquery"
|
843
|
+
#
|
844
|
+
# bigquery = Google::Cloud::Bigquery.new
|
845
|
+
#
|
846
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
847
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
848
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
849
|
+
# ext.hive_partitioning_mode = :auto
|
850
|
+
# ext.hive_partitioning_require_partition_filter = true
|
851
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
852
|
+
# end
|
853
|
+
#
|
854
|
+
# external_data.hive_partitioning? #=> true
|
855
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
856
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
857
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
858
|
+
#
|
859
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
860
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
861
|
+
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
862
|
+
end
|
863
|
+
|
539
864
|
##
|
540
865
|
# @private Google API Client object.
|
541
866
|
def to_gapi
|
@@ -103,8 +103,7 @@ module Google
|
|
103
103
|
# table extraction.
|
104
104
|
def compression?
|
105
105
|
return false unless table?
|
106
|
-
|
107
|
-
val == "GZIP"
|
106
|
+
@gapi.configuration.extract.compression == "GZIP"
|
108
107
|
end
|
109
108
|
|
110
109
|
##
|
@@ -117,8 +116,7 @@ module Google
|
|
117
116
|
#
|
118
117
|
def json?
|
119
118
|
return false unless table?
|
120
|
-
|
121
|
-
val == "NEWLINE_DELIMITED_JSON"
|
119
|
+
@gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
|
122
120
|
end
|
123
121
|
|
124
122
|
##
|
@@ -146,8 +144,7 @@ module Google
|
|
146
144
|
#
|
147
145
|
def avro?
|
148
146
|
return false unless table?
|
149
|
-
|
150
|
-
val == "AVRO"
|
147
|
+
@gapi.configuration.extract.destination_format == "AVRO"
|
151
148
|
end
|
152
149
|
|
153
150
|
##
|
@@ -173,8 +170,7 @@ module Google
|
|
173
170
|
#
|
174
171
|
def ml_xgboost_booster?
|
175
172
|
return false unless model?
|
176
|
-
|
177
|
-
val == "ML_XGBOOST_BOOSTER"
|
173
|
+
@gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
|
178
174
|
end
|
179
175
|
|
180
176
|
##
|
@@ -37,8 +37,8 @@ module Google
|
|
37
37
|
# bigquery = Google::Cloud::Bigquery.new
|
38
38
|
# dataset = bigquery.dataset "my_dataset"
|
39
39
|
#
|
40
|
-
#
|
41
|
-
# load_job = dataset.load_job "my_new_table",
|
40
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
41
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
|
42
42
|
# schema.string "first_name", mode: :required
|
43
43
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
44
44
|
# nested_schema.string "place", mode: :required
|
@@ -112,8 +112,7 @@ module Google
|
|
112
112
|
# `false` otherwise.
|
113
113
|
#
|
114
114
|
def iso8859_1?
|
115
|
-
|
116
|
-
val == "ISO-8859-1"
|
115
|
+
@gapi.configuration.load.encoding == "ISO-8859-1"
|
117
116
|
end
|
118
117
|
|
119
118
|
##
|
@@ -195,8 +194,7 @@ module Google
|
|
195
194
|
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
196
195
|
#
|
197
196
|
def json?
|
198
|
-
|
199
|
-
val == "NEWLINE_DELIMITED_JSON"
|
197
|
+
@gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
|
200
198
|
end
|
201
199
|
|
202
200
|
##
|
@@ -218,8 +216,27 @@ module Google
|
|
218
216
|
# `false` otherwise.
|
219
217
|
#
|
220
218
|
def backup?
|
221
|
-
|
222
|
-
|
219
|
+
@gapi.configuration.load.source_format == "DATASTORE_BACKUP"
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# Checks if the source format is ORC.
|
224
|
+
#
|
225
|
+
# @return [Boolean] `true` when the source format is `ORC`,
|
226
|
+
# `false` otherwise.
|
227
|
+
#
|
228
|
+
def orc?
|
229
|
+
@gapi.configuration.load.source_format == "ORC"
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Checks if the source format is Parquet.
|
234
|
+
#
|
235
|
+
# @return [Boolean] `true` when the source format is `PARQUET`,
|
236
|
+
# `false` otherwise.
|
237
|
+
#
|
238
|
+
def parquet?
|
239
|
+
@gapi.configuration.load.source_format == "PARQUET"
|
223
240
|
end
|
224
241
|
|
225
242
|
##
|
@@ -347,6 +364,58 @@ module Google
|
|
347
364
|
nil
|
348
365
|
end
|
349
366
|
|
367
|
+
###
|
368
|
+
# Checks if hive partitioning options are set.
|
369
|
+
#
|
370
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
371
|
+
#
|
372
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
373
|
+
#
|
374
|
+
# @!group Attributes
|
375
|
+
#
|
376
|
+
def hive_partitioning?
|
377
|
+
!@gapi.configuration.load.hive_partitioning_options.nil?
|
378
|
+
end
|
379
|
+
|
380
|
+
###
|
381
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
382
|
+
#
|
383
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
384
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
385
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
386
|
+
#
|
387
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
388
|
+
#
|
389
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
390
|
+
#
|
391
|
+
# @!group Attributes
|
392
|
+
#
|
393
|
+
def hive_partitioning_mode
|
394
|
+
@gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
|
395
|
+
end
|
396
|
+
|
397
|
+
###
|
398
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
399
|
+
# immediately before the partition key encoding begins. For example, consider files following this data layout:
|
400
|
+
#
|
401
|
+
# ```
|
402
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
403
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
404
|
+
# ```
|
405
|
+
#
|
406
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
407
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
408
|
+
#
|
409
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
410
|
+
#
|
411
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
412
|
+
#
|
413
|
+
# @!group Attributes
|
414
|
+
#
|
415
|
+
def hive_partitioning_source_uri_prefix
|
416
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
417
|
+
end
|
418
|
+
|
350
419
|
###
|
351
420
|
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
352
421
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1326,6 +1395,89 @@ module Google
|
|
1326
1395
|
@gapi.configuration.update! labels: val
|
1327
1396
|
end
|
1328
1397
|
|
1398
|
+
##
|
1399
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
1400
|
+
#
|
1401
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
1402
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
1403
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
1404
|
+
#
|
1405
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
1406
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
1407
|
+
#
|
1408
|
+
# See {#format=} and {#hive_partitioning_source_uri_prefix=}.
|
1409
|
+
#
|
1410
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1411
|
+
#
|
1412
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
1413
|
+
#
|
1414
|
+
# @example
|
1415
|
+
# require "google/cloud/bigquery"
|
1416
|
+
#
|
1417
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1418
|
+
# dataset = bigquery.dataset "my_dataset"
|
1419
|
+
#
|
1420
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1421
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1422
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1423
|
+
# job.format = :parquet
|
1424
|
+
# job.hive_partitioning_mode = :auto
|
1425
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1426
|
+
# end
|
1427
|
+
#
|
1428
|
+
# load_job.wait_until_done!
|
1429
|
+
# load_job.done? #=> true
|
1430
|
+
#
|
1431
|
+
# @!group Attributes
|
1432
|
+
#
|
1433
|
+
def hive_partitioning_mode= mode
|
1434
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1435
|
+
@gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
##
|
1439
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
1440
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
1441
|
+
# layout:
|
1442
|
+
#
|
1443
|
+
# ```
|
1444
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
1445
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
1446
|
+
# ```
|
1447
|
+
#
|
1448
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
1449
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
1450
|
+
#
|
1451
|
+
# See {#hive_partitioning_mode=}.
|
1452
|
+
#
|
1453
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1454
|
+
#
|
1455
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
1456
|
+
#
|
1457
|
+
# @example
|
1458
|
+
# require "google/cloud/bigquery"
|
1459
|
+
#
|
1460
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1461
|
+
# dataset = bigquery.dataset "my_dataset"
|
1462
|
+
#
|
1463
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1464
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1465
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1466
|
+
# job.format = :parquet
|
1467
|
+
# job.hive_partitioning_mode = :auto
|
1468
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1469
|
+
# end
|
1470
|
+
#
|
1471
|
+
# load_job.wait_until_done!
|
1472
|
+
# load_job.done? #=> true
|
1473
|
+
#
|
1474
|
+
# @!group Attributes
|
1475
|
+
#
|
1476
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
1477
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1478
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
1479
|
+
end
|
1480
|
+
|
1329
1481
|
##
|
1330
1482
|
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1331
1483
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1345,8 +1497,8 @@ module Google
|
|
1345
1497
|
# bigquery = Google::Cloud::Bigquery.new
|
1346
1498
|
# dataset = bigquery.dataset "my_dataset"
|
1347
1499
|
#
|
1348
|
-
#
|
1349
|
-
# load_job = dataset.load_job "my_new_table",
|
1500
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1501
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1350
1502
|
# job.schema do |schema|
|
1351
1503
|
# schema.integer "my_table_id", mode: :required
|
1352
1504
|
# schema.string "my_table_data", mode: :required
|
@@ -1386,8 +1538,8 @@ module Google
|
|
1386
1538
|
# bigquery = Google::Cloud::Bigquery.new
|
1387
1539
|
# dataset = bigquery.dataset "my_dataset"
|
1388
1540
|
#
|
1389
|
-
#
|
1390
|
-
# load_job = dataset.load_job "my_new_table",
|
1541
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1542
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1391
1543
|
# job.schema do |schema|
|
1392
1544
|
# schema.integer "my_table_id", mode: :required
|
1393
1545
|
# schema.string "my_table_data", mode: :required
|
@@ -1427,8 +1579,8 @@ module Google
|
|
1427
1579
|
# bigquery = Google::Cloud::Bigquery.new
|
1428
1580
|
# dataset = bigquery.dataset "my_dataset"
|
1429
1581
|
#
|
1430
|
-
#
|
1431
|
-
# load_job = dataset.load_job "my_new_table",
|
1582
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1583
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1432
1584
|
# job.schema do |schema|
|
1433
1585
|
# schema.integer "my_table_id", mode: :required
|
1434
1586
|
# schema.string "my_table_data", mode: :required
|
@@ -1468,8 +1620,8 @@ module Google
|
|
1468
1620
|
# bigquery = Google::Cloud::Bigquery.new
|
1469
1621
|
# dataset = bigquery.dataset "my_dataset"
|
1470
1622
|
#
|
1471
|
-
#
|
1472
|
-
# load_job = dataset.load_job "my_new_table",
|
1623
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1624
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1473
1625
|
# job.schema do |schema|
|
1474
1626
|
# schema.integer "my_table_id", mode: :required
|
1475
1627
|
# schema.string "my_table_data", mode: :required
|
@@ -1510,8 +1662,8 @@ module Google
|
|
1510
1662
|
# bigquery = Google::Cloud::Bigquery.new
|
1511
1663
|
# dataset = bigquery.dataset "my_dataset"
|
1512
1664
|
#
|
1513
|
-
#
|
1514
|
-
# load_job = dataset.load_job "my_new_table",
|
1665
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1666
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1515
1667
|
# job.time_partitioning_type = "DAY"
|
1516
1668
|
# end
|
1517
1669
|
#
|
@@ -1549,8 +1701,8 @@ module Google
|
|
1549
1701
|
# bigquery = Google::Cloud::Bigquery.new
|
1550
1702
|
# dataset = bigquery.dataset "my_dataset"
|
1551
1703
|
#
|
1552
|
-
#
|
1553
|
-
# load_job = dataset.load_job "my_new_table",
|
1704
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1705
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1554
1706
|
# job.time_partitioning_type = "DAY"
|
1555
1707
|
# job.time_partitioning_field = "dob"
|
1556
1708
|
# job.schema do |schema|
|
@@ -1585,8 +1737,8 @@ module Google
|
|
1585
1737
|
# bigquery = Google::Cloud::Bigquery.new
|
1586
1738
|
# dataset = bigquery.dataset "my_dataset"
|
1587
1739
|
#
|
1588
|
-
#
|
1589
|
-
# load_job = dataset.load_job "my_new_table",
|
1740
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1741
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1590
1742
|
# job.time_partitioning_type = "DAY"
|
1591
1743
|
# job.time_partitioning_expiration = 86_400
|
1592
1744
|
# end
|
@@ -1645,8 +1797,8 @@ module Google
|
|
1645
1797
|
# bigquery = Google::Cloud::Bigquery.new
|
1646
1798
|
# dataset = bigquery.dataset "my_dataset"
|
1647
1799
|
#
|
1648
|
-
#
|
1649
|
-
# load_job = dataset.load_job "my_new_table",
|
1800
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1801
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1650
1802
|
# job.time_partitioning_type = "DAY"
|
1651
1803
|
# job.time_partitioning_field = "dob"
|
1652
1804
|
# job.schema do |schema|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-01-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -26,19 +26,19 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name: google-
|
29
|
+
name: google-apis-bigquery_v2
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '0.
|
34
|
+
version: '0.1'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '0.
|
41
|
+
version: '0.1'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: google-cloud-core
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|