google-cloud-bigquery 1.25.0 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/CONTRIBUTING.md +1 -1
- data/lib/google/cloud/bigquery/external.rb +328 -3
- data/lib/google/cloud/bigquery/extract_job.rb +4 -8
- data/lib/google/cloud/bigquery/load_job.rb +176 -24
- data/lib/google/cloud/bigquery/query_job.rb +1 -2
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c2f8d8d7a564df2c916cced8adf4c0b621a1e11250a3a04decfce08995a4a24
|
4
|
+
data.tar.gz: 57bbda27d4c54e0522b564b8cdc62fb5e9dce8b26c91d3f0604c624ff113f057
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f445a86a5435cafc236d82faf91df46a06c6cee8612d8e6c4011450c93b73b61a883510909c6bb25e72516ae1a57c8a605736d82e0b63f0b10ad25cab90a1280
|
7
|
+
data.tar.gz: 43423f2bea5cea82c6c19bcf639bfb690f4718b0450140d299616ece59b147258ef877cafbc17b99c68274c310a0cd0123ae6b6db0b37bba2fb08be0395837f0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Release History
|
2
2
|
|
3
|
+
### 1.26.0 / 2021-01-13
|
4
|
+
|
5
|
+
#### Features
|
6
|
+
|
7
|
+
* Add support for Hive Partitioning
|
8
|
+
* Add hive partitioning options to External::DataSource
|
9
|
+
* Add hive partitioning options to LoadJob and LoadJob::Updater
|
10
|
+
* Replace google-api-client with google-apis-bigquery_v2
|
11
|
+
|
3
12
|
### 1.25.0 / 2020-11-16
|
4
13
|
|
5
14
|
#### Features
|
data/CONTRIBUTING.md
CHANGED
@@ -52,6 +52,24 @@ module Google
|
|
52
52
|
# # Retrieve the next page of results
|
53
53
|
# data = data.next if data.next?
|
54
54
|
#
|
55
|
+
# @example Hive partitioning options:
|
56
|
+
# require "google/cloud/bigquery"
|
57
|
+
#
|
58
|
+
# bigquery = Google::Cloud::Bigquery.new
|
59
|
+
#
|
60
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
61
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
62
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
63
|
+
# ext.hive_partitioning_mode = :auto
|
64
|
+
# ext.hive_partitioning_require_partition_filter = true
|
65
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# external_data.hive_partitioning? #=> true
|
69
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
70
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
71
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
72
|
+
#
|
55
73
|
module External
|
56
74
|
##
|
57
75
|
# @private New External from URLs and format
|
@@ -79,7 +97,8 @@ module Google
|
|
79
97
|
# @private Determine source_format from inputs
|
80
98
|
def self.source_format_for urls, format
|
81
99
|
val = {
|
82
|
-
"csv"
|
100
|
+
"csv" => "CSV",
|
101
|
+
"avro" => "AVRO",
|
83
102
|
"json" => "NEWLINE_DELIMITED_JSON",
|
84
103
|
"newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
|
85
104
|
"sheets" => "GOOGLE_SHEETS",
|
@@ -87,7 +106,9 @@ module Google
|
|
87
106
|
"datastore" => "DATASTORE_BACKUP",
|
88
107
|
"backup" => "DATASTORE_BACKUP",
|
89
108
|
"datastore_backup" => "DATASTORE_BACKUP",
|
90
|
-
"bigtable" => "BIGTABLE"
|
109
|
+
"bigtable" => "BIGTABLE",
|
110
|
+
"orc" => "ORC",
|
111
|
+
"parquet" => "PARQUET"
|
91
112
|
}[format.to_s.downcase]
|
92
113
|
return val unless val.nil?
|
93
114
|
Array(urls).each do |url|
|
@@ -110,7 +131,7 @@ module Google
|
|
110
131
|
when "GOOGLE_SHEETS" then External::SheetsSource
|
111
132
|
when "BIGTABLE" then External::BigtableSource
|
112
133
|
else
|
113
|
-
# AVRO
|
134
|
+
# AVRO, DATASTORE_BACKUP, PARQUET
|
114
135
|
External::DataSource
|
115
136
|
end
|
116
137
|
end
|
@@ -148,6 +169,24 @@ module Google
|
|
148
169
|
# # Retrieve the next page of results
|
149
170
|
# data = data.next if data.next?
|
150
171
|
#
|
172
|
+
# @example Hive partitioning options:
|
173
|
+
# require "google/cloud/bigquery"
|
174
|
+
#
|
175
|
+
# bigquery = Google::Cloud::Bigquery.new
|
176
|
+
#
|
177
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
178
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
179
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
180
|
+
# ext.hive_partitioning_mode = :auto
|
181
|
+
# ext.hive_partitioning_require_partition_filter = true
|
182
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# external_data.hive_partitioning? #=> true
|
186
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
187
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
188
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
189
|
+
#
|
151
190
|
class DataSource
|
152
191
|
##
|
153
192
|
# @private The Google API Client object.
|
@@ -302,6 +341,52 @@ module Google
|
|
302
341
|
@gapi.source_format == "BIGTABLE"
|
303
342
|
end
|
304
343
|
|
344
|
+
##
|
345
|
+
# Whether the data format is "ORC".
|
346
|
+
#
|
347
|
+
# @return [Boolean]
|
348
|
+
#
|
349
|
+
# @example
|
350
|
+
# require "google/cloud/bigquery"
|
351
|
+
#
|
352
|
+
# bigquery = Google::Cloud::Bigquery.new
|
353
|
+
#
|
354
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
355
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
356
|
+
# external_data = bigquery.external gcs_uri, format: :orc do |ext|
|
357
|
+
# ext.hive_partitioning_mode = :auto
|
358
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
359
|
+
# end
|
360
|
+
# external_data.format #=> "ORC"
|
361
|
+
# external_data.orc? #=> true
|
362
|
+
#
|
363
|
+
def orc?
|
364
|
+
@gapi.source_format == "ORC"
|
365
|
+
end
|
366
|
+
|
367
|
+
##
|
368
|
+
# Whether the data format is "PARQUET".
|
369
|
+
#
|
370
|
+
# @return [Boolean]
|
371
|
+
#
|
372
|
+
# @example
|
373
|
+
# require "google/cloud/bigquery"
|
374
|
+
#
|
375
|
+
# bigquery = Google::Cloud::Bigquery.new
|
376
|
+
#
|
377
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
378
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
379
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
380
|
+
# ext.hive_partitioning_mode = :auto
|
381
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
382
|
+
# end
|
383
|
+
# external_data.format #=> "PARQUET"
|
384
|
+
# external_data.parquet? #=> true
|
385
|
+
#
|
386
|
+
def parquet?
|
387
|
+
@gapi.source_format == "PARQUET"
|
388
|
+
end
|
389
|
+
|
305
390
|
##
|
306
391
|
# The fully-qualified URIs that point to your data in Google Cloud.
|
307
392
|
# For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
|
@@ -536,6 +621,246 @@ module Google
|
|
536
621
|
@gapi.max_bad_records = new_max_bad_records
|
537
622
|
end
|
538
623
|
|
624
|
+
###
|
625
|
+
# Checks if hive partitioning options are set.
|
626
|
+
#
|
627
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
628
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
629
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
630
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
631
|
+
#
|
632
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
633
|
+
#
|
634
|
+
# @example
|
635
|
+
# require "google/cloud/bigquery"
|
636
|
+
#
|
637
|
+
# bigquery = Google::Cloud::Bigquery.new
|
638
|
+
#
|
639
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
640
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
641
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
642
|
+
# ext.hive_partitioning_mode = :auto
|
643
|
+
# ext.hive_partitioning_require_partition_filter = true
|
644
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
645
|
+
# end
|
646
|
+
#
|
647
|
+
# external_data.hive_partitioning? #=> true
|
648
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
649
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
650
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
651
|
+
#
|
652
|
+
def hive_partitioning?
|
653
|
+
!@gapi.hive_partitioning_options.nil?
|
654
|
+
end
|
655
|
+
|
656
|
+
###
|
657
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
658
|
+
#
|
659
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
660
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
661
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
662
|
+
#
|
663
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
664
|
+
#
|
665
|
+
# @example
|
666
|
+
# require "google/cloud/bigquery"
|
667
|
+
#
|
668
|
+
# bigquery = Google::Cloud::Bigquery.new
|
669
|
+
#
|
670
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
671
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
672
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
673
|
+
# ext.hive_partitioning_mode = :auto
|
674
|
+
# ext.hive_partitioning_require_partition_filter = true
|
675
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
676
|
+
# end
|
677
|
+
#
|
678
|
+
# external_data.hive_partitioning? #=> true
|
679
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
680
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
681
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
682
|
+
#
|
683
|
+
def hive_partitioning_mode
|
684
|
+
@gapi.hive_partitioning_options.mode if hive_partitioning?
|
685
|
+
end
|
686
|
+
|
687
|
+
##
|
688
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
689
|
+
#
|
690
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
691
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
692
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
693
|
+
#
|
694
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
695
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
696
|
+
# If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
|
697
|
+
# Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
|
698
|
+
#
|
699
|
+
# See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
|
700
|
+
#
|
701
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
702
|
+
#
|
703
|
+
# @example
|
704
|
+
# require "google/cloud/bigquery"
|
705
|
+
#
|
706
|
+
# bigquery = Google::Cloud::Bigquery.new
|
707
|
+
#
|
708
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
709
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
710
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
711
|
+
# ext.hive_partitioning_mode = :auto
|
712
|
+
# ext.hive_partitioning_require_partition_filter = true
|
713
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
714
|
+
# end
|
715
|
+
#
|
716
|
+
# external_data.hive_partitioning? #=> true
|
717
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
718
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
719
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
720
|
+
#
|
721
|
+
def hive_partitioning_mode= mode
|
722
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
723
|
+
@gapi.hive_partitioning_options.mode = mode.to_s.upcase
|
724
|
+
end
|
725
|
+
|
726
|
+
###
|
727
|
+
# Whether queries over the table using this external data source require a partition filter that can be used
|
728
|
+
# for partition elimination to be specified. Note that this field should only be true when creating a
|
729
|
+
# permanent external table or querying a temporary external table.
|
730
|
+
#
|
731
|
+
# @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
|
732
|
+
#
|
733
|
+
# @example
|
734
|
+
# require "google/cloud/bigquery"
|
735
|
+
#
|
736
|
+
# bigquery = Google::Cloud::Bigquery.new
|
737
|
+
#
|
738
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
739
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
740
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
741
|
+
# ext.hive_partitioning_mode = :auto
|
742
|
+
# ext.hive_partitioning_require_partition_filter = true
|
743
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
744
|
+
# end
|
745
|
+
#
|
746
|
+
# external_data.hive_partitioning? #=> true
|
747
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
748
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
749
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
750
|
+
#
|
751
|
+
def hive_partitioning_require_partition_filter?
|
752
|
+
return false unless hive_partitioning?
|
753
|
+
!@gapi.hive_partitioning_options.require_partition_filter.nil?
|
754
|
+
end
|
755
|
+
|
756
|
+
##
|
757
|
+
# Sets whether queries over the table using this external data source require a partition filter
|
758
|
+
# that can be used for partition elimination to be specified.
|
759
|
+
#
|
760
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
|
761
|
+
#
|
762
|
+
# @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
|
763
|
+
#
|
764
|
+
# @example
|
765
|
+
# require "google/cloud/bigquery"
|
766
|
+
#
|
767
|
+
# bigquery = Google::Cloud::Bigquery.new
|
768
|
+
#
|
769
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
770
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
771
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
772
|
+
# ext.hive_partitioning_mode = :auto
|
773
|
+
# ext.hive_partitioning_require_partition_filter = true
|
774
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
775
|
+
# end
|
776
|
+
#
|
777
|
+
# external_data.hive_partitioning? #=> true
|
778
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
779
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
780
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
781
|
+
#
|
782
|
+
def hive_partitioning_require_partition_filter= require_partition_filter
|
783
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
784
|
+
@gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
|
785
|
+
end
|
786
|
+
|
787
|
+
###
|
788
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
789
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
790
|
+
# layout:
|
791
|
+
#
|
792
|
+
# ```
|
793
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
794
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
795
|
+
# ```
|
796
|
+
#
|
797
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
798
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
799
|
+
#
|
800
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
801
|
+
#
|
802
|
+
# @example
|
803
|
+
# require "google/cloud/bigquery"
|
804
|
+
#
|
805
|
+
# bigquery = Google::Cloud::Bigquery.new
|
806
|
+
#
|
807
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
808
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
809
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
810
|
+
# ext.hive_partitioning_mode = :auto
|
811
|
+
# ext.hive_partitioning_require_partition_filter = true
|
812
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
813
|
+
# end
|
814
|
+
#
|
815
|
+
# external_data.hive_partitioning? #=> true
|
816
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
817
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
818
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
819
|
+
#
|
820
|
+
def hive_partitioning_source_uri_prefix
|
821
|
+
@gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
822
|
+
end
|
823
|
+
|
824
|
+
##
|
825
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
826
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
827
|
+
# layout:
|
828
|
+
#
|
829
|
+
# ```
|
830
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
831
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
832
|
+
# ```
|
833
|
+
#
|
834
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
835
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
836
|
+
#
|
837
|
+
# See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
|
838
|
+
#
|
839
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
840
|
+
#
|
841
|
+
# @example
|
842
|
+
# require "google/cloud/bigquery"
|
843
|
+
#
|
844
|
+
# bigquery = Google::Cloud::Bigquery.new
|
845
|
+
#
|
846
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
847
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
848
|
+
# external_data = bigquery.external gcs_uri, format: :parquet do |ext|
|
849
|
+
# ext.hive_partitioning_mode = :auto
|
850
|
+
# ext.hive_partitioning_require_partition_filter = true
|
851
|
+
# ext.hive_partitioning_source_uri_prefix = source_uri_prefix
|
852
|
+
# end
|
853
|
+
#
|
854
|
+
# external_data.hive_partitioning? #=> true
|
855
|
+
# external_data.hive_partitioning_mode #=> "AUTO"
|
856
|
+
# external_data.hive_partitioning_require_partition_filter? #=> true
|
857
|
+
# external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
|
858
|
+
#
|
859
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
860
|
+
@gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
861
|
+
@gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
862
|
+
end
|
863
|
+
|
539
864
|
##
|
540
865
|
# @private Google API Client object.
|
541
866
|
def to_gapi
|
@@ -103,8 +103,7 @@ module Google
|
|
103
103
|
# table extraction.
|
104
104
|
def compression?
|
105
105
|
return false unless table?
|
106
|
-
|
107
|
-
val == "GZIP"
|
106
|
+
@gapi.configuration.extract.compression == "GZIP"
|
108
107
|
end
|
109
108
|
|
110
109
|
##
|
@@ -117,8 +116,7 @@ module Google
|
|
117
116
|
#
|
118
117
|
def json?
|
119
118
|
return false unless table?
|
120
|
-
|
121
|
-
val == "NEWLINE_DELIMITED_JSON"
|
119
|
+
@gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
|
122
120
|
end
|
123
121
|
|
124
122
|
##
|
@@ -146,8 +144,7 @@ module Google
|
|
146
144
|
#
|
147
145
|
def avro?
|
148
146
|
return false unless table?
|
149
|
-
|
150
|
-
val == "AVRO"
|
147
|
+
@gapi.configuration.extract.destination_format == "AVRO"
|
151
148
|
end
|
152
149
|
|
153
150
|
##
|
@@ -173,8 +170,7 @@ module Google
|
|
173
170
|
#
|
174
171
|
def ml_xgboost_booster?
|
175
172
|
return false unless model?
|
176
|
-
|
177
|
-
val == "ML_XGBOOST_BOOSTER"
|
173
|
+
@gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
|
178
174
|
end
|
179
175
|
|
180
176
|
##
|
@@ -37,8 +37,8 @@ module Google
|
|
37
37
|
# bigquery = Google::Cloud::Bigquery.new
|
38
38
|
# dataset = bigquery.dataset "my_dataset"
|
39
39
|
#
|
40
|
-
#
|
41
|
-
# load_job = dataset.load_job "my_new_table",
|
40
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
41
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
|
42
42
|
# schema.string "first_name", mode: :required
|
43
43
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
44
44
|
# nested_schema.string "place", mode: :required
|
@@ -112,8 +112,7 @@ module Google
|
|
112
112
|
# `false` otherwise.
|
113
113
|
#
|
114
114
|
def iso8859_1?
|
115
|
-
|
116
|
-
val == "ISO-8859-1"
|
115
|
+
@gapi.configuration.load.encoding == "ISO-8859-1"
|
117
116
|
end
|
118
117
|
|
119
118
|
##
|
@@ -195,8 +194,7 @@ module Google
|
|
195
194
|
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
196
195
|
#
|
197
196
|
def json?
|
198
|
-
|
199
|
-
val == "NEWLINE_DELIMITED_JSON"
|
197
|
+
@gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
|
200
198
|
end
|
201
199
|
|
202
200
|
##
|
@@ -218,8 +216,27 @@ module Google
|
|
218
216
|
# `false` otherwise.
|
219
217
|
#
|
220
218
|
def backup?
|
221
|
-
|
222
|
-
|
219
|
+
@gapi.configuration.load.source_format == "DATASTORE_BACKUP"
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# Checks if the source format is ORC.
|
224
|
+
#
|
225
|
+
# @return [Boolean] `true` when the source format is `ORC`,
|
226
|
+
# `false` otherwise.
|
227
|
+
#
|
228
|
+
def orc?
|
229
|
+
@gapi.configuration.load.source_format == "ORC"
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Checks if the source format is Parquet.
|
234
|
+
#
|
235
|
+
# @return [Boolean] `true` when the source format is `PARQUET`,
|
236
|
+
# `false` otherwise.
|
237
|
+
#
|
238
|
+
def parquet?
|
239
|
+
@gapi.configuration.load.source_format == "PARQUET"
|
223
240
|
end
|
224
241
|
|
225
242
|
##
|
@@ -347,6 +364,58 @@ module Google
|
|
347
364
|
nil
|
348
365
|
end
|
349
366
|
|
367
|
+
###
|
368
|
+
# Checks if hive partitioning options are set.
|
369
|
+
#
|
370
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
371
|
+
#
|
372
|
+
# @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
|
373
|
+
#
|
374
|
+
# @!group Attributes
|
375
|
+
#
|
376
|
+
def hive_partitioning?
|
377
|
+
!@gapi.configuration.load.hive_partitioning_options.nil?
|
378
|
+
end
|
379
|
+
|
380
|
+
###
|
381
|
+
# The mode of hive partitioning to use when reading data. The following modes are supported:
|
382
|
+
#
|
383
|
+
# 1. `AUTO`: automatically infer partition key name(s) and type(s).
|
384
|
+
# 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
|
385
|
+
# 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
|
386
|
+
#
|
387
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
388
|
+
#
|
389
|
+
# @return [String, nil] The mode of hive partitioning, or `nil` if not set.
|
390
|
+
#
|
391
|
+
# @!group Attributes
|
392
|
+
#
|
393
|
+
def hive_partitioning_mode
|
394
|
+
@gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
|
395
|
+
end
|
396
|
+
|
397
|
+
###
|
398
|
+
# The common prefix for all source uris when hive partition detection is requested. The prefix must end
|
399
|
+
# immediately before the partition key encoding begins. For example, consider files following this data layout:
|
400
|
+
#
|
401
|
+
# ```
|
402
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
403
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
404
|
+
# ```
|
405
|
+
#
|
406
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
407
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
408
|
+
#
|
409
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
410
|
+
#
|
411
|
+
# @return [String, nil] The common prefix for all source uris, or `nil` if not set.
|
412
|
+
#
|
413
|
+
# @!group Attributes
|
414
|
+
#
|
415
|
+
def hive_partitioning_source_uri_prefix
|
416
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
|
417
|
+
end
|
418
|
+
|
350
419
|
###
|
351
420
|
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
352
421
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1326,6 +1395,89 @@ module Google
|
|
1326
1395
|
@gapi.configuration.update! labels: val
|
1327
1396
|
end
|
1328
1397
|
|
1398
|
+
##
|
1399
|
+
# Sets the mode of hive partitioning to use when reading data. The following modes are supported:
|
1400
|
+
#
|
1401
|
+
# 1. `auto`: automatically infer partition key name(s) and type(s).
|
1402
|
+
# 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
|
1403
|
+
# 3. `custom`: partition key schema is encoded in the source URI prefix.
|
1404
|
+
#
|
1405
|
+
# Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
|
1406
|
+
# will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
|
1407
|
+
#
|
1408
|
+
# See {#format=} and {#hive_partitioning_source_uri_prefix=}.
|
1409
|
+
#
|
1410
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1411
|
+
#
|
1412
|
+
# @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
|
1413
|
+
#
|
1414
|
+
# @example
|
1415
|
+
# require "google/cloud/bigquery"
|
1416
|
+
#
|
1417
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1418
|
+
# dataset = bigquery.dataset "my_dataset"
|
1419
|
+
#
|
1420
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1421
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1422
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1423
|
+
# job.format = :parquet
|
1424
|
+
# job.hive_partitioning_mode = :auto
|
1425
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1426
|
+
# end
|
1427
|
+
#
|
1428
|
+
# load_job.wait_until_done!
|
1429
|
+
# load_job.done? #=> true
|
1430
|
+
#
|
1431
|
+
# @!group Attributes
|
1432
|
+
#
|
1433
|
+
def hive_partitioning_mode= mode
|
1434
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1435
|
+
@gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
##
|
1439
|
+
# Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
|
1440
|
+
# immediately before the partition key encoding begins. For example, consider files following this data
|
1441
|
+
# layout:
|
1442
|
+
#
|
1443
|
+
# ```
|
1444
|
+
# gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
|
1445
|
+
# gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
|
1446
|
+
# ```
|
1447
|
+
#
|
1448
|
+
# When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
|
1449
|
+
# `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
|
1450
|
+
#
|
1451
|
+
# See {#hive_partitioning_mode=}.
|
1452
|
+
#
|
1453
|
+
# @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
|
1454
|
+
#
|
1455
|
+
# @param [String] source_uri_prefix The common prefix for all source uris.
|
1456
|
+
#
|
1457
|
+
# @example
|
1458
|
+
# require "google/cloud/bigquery"
|
1459
|
+
#
|
1460
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1461
|
+
# dataset = bigquery.dataset "my_dataset"
|
1462
|
+
#
|
1463
|
+
# gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
|
1464
|
+
# source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
|
1465
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1466
|
+
# job.format = :parquet
|
1467
|
+
# job.hive_partitioning_mode = :auto
|
1468
|
+
# job.hive_partitioning_source_uri_prefix = source_uri_prefix
|
1469
|
+
# end
|
1470
|
+
#
|
1471
|
+
# load_job.wait_until_done!
|
1472
|
+
# load_job.done? #=> true
|
1473
|
+
#
|
1474
|
+
# @!group Attributes
|
1475
|
+
#
|
1476
|
+
def hive_partitioning_source_uri_prefix= source_uri_prefix
|
1477
|
+
@gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
|
1478
|
+
@gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
|
1479
|
+
end
|
1480
|
+
|
1329
1481
|
##
|
1330
1482
|
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1331
1483
|
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
@@ -1345,8 +1497,8 @@ module Google
|
|
1345
1497
|
# bigquery = Google::Cloud::Bigquery.new
|
1346
1498
|
# dataset = bigquery.dataset "my_dataset"
|
1347
1499
|
#
|
1348
|
-
#
|
1349
|
-
# load_job = dataset.load_job "my_new_table",
|
1500
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1501
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1350
1502
|
# job.schema do |schema|
|
1351
1503
|
# schema.integer "my_table_id", mode: :required
|
1352
1504
|
# schema.string "my_table_data", mode: :required
|
@@ -1386,8 +1538,8 @@ module Google
|
|
1386
1538
|
# bigquery = Google::Cloud::Bigquery.new
|
1387
1539
|
# dataset = bigquery.dataset "my_dataset"
|
1388
1540
|
#
|
1389
|
-
#
|
1390
|
-
# load_job = dataset.load_job "my_new_table",
|
1541
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1542
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1391
1543
|
# job.schema do |schema|
|
1392
1544
|
# schema.integer "my_table_id", mode: :required
|
1393
1545
|
# schema.string "my_table_data", mode: :required
|
@@ -1427,8 +1579,8 @@ module Google
|
|
1427
1579
|
# bigquery = Google::Cloud::Bigquery.new
|
1428
1580
|
# dataset = bigquery.dataset "my_dataset"
|
1429
1581
|
#
|
1430
|
-
#
|
1431
|
-
# load_job = dataset.load_job "my_new_table",
|
1582
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1583
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1432
1584
|
# job.schema do |schema|
|
1433
1585
|
# schema.integer "my_table_id", mode: :required
|
1434
1586
|
# schema.string "my_table_data", mode: :required
|
@@ -1468,8 +1620,8 @@ module Google
|
|
1468
1620
|
# bigquery = Google::Cloud::Bigquery.new
|
1469
1621
|
# dataset = bigquery.dataset "my_dataset"
|
1470
1622
|
#
|
1471
|
-
#
|
1472
|
-
# load_job = dataset.load_job "my_new_table",
|
1623
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1624
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1473
1625
|
# job.schema do |schema|
|
1474
1626
|
# schema.integer "my_table_id", mode: :required
|
1475
1627
|
# schema.string "my_table_data", mode: :required
|
@@ -1510,8 +1662,8 @@ module Google
|
|
1510
1662
|
# bigquery = Google::Cloud::Bigquery.new
|
1511
1663
|
# dataset = bigquery.dataset "my_dataset"
|
1512
1664
|
#
|
1513
|
-
#
|
1514
|
-
# load_job = dataset.load_job "my_new_table",
|
1665
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1666
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1515
1667
|
# job.time_partitioning_type = "DAY"
|
1516
1668
|
# end
|
1517
1669
|
#
|
@@ -1549,8 +1701,8 @@ module Google
|
|
1549
1701
|
# bigquery = Google::Cloud::Bigquery.new
|
1550
1702
|
# dataset = bigquery.dataset "my_dataset"
|
1551
1703
|
#
|
1552
|
-
#
|
1553
|
-
# load_job = dataset.load_job "my_new_table",
|
1704
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1705
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1554
1706
|
# job.time_partitioning_type = "DAY"
|
1555
1707
|
# job.time_partitioning_field = "dob"
|
1556
1708
|
# job.schema do |schema|
|
@@ -1585,8 +1737,8 @@ module Google
|
|
1585
1737
|
# bigquery = Google::Cloud::Bigquery.new
|
1586
1738
|
# dataset = bigquery.dataset "my_dataset"
|
1587
1739
|
#
|
1588
|
-
#
|
1589
|
-
# load_job = dataset.load_job "my_new_table",
|
1740
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1741
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1590
1742
|
# job.time_partitioning_type = "DAY"
|
1591
1743
|
# job.time_partitioning_expiration = 86_400
|
1592
1744
|
# end
|
@@ -1645,8 +1797,8 @@ module Google
|
|
1645
1797
|
# bigquery = Google::Cloud::Bigquery.new
|
1646
1798
|
# dataset = bigquery.dataset "my_dataset"
|
1647
1799
|
#
|
1648
|
-
#
|
1649
|
-
# load_job = dataset.load_job "my_new_table",
|
1800
|
+
# gcs_uri = "gs://my-bucket/file-name.csv"
|
1801
|
+
# load_job = dataset.load_job "my_new_table", gcs_uri do |job|
|
1650
1802
|
# job.time_partitioning_type = "DAY"
|
1651
1803
|
# job.time_partitioning_field = "dob"
|
1652
1804
|
# job.schema do |schema|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-01-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -26,19 +26,19 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name: google-
|
29
|
+
name: google-apis-bigquery_v2
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '0.
|
34
|
+
version: '0.1'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '0.
|
41
|
+
version: '0.1'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: google-cloud-core
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|