google-cloud-bigquery 1.25.0 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae4b6e4d7c37a945f027fe56425698b19b430f5e75451ce82f2156f2dae96719
4
- data.tar.gz: d50284a47bf96d5221b574590a60b3af0f77b3a8943d201fd473d78f41a46670
3
+ metadata.gz: 8c2f8d8d7a564df2c916cced8adf4c0b621a1e11250a3a04decfce08995a4a24
4
+ data.tar.gz: 57bbda27d4c54e0522b564b8cdc62fb5e9dce8b26c91d3f0604c624ff113f057
5
5
  SHA512:
6
- metadata.gz: 56c0b2d3214b385efb3866e502faa0b0c28c2fffc0c3d2ca3c5e1ad05f4d69e0638e68d59d8310c68d01c26c3db55b42225944379bfb8f6ad7a57eec558ced3a
7
- data.tar.gz: 0562b98e65e880a40d634f541285241823354a966f4d05a559463a23eacca54addf8843f89b32731d313a56d91dfe2984f6098630c4126d7dbc97033d3dc6a81
6
+ metadata.gz: f445a86a5435cafc236d82faf91df46a06c6cee8612d8e6c4011450c93b73b61a883510909c6bb25e72516ae1a57c8a605736d82e0b63f0b10ad25cab90a1280
7
+ data.tar.gz: 43423f2bea5cea82c6c19bcf639bfb690f4718b0450140d299616ece59b147258ef877cafbc17b99c68274c310a0cd0123ae6b6db0b37bba2fb08be0395837f0
@@ -1,5 +1,14 @@
1
1
  # Release History
2
2
 
3
+ ### 1.26.0 / 2021-01-13
4
+
5
+ #### Features
6
+
7
+ * Add support for Hive Partitioning
8
+ * Add hive partitioning options to External::DataSource
9
+ * Add hive partitioning options to LoadJob and LoadJob::Updater
10
+ * Replace google-api-client with google-apis-bigquery_v2
11
+
3
12
  ### 1.25.0 / 2020-11-16
4
13
 
5
14
  #### Features
@@ -45,7 +45,7 @@ there is a small amount of setup:
45
45
 
46
46
  ```sh
47
47
  $ cd google-cloud-bigquery/
48
- $ bundle exec rake bundleupdate
48
+ $ bundle install
49
49
  ```
50
50
 
51
51
  ## Console
@@ -52,6 +52,24 @@ module Google
52
52
  # # Retrieve the next page of results
53
53
  # data = data.next if data.next?
54
54
  #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
55
73
  module External
56
74
  ##
57
75
  # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
79
97
  # @private Determine source_format from inputs
80
98
  def self.source_format_for urls, format
81
99
  val = {
82
- "csv" => "CSV", "avro" => "AVRO",
100
+ "csv" => "CSV",
101
+ "avro" => "AVRO",
83
102
  "json" => "NEWLINE_DELIMITED_JSON",
84
103
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
104
  "sheets" => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
87
106
  "datastore" => "DATASTORE_BACKUP",
88
107
  "backup" => "DATASTORE_BACKUP",
89
108
  "datastore_backup" => "DATASTORE_BACKUP",
90
- "bigtable" => "BIGTABLE"
109
+ "bigtable" => "BIGTABLE",
110
+ "orc" => "ORC",
111
+ "parquet" => "PARQUET"
91
112
  }[format.to_s.downcase]
92
113
  return val unless val.nil?
93
114
  Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
110
131
  when "GOOGLE_SHEETS" then External::SheetsSource
111
132
  when "BIGTABLE" then External::BigtableSource
112
133
  else
113
- # AVRO and DATASTORE_BACKUP
134
+ # AVRO, DATASTORE_BACKUP, PARQUET
114
135
  External::DataSource
115
136
  end
116
137
  end
@@ -148,6 +169,24 @@ module Google
148
169
  # # Retrieve the next page of results
149
170
  # data = data.next if data.next?
150
171
  #
172
+ # @example Hive partitioning options:
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
178
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
179
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
180
+ # ext.hive_partitioning_mode = :auto
181
+ # ext.hive_partitioning_require_partition_filter = true
182
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
183
+ # end
184
+ #
185
+ # external_data.hive_partitioning? #=> true
186
+ # external_data.hive_partitioning_mode #=> "AUTO"
187
+ # external_data.hive_partitioning_require_partition_filter? #=> true
188
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
189
+ #
151
190
  class DataSource
152
191
  ##
153
192
  # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
302
341
  @gapi.source_format == "BIGTABLE"
303
342
  end
304
343
 
344
+ ##
345
+ # Whether the data format is "ORC".
346
+ #
347
+ # @return [Boolean]
348
+ #
349
+ # @example
350
+ # require "google/cloud/bigquery"
351
+ #
352
+ # bigquery = Google::Cloud::Bigquery.new
353
+ #
354
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
355
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
356
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
357
+ # ext.hive_partitioning_mode = :auto
358
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
359
+ # end
360
+ # external_data.format #=> "ORC"
361
+ # external_data.orc? #=> true
362
+ #
363
+ def orc?
364
+ @gapi.source_format == "ORC"
365
+ end
366
+
367
+ ##
368
+ # Whether the data format is "PARQUET".
369
+ #
370
+ # @return [Boolean]
371
+ #
372
+ # @example
373
+ # require "google/cloud/bigquery"
374
+ #
375
+ # bigquery = Google::Cloud::Bigquery.new
376
+ #
377
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
378
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
379
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
380
+ # ext.hive_partitioning_mode = :auto
381
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
382
+ # end
383
+ # external_data.format #=> "PARQUET"
384
+ # external_data.parquet? #=> true
385
+ #
386
+ def parquet?
387
+ @gapi.source_format == "PARQUET"
388
+ end
389
+
305
390
  ##
306
391
  # The fully-qualified URIs that point to your data in Google Cloud.
307
392
  # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
536
621
  @gapi.max_bad_records = new_max_bad_records
537
622
  end
538
623
 
624
+ ###
625
+ # Checks if hive partitioning options are set.
626
+ #
627
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
628
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
629
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
630
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
631
+ #
632
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
633
+ #
634
+ # @example
635
+ # require "google/cloud/bigquery"
636
+ #
637
+ # bigquery = Google::Cloud::Bigquery.new
638
+ #
639
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
640
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
641
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
642
+ # ext.hive_partitioning_mode = :auto
643
+ # ext.hive_partitioning_require_partition_filter = true
644
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
645
+ # end
646
+ #
647
+ # external_data.hive_partitioning? #=> true
648
+ # external_data.hive_partitioning_mode #=> "AUTO"
649
+ # external_data.hive_partitioning_require_partition_filter? #=> true
650
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
651
+ #
652
+ def hive_partitioning?
653
+ !@gapi.hive_partitioning_options.nil?
654
+ end
655
+
656
+ ###
657
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
658
+ #
659
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
660
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
661
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
662
+ #
663
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
664
+ #
665
+ # @example
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ #
670
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
671
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
672
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
673
+ # ext.hive_partitioning_mode = :auto
674
+ # ext.hive_partitioning_require_partition_filter = true
675
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
676
+ # end
677
+ #
678
+ # external_data.hive_partitioning? #=> true
679
+ # external_data.hive_partitioning_mode #=> "AUTO"
680
+ # external_data.hive_partitioning_require_partition_filter? #=> true
681
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
682
+ #
683
+ def hive_partitioning_mode
684
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
685
+ end
686
+
687
+ ##
688
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
689
+ #
690
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
691
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
692
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
693
+ #
694
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
695
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
696
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
697
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
698
+ #
699
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
700
+ #
701
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
702
+ #
703
+ # @example
704
+ # require "google/cloud/bigquery"
705
+ #
706
+ # bigquery = Google::Cloud::Bigquery.new
707
+ #
708
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
709
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
710
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
711
+ # ext.hive_partitioning_mode = :auto
712
+ # ext.hive_partitioning_require_partition_filter = true
713
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
714
+ # end
715
+ #
716
+ # external_data.hive_partitioning? #=> true
717
+ # external_data.hive_partitioning_mode #=> "AUTO"
718
+ # external_data.hive_partitioning_require_partition_filter? #=> true
719
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
720
+ #
721
+ def hive_partitioning_mode= mode
722
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
723
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
724
+ end
725
+
726
+ ###
727
+ # Whether queries over the table using this external data source require a partition filter that can be used
728
+ # for partition elimination to be specified. Note that this field should only be true when creating a
729
+ # permanent external table or querying a temporary external table.
730
+ #
731
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
732
+ #
733
+ # @example
734
+ # require "google/cloud/bigquery"
735
+ #
736
+ # bigquery = Google::Cloud::Bigquery.new
737
+ #
738
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
739
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
740
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
741
+ # ext.hive_partitioning_mode = :auto
742
+ # ext.hive_partitioning_require_partition_filter = true
743
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
744
+ # end
745
+ #
746
+ # external_data.hive_partitioning? #=> true
747
+ # external_data.hive_partitioning_mode #=> "AUTO"
748
+ # external_data.hive_partitioning_require_partition_filter? #=> true
749
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
750
+ #
751
+ def hive_partitioning_require_partition_filter?
752
+ return false unless hive_partitioning?
753
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
754
+ end
755
+
756
+ ##
757
+ # Sets whether queries over the table using this external data source require a partition filter
758
+ # that can be used for partition elimination to be specified.
759
+ #
760
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
761
+ #
762
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
763
+ #
764
+ # @example
765
+ # require "google/cloud/bigquery"
766
+ #
767
+ # bigquery = Google::Cloud::Bigquery.new
768
+ #
769
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
770
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
771
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
772
+ # ext.hive_partitioning_mode = :auto
773
+ # ext.hive_partitioning_require_partition_filter = true
774
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
775
+ # end
776
+ #
777
+ # external_data.hive_partitioning? #=> true
778
+ # external_data.hive_partitioning_mode #=> "AUTO"
779
+ # external_data.hive_partitioning_require_partition_filter? #=> true
780
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
781
+ #
782
+ def hive_partitioning_require_partition_filter= require_partition_filter
783
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
784
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
785
+ end
786
+
787
+ ###
788
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
789
+ # immediately before the partition key encoding begins. For example, consider files following this data
790
+ # layout:
791
+ #
792
+ # ```
793
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
794
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
795
+ # ```
796
+ #
797
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
798
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
799
+ #
800
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
801
+ #
802
+ # @example
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
808
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
809
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
810
+ # ext.hive_partitioning_mode = :auto
811
+ # ext.hive_partitioning_require_partition_filter = true
812
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
813
+ # end
814
+ #
815
+ # external_data.hive_partitioning? #=> true
816
+ # external_data.hive_partitioning_mode #=> "AUTO"
817
+ # external_data.hive_partitioning_require_partition_filter? #=> true
818
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
819
+ #
820
+ def hive_partitioning_source_uri_prefix
821
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
822
+ end
823
+
824
+ ##
825
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
826
+ # immediately before the partition key encoding begins. For example, consider files following this data
827
+ # layout:
828
+ #
829
+ # ```
830
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
831
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
832
+ # ```
833
+ #
834
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
835
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
836
+ #
837
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
838
+ #
839
+ # @param [String] source_uri_prefix The common prefix for all source uris.
840
+ #
841
+ # @example
842
+ # require "google/cloud/bigquery"
843
+ #
844
+ # bigquery = Google::Cloud::Bigquery.new
845
+ #
846
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
847
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
848
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
849
+ # ext.hive_partitioning_mode = :auto
850
+ # ext.hive_partitioning_require_partition_filter = true
851
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
852
+ # end
853
+ #
854
+ # external_data.hive_partitioning? #=> true
855
+ # external_data.hive_partitioning_mode #=> "AUTO"
856
+ # external_data.hive_partitioning_require_partition_filter? #=> true
857
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
858
+ #
859
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
860
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
861
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
862
+ end
863
+
539
864
  ##
540
865
  # @private Google API Client object.
541
866
  def to_gapi
@@ -103,8 +103,7 @@ module Google
103
103
  # table extraction.
104
104
  def compression?
105
105
  return false unless table?
106
- val = @gapi.configuration.extract.compression
107
- val == "GZIP"
106
+ @gapi.configuration.extract.compression == "GZIP"
108
107
  end
109
108
 
110
109
  ##
@@ -117,8 +116,7 @@ module Google
117
116
  #
118
117
  def json?
119
118
  return false unless table?
120
- val = @gapi.configuration.extract.destination_format
121
- val == "NEWLINE_DELIMITED_JSON"
119
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
122
120
  end
123
121
 
124
122
  ##
@@ -146,8 +144,7 @@ module Google
146
144
  #
147
145
  def avro?
148
146
  return false unless table?
149
- val = @gapi.configuration.extract.destination_format
150
- val == "AVRO"
147
+ @gapi.configuration.extract.destination_format == "AVRO"
151
148
  end
152
149
 
153
150
  ##
@@ -173,8 +170,7 @@ module Google
173
170
  #
174
171
  def ml_xgboost_booster?
175
172
  return false unless model?
176
- val = @gapi.configuration.extract.destination_format
177
- val == "ML_XGBOOST_BOOSTER"
173
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
178
174
  end
179
175
 
180
176
  ##
@@ -37,8 +37,8 @@ module Google
37
37
  # bigquery = Google::Cloud::Bigquery.new
38
38
  # dataset = bigquery.dataset "my_dataset"
39
39
  #
40
- # gs_url = "gs://my-bucket/file-name.csv"
41
- # load_job = dataset.load_job "my_new_table", gs_url do |schema|
40
+ # gcs_uri = "gs://my-bucket/file-name.csv"
41
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
42
42
  # schema.string "first_name", mode: :required
43
43
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
44
44
  # nested_schema.string "place", mode: :required
@@ -112,8 +112,7 @@ module Google
112
112
  # `false` otherwise.
113
113
  #
114
114
  def iso8859_1?
115
- val = @gapi.configuration.load.encoding
116
- val == "ISO-8859-1"
115
+ @gapi.configuration.load.encoding == "ISO-8859-1"
117
116
  end
118
117
 
119
118
  ##
@@ -195,8 +194,7 @@ module Google
195
194
  # `NEWLINE_DELIMITED_JSON`, `false` otherwise.
196
195
  #
197
196
  def json?
198
- val = @gapi.configuration.load.source_format
199
- val == "NEWLINE_DELIMITED_JSON"
197
+ @gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
200
198
  end
201
199
 
202
200
  ##
@@ -218,8 +216,27 @@ module Google
218
216
  # `false` otherwise.
219
217
  #
220
218
  def backup?
221
- val = @gapi.configuration.load.source_format
222
- val == "DATASTORE_BACKUP"
219
+ @gapi.configuration.load.source_format == "DATASTORE_BACKUP"
220
+ end
221
+
222
+ ##
223
+ # Checks if the source format is ORC.
224
+ #
225
+ # @return [Boolean] `true` when the source format is `ORC`,
226
+ # `false` otherwise.
227
+ #
228
+ def orc?
229
+ @gapi.configuration.load.source_format == "ORC"
230
+ end
231
+
232
+ ##
233
+ # Checks if the source format is Parquet.
234
+ #
235
+ # @return [Boolean] `true` when the source format is `PARQUET`,
236
+ # `false` otherwise.
237
+ #
238
+ def parquet?
239
+ @gapi.configuration.load.source_format == "PARQUET"
223
240
  end
224
241
 
225
242
  ##
@@ -347,6 +364,58 @@ module Google
347
364
  nil
348
365
  end
349
366
 
367
+ ###
368
+ # Checks if hive partitioning options are set.
369
+ #
370
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
371
+ #
372
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
373
+ #
374
+ # @!group Attributes
375
+ #
376
+ def hive_partitioning?
377
+ !@gapi.configuration.load.hive_partitioning_options.nil?
378
+ end
379
+
380
+ ###
381
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
382
+ #
383
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
384
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
385
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
386
+ #
387
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
388
+ #
389
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
390
+ #
391
+ # @!group Attributes
392
+ #
393
+ def hive_partitioning_mode
394
+ @gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
395
+ end
396
+
397
+ ###
398
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
399
+ # immediately before the partition key encoding begins. For example, consider files following this data layout:
400
+ #
401
+ # ```
402
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
403
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
404
+ # ```
405
+ #
406
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
407
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
408
+ #
409
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
410
+ #
411
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
412
+ #
413
+ # @!group Attributes
414
+ #
415
+ def hive_partitioning_source_uri_prefix
416
+ @gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
417
+ end
418
+
350
419
  ###
351
420
  # Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
352
421
  # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -1326,6 +1395,89 @@ module Google
1326
1395
  @gapi.configuration.update! labels: val
1327
1396
  end
1328
1397
 
1398
+ ##
1399
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
1400
+ #
1401
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
1402
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
1403
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
1404
+ #
1405
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
1406
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
1407
+ #
1408
+ # See {#format=} and {#hive_partitioning_source_uri_prefix=}.
1409
+ #
1410
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
1411
+ #
1412
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
1413
+ #
1414
+ # @example
1415
+ # require "google/cloud/bigquery"
1416
+ #
1417
+ # bigquery = Google::Cloud::Bigquery.new
1418
+ # dataset = bigquery.dataset "my_dataset"
1419
+ #
1420
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
1421
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
1422
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1423
+ # job.format = :parquet
1424
+ # job.hive_partitioning_mode = :auto
1425
+ # job.hive_partitioning_source_uri_prefix = source_uri_prefix
1426
+ # end
1427
+ #
1428
+ # load_job.wait_until_done!
1429
+ # load_job.done? #=> true
1430
+ #
1431
+ # @!group Attributes
1432
+ #
1433
+ def hive_partitioning_mode= mode
1434
+ @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
1435
+ @gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
1436
+ end
1437
+
1438
+ ##
1439
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
1440
+ # immediately before the partition key encoding begins. For example, consider files following this data
1441
+ # layout:
1442
+ #
1443
+ # ```
1444
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
1445
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
1446
+ # ```
1447
+ #
1448
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
1449
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
1450
+ #
1451
+ # See {#hive_partitioning_mode=}.
1452
+ #
1453
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
1454
+ #
1455
+ # @param [String] source_uri_prefix The common prefix for all source uris.
1456
+ #
1457
+ # @example
1458
+ # require "google/cloud/bigquery"
1459
+ #
1460
+ # bigquery = Google::Cloud::Bigquery.new
1461
+ # dataset = bigquery.dataset "my_dataset"
1462
+ #
1463
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
1464
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
1465
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1466
+ # job.format = :parquet
1467
+ # job.hive_partitioning_mode = :auto
1468
+ # job.hive_partitioning_source_uri_prefix = source_uri_prefix
1469
+ # end
1470
+ #
1471
+ # load_job.wait_until_done!
1472
+ # load_job.done? #=> true
1473
+ #
1474
+ # @!group Attributes
1475
+ #
1476
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
1477
+ @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
1478
+ @gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
1479
+ end
1480
+
1329
1481
  ##
1330
1482
  # Sets the field on which to range partition the table. See [Creating and using integer range partitioned
1331
1483
  # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -1345,8 +1497,8 @@ module Google
1345
1497
  # bigquery = Google::Cloud::Bigquery.new
1346
1498
  # dataset = bigquery.dataset "my_dataset"
1347
1499
  #
1348
- # gs_url = "gs://my-bucket/file-name.csv"
1349
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1500
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1501
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1350
1502
  # job.schema do |schema|
1351
1503
  # schema.integer "my_table_id", mode: :required
1352
1504
  # schema.string "my_table_data", mode: :required
@@ -1386,8 +1538,8 @@ module Google
1386
1538
  # bigquery = Google::Cloud::Bigquery.new
1387
1539
  # dataset = bigquery.dataset "my_dataset"
1388
1540
  #
1389
- # gs_url = "gs://my-bucket/file-name.csv"
1390
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1541
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1542
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1391
1543
  # job.schema do |schema|
1392
1544
  # schema.integer "my_table_id", mode: :required
1393
1545
  # schema.string "my_table_data", mode: :required
@@ -1427,8 +1579,8 @@ module Google
1427
1579
  # bigquery = Google::Cloud::Bigquery.new
1428
1580
  # dataset = bigquery.dataset "my_dataset"
1429
1581
  #
1430
- # gs_url = "gs://my-bucket/file-name.csv"
1431
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1582
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1583
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1432
1584
  # job.schema do |schema|
1433
1585
  # schema.integer "my_table_id", mode: :required
1434
1586
  # schema.string "my_table_data", mode: :required
@@ -1468,8 +1620,8 @@ module Google
1468
1620
  # bigquery = Google::Cloud::Bigquery.new
1469
1621
  # dataset = bigquery.dataset "my_dataset"
1470
1622
  #
1471
- # gs_url = "gs://my-bucket/file-name.csv"
1472
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1623
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1624
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1473
1625
  # job.schema do |schema|
1474
1626
  # schema.integer "my_table_id", mode: :required
1475
1627
  # schema.string "my_table_data", mode: :required
@@ -1510,8 +1662,8 @@ module Google
1510
1662
  # bigquery = Google::Cloud::Bigquery.new
1511
1663
  # dataset = bigquery.dataset "my_dataset"
1512
1664
  #
1513
- # gs_url = "gs://my-bucket/file-name.csv"
1514
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1665
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1666
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1515
1667
  # job.time_partitioning_type = "DAY"
1516
1668
  # end
1517
1669
  #
@@ -1549,8 +1701,8 @@ module Google
1549
1701
  # bigquery = Google::Cloud::Bigquery.new
1550
1702
  # dataset = bigquery.dataset "my_dataset"
1551
1703
  #
1552
- # gs_url = "gs://my-bucket/file-name.csv"
1553
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1704
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1705
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1554
1706
  # job.time_partitioning_type = "DAY"
1555
1707
  # job.time_partitioning_field = "dob"
1556
1708
  # job.schema do |schema|
@@ -1585,8 +1737,8 @@ module Google
1585
1737
  # bigquery = Google::Cloud::Bigquery.new
1586
1738
  # dataset = bigquery.dataset "my_dataset"
1587
1739
  #
1588
- # gs_url = "gs://my-bucket/file-name.csv"
1589
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1740
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1741
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1590
1742
  # job.time_partitioning_type = "DAY"
1591
1743
  # job.time_partitioning_expiration = 86_400
1592
1744
  # end
@@ -1645,8 +1797,8 @@ module Google
1645
1797
  # bigquery = Google::Cloud::Bigquery.new
1646
1798
  # dataset = bigquery.dataset "my_dataset"
1647
1799
  #
1648
- # gs_url = "gs://my-bucket/file-name.csv"
1649
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1800
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1801
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1650
1802
  # job.time_partitioning_type = "DAY"
1651
1803
  # job.time_partitioning_field = "dob"
1652
1804
  # job.schema do |schema|
@@ -94,8 +94,7 @@ module Google
94
94
  # otherwise.
95
95
  #
96
96
  def batch?
97
- val = @gapi.configuration.query.priority
98
- val == "BATCH"
97
+ @gapi.configuration.query.priority == "BATCH"
99
98
  end
100
99
 
101
100
  ##
@@ -16,7 +16,7 @@
16
16
  module Google
17
17
  module Cloud
18
18
  module Bigquery
19
- VERSION = "1.25.0".freeze
19
+ VERSION = "1.26.0".freeze
20
20
  end
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.25.0
4
+ version: 1.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Moore
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-11-16 00:00:00.000000000 Z
12
+ date: 2021-01-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: concurrent-ruby
@@ -26,19 +26,19 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.0'
28
28
  - !ruby/object:Gem::Dependency
29
- name: google-api-client
29
+ name: google-apis-bigquery_v2
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: '0.47'
34
+ version: '0.1'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - "~>"
40
40
  - !ruby/object:Gem::Version
41
- version: '0.47'
41
+ version: '0.1'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: google-cloud-core
44
44
  requirement: !ruby/object:Gem::Requirement