google-cloud-bigquery 1.25.0 → 1.26.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae4b6e4d7c37a945f027fe56425698b19b430f5e75451ce82f2156f2dae96719
4
- data.tar.gz: d50284a47bf96d5221b574590a60b3af0f77b3a8943d201fd473d78f41a46670
3
+ metadata.gz: 8c2f8d8d7a564df2c916cced8adf4c0b621a1e11250a3a04decfce08995a4a24
4
+ data.tar.gz: 57bbda27d4c54e0522b564b8cdc62fb5e9dce8b26c91d3f0604c624ff113f057
5
5
  SHA512:
6
- metadata.gz: 56c0b2d3214b385efb3866e502faa0b0c28c2fffc0c3d2ca3c5e1ad05f4d69e0638e68d59d8310c68d01c26c3db55b42225944379bfb8f6ad7a57eec558ced3a
7
- data.tar.gz: 0562b98e65e880a40d634f541285241823354a966f4d05a559463a23eacca54addf8843f89b32731d313a56d91dfe2984f6098630c4126d7dbc97033d3dc6a81
6
+ metadata.gz: f445a86a5435cafc236d82faf91df46a06c6cee8612d8e6c4011450c93b73b61a883510909c6bb25e72516ae1a57c8a605736d82e0b63f0b10ad25cab90a1280
7
+ data.tar.gz: 43423f2bea5cea82c6c19bcf639bfb690f4718b0450140d299616ece59b147258ef877cafbc17b99c68274c310a0cd0123ae6b6db0b37bba2fb08be0395837f0
@@ -1,5 +1,14 @@
1
1
  # Release History
2
2
 
3
+ ### 1.26.0 / 2021-01-13
4
+
5
+ #### Features
6
+
7
+ * Add support for Hive Partitioning
8
+ * Add hive partitioning options to External::DataSource
9
+ * Add hive partitioning options to LoadJob and LoadJob::Updater
10
+ * Replace google-api-client with google-apis-bigquery_v2
11
+
3
12
  ### 1.25.0 / 2020-11-16
4
13
 
5
14
  #### Features
@@ -45,7 +45,7 @@ there is a small amount of setup:
45
45
 
46
46
  ```sh
47
47
  $ cd google-cloud-bigquery/
48
- $ bundle exec rake bundleupdate
48
+ $ bundle install
49
49
  ```
50
50
 
51
51
  ## Console
@@ -52,6 +52,24 @@ module Google
52
52
  # # Retrieve the next page of results
53
53
  # data = data.next if data.next?
54
54
  #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
55
73
  module External
56
74
  ##
57
75
  # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
79
97
  # @private Determine source_format from inputs
80
98
  def self.source_format_for urls, format
81
99
  val = {
82
- "csv" => "CSV", "avro" => "AVRO",
100
+ "csv" => "CSV",
101
+ "avro" => "AVRO",
83
102
  "json" => "NEWLINE_DELIMITED_JSON",
84
103
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
104
  "sheets" => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
87
106
  "datastore" => "DATASTORE_BACKUP",
88
107
  "backup" => "DATASTORE_BACKUP",
89
108
  "datastore_backup" => "DATASTORE_BACKUP",
90
- "bigtable" => "BIGTABLE"
109
+ "bigtable" => "BIGTABLE",
110
+ "orc" => "ORC",
111
+ "parquet" => "PARQUET"
91
112
  }[format.to_s.downcase]
92
113
  return val unless val.nil?
93
114
  Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
110
131
  when "GOOGLE_SHEETS" then External::SheetsSource
111
132
  when "BIGTABLE" then External::BigtableSource
112
133
  else
113
- # AVRO and DATASTORE_BACKUP
134
+ # AVRO, DATASTORE_BACKUP, PARQUET
114
135
  External::DataSource
115
136
  end
116
137
  end
@@ -148,6 +169,24 @@ module Google
148
169
  # # Retrieve the next page of results
149
170
  # data = data.next if data.next?
150
171
  #
172
+ # @example Hive partitioning options:
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
178
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
179
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
180
+ # ext.hive_partitioning_mode = :auto
181
+ # ext.hive_partitioning_require_partition_filter = true
182
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
183
+ # end
184
+ #
185
+ # external_data.hive_partitioning? #=> true
186
+ # external_data.hive_partitioning_mode #=> "AUTO"
187
+ # external_data.hive_partitioning_require_partition_filter? #=> true
188
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
189
+ #
151
190
  class DataSource
152
191
  ##
153
192
  # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
302
341
  @gapi.source_format == "BIGTABLE"
303
342
  end
304
343
 
344
+ ##
345
+ # Whether the data format is "ORC".
346
+ #
347
+ # @return [Boolean]
348
+ #
349
+ # @example
350
+ # require "google/cloud/bigquery"
351
+ #
352
+ # bigquery = Google::Cloud::Bigquery.new
353
+ #
354
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
355
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
356
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
357
+ # ext.hive_partitioning_mode = :auto
358
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
359
+ # end
360
+ # external_data.format #=> "ORC"
361
+ # external_data.orc? #=> true
362
+ #
363
+ def orc?
364
+ @gapi.source_format == "ORC"
365
+ end
366
+
367
+ ##
368
+ # Whether the data format is "PARQUET".
369
+ #
370
+ # @return [Boolean]
371
+ #
372
+ # @example
373
+ # require "google/cloud/bigquery"
374
+ #
375
+ # bigquery = Google::Cloud::Bigquery.new
376
+ #
377
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
378
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
379
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
380
+ # ext.hive_partitioning_mode = :auto
381
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
382
+ # end
383
+ # external_data.format #=> "PARQUET"
384
+ # external_data.parquet? #=> true
385
+ #
386
+ def parquet?
387
+ @gapi.source_format == "PARQUET"
388
+ end
389
+
305
390
  ##
306
391
  # The fully-qualified URIs that point to your data in Google Cloud.
307
392
  # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
536
621
  @gapi.max_bad_records = new_max_bad_records
537
622
  end
538
623
 
624
+ ###
625
+ # Checks if hive partitioning options are set.
626
+ #
627
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
628
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
629
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
630
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
631
+ #
632
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
633
+ #
634
+ # @example
635
+ # require "google/cloud/bigquery"
636
+ #
637
+ # bigquery = Google::Cloud::Bigquery.new
638
+ #
639
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
640
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
641
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
642
+ # ext.hive_partitioning_mode = :auto
643
+ # ext.hive_partitioning_require_partition_filter = true
644
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
645
+ # end
646
+ #
647
+ # external_data.hive_partitioning? #=> true
648
+ # external_data.hive_partitioning_mode #=> "AUTO"
649
+ # external_data.hive_partitioning_require_partition_filter? #=> true
650
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
651
+ #
652
+ def hive_partitioning?
653
+ !@gapi.hive_partitioning_options.nil?
654
+ end
655
+
656
+ ###
657
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
658
+ #
659
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
660
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
661
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
662
+ #
663
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
664
+ #
665
+ # @example
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ #
670
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
671
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
672
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
673
+ # ext.hive_partitioning_mode = :auto
674
+ # ext.hive_partitioning_require_partition_filter = true
675
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
676
+ # end
677
+ #
678
+ # external_data.hive_partitioning? #=> true
679
+ # external_data.hive_partitioning_mode #=> "AUTO"
680
+ # external_data.hive_partitioning_require_partition_filter? #=> true
681
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
682
+ #
683
+ def hive_partitioning_mode
684
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
685
+ end
686
+
687
+ ##
688
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
689
+ #
690
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
691
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
692
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
693
+ #
694
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
695
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
696
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
697
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
698
+ #
699
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
700
+ #
701
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
702
+ #
703
+ # @example
704
+ # require "google/cloud/bigquery"
705
+ #
706
+ # bigquery = Google::Cloud::Bigquery.new
707
+ #
708
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
709
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
710
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
711
+ # ext.hive_partitioning_mode = :auto
712
+ # ext.hive_partitioning_require_partition_filter = true
713
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
714
+ # end
715
+ #
716
+ # external_data.hive_partitioning? #=> true
717
+ # external_data.hive_partitioning_mode #=> "AUTO"
718
+ # external_data.hive_partitioning_require_partition_filter? #=> true
719
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
720
+ #
721
+ def hive_partitioning_mode= mode
722
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
723
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
724
+ end
725
+
726
+ ###
727
+ # Whether queries over the table using this external data source require a partition filter that can be used
728
+ # for partition elimination to be specified. Note that this field should only be true when creating a
729
+ # permanent external table or querying a temporary external table.
730
+ #
731
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
732
+ #
733
+ # @example
734
+ # require "google/cloud/bigquery"
735
+ #
736
+ # bigquery = Google::Cloud::Bigquery.new
737
+ #
738
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
739
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
740
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
741
+ # ext.hive_partitioning_mode = :auto
742
+ # ext.hive_partitioning_require_partition_filter = true
743
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
744
+ # end
745
+ #
746
+ # external_data.hive_partitioning? #=> true
747
+ # external_data.hive_partitioning_mode #=> "AUTO"
748
+ # external_data.hive_partitioning_require_partition_filter? #=> true
749
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
750
+ #
751
+ def hive_partitioning_require_partition_filter?
752
+ return false unless hive_partitioning?
753
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
754
+ end
755
+
756
+ ##
757
+ # Sets whether queries over the table using this external data source require a partition filter
758
+ # that can be used for partition elimination to be specified.
759
+ #
760
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
761
+ #
762
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
763
+ #
764
+ # @example
765
+ # require "google/cloud/bigquery"
766
+ #
767
+ # bigquery = Google::Cloud::Bigquery.new
768
+ #
769
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
770
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
771
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
772
+ # ext.hive_partitioning_mode = :auto
773
+ # ext.hive_partitioning_require_partition_filter = true
774
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
775
+ # end
776
+ #
777
+ # external_data.hive_partitioning? #=> true
778
+ # external_data.hive_partitioning_mode #=> "AUTO"
779
+ # external_data.hive_partitioning_require_partition_filter? #=> true
780
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
781
+ #
782
+ def hive_partitioning_require_partition_filter= require_partition_filter
783
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
784
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
785
+ end
786
+
787
+ ###
788
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
789
+ # immediately before the partition key encoding begins. For example, consider files following this data
790
+ # layout:
791
+ #
792
+ # ```
793
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
794
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
795
+ # ```
796
+ #
797
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
798
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
799
+ #
800
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
801
+ #
802
+ # @example
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
808
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
809
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
810
+ # ext.hive_partitioning_mode = :auto
811
+ # ext.hive_partitioning_require_partition_filter = true
812
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
813
+ # end
814
+ #
815
+ # external_data.hive_partitioning? #=> true
816
+ # external_data.hive_partitioning_mode #=> "AUTO"
817
+ # external_data.hive_partitioning_require_partition_filter? #=> true
818
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
819
+ #
820
+ def hive_partitioning_source_uri_prefix
821
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
822
+ end
823
+
824
+ ##
825
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
826
+ # immediately before the partition key encoding begins. For example, consider files following this data
827
+ # layout:
828
+ #
829
+ # ```
830
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
831
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
832
+ # ```
833
+ #
834
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
835
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
836
+ #
837
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
838
+ #
839
+ # @param [String] source_uri_prefix The common prefix for all source uris.
840
+ #
841
+ # @example
842
+ # require "google/cloud/bigquery"
843
+ #
844
+ # bigquery = Google::Cloud::Bigquery.new
845
+ #
846
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
847
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
848
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
849
+ # ext.hive_partitioning_mode = :auto
850
+ # ext.hive_partitioning_require_partition_filter = true
851
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
852
+ # end
853
+ #
854
+ # external_data.hive_partitioning? #=> true
855
+ # external_data.hive_partitioning_mode #=> "AUTO"
856
+ # external_data.hive_partitioning_require_partition_filter? #=> true
857
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
858
+ #
859
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
860
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
861
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
862
+ end
863
+
539
864
  ##
540
865
  # @private Google API Client object.
541
866
  def to_gapi
@@ -103,8 +103,7 @@ module Google
103
103
  # table extraction.
104
104
  def compression?
105
105
  return false unless table?
106
- val = @gapi.configuration.extract.compression
107
- val == "GZIP"
106
+ @gapi.configuration.extract.compression == "GZIP"
108
107
  end
109
108
 
110
109
  ##
@@ -117,8 +116,7 @@ module Google
117
116
  #
118
117
  def json?
119
118
  return false unless table?
120
- val = @gapi.configuration.extract.destination_format
121
- val == "NEWLINE_DELIMITED_JSON"
119
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
122
120
  end
123
121
 
124
122
  ##
@@ -146,8 +144,7 @@ module Google
146
144
  #
147
145
  def avro?
148
146
  return false unless table?
149
- val = @gapi.configuration.extract.destination_format
150
- val == "AVRO"
147
+ @gapi.configuration.extract.destination_format == "AVRO"
151
148
  end
152
149
 
153
150
  ##
@@ -173,8 +170,7 @@ module Google
173
170
  #
174
171
  def ml_xgboost_booster?
175
172
  return false unless model?
176
- val = @gapi.configuration.extract.destination_format
177
- val == "ML_XGBOOST_BOOSTER"
173
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
178
174
  end
179
175
 
180
176
  ##
@@ -37,8 +37,8 @@ module Google
37
37
  # bigquery = Google::Cloud::Bigquery.new
38
38
  # dataset = bigquery.dataset "my_dataset"
39
39
  #
40
- # gs_url = "gs://my-bucket/file-name.csv"
41
- # load_job = dataset.load_job "my_new_table", gs_url do |schema|
40
+ # gcs_uri = "gs://my-bucket/file-name.csv"
41
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |schema|
42
42
  # schema.string "first_name", mode: :required
43
43
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
44
44
  # nested_schema.string "place", mode: :required
@@ -112,8 +112,7 @@ module Google
112
112
  # `false` otherwise.
113
113
  #
114
114
  def iso8859_1?
115
- val = @gapi.configuration.load.encoding
116
- val == "ISO-8859-1"
115
+ @gapi.configuration.load.encoding == "ISO-8859-1"
117
116
  end
118
117
 
119
118
  ##
@@ -195,8 +194,7 @@ module Google
195
194
  # `NEWLINE_DELIMITED_JSON`, `false` otherwise.
196
195
  #
197
196
  def json?
198
- val = @gapi.configuration.load.source_format
199
- val == "NEWLINE_DELIMITED_JSON"
197
+ @gapi.configuration.load.source_format == "NEWLINE_DELIMITED_JSON"
200
198
  end
201
199
 
202
200
  ##
@@ -218,8 +216,27 @@ module Google
218
216
  # `false` otherwise.
219
217
  #
220
218
  def backup?
221
- val = @gapi.configuration.load.source_format
222
- val == "DATASTORE_BACKUP"
219
+ @gapi.configuration.load.source_format == "DATASTORE_BACKUP"
220
+ end
221
+
222
+ ##
223
+ # Checks if the source format is ORC.
224
+ #
225
+ # @return [Boolean] `true` when the source format is `ORC`,
226
+ # `false` otherwise.
227
+ #
228
+ def orc?
229
+ @gapi.configuration.load.source_format == "ORC"
230
+ end
231
+
232
+ ##
233
+ # Checks if the source format is Parquet.
234
+ #
235
+ # @return [Boolean] `true` when the source format is `PARQUET`,
236
+ # `false` otherwise.
237
+ #
238
+ def parquet?
239
+ @gapi.configuration.load.source_format == "PARQUET"
223
240
  end
224
241
 
225
242
  ##
@@ -347,6 +364,58 @@ module Google
347
364
  nil
348
365
  end
349
366
 
367
+ ###
368
+ # Checks if hive partitioning options are set.
369
+ #
370
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
371
+ #
372
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
373
+ #
374
+ # @!group Attributes
375
+ #
376
+ def hive_partitioning?
377
+ !@gapi.configuration.load.hive_partitioning_options.nil?
378
+ end
379
+
380
+ ###
381
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
382
+ #
383
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
384
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
385
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
386
+ #
387
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
388
+ #
389
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
390
+ #
391
+ # @!group Attributes
392
+ #
393
+ def hive_partitioning_mode
394
+ @gapi.configuration.load.hive_partitioning_options.mode if hive_partitioning?
395
+ end
396
+
397
+ ###
398
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
399
+ # immediately before the partition key encoding begins. For example, consider files following this data layout:
400
+ #
401
+ # ```
402
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
403
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
404
+ # ```
405
+ #
406
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
407
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
408
+ #
409
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
410
+ #
411
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
412
+ #
413
+ # @!group Attributes
414
+ #
415
+ def hive_partitioning_source_uri_prefix
416
+ @gapi.configuration.load.hive_partitioning_options.source_uri_prefix if hive_partitioning?
417
+ end
418
+
350
419
  ###
351
420
  # Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
352
421
  # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -1326,6 +1395,89 @@ module Google
1326
1395
  @gapi.configuration.update! labels: val
1327
1396
  end
1328
1397
 
1398
+ ##
1399
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
1400
+ #
1401
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
1402
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
1403
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
1404
+ #
1405
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
1406
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
1407
+ #
1408
+ # See {#format=} and {#hive_partitioning_source_uri_prefix=}.
1409
+ #
1410
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
1411
+ #
1412
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
1413
+ #
1414
+ # @example
1415
+ # require "google/cloud/bigquery"
1416
+ #
1417
+ # bigquery = Google::Cloud::Bigquery.new
1418
+ # dataset = bigquery.dataset "my_dataset"
1419
+ #
1420
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
1421
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
1422
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1423
+ # job.format = :parquet
1424
+ # job.hive_partitioning_mode = :auto
1425
+ # job.hive_partitioning_source_uri_prefix = source_uri_prefix
1426
+ # end
1427
+ #
1428
+ # load_job.wait_until_done!
1429
+ # load_job.done? #=> true
1430
+ #
1431
+ # @!group Attributes
1432
+ #
1433
+ def hive_partitioning_mode= mode
1434
+ @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
1435
+ @gapi.configuration.load.hive_partitioning_options.mode = mode.to_s.upcase
1436
+ end
1437
+
1438
+ ##
1439
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
1440
+ # immediately before the partition key encoding begins. For example, consider files following this data
1441
+ # layout:
1442
+ #
1443
+ # ```
1444
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
1445
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
1446
+ # ```
1447
+ #
1448
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
1449
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
1450
+ #
1451
+ # See {#hive_partitioning_mode=}.
1452
+ #
1453
+ # @see https://cloud.google.com/bigquery/docs/hive-partitioned-loads-gcs Loading externally partitioned data
1454
+ #
1455
+ # @param [String] source_uri_prefix The common prefix for all source uris.
1456
+ #
1457
+ # @example
1458
+ # require "google/cloud/bigquery"
1459
+ #
1460
+ # bigquery = Google::Cloud::Bigquery.new
1461
+ # dataset = bigquery.dataset "my_dataset"
1462
+ #
1463
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
1464
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
1465
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1466
+ # job.format = :parquet
1467
+ # job.hive_partitioning_mode = :auto
1468
+ # job.hive_partitioning_source_uri_prefix = source_uri_prefix
1469
+ # end
1470
+ #
1471
+ # load_job.wait_until_done!
1472
+ # load_job.done? #=> true
1473
+ #
1474
+ # @!group Attributes
1475
+ #
1476
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
1477
+ @gapi.configuration.load.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
1478
+ @gapi.configuration.load.hive_partitioning_options.source_uri_prefix = source_uri_prefix
1479
+ end
1480
+
1329
1481
  ##
1330
1482
  # Sets the field on which to range partition the table. See [Creating and using integer range partitioned
1331
1483
  # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@@ -1345,8 +1497,8 @@ module Google
1345
1497
  # bigquery = Google::Cloud::Bigquery.new
1346
1498
  # dataset = bigquery.dataset "my_dataset"
1347
1499
  #
1348
- # gs_url = "gs://my-bucket/file-name.csv"
1349
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1500
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1501
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1350
1502
  # job.schema do |schema|
1351
1503
  # schema.integer "my_table_id", mode: :required
1352
1504
  # schema.string "my_table_data", mode: :required
@@ -1386,8 +1538,8 @@ module Google
1386
1538
  # bigquery = Google::Cloud::Bigquery.new
1387
1539
  # dataset = bigquery.dataset "my_dataset"
1388
1540
  #
1389
- # gs_url = "gs://my-bucket/file-name.csv"
1390
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1541
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1542
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1391
1543
  # job.schema do |schema|
1392
1544
  # schema.integer "my_table_id", mode: :required
1393
1545
  # schema.string "my_table_data", mode: :required
@@ -1427,8 +1579,8 @@ module Google
1427
1579
  # bigquery = Google::Cloud::Bigquery.new
1428
1580
  # dataset = bigquery.dataset "my_dataset"
1429
1581
  #
1430
- # gs_url = "gs://my-bucket/file-name.csv"
1431
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1582
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1583
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1432
1584
  # job.schema do |schema|
1433
1585
  # schema.integer "my_table_id", mode: :required
1434
1586
  # schema.string "my_table_data", mode: :required
@@ -1468,8 +1620,8 @@ module Google
1468
1620
  # bigquery = Google::Cloud::Bigquery.new
1469
1621
  # dataset = bigquery.dataset "my_dataset"
1470
1622
  #
1471
- # gs_url = "gs://my-bucket/file-name.csv"
1472
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1623
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1624
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1473
1625
  # job.schema do |schema|
1474
1626
  # schema.integer "my_table_id", mode: :required
1475
1627
  # schema.string "my_table_data", mode: :required
@@ -1510,8 +1662,8 @@ module Google
1510
1662
  # bigquery = Google::Cloud::Bigquery.new
1511
1663
  # dataset = bigquery.dataset "my_dataset"
1512
1664
  #
1513
- # gs_url = "gs://my-bucket/file-name.csv"
1514
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1665
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1666
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1515
1667
  # job.time_partitioning_type = "DAY"
1516
1668
  # end
1517
1669
  #
@@ -1549,8 +1701,8 @@ module Google
1549
1701
  # bigquery = Google::Cloud::Bigquery.new
1550
1702
  # dataset = bigquery.dataset "my_dataset"
1551
1703
  #
1552
- # gs_url = "gs://my-bucket/file-name.csv"
1553
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1704
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1705
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1554
1706
  # job.time_partitioning_type = "DAY"
1555
1707
  # job.time_partitioning_field = "dob"
1556
1708
  # job.schema do |schema|
@@ -1585,8 +1737,8 @@ module Google
1585
1737
  # bigquery = Google::Cloud::Bigquery.new
1586
1738
  # dataset = bigquery.dataset "my_dataset"
1587
1739
  #
1588
- # gs_url = "gs://my-bucket/file-name.csv"
1589
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1740
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1741
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1590
1742
  # job.time_partitioning_type = "DAY"
1591
1743
  # job.time_partitioning_expiration = 86_400
1592
1744
  # end
@@ -1645,8 +1797,8 @@ module Google
1645
1797
  # bigquery = Google::Cloud::Bigquery.new
1646
1798
  # dataset = bigquery.dataset "my_dataset"
1647
1799
  #
1648
- # gs_url = "gs://my-bucket/file-name.csv"
1649
- # load_job = dataset.load_job "my_new_table", gs_url do |job|
1800
+ # gcs_uri = "gs://my-bucket/file-name.csv"
1801
+ # load_job = dataset.load_job "my_new_table", gcs_uri do |job|
1650
1802
  # job.time_partitioning_type = "DAY"
1651
1803
  # job.time_partitioning_field = "dob"
1652
1804
  # job.schema do |schema|
@@ -94,8 +94,7 @@ module Google
94
94
  # otherwise.
95
95
  #
96
96
  def batch?
97
- val = @gapi.configuration.query.priority
98
- val == "BATCH"
97
+ @gapi.configuration.query.priority == "BATCH"
99
98
  end
100
99
 
101
100
  ##
@@ -16,7 +16,7 @@
16
16
  module Google
17
17
  module Cloud
18
18
  module Bigquery
19
- VERSION = "1.25.0".freeze
19
+ VERSION = "1.26.0".freeze
20
20
  end
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.25.0
4
+ version: 1.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Moore
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-11-16 00:00:00.000000000 Z
12
+ date: 2021-01-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: concurrent-ruby
@@ -26,19 +26,19 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.0'
28
28
  - !ruby/object:Gem::Dependency
29
- name: google-api-client
29
+ name: google-apis-bigquery_v2
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: '0.47'
34
+ version: '0.1'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - "~>"
40
40
  - !ruby/object:Gem::Version
41
- version: '0.47'
41
+ version: '0.1'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: google-cloud-core
44
44
  requirement: !ruby/object:Gem::Requirement