google-cloud-bigquery 1.21.2 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,6 +52,24 @@ module Google
52
52
  # # Retrieve the next page of results
53
53
  # data = data.next if data.next?
54
54
  #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
55
73
  module External
56
74
  ##
57
75
  # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
79
97
  # @private Determine source_format from inputs
80
98
  def self.source_format_for urls, format
81
99
  val = {
82
- "csv" => "CSV", "avro" => "AVRO",
100
+ "csv" => "CSV",
101
+ "avro" => "AVRO",
83
102
  "json" => "NEWLINE_DELIMITED_JSON",
84
103
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
104
  "sheets" => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
87
106
  "datastore" => "DATASTORE_BACKUP",
88
107
  "backup" => "DATASTORE_BACKUP",
89
108
  "datastore_backup" => "DATASTORE_BACKUP",
90
- "bigtable" => "BIGTABLE"
109
+ "bigtable" => "BIGTABLE",
110
+ "orc" => "ORC",
111
+ "parquet" => "PARQUET"
91
112
  }[format.to_s.downcase]
92
113
  return val unless val.nil?
93
114
  Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
110
131
  when "GOOGLE_SHEETS" then External::SheetsSource
111
132
  when "BIGTABLE" then External::BigtableSource
112
133
  else
113
- # AVRO and DATASTORE_BACKUP
134
+ # AVRO, DATASTORE_BACKUP, PARQUET
114
135
  External::DataSource
115
136
  end
116
137
  end
@@ -148,6 +169,24 @@ module Google
148
169
  # # Retrieve the next page of results
149
170
  # data = data.next if data.next?
150
171
  #
172
+ # @example Hive partitioning options:
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
178
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
179
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
180
+ # ext.hive_partitioning_mode = :auto
181
+ # ext.hive_partitioning_require_partition_filter = true
182
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
183
+ # end
184
+ #
185
+ # external_data.hive_partitioning? #=> true
186
+ # external_data.hive_partitioning_mode #=> "AUTO"
187
+ # external_data.hive_partitioning_require_partition_filter? #=> true
188
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
189
+ #
151
190
  class DataSource
152
191
  ##
153
192
  # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
302
341
  @gapi.source_format == "BIGTABLE"
303
342
  end
304
343
 
344
+ ##
345
+ # Whether the data format is "ORC".
346
+ #
347
+ # @return [Boolean]
348
+ #
349
+ # @example
350
+ # require "google/cloud/bigquery"
351
+ #
352
+ # bigquery = Google::Cloud::Bigquery.new
353
+ #
354
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
355
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
356
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
357
+ # ext.hive_partitioning_mode = :auto
358
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
359
+ # end
360
+ # external_data.format #=> "ORC"
361
+ # external_data.orc? #=> true
362
+ #
363
+ def orc?
364
+ @gapi.source_format == "ORC"
365
+ end
366
+
367
+ ##
368
+ # Whether the data format is "PARQUET".
369
+ #
370
+ # @return [Boolean]
371
+ #
372
+ # @example
373
+ # require "google/cloud/bigquery"
374
+ #
375
+ # bigquery = Google::Cloud::Bigquery.new
376
+ #
377
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
378
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
379
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
380
+ # ext.hive_partitioning_mode = :auto
381
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
382
+ # end
383
+ # external_data.format #=> "PARQUET"
384
+ # external_data.parquet? #=> true
385
+ #
386
+ def parquet?
387
+ @gapi.source_format == "PARQUET"
388
+ end
389
+
305
390
  ##
306
391
  # The fully-qualified URIs that point to your data in Google Cloud.
307
392
  # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
536
621
  @gapi.max_bad_records = new_max_bad_records
537
622
  end
538
623
 
624
+ ###
625
+ # Checks if hive partitioning options are set.
626
+ #
627
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
628
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
629
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
630
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
631
+ #
632
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
633
+ #
634
+ # @example
635
+ # require "google/cloud/bigquery"
636
+ #
637
+ # bigquery = Google::Cloud::Bigquery.new
638
+ #
639
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
640
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
641
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
642
+ # ext.hive_partitioning_mode = :auto
643
+ # ext.hive_partitioning_require_partition_filter = true
644
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
645
+ # end
646
+ #
647
+ # external_data.hive_partitioning? #=> true
648
+ # external_data.hive_partitioning_mode #=> "AUTO"
649
+ # external_data.hive_partitioning_require_partition_filter? #=> true
650
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
651
+ #
652
+ def hive_partitioning?
653
+ !@gapi.hive_partitioning_options.nil?
654
+ end
655
+
656
+ ###
657
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
658
+ #
659
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
660
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
661
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
662
+ #
663
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
664
+ #
665
+ # @example
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ #
670
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
671
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
672
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
673
+ # ext.hive_partitioning_mode = :auto
674
+ # ext.hive_partitioning_require_partition_filter = true
675
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
676
+ # end
677
+ #
678
+ # external_data.hive_partitioning? #=> true
679
+ # external_data.hive_partitioning_mode #=> "AUTO"
680
+ # external_data.hive_partitioning_require_partition_filter? #=> true
681
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
682
+ #
683
+ def hive_partitioning_mode
684
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
685
+ end
686
+
687
+ ##
688
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
689
+ #
690
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
691
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
692
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
693
+ #
694
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
695
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
696
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
697
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
698
+ #
699
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
700
+ #
701
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
702
+ #
703
+ # @example
704
+ # require "google/cloud/bigquery"
705
+ #
706
+ # bigquery = Google::Cloud::Bigquery.new
707
+ #
708
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
709
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
710
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
711
+ # ext.hive_partitioning_mode = :auto
712
+ # ext.hive_partitioning_require_partition_filter = true
713
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
714
+ # end
715
+ #
716
+ # external_data.hive_partitioning? #=> true
717
+ # external_data.hive_partitioning_mode #=> "AUTO"
718
+ # external_data.hive_partitioning_require_partition_filter? #=> true
719
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
720
+ #
721
+ def hive_partitioning_mode= mode
722
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
723
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
724
+ end
725
+
726
+ ###
727
+ # Whether queries over the table using this external data source require a partition filter that can be used
728
+ # for partition elimination to be specified. Note that this field should only be true when creating a
729
+ # permanent external table or querying a temporary external table.
730
+ #
731
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
732
+ #
733
+ # @example
734
+ # require "google/cloud/bigquery"
735
+ #
736
+ # bigquery = Google::Cloud::Bigquery.new
737
+ #
738
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
739
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
740
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
741
+ # ext.hive_partitioning_mode = :auto
742
+ # ext.hive_partitioning_require_partition_filter = true
743
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
744
+ # end
745
+ #
746
+ # external_data.hive_partitioning? #=> true
747
+ # external_data.hive_partitioning_mode #=> "AUTO"
748
+ # external_data.hive_partitioning_require_partition_filter? #=> true
749
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
750
+ #
751
+ def hive_partitioning_require_partition_filter?
752
+ return false unless hive_partitioning?
753
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
754
+ end
755
+
756
+ ##
757
+ # Sets whether queries over the table using this external data source require a partition filter
758
+ # that can be used for partition elimination to be specified.
759
+ #
760
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
761
+ #
762
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
763
+ #
764
+ # @example
765
+ # require "google/cloud/bigquery"
766
+ #
767
+ # bigquery = Google::Cloud::Bigquery.new
768
+ #
769
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
770
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
771
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
772
+ # ext.hive_partitioning_mode = :auto
773
+ # ext.hive_partitioning_require_partition_filter = true
774
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
775
+ # end
776
+ #
777
+ # external_data.hive_partitioning? #=> true
778
+ # external_data.hive_partitioning_mode #=> "AUTO"
779
+ # external_data.hive_partitioning_require_partition_filter? #=> true
780
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
781
+ #
782
+ def hive_partitioning_require_partition_filter= require_partition_filter
783
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
784
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
785
+ end
786
+
787
+ ###
788
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
789
+ # immediately before the partition key encoding begins. For example, consider files following this data
790
+ # layout:
791
+ #
792
+ # ```
793
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
794
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
795
+ # ```
796
+ #
797
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
798
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
799
+ #
800
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
801
+ #
802
+ # @example
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
808
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
809
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
810
+ # ext.hive_partitioning_mode = :auto
811
+ # ext.hive_partitioning_require_partition_filter = true
812
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
813
+ # end
814
+ #
815
+ # external_data.hive_partitioning? #=> true
816
+ # external_data.hive_partitioning_mode #=> "AUTO"
817
+ # external_data.hive_partitioning_require_partition_filter? #=> true
818
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
819
+ #
820
+ def hive_partitioning_source_uri_prefix
821
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
822
+ end
823
+
824
+ ##
825
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
826
+ # immediately before the partition key encoding begins. For example, consider files following this data
827
+ # layout:
828
+ #
829
+ # ```
830
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
831
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
832
+ # ```
833
+ #
834
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
835
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
836
+ #
837
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
838
+ #
839
+ # @param [String] source_uri_prefix The common prefix for all source uris.
840
+ #
841
+ # @example
842
+ # require "google/cloud/bigquery"
843
+ #
844
+ # bigquery = Google::Cloud::Bigquery.new
845
+ #
846
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
847
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
848
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
849
+ # ext.hive_partitioning_mode = :auto
850
+ # ext.hive_partitioning_require_partition_filter = true
851
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
852
+ # end
853
+ #
854
+ # external_data.hive_partitioning? #=> true
855
+ # external_data.hive_partitioning_mode #=> "AUTO"
856
+ # external_data.hive_partitioning_require_partition_filter? #=> true
857
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
858
+ #
859
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
860
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
861
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
862
+ end
863
+
539
864
  ##
540
865
  # @private Google API Client object.
541
866
  def to_gapi
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,126 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
68
83
  #
69
- def compression?
70
- val = @gapi.configuration.extract.compression
71
- val == "GZIP"
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
72
86
  end
73
87
 
74
88
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
89
+ # Whether the source of the export job is a model. See {#source}.
77
90
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
91
+ # @return [Boolean] `true` when the source is a model, `false`
79
92
  # otherwise.
80
93
  #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
101
+ #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
104
+ def compression?
105
+ return false unless table?
106
+ @gapi.configuration.extract.compression == "GZIP"
107
+ end
108
+
109
+ ##
110
+ # Checks if the destination format for the table data is [newline-delimited
111
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
112
+ # extracting models.
113
+ #
114
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
115
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
116
+ #
81
117
  def json?
82
- val = @gapi.configuration.extract.destination_format
83
- val == "NEWLINE_DELIMITED_JSON"
118
+ return false unless table?
119
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
84
120
  end
85
121
 
86
122
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
123
+ # Checks if the destination format for the table data is CSV. Tables with
88
124
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
125
+ # `true` for tables. Not applicable when extracting models.
90
126
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
127
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
128
+ # table extraction.
92
129
  #
93
130
  def csv?
131
+ return false unless table?
94
132
  val = @gapi.configuration.extract.destination_format
95
133
  return true if val.nil?
96
134
  val == "CSV"
97
135
  end
98
136
 
99
137
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
138
+ # Checks if the destination format for the table data is
139
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
140
+ # when extracting models.
102
141
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
142
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
143
+ # table extraction.
104
144
  #
105
145
  def avro?
146
+ return false unless table?
147
+ @gapi.configuration.extract.destination_format == "AVRO"
148
+ end
149
+
150
+ ##
151
+ # Checks if the destination format for the model is TensorFlow SavedModel.
152
+ # The default is `true` for models. Not applicable when extracting tables.
153
+ #
154
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
155
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
156
+ #
157
+ def ml_tf_saved_model?
158
+ return false unless model?
106
159
  val = @gapi.configuration.extract.destination_format
107
- val == "AVRO"
160
+ return true if val.nil?
161
+ val == "ML_TF_SAVED_MODEL"
162
+ end
163
+
164
+ ##
165
+ # Checks if the destination format for the model is XGBoost. The default
166
+ # is `false`. Not applicable when extracting tables.
167
+ #
168
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
169
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
170
+ #
171
+ def ml_xgboost_booster?
172
+ return false unless model?
173
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
108
174
  end
109
175
 
110
176
  ##
111
177
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
178
+ # exported data. The default is a comma (,) for tables. Not applicable
179
+ # when extracting models.
113
180
  #
114
- # @return [String] A string containing the character, such as `","`.
181
+ # @return [String, nil] A string containing the character, such as `","`,
182
+ # `nil` if not a table extraction.
115
183
  #
116
184
  def delimiter
185
+ return unless table?
117
186
  val = @gapi.configuration.extract.field_delimiter
118
187
  val = "," if val.nil?
119
188
  val
@@ -121,12 +190,13 @@ module Google
121
190
 
122
191
  ##
123
192
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
193
+ # `true` for tables. Not applicable when extracting models.
125
194
  #
126
195
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
196
+ # present or `nil`, `false` if disabled or not a table extraction.
128
197
  #
129
198
  def print_header?
199
+ return false unless table?
130
200
  val = @gapi.configuration.extract.print_header
131
201
  val = true if val.nil?
132
202
  val
@@ -159,12 +229,14 @@ module Google
159
229
  # whether to enable extracting applicable column types (such as
160
230
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
231
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
232
+ # (`avro-long`). Not applicable when extracting models.
163
233
  #
164
234
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
235
+ # corresponding AVRO logical types, `false` if not enabled or not a
236
+ # table extraction.
166
237
  #
167
238
  def use_avro_logical_types?
239
+ return false unless table?
168
240
  @gapi.configuration.extract.use_avro_logical_types
169
241
  end
170
242
 
@@ -182,19 +254,24 @@ module Google
182
254
  #
183
255
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
256
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
257
+ def self.from_options service, source, storage_files, options
186
258
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
259
  storage_urls = Array(storage_files).map do |url|
188
260
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
261
  end
190
262
  options[:format] ||= Convert.derive_source_format storage_urls.first
263
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
264
+ destination_uris: Array(storage_urls)
265
+ )
266
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
267
+ extract_config.source_table = source
268
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
269
+ extract_config.source_model = source
270
+ end
191
271
  job = Google::Apis::BigqueryV2::Job.new(
192
272
  job_reference: job_ref,
193
273
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
274
+ extract: extract_config,
198
275
  dry_run: options[:dryrun]
199
276
  )
200
277
  )
@@ -253,7 +330,7 @@ module Google
253
330
  end
254
331
 
255
332
  ##
256
- # Sets the compression type.
333
+ # Sets the compression type. Not applicable when extracting models.
257
334
  #
258
335
  # @param [String] value The compression type to use for exported
259
336
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +342,7 @@ module Google
265
342
  end
266
343
 
267
344
  ##
268
- # Sets the field delimiter.
345
+ # Sets the field delimiter. Not applicable when extracting models.
269
346
  #
270
347
  # @param [String] value Delimiter to use between fields in the
271
348
  # exported data. Default is <code>,</code>.
@@ -276,14 +353,21 @@ module Google
276
353
  end
277
354
 
278
355
  ##
279
- # Sets the destination file format. The default value is `csv`.
356
+ # Sets the destination file format. The default value for
357
+ # tables is `csv`. Tables with nested or repeated fields cannot be
358
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
359
  #
281
- # The following values are supported:
360
+ # Supported values for tables:
282
361
  #
283
362
  # * `csv` - CSV
284
363
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
364
  # * `avro` - [Avro](http://avro.apache.org/)
286
365
  #
366
+ # Supported values for models:
367
+ #
368
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
369
+ # * `ml_xgboost_booster` - XGBoost Booster
370
+ #
287
371
  # @param [String] new_format The new source format.
288
372
  #
289
373
  # @!group Attributes
@@ -293,7 +377,8 @@ module Google
293
377
  end
294
378
 
295
379
  ##
296
- # Print a header row in the exported file.
380
+ # Print a header row in the exported file. Not applicable when
381
+ # extracting models.
297
382
  #
298
383
  # @param [Boolean] value Whether to print out a header row in the
299
384
  # results. Default is `true`.
@@ -307,12 +392,21 @@ module Google
307
392
  # Sets the labels to use for the job.
308
393
  #
309
394
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
395
+ # the job. You can use these to organize and group your jobs.
396
+ #
397
+ # The labels applied to a resource must meet the following requirements:
398
+ #
399
+ # * Each resource can have multiple labels, up to a maximum of 64.
400
+ # * Each label must be a key-value pair.
401
+ # * Keys have a minimum length of 1 character and a maximum length of
402
+ # 63 characters, and cannot be empty. Values can be empty, and have
403
+ # a maximum length of 63 characters.
404
+ # * Keys and values can contain only lowercase letters, numeric characters,
405
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
406
+ # international characters are allowed.
407
+ # * The key portion of a label must be unique. However, you can use the
408
+ # same key with multiple resources.
409
+ # * Keys must start with a lowercase letter or international character.
316
410
  #
317
411
  # @!group Attributes
318
412
  #
@@ -362,6 +456,16 @@ module Google
362
456
  @gapi
363
457
  end
364
458
  end
459
+
460
+ protected
461
+
462
+ def retrieve_model project_id, dataset_id, model_id
463
+ ensure_service!
464
+ gapi = service.get_project_model project_id, dataset_id, model_id
465
+ Model.from_gapi_json gapi, service
466
+ rescue Google::Cloud::NotFoundError
467
+ nil
468
+ end
365
469
  end
366
470
  end
367
471
  end