google-cloud-bigquery 1.21.2 → 1.26.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -52,6 +52,24 @@ module Google
52
52
  # # Retrieve the next page of results
53
53
  # data = data.next if data.next?
54
54
  #
55
+ # @example Hive partitioning options:
56
+ # require "google/cloud/bigquery"
57
+ #
58
+ # bigquery = Google::Cloud::Bigquery.new
59
+ #
60
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
61
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
62
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
63
+ # ext.hive_partitioning_mode = :auto
64
+ # ext.hive_partitioning_require_partition_filter = true
65
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
66
+ # end
67
+ #
68
+ # external_data.hive_partitioning? #=> true
69
+ # external_data.hive_partitioning_mode #=> "AUTO"
70
+ # external_data.hive_partitioning_require_partition_filter? #=> true
71
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
72
+ #
55
73
  module External
56
74
  ##
57
75
  # @private New External from URLs and format
@@ -79,7 +97,8 @@ module Google
79
97
  # @private Determine source_format from inputs
80
98
  def self.source_format_for urls, format
81
99
  val = {
82
- "csv" => "CSV", "avro" => "AVRO",
100
+ "csv" => "CSV",
101
+ "avro" => "AVRO",
83
102
  "json" => "NEWLINE_DELIMITED_JSON",
84
103
  "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
85
104
  "sheets" => "GOOGLE_SHEETS",
@@ -87,7 +106,9 @@ module Google
87
106
  "datastore" => "DATASTORE_BACKUP",
88
107
  "backup" => "DATASTORE_BACKUP",
89
108
  "datastore_backup" => "DATASTORE_BACKUP",
90
- "bigtable" => "BIGTABLE"
109
+ "bigtable" => "BIGTABLE",
110
+ "orc" => "ORC",
111
+ "parquet" => "PARQUET"
91
112
  }[format.to_s.downcase]
92
113
  return val unless val.nil?
93
114
  Array(urls).each do |url|
@@ -110,7 +131,7 @@ module Google
110
131
  when "GOOGLE_SHEETS" then External::SheetsSource
111
132
  when "BIGTABLE" then External::BigtableSource
112
133
  else
113
- # AVRO and DATASTORE_BACKUP
134
+ # AVRO, DATASTORE_BACKUP, PARQUET
114
135
  External::DataSource
115
136
  end
116
137
  end
@@ -148,6 +169,24 @@ module Google
148
169
  # # Retrieve the next page of results
149
170
  # data = data.next if data.next?
150
171
  #
172
+ # @example Hive partitioning options:
173
+ # require "google/cloud/bigquery"
174
+ #
175
+ # bigquery = Google::Cloud::Bigquery.new
176
+ #
177
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
178
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
179
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
180
+ # ext.hive_partitioning_mode = :auto
181
+ # ext.hive_partitioning_require_partition_filter = true
182
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
183
+ # end
184
+ #
185
+ # external_data.hive_partitioning? #=> true
186
+ # external_data.hive_partitioning_mode #=> "AUTO"
187
+ # external_data.hive_partitioning_require_partition_filter? #=> true
188
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
189
+ #
151
190
  class DataSource
152
191
  ##
153
192
  # @private The Google API Client object.
@@ -302,6 +341,52 @@ module Google
302
341
  @gapi.source_format == "BIGTABLE"
303
342
  end
304
343
 
344
+ ##
345
+ # Whether the data format is "ORC".
346
+ #
347
+ # @return [Boolean]
348
+ #
349
+ # @example
350
+ # require "google/cloud/bigquery"
351
+ #
352
+ # bigquery = Google::Cloud::Bigquery.new
353
+ #
354
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
355
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
356
+ # external_data = bigquery.external gcs_uri, format: :orc do |ext|
357
+ # ext.hive_partitioning_mode = :auto
358
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
359
+ # end
360
+ # external_data.format #=> "ORC"
361
+ # external_data.orc? #=> true
362
+ #
363
+ def orc?
364
+ @gapi.source_format == "ORC"
365
+ end
366
+
367
+ ##
368
+ # Whether the data format is "PARQUET".
369
+ #
370
+ # @return [Boolean]
371
+ #
372
+ # @example
373
+ # require "google/cloud/bigquery"
374
+ #
375
+ # bigquery = Google::Cloud::Bigquery.new
376
+ #
377
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
378
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
379
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
380
+ # ext.hive_partitioning_mode = :auto
381
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
382
+ # end
383
+ # external_data.format #=> "PARQUET"
384
+ # external_data.parquet? #=> true
385
+ #
386
+ def parquet?
387
+ @gapi.source_format == "PARQUET"
388
+ end
389
+
305
390
  ##
306
391
  # The fully-qualified URIs that point to your data in Google Cloud.
307
392
  # For Google Cloud Storage URIs: Each URI can contain one '*' wildcard
@@ -536,6 +621,246 @@ module Google
536
621
  @gapi.max_bad_records = new_max_bad_records
537
622
  end
538
623
 
624
+ ###
625
+ # Checks if hive partitioning options are set.
626
+ #
627
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
628
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
629
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
630
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
631
+ #
632
+ # @return [Boolean] `true` when hive partitioning options are set, or `false` otherwise.
633
+ #
634
+ # @example
635
+ # require "google/cloud/bigquery"
636
+ #
637
+ # bigquery = Google::Cloud::Bigquery.new
638
+ #
639
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
640
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
641
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
642
+ # ext.hive_partitioning_mode = :auto
643
+ # ext.hive_partitioning_require_partition_filter = true
644
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
645
+ # end
646
+ #
647
+ # external_data.hive_partitioning? #=> true
648
+ # external_data.hive_partitioning_mode #=> "AUTO"
649
+ # external_data.hive_partitioning_require_partition_filter? #=> true
650
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
651
+ #
652
+ def hive_partitioning?
653
+ !@gapi.hive_partitioning_options.nil?
654
+ end
655
+
656
+ ###
657
+ # The mode of hive partitioning to use when reading data. The following modes are supported:
658
+ #
659
+ # 1. `AUTO`: automatically infer partition key name(s) and type(s).
660
+ # 2. `STRINGS`: automatically infer partition key name(s). All types are interpreted as strings.
661
+ # 3. `CUSTOM`: partition key schema is encoded in the source URI prefix.
662
+ #
663
+ # @return [String, nil] The mode of hive partitioning, or `nil` if not set.
664
+ #
665
+ # @example
666
+ # require "google/cloud/bigquery"
667
+ #
668
+ # bigquery = Google::Cloud::Bigquery.new
669
+ #
670
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
671
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
672
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
673
+ # ext.hive_partitioning_mode = :auto
674
+ # ext.hive_partitioning_require_partition_filter = true
675
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
676
+ # end
677
+ #
678
+ # external_data.hive_partitioning? #=> true
679
+ # external_data.hive_partitioning_mode #=> "AUTO"
680
+ # external_data.hive_partitioning_require_partition_filter? #=> true
681
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
682
+ #
683
+ def hive_partitioning_mode
684
+ @gapi.hive_partitioning_options.mode if hive_partitioning?
685
+ end
686
+
687
+ ##
688
+ # Sets the mode of hive partitioning to use when reading data. The following modes are supported:
689
+ #
690
+ # 1. `auto`: automatically infer partition key name(s) and type(s).
691
+ # 2. `strings`: automatically infer partition key name(s). All types are interpreted as strings.
692
+ # 3. `custom`: partition key schema is encoded in the source URI prefix.
693
+ #
694
+ # Not all storage formats support hive partitioning. Requesting hive partitioning on an unsupported format
695
+ # will lead to an error. Currently supported types include: `avro`, `csv`, `json`, `orc` and `parquet`.
696
+ # If your data is stored in ORC or Parquet on Cloud Storage, see [Querying columnar formats on Cloud
697
+ # Storage](https://cloud.google.com/bigquery/pricing#columnar_formats_pricing).
698
+ #
699
+ # See {#format}, {#hive_partitioning_require_partition_filter=} and {#hive_partitioning_source_uri_prefix=}.
700
+ #
701
+ # @param [String, Symbol] mode The mode of hive partitioning to use when reading data.
702
+ #
703
+ # @example
704
+ # require "google/cloud/bigquery"
705
+ #
706
+ # bigquery = Google::Cloud::Bigquery.new
707
+ #
708
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
709
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
710
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
711
+ # ext.hive_partitioning_mode = :auto
712
+ # ext.hive_partitioning_require_partition_filter = true
713
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
714
+ # end
715
+ #
716
+ # external_data.hive_partitioning? #=> true
717
+ # external_data.hive_partitioning_mode #=> "AUTO"
718
+ # external_data.hive_partitioning_require_partition_filter? #=> true
719
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
720
+ #
721
+ def hive_partitioning_mode= mode
722
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
723
+ @gapi.hive_partitioning_options.mode = mode.to_s.upcase
724
+ end
725
+
726
+ ###
727
+ # Whether queries over the table using this external data source require a partition filter that can be used
728
+ # for partition elimination to be specified. Note that this field should only be true when creating a
729
+ # permanent external table or querying a temporary external table.
730
+ #
731
+ # @return [Boolean] `true` when queries over this table require a partition filter, or `false` otherwise.
732
+ #
733
+ # @example
734
+ # require "google/cloud/bigquery"
735
+ #
736
+ # bigquery = Google::Cloud::Bigquery.new
737
+ #
738
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
739
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
740
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
741
+ # ext.hive_partitioning_mode = :auto
742
+ # ext.hive_partitioning_require_partition_filter = true
743
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
744
+ # end
745
+ #
746
+ # external_data.hive_partitioning? #=> true
747
+ # external_data.hive_partitioning_mode #=> "AUTO"
748
+ # external_data.hive_partitioning_require_partition_filter? #=> true
749
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
750
+ #
751
+ def hive_partitioning_require_partition_filter?
752
+ return false unless hive_partitioning?
753
+ !@gapi.hive_partitioning_options.require_partition_filter.nil?
754
+ end
755
+
756
+ ##
757
+ # Sets whether queries over the table using this external data source require a partition filter
758
+ # that can be used for partition elimination to be specified.
759
+ #
760
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_source_uri_prefix=}.
761
+ #
762
+ # @param [Boolean] require_partition_filter `true` if a partition filter must be specified, `false` otherwise.
763
+ #
764
+ # @example
765
+ # require "google/cloud/bigquery"
766
+ #
767
+ # bigquery = Google::Cloud::Bigquery.new
768
+ #
769
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
770
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
771
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
772
+ # ext.hive_partitioning_mode = :auto
773
+ # ext.hive_partitioning_require_partition_filter = true
774
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
775
+ # end
776
+ #
777
+ # external_data.hive_partitioning? #=> true
778
+ # external_data.hive_partitioning_mode #=> "AUTO"
779
+ # external_data.hive_partitioning_require_partition_filter? #=> true
780
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
781
+ #
782
+ def hive_partitioning_require_partition_filter= require_partition_filter
783
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
784
+ @gapi.hive_partitioning_options.require_partition_filter = require_partition_filter
785
+ end
786
+
787
+ ###
788
+ # The common prefix for all source uris when hive partition detection is requested. The prefix must end
789
+ # immediately before the partition key encoding begins. For example, consider files following this data
790
+ # layout:
791
+ #
792
+ # ```
793
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
794
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
795
+ # ```
796
+ #
797
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
798
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
799
+ #
800
+ # @return [String, nil] The common prefix for all source uris, or `nil` if not set.
801
+ #
802
+ # @example
803
+ # require "google/cloud/bigquery"
804
+ #
805
+ # bigquery = Google::Cloud::Bigquery.new
806
+ #
807
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
808
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
809
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
810
+ # ext.hive_partitioning_mode = :auto
811
+ # ext.hive_partitioning_require_partition_filter = true
812
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
813
+ # end
814
+ #
815
+ # external_data.hive_partitioning? #=> true
816
+ # external_data.hive_partitioning_mode #=> "AUTO"
817
+ # external_data.hive_partitioning_require_partition_filter? #=> true
818
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
819
+ #
820
+ def hive_partitioning_source_uri_prefix
821
+ @gapi.hive_partitioning_options.source_uri_prefix if hive_partitioning?
822
+ end
823
+
824
+ ##
825
+ # Sets the common prefix for all source uris when hive partition detection is requested. The prefix must end
826
+ # immediately before the partition key encoding begins. For example, consider files following this data
827
+ # layout:
828
+ #
829
+ # ```
830
+ # gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
831
+ # gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro
832
+ # ```
833
+ #
834
+ # When hive partitioning is requested with either `AUTO` or `STRINGS` mode, the common prefix can be either of
835
+ # `gs://bucket/path_to_table` or `gs://bucket/path_to_table/` (trailing slash does not matter).
836
+ #
837
+ # See {#format}, {#hive_partitioning_mode=} and {#hive_partitioning_require_partition_filter=}.
838
+ #
839
+ # @param [String] source_uri_prefix The common prefix for all source uris.
840
+ #
841
+ # @example
842
+ # require "google/cloud/bigquery"
843
+ #
844
+ # bigquery = Google::Cloud::Bigquery.new
845
+ #
846
+ # gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
847
+ # source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
848
+ # external_data = bigquery.external gcs_uri, format: :parquet do |ext|
849
+ # ext.hive_partitioning_mode = :auto
850
+ # ext.hive_partitioning_require_partition_filter = true
851
+ # ext.hive_partitioning_source_uri_prefix = source_uri_prefix
852
+ # end
853
+ #
854
+ # external_data.hive_partitioning? #=> true
855
+ # external_data.hive_partitioning_mode #=> "AUTO"
856
+ # external_data.hive_partitioning_require_partition_filter? #=> true
857
+ # external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix
858
+ #
859
+ def hive_partitioning_source_uri_prefix= source_uri_prefix
860
+ @gapi.hive_partitioning_options ||= Google::Apis::BigqueryV2::HivePartitioningOptions.new
861
+ @gapi.hive_partitioning_options.source_uri_prefix = source_uri_prefix
862
+ end
863
+
539
864
  ##
540
865
  # @private Google API Client object.
541
866
  def to_gapi
@@ -20,15 +20,17 @@ module Google
20
20
  # # ExtractJob
21
21
  #
22
22
  # A {Job} subclass representing an export operation that may be performed
23
- # on a {Table}. A ExtractJob instance is created when you call
24
- # {Table#extract_job}.
23
+ # on a {Table} or {Model}. A ExtractJob instance is returned when you call
24
+ # {Project#extract_job}, {Table#extract_job} or {Model#extract_job}.
25
25
  #
26
26
  # @see https://cloud.google.com/bigquery/docs/exporting-data
27
- # Exporting Data From BigQuery
27
+ # Exporting table data
28
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
29
+ # Exporting models
28
30
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
31
  # reference
30
32
  #
31
- # @example
33
+ # @example Export table data
32
34
  # require "google/cloud/bigquery"
33
35
  #
34
36
  # bigquery = Google::Cloud::Bigquery.new
@@ -40,6 +42,18 @@ module Google
40
42
  # extract_job.wait_until_done!
41
43
  # extract_job.done? #=> true
42
44
  #
45
+ # @example Export a model
46
+ # require "google/cloud/bigquery"
47
+ #
48
+ # bigquery = Google::Cloud::Bigquery.new
49
+ # dataset = bigquery.dataset "my_dataset"
50
+ # model = dataset.model "my_model"
51
+ #
52
+ # extract_job = model.extract_job "gs://my-bucket/#{model.model_id}"
53
+ #
54
+ # extract_job.wait_until_done!
55
+ # extract_job.done? #=> true
56
+ #
43
57
  class ExtractJob < Job
44
58
  ##
45
59
  # The URI or URIs representing the Google Cloud Storage files to which
@@ -49,71 +63,126 @@ module Google
49
63
  end
50
64
 
51
65
  ##
52
- # The table from which the data is exported. This is the table upon
53
- # which {Table#extract_job} was called.
66
+ # The table or model which is exported.
54
67
  #
55
- # @return [Table] A table instance.
68
+ # @return [Table, Model, nil] A table or model instance, or `nil`.
56
69
  #
57
70
  def source
58
- table = @gapi.configuration.extract.source_table
59
- return nil unless table
60
- retrieve_table table.project_id, table.dataset_id, table.table_id
71
+ if (table = @gapi.configuration.extract.source_table)
72
+ retrieve_table table.project_id, table.dataset_id, table.table_id
73
+ elsif (model = @gapi.configuration.extract.source_model)
74
+ retrieve_model model.project_id, model.dataset_id, model.model_id
75
+ end
61
76
  end
62
77
 
63
78
  ##
64
- # Checks if the export operation compresses the data using gzip. The
65
- # default is `false`.
79
+ # Whether the source of the export job is a table. See {#source}.
66
80
  #
67
- # @return [Boolean] `true` when `GZIP`, `false` otherwise.
81
+ # @return [Boolean] `true` when the source is a table, `false`
82
+ # otherwise.
68
83
  #
69
- def compression?
70
- val = @gapi.configuration.extract.compression
71
- val == "GZIP"
84
+ def table?
85
+ !@gapi.configuration.extract.source_table.nil?
72
86
  end
73
87
 
74
88
  ##
75
- # Checks if the destination format for the data is [newline-delimited
76
- # JSON](http://jsonlines.org/). The default is `false`.
89
+ # Whether the source of the export job is a model. See {#source}.
77
90
  #
78
- # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
91
+ # @return [Boolean] `true` when the source is a model, `false`
79
92
  # otherwise.
80
93
  #
94
+ def model?
95
+ !@gapi.configuration.extract.source_model.nil?
96
+ end
97
+
98
+ ##
99
+ # Checks if the export operation compresses the data using gzip. The
100
+ # default is `false`. Not applicable when extracting models.
101
+ #
102
+ # @return [Boolean] `true` when `GZIP`, `false` if not `GZIP` or not a
103
+ # table extraction.
104
+ def compression?
105
+ return false unless table?
106
+ @gapi.configuration.extract.compression == "GZIP"
107
+ end
108
+
109
+ ##
110
+ # Checks if the destination format for the table data is [newline-delimited
111
+ # JSON](http://jsonlines.org/). The default is `false`. Not applicable when
112
+ # extracting models.
113
+ #
114
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` if not
115
+ # `NEWLINE_DELIMITED_JSON` or not a table extraction.
116
+ #
81
117
  def json?
82
- val = @gapi.configuration.extract.destination_format
83
- val == "NEWLINE_DELIMITED_JSON"
118
+ return false unless table?
119
+ @gapi.configuration.extract.destination_format == "NEWLINE_DELIMITED_JSON"
84
120
  end
85
121
 
86
122
  ##
87
- # Checks if the destination format for the data is CSV. Tables with
123
+ # Checks if the destination format for the table data is CSV. Tables with
88
124
  # nested or repeated fields cannot be exported as CSV. The default is
89
- # `true`.
125
+ # `true` for tables. Not applicable when extracting models.
90
126
  #
91
- # @return [Boolean] `true` when `CSV`, `false` otherwise.
127
+ # @return [Boolean] `true` when `CSV`, or `false` if not `CSV` or not a
128
+ # table extraction.
92
129
  #
93
130
  def csv?
131
+ return false unless table?
94
132
  val = @gapi.configuration.extract.destination_format
95
133
  return true if val.nil?
96
134
  val == "CSV"
97
135
  end
98
136
 
99
137
  ##
100
- # Checks if the destination format for the data is
101
- # [Avro](http://avro.apache.org/). The default is `false`.
138
+ # Checks if the destination format for the table data is
139
+ # [Avro](http://avro.apache.org/). The default is `false`. Not applicable
140
+ # when extracting models.
102
141
  #
103
- # @return [Boolean] `true` when `AVRO`, `false` otherwise.
142
+ # @return [Boolean] `true` when `AVRO`, `false` if not `AVRO` or not a
143
+ # table extraction.
104
144
  #
105
145
  def avro?
146
+ return false unless table?
147
+ @gapi.configuration.extract.destination_format == "AVRO"
148
+ end
149
+
150
+ ##
151
+ # Checks if the destination format for the model is TensorFlow SavedModel.
152
+ # The default is `true` for models. Not applicable when extracting tables.
153
+ #
154
+ # @return [Boolean] `true` when `ML_TF_SAVED_MODEL`, `false` if not
155
+ # `ML_TF_SAVED_MODEL` or not a model extraction.
156
+ #
157
+ def ml_tf_saved_model?
158
+ return false unless model?
106
159
  val = @gapi.configuration.extract.destination_format
107
- val == "AVRO"
160
+ return true if val.nil?
161
+ val == "ML_TF_SAVED_MODEL"
162
+ end
163
+
164
+ ##
165
+ # Checks if the destination format for the model is XGBoost. The default
166
+ # is `false`. Not applicable when extracting tables.
167
+ #
168
+ # @return [Boolean] `true` when `ML_XGBOOST_BOOSTER`, `false` if not
169
+ # `ML_XGBOOST_BOOSTER` or not a model extraction.
170
+ #
171
+ def ml_xgboost_booster?
172
+ return false unless model?
173
+ @gapi.configuration.extract.destination_format == "ML_XGBOOST_BOOSTER"
108
174
  end
109
175
 
110
176
  ##
111
177
  # The character or symbol the operation uses to delimit fields in the
112
- # exported data. The default is a comma (,).
178
+ # exported data. The default is a comma (,) for tables. Not applicable
179
+ # when extracting models.
113
180
  #
114
- # @return [String] A string containing the character, such as `","`.
181
+ # @return [String, nil] A string containing the character, such as `","`,
182
+ # `nil` if not a table extraction.
115
183
  #
116
184
  def delimiter
185
+ return unless table?
117
186
  val = @gapi.configuration.extract.field_delimiter
118
187
  val = "," if val.nil?
119
188
  val
@@ -121,12 +190,13 @@ module Google
121
190
 
122
191
  ##
123
192
  # Checks if the exported data contains a header row. The default is
124
- # `true`.
193
+ # `true` for tables. Not applicable when extracting models.
125
194
  #
126
195
  # @return [Boolean] `true` when the print header configuration is
127
- # present or `nil`, `false` otherwise.
196
+ # present or `nil`, `false` if disabled or not a table extraction.
128
197
  #
129
198
  def print_header?
199
+ return false unless table?
130
200
  val = @gapi.configuration.extract.print_header
131
201
  val = true if val.nil?
132
202
  val
@@ -159,12 +229,14 @@ module Google
159
229
  # whether to enable extracting applicable column types (such as
160
230
  # `TIMESTAMP`) to their corresponding AVRO logical types
161
231
  # (`timestamp-micros`), instead of only using their raw types
162
- # (`avro-long`).
232
+ # (`avro-long`). Not applicable when extracting models.
163
233
  #
164
234
  # @return [Boolean] `true` when applicable column types will use their
165
- # corresponding AVRO logical types, `false` otherwise.
235
+ # corresponding AVRO logical types, `false` if not enabled or not a
236
+ # table extraction.
166
237
  #
167
238
  def use_avro_logical_types?
239
+ return false unless table?
168
240
  @gapi.configuration.extract.use_avro_logical_types
169
241
  end
170
242
 
@@ -182,19 +254,24 @@ module Google
182
254
  #
183
255
  # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
256
  # configuration object for setting query options.
185
- def self.from_options service, table, storage_files, options
257
+ def self.from_options service, source, storage_files, options
186
258
  job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
259
  storage_urls = Array(storage_files).map do |url|
188
260
  url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
261
  end
190
262
  options[:format] ||= Convert.derive_source_format storage_urls.first
263
+ extract_config = Google::Apis::BigqueryV2::JobConfigurationExtract.new(
264
+ destination_uris: Array(storage_urls)
265
+ )
266
+ if source.is_a? Google::Apis::BigqueryV2::TableReference
267
+ extract_config.source_table = source
268
+ elsif source.is_a? Google::Apis::BigqueryV2::ModelReference
269
+ extract_config.source_model = source
270
+ end
191
271
  job = Google::Apis::BigqueryV2::Job.new(
192
272
  job_reference: job_ref,
193
273
  configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
- extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
- destination_uris: Array(storage_urls),
196
- source_table: table
197
- ),
274
+ extract: extract_config,
198
275
  dry_run: options[:dryrun]
199
276
  )
200
277
  )
@@ -253,7 +330,7 @@ module Google
253
330
  end
254
331
 
255
332
  ##
256
- # Sets the compression type.
333
+ # Sets the compression type. Not applicable when extracting models.
257
334
  #
258
335
  # @param [String] value The compression type to use for exported
259
336
  # files. Possible values include `GZIP` and `NONE`. The default
@@ -265,7 +342,7 @@ module Google
265
342
  end
266
343
 
267
344
  ##
268
- # Sets the field delimiter.
345
+ # Sets the field delimiter. Not applicable when extracting models.
269
346
  #
270
347
  # @param [String] value Delimiter to use between fields in the
271
348
  # exported data. Default is <code>,</code>.
@@ -276,14 +353,21 @@ module Google
276
353
  end
277
354
 
278
355
  ##
279
- # Sets the destination file format. The default value is `csv`.
356
+ # Sets the destination file format. The default value for
357
+ # tables is `csv`. Tables with nested or repeated fields cannot be
358
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
280
359
  #
281
- # The following values are supported:
360
+ # Supported values for tables:
282
361
  #
283
362
  # * `csv` - CSV
284
363
  # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
364
  # * `avro` - [Avro](http://avro.apache.org/)
286
365
  #
366
+ # Supported values for models:
367
+ #
368
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
369
+ # * `ml_xgboost_booster` - XGBoost Booster
370
+ #
287
371
  # @param [String] new_format The new source format.
288
372
  #
289
373
  # @!group Attributes
@@ -293,7 +377,8 @@ module Google
293
377
  end
294
378
 
295
379
  ##
296
- # Print a header row in the exported file.
380
+ # Print a header row in the exported file. Not applicable when
381
+ # extracting models.
297
382
  #
298
383
  # @param [Boolean] value Whether to print out a header row in the
299
384
  # results. Default is `true`.
@@ -307,12 +392,21 @@ module Google
307
392
  # Sets the labels to use for the job.
308
393
  #
309
394
  # @param [Hash] value A hash of user-provided labels associated with
310
- # the job. You can use these to organize and group your jobs. Label
311
- # keys and values can be no longer than 63 characters, can only
312
- # contain lowercase letters, numeric characters, underscores and
313
- # dashes. International characters are allowed. Label values are
314
- # optional. Label keys must start with a letter and each label in
315
- # the list must have a different key.
395
+ # the job. You can use these to organize and group your jobs.
396
+ #
397
+ # The labels applied to a resource must meet the following requirements:
398
+ #
399
+ # * Each resource can have multiple labels, up to a maximum of 64.
400
+ # * Each label must be a key-value pair.
401
+ # * Keys have a minimum length of 1 character and a maximum length of
402
+ # 63 characters, and cannot be empty. Values can be empty, and have
403
+ # a maximum length of 63 characters.
404
+ # * Keys and values can contain only lowercase letters, numeric characters,
405
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
406
+ # international characters are allowed.
407
+ # * The key portion of a label must be unique. However, you can use the
408
+ # same key with multiple resources.
409
+ # * Keys must start with a lowercase letter or international character.
316
410
  #
317
411
  # @!group Attributes
318
412
  #
@@ -362,6 +456,16 @@ module Google
362
456
  @gapi
363
457
  end
364
458
  end
459
+
460
+ protected
461
+
462
+ def retrieve_model project_id, dataset_id, model_id
463
+ ensure_service!
464
+ gapi = service.get_project_model project_id, dataset_id, model_id
465
+ Model.from_gapi_json gapi, service
466
+ rescue Google::Cloud::NotFoundError
467
+ nil
468
+ end
365
469
  end
366
470
  end
367
471
  end