RubyGems - google-cloud-bigquery - Versions diffs - 0.28.0 → 0.29.0 - Mend

google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/README.md +1 -1
data/lib/google-cloud-bigquery.rb +2 -2
data/lib/google/cloud/bigquery.rb +10 -12
data/lib/google/cloud/bigquery/copy_job.rb +42 -6
data/lib/google/cloud/bigquery/data.rb +129 -23
data/lib/google/cloud/bigquery/dataset.rb +708 -66
data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
data/lib/google/cloud/bigquery/external.rb +2353 -0
data/lib/google/cloud/bigquery/extract_job.rb +52 -11
data/lib/google/cloud/bigquery/insert_response.rb +90 -2
data/lib/google/cloud/bigquery/job.rb +160 -21
data/lib/google/cloud/bigquery/load_job.rb +128 -11
data/lib/google/cloud/bigquery/project.rb +187 -44
data/lib/google/cloud/bigquery/query_job.rb +323 -13
data/lib/google/cloud/bigquery/schema.rb +57 -1
data/lib/google/cloud/bigquery/schema/field.rb +118 -17
data/lib/google/cloud/bigquery/service.rb +196 -43
data/lib/google/cloud/bigquery/table.rb +739 -49
data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
data/lib/google/cloud/bigquery/version.rb +1 -1
data/lib/google/cloud/bigquery/view.rb +306 -69
metadata +18 -3
data/lib/google/cloud/bigquery/query_data.rb +0 -234

data/lib/google/cloud/bigquery/schema/field.rb CHANGED

@@ -34,6 +34,7 @@ module Google
         #
         #   field = table.schema.field "name"
         #   field.required? #=> true
+        #
         class Field
           # @private
           MODES = %w( NULLABLE REQUIRED REPEATED )
@@ -45,6 +46,11 @@ module Google
           ##
           # The name of the field.
           #
+          # @return [String] The field name. The name must contain only
+          #   letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
+          #   start with a letter or underscore. The maximum length is 128
+          #   characters.
+          #
           def name
             @gapi.name
           end
@@ -52,19 +58,38 @@ module Google
           ##
           # Updates the name of the field.
           #
+          # @param [String] new_name The field name. The name must contain only
+          #   letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
+          #   start with a letter or underscore. The maximum length is 128
+          #   characters.
+          #
           def name= new_name
             @gapi.update! name: String(new_name)
           end
           ##
-          # The type of the field.
+          # The data type of the field.
+          #
+          # @return [String] The field data type. Possible values include
+          #   `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
+          #   `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
+          #   `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
+          #   (where `RECORD` indicates that the field contains a nested schema)
+          #   or `STRUCT` (same as `RECORD`).
           #
           def type
             @gapi.type
           end
           ##
-          # Updates the type of the field.
+          # Updates the data type of the field.
+          #
+          # @param [String] new_type The data type. Possible values include
+          #   `STRING`, `BYTES`, `INTEGER`, `INT64` (same as `INTEGER`),
+          #   `FLOAT`, `FLOAT64` (same as `FLOAT`), `BOOLEAN`, `BOOL` (same as
+          #   `BOOLEAN`), `TIMESTAMP`, `DATE`, `TIME`, `DATETIME`, `RECORD`
+          #   (where `RECORD` indicates that the field contains a nested schema)
+          #   or `STRUCT` (same as `RECORD`).
           #
           def type= new_type
             @gapi.update! type: verify_type(new_type)
@@ -72,18 +97,27 @@ module Google
           ##
           # Checks if the type of the field is `NULLABLE`.
+          #
+          # @return [Boolean] `true` when `NULLABLE`, `false` otherwise.
+          #
           def nullable?
             mode == "NULLABLE"
           end
           ##
           # Checks if the type of the field is `REQUIRED`.
+          #
+          # @return [Boolean] `true` when `REQUIRED`, `false` otherwise.
+          #
           def required?
             mode == "REQUIRED"
           end
           ##
           # Checks if the type of the field is `REPEATED`.
+          #
+          # @return [Boolean] `true` when `REPEATED`, `false` otherwise.
+          #
           def repeated?
             mode == "REPEATED"
           end
@@ -91,6 +125,9 @@ module Google
           ##
           # The description of the field.
           #
+          # @return [String] The field description. The maximum length is 1,024
+          #   characters.
+          #
           def description
             @gapi.description
           end
@@ -98,6 +135,9 @@ module Google
           ##
           # Updates the description of the field.
           #
+          # @param [String] new_description The field description. The maximum
+          #   length is 1,024 characters.
+          #
           def description= new_description
             @gapi.update! description: new_description
           end
@@ -105,6 +145,9 @@ module Google
           ##
           # The mode of the field.
           #
+          # @return [String] The field mode. Possible values include `NULLABLE`,
+          #   `REQUIRED` and `REPEATED`. The default value is `NULLABLE`.
+          #
           def mode
             @gapi.mode
           end
@@ -112,66 +155,100 @@ module Google
           ##
           # Updates the mode of the field.
           #
+          # @param [String] new_mode The field mode. Possible values include
+          #   `NULLABLE`, `REQUIRED` and `REPEATED`. The default value is
+          #   `NULLABLE`.
+          #
           def mode= new_mode
             @gapi.update! mode: verify_mode(new_mode)
           end
           ##
           # Checks if the mode of the field is `STRING`.
+          #
+          # @return [Boolean] `true` when `STRING`, `false` otherwise.
+          #
           def string?
             mode == "STRING"
           end
           ##
           # Checks if the mode of the field is `INTEGER`.
+          #
+          # @return [Boolean] `true` when `INTEGER`, `false` otherwise.
+          #
           def integer?
             mode == "INTEGER"
           end
           ##
           # Checks if the mode of the field is `FLOAT`.
+          #
+          # @return [Boolean] `true` when `FLOAT`, `false` otherwise.
+          #
           def float?
             mode == "FLOAT"
           end
           ##
           # Checks if the mode of the field is `BOOLEAN`.
+          #
+          # @return [Boolean] `true` when `BOOLEAN`, `false` otherwise.
+          #
           def boolean?
             mode == "BOOLEAN"
           end
           ##
           # Checks if the mode of the field is `BYTES`.
+          #
+          # @return [Boolean] `true` when `BYTES`, `false` otherwise.
+          #
           def bytes?
             mode == "BYTES"
           end
           ##
           # Checks if the mode of the field is `TIMESTAMP`.
+          #
+          # @return [Boolean] `true` when `TIMESTAMP`, `false` otherwise.
+          #
           def timestamp?
             mode == "TIMESTAMP"
           end
           ##
           # Checks if the mode of the field is `TIME`.
+          #
+          # @return [Boolean] `true` when `TIME`, `false` otherwise.
+          #
           def time?
             mode == "TIME"
           end
           ##
           # Checks if the mode of the field is `DATETIME`.
+          #
+          # @return [Boolean] `true` when `DATETIME`, `false` otherwise.
+          #
           def datetime?
             mode == "DATETIME"
           end
           ##
           # Checks if the mode of the field is `DATE`.
+          #
+          # @return [Boolean] `true` when `DATE`, `false` otherwise.
+          #
           def date?
             mode == "DATE"
           end
           ##
           # Checks if the mode of the field is `RECORD`.
+          #
+          # @return [Boolean] `true` when `RECORD`, `false` otherwise.
+          #
           def record?
             mode == "RECORD"
           end
@@ -179,6 +256,10 @@ module Google
           ##
           # The nested fields if the type property is set to `RECORD`. Will be
           # empty otherwise.
+          #
+          # @return [Array<Field>, nil] The nested schema fields if the type
+          #   is set to `RECORD`.
+          #
           def fields
             if frozen?
               Array(@gapi.fields).map { |f| Field.from_gapi(f).freeze }.freeze
@@ -190,13 +271,20 @@ module Google
           ##
           # The names of the nested fields as symbols if the type property is
           # set to `RECORD`. Will be empty otherwise.
+          #
+          # @return [Array<Symbol>, nil] The names of the nested schema fields
+          #   if the type is set to `RECORD`.
+          #
           def headers
             fields.map(&:name).map(&:to_sym)
           end
           ##
-          # Retreive a nested fields by name, if the type property is
+          # Retrieve a nested field by name, if the type property is
           # set to `RECORD`. Will return `nil` otherwise.
+          #
+          # @return [Field, nil] The nested schema field object, or `nil`.
+          #
           def field name
             f = fields.find { |fld| fld.name == name.to_s }
             return nil if f.nil?
@@ -205,7 +293,7 @@ module Google
           end
           ##
-          # Adds a string field to the schema.
+          # Adds a string field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -217,6 +305,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def string name, description: nil, mode: :nullable
             record_check!
@@ -224,7 +313,7 @@ module Google
           end
           ##
-          # Adds an integer field to the schema.
+          # Adds an integer field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -236,6 +325,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def integer name, description: nil, mode: :nullable
             record_check!
@@ -243,7 +333,8 @@ module Google
           end
           ##
-          # Adds a floating-point number field to the schema.
+          # Adds a floating-point number field to the nested schema of a record
+          # field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -255,6 +346,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def float name, description: nil, mode: :nullable
             record_check!
@@ -262,7 +354,7 @@ module Google
           end
           ##
-          # Adds a boolean field to the schema.
+          # Adds a boolean field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -274,6 +366,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def boolean name, description: nil, mode: :nullable
             record_check!
@@ -281,7 +374,7 @@ module Google
           end
           ##
-          # Adds a bytes field to the schema.
+          # Adds a bytes field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -293,6 +386,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def bytes name, description: nil, mode: :nullable
             record_check!
@@ -300,7 +394,7 @@ module Google
           end
           ##
-          # Adds a timestamp field to the schema.
+          # Adds a timestamp field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -312,6 +406,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def timestamp name, description: nil, mode: :nullable
             record_check!
@@ -319,7 +414,7 @@ module Google
           end
           ##
-          # Adds a time field to the schema.
+          # Adds a time field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -331,6 +426,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def time name, description: nil, mode: :nullable
             record_check!
@@ -338,7 +434,7 @@ module Google
           end
           ##
-          # Adds a datetime field to the schema.
+          # Adds a datetime field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -350,6 +446,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def datetime name, description: nil, mode: :nullable
             record_check!
@@ -357,7 +454,7 @@ module Google
           end
           ##
-          # Adds a date field to the schema.
+          # Adds a date field to the nested schema of a record field.
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -369,6 +466,7 @@ module Google
           # @param [Symbol] mode The field's mode. The possible values are
           #   `:nullable`, `:required`, and `:repeated`. The default value is
           #   `:nullable`.
+          #
           def date name, description: nil, mode: :nullable
             record_check!
@@ -376,10 +474,10 @@ module Google
           end
           ##
-          # Adds a record field to the schema. A block must be passed describing
-          # the nested fields of the record. For more information about nested
-          # and repeated records, see [Preparing Data for BigQuery
-          # ](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
+          # Adds a record field to the nested schema of a record field. A block
+          # must be passed describing the nested fields of the record. For more
+          # information about nested and repeated records, see [Preparing Data
+          # for BigQuery](https://cloud.google.com/bigquery/preparing-data-for-bigquery).
           #
           # This can only be called on fields that are of type `RECORD`.
           #
@@ -405,7 +503,10 @@ module Google
           #   table.schema do |schema|
           #     schema.string "first_name", mode: :required
           #     schema.record "cities_lived", mode: :repeated do |cities_lived|
-          #       cities_lived.string "place", mode: :required
+          #       cities_lived.record "city", mode: :required do |city|
+          #         city.string "name", mode: :required
+          #         city.string "country", mode: :required
+          #       end
           #       cities_lived.integer "number_of_years", mode: :required
           #     end
           #   end

data/lib/google/cloud/bigquery/service.rb CHANGED

@@ -18,7 +18,7 @@ require "google/cloud/bigquery/convert"
 require "google/cloud/errors"
 require "google/apis/bigquery_v2"
 require "pathname"
-require "digest/md5"
+require "securerandom"
 require "mime/types"
 require "date"
@@ -61,7 +61,7 @@ module Google
             service.client_options.open_timeout_sec = timeout
             service.client_options.read_timeout_sec = timeout
             service.client_options.send_timeout_sec = timeout
-            service.request_options.retries = @retries || 3
+            service.request_options.retries = 0 # handle retries in #execute
             service.request_options.header ||= {}
             service.request_options.header["x-goog-api-client"] = \
               "gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
@@ -75,17 +75,19 @@ module Google
         # Lists all datasets in the specified project to which you have
         # been granted the READER dataset role.
         def list_datasets options = {}
-          execute do
+          # The list operation is considered idempotent
+          execute backoff: true do
             service.list_datasets \
-              @project, all: options[:all], max_results: options[:max],
-                        page_token: options[:token]
+              @project, all: options[:all], filter: options[:filter],
+                        max_results: options[:max], page_token: options[:token]
           end
         end
         ##
         # Returns the dataset specified by datasetID.
         def get_dataset dataset_id
-          execute { service.get_dataset @project, dataset_id }
+          # The get operation is considered idempotent
+          execute(backoff: true) { service.get_dataset @project, dataset_id }
         end
         ##
@@ -98,8 +100,16 @@ module Google
         # Updates information in an existing dataset, only replacing
         # fields that are provided in the submitted dataset resource.
         def patch_dataset dataset_id, patched_dataset_gapi
-          execute do
-            service.patch_dataset @project, dataset_id, patched_dataset_gapi
+          patch_with_backoff = false
+          options = {}
+          if patched_dataset_gapi.etag
+            options[:header] = { "If-Match" => patched_dataset_gapi.etag }
+            # The patch with etag operation is considered idempotent
+            patch_with_backoff = true
+          end
+          execute backoff: patch_with_backoff do
+            service.patch_dataset @project, dataset_id, patched_dataset_gapi,
+                                  options: options
           end
         end
@@ -119,7 +129,8 @@ module Google
         # Lists all tables in the specified dataset.
         # Requires the READER dataset role.
         def list_tables dataset_id, options = {}
-          execute do
+          # The list operation is considered idempotent
+          execute backoff: true do
             service.list_tables @project, dataset_id,
                                 max_results: options[:max],
                                 page_token: options[:token]
@@ -127,7 +138,10 @@ module Google
         end
         def get_project_table project_id, dataset_id, table_id
-          execute { service.get_table project_id, dataset_id, table_id }
+          # The get operation is considered idempotent
+          execute backoff: true do
+            service.get_table project_id, dataset_id, table_id
+          end
         end
         ##
@@ -136,7 +150,10 @@ module Google
         # it only returns the table resource,
         # which describes the structure of this table.
         def get_table dataset_id, table_id
-          execute { get_project_table @project, dataset_id, table_id }
+          # The get operation is considered idempotent
+          execute backoff: true do
+            get_project_table @project, dataset_id, table_id
+          end
         end
         ##
@@ -149,9 +166,16 @@ module Google
         # Updates information in an existing table, replacing fields that
         # are provided in the submitted table resource.
         def patch_table dataset_id, table_id, patched_table_gapi
-          execute do
+          patch_with_backoff = false
+          options = {}
+          if patched_table_gapi.etag
+            options[:header] = { "If-Match" => patched_table_gapi.etag }
+            # The patch with etag operation is considered idempotent
+            patch_with_backoff = true
+          end
+          execute backoff: patch_with_backoff do
             service.patch_table @project, dataset_id, table_id,
-                                patched_table_gapi
+                                patched_table_gapi, options: options
           end
         end
@@ -165,7 +189,8 @@ module Google
         ##
         # Retrieves data from the table.
         def list_tabledata dataset_id, table_id, options = {}
-          execute do
+          # The list operation is considered idempotent
+          execute backoff: true do
             service.list_table_data @project, dataset_id, table_id,
                                     max_results: options.delete(:max),
                                     page_token: options.delete(:token),
@@ -176,8 +201,8 @@ module Google
         def insert_tabledata dataset_id, table_id, rows, options = {}
           insert_rows = Array(rows).map do |row|
             Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(
-              insert_id: Digest::MD5.base64digest(row.to_json),
-              json: row
+              insert_id: SecureRandom.uuid,
+              json: Convert.to_json_row(row)
             )
           end
           insert_req = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(
@@ -186,7 +211,8 @@ module Google
             skip_invalid_rows: options[:skip_invalid]
           )
-          execute do
+          # The insertAll with insertId operation is considered idempotent
+          execute backoff: true do
             service.insert_all_table_data(
               @project, dataset_id, table_id, insert_req)
           end
@@ -196,7 +222,8 @@ module Google
         # Lists all jobs in the specified project to which you have
         # been granted the READER job role.
         def list_jobs options = {}
-          execute do
+          # The list operation is considered idempotent
+          execute backoff: true do
             service.list_jobs \
               @project, all_users: options[:all], max_results: options[:max],
                         page_token: options[:token], projection: "full",
@@ -207,35 +234,37 @@ module Google
         ##
         # Cancel the job specified by jobId.
         def cancel_job job_id
-          execute { service.cancel_job @project, job_id }
+          # The BigQuery team has told us cancelling is considered idempotent
+          execute(backoff: true) { service.cancel_job @project, job_id }
         end
         ##
         # Returns the job specified by jobID.
         def get_job job_id
-          execute { service.get_job @project, job_id }
+          # The get operation is considered idempotent
+          execute(backoff: true) { service.get_job @project, job_id }
         end
         def insert_job config
           job_object = API::Job.new(
+            job_reference: job_ref_from(nil, nil),
             configuration: config
           )
-          execute { service.insert_job @project, job_object }
+          # Jobs have generated id, so this operation is considered idempotent
+          execute(backoff: true) { service.insert_job @project, job_object }
         end
         def query_job query, options = {}
           config = query_table_config(query, options)
-          execute { service.insert_job @project, config }
-        end
-        def query query, options = {}
-          execute { service.query_job @project, query_config(query, options) }
+          # Jobs have generated id, so this operation is considered idempotent
+          execute(backoff: true) { service.insert_job @project, config }
         end
         ##
         # Returns the query data for the job
         def job_query_results job_id, options = {}
-          execute do
+          # The get operation is considered idempotent
+          execute backoff: true do
             service.get_job_query_results @project,
                                           job_id,
                                           max_results: options.delete(:max),
@@ -246,21 +275,24 @@ module Google
         end
         def copy_table source, target, options = {}
-          execute do
+          # Jobs have generated id, so this operation is considered idempotent
+          execute backoff: true do
             service.insert_job @project, copy_table_config(
               source, target, options)
           end
         end
         def extract_table table, storage_files, options = {}
-          execute do
+          # Jobs have generated id, so this operation is considered idempotent
+          execute backoff: true do
             service.insert_job \
               @project, extract_table_config(table, storage_files, options)
           end
         end
         def load_table_gs_url dataset_id, table_id, url, options = {}
-          execute do
+          # Jobs have generated id, so this operation is considered idempotent
+          execute backoff: true do
             service.insert_job \
               @project, load_table_url_config(dataset_id, table_id,
                                               url, options)
@@ -268,7 +300,8 @@ module Google
         end
         def load_table_file dataset_id, table_id, file, options = {}
-          execute do
+          # Jobs have generated id, so this operation is considered idempotent
+          execute backoff: true do
             service.insert_job \
               @project, load_table_file_config(
                 dataset_id, table_id, file, options),
@@ -299,7 +332,7 @@ module Google
         ##
         # Lists all projects to which you have been granted any project role.
         def list_projects options = {}
-          execute do
+          execute backoff: true do
             service.list_projects max_results: options[:max],
                                   page_token: options[:token]
           end
@@ -335,6 +368,23 @@ module Google
           end
         end
+        # Generate a random string similar to the BigQuery service job IDs.
+        def generate_id
+          SecureRandom.urlsafe_base64(21)
+        end
+        # If no job_id or prefix is given, always generate a client-side job ID
+        # anyway, for idempotent retry in the google-api-client layer.
+        # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
+        def job_ref_from job_id, prefix
+          prefix ||= "job_"
+          job_id ||= "#{prefix}#{generate_id}"
+          API::JobReference.new(
+            project_id: @project,
+            job_id: job_id
+          )
+        end
         def load_table_file_opts dataset_id, table_id, file, options = {}
           path = Pathname(file).to_path
           {
@@ -346,21 +396,26 @@ module Google
             projection_fields: projection_fields(options[:projection_fields]),
             allow_jagged_rows: options[:jagged_rows],
             allow_quoted_newlines: options[:quoted_newlines],
+            autodetect: options[:autodetect],
             encoding: options[:encoding], field_delimiter: options[:delimiter],
             ignore_unknown_values: options[:ignore_unknown],
-            max_bad_records: options[:max_bad_records], quote: options[:quote],
+            max_bad_records: options[:max_bad_records],
+            null_marker: options[:null_marker], quote: options[:quote],
             schema: options[:schema], skip_leading_rows: options[:skip_leading]
           }.delete_if { |_, v| v.nil? }
         end
         def load_table_file_config dataset_id, table_id, file, options = {}
           load_opts = load_table_file_opts dataset_id, table_id, file, options
-          API::Job.new(
+          req = API::Job.new(
+            job_reference: job_ref_from(options[:job_id], options[:prefix]),
             configuration: API::JobConfiguration.new(
               load: API::JobConfigurationLoad.new(load_opts),
               dry_run: options[:dryrun]
             )
           )
+          req.configuration.labels = options[:labels] if options[:labels]
+          req
         end
         def load_table_url_opts dataset_id, table_id, url, options = {}
@@ -374,21 +429,26 @@ module Google
             projection_fields: projection_fields(options[:projection_fields]),
             allow_jagged_rows: options[:jagged_rows],
             allow_quoted_newlines: options[:quoted_newlines],
+            autodetect: options[:autodetect],
             encoding: options[:encoding], field_delimiter: options[:delimiter],
             ignore_unknown_values: options[:ignore_unknown],
-            max_bad_records: options[:max_bad_records], quote: options[:quote],
+            max_bad_records: options[:max_bad_records],
+            null_marker: options[:null_marker], quote: options[:quote],
             schema: options[:schema], skip_leading_rows: options[:skip_leading]
           }.delete_if { |_, v| v.nil? }
         end
         def load_table_url_config dataset_id, table_id, url, options = {}
           load_opts = load_table_url_opts dataset_id, table_id, url, options
-          API::Job.new(
+          req = API::Job.new(
+            job_reference: job_ref_from(options[:job_id], options[:prefix]),
             configuration: API::JobConfiguration.new(
               load: API::JobConfigurationLoad.new(load_opts),
               dry_run: options[:dryrun]
             )
           )
+          req.configuration.labels = options[:labels] if options[:labels]
+          req
         end
         # rubocop:disable all
@@ -397,8 +457,9 @@ module Google
         # Job description for query job
         def query_table_config query, options
           dest_table = table_ref_from options[:table]
-          default_dataset = dataset_ref_from options[:dataset]
+          dataset_config = dataset_ref_from options[:dataset], options[:project]
           req = API::Job.new(
+            job_reference: job_ref_from(options[:job_id], options[:prefix]),
             configuration: API::JobConfiguration.new(
               query: API::JobConfigurationQuery.new(
                 query: query,
@@ -410,14 +471,16 @@ module Google
                 write_disposition: write_disposition(options[:write]),
                 allow_large_results: options[:large_results],
                 flatten_results: options[:flatten],
-                default_dataset: default_dataset,
+                default_dataset: dataset_config,
                 use_legacy_sql: Convert.resolve_legacy_sql(
                   options[:standard_sql], options[:legacy_sql]),
                 maximum_billing_tier: options[:maximum_billing_tier],
-                maximum_bytes_billed: options[:maximum_bytes_billed]
+                maximum_bytes_billed: options[:maximum_bytes_billed],
+                user_defined_function_resources: udfs(options[:udfs])
               )
             )
           )
+          req.configuration.labels = options[:labels] if options[:labels]
           if options[:params]
             if Array === options[:params]
@@ -439,6 +502,14 @@ module Google
             end
           end
+          if options[:external]
+            external_table_pairs = options[:external].map do |name, obj|
+              [String(name), obj.to_gapi]
+            end
+            external_table_hash = Hash[external_table_pairs]
+            req.configuration.query.table_definitions = external_table_hash
+          end
           req
         end
@@ -484,7 +555,8 @@ module Google
         ##
         # Job description for copy job
         def copy_table_config source, target, options = {}
-          API::Job.new(
+          req = API::Job.new(
+            job_reference: job_ref_from(options[:job_id], options[:prefix]),
             configuration: API::JobConfiguration.new(
               copy: API::JobConfigurationTableCopy.new(
                 source_table: source,
@@ -495,6 +567,8 @@ module Google
               dry_run: options[:dryrun]
             )
           )
+          req.configuration.labels = options[:labels] if options[:labels]
+          req
         end
         def extract_table_config table, storage_files, options = {}
@@ -502,7 +576,8 @@ module Google
             url.respond_to?(:to_gs_url) ? url.to_gs_url : url
           end
           dest_format = source_format storage_urls.first, options[:format]
-          API::Job.new(
+          req = API::Job.new(
+            job_reference: job_ref_from(options[:job_id], options[:prefix]),
             configuration: API::JobConfiguration.new(
               extract: API::JobConfigurationExtract.new(
                 destination_uris: Array(storage_urls),
@@ -515,6 +590,8 @@ module Google
               dry_run: options[:dryrun]
             )
           )
+          req.configuration.labels = options[:labels] if options[:labels]
+          req
         end
         def create_disposition str
@@ -550,6 +627,7 @@ module Google
                   "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
                   "avro" => "AVRO",
                   "datastore" => "DATASTORE_BACKUP",
+                  "backup" => "DATASTORE_BACKUP",
                   "datastore_backup" => "DATASTORE_BACKUP"
                 }[format.to_s.downcase]
           return val unless val.nil?
@@ -573,11 +651,86 @@ module Google
           nil
         end
-        def execute
-          yield
+        def udfs array_or_str
+          Array(array_or_str).map do |uri_or_code|
+            resource = API::UserDefinedFunctionResource.new
+            if uri_or_code.start_with?("gs://")
+              resource.resource_uri = uri_or_code
+            else
+              resource.inline_code = uri_or_code
+            end
+            resource
+          end
+        end
+        def execute backoff: nil
+          if backoff
+            Backoff.new(retries: retries).execute { yield }
+          else
+            yield
+          end
         rescue Google::Apis::Error => e
           raise Google::Cloud::Error.from_error(e)
         end
+        class Backoff
+          class << self
+            attr_accessor :retries
+            attr_accessor :reasons
+            attr_accessor :backoff
+          end
+          self.retries = 5
+          self.reasons = %w(rateLimitExceeded backendError)
+          self.backoff = lambda do |retries|
+            # Max delay is 32 seconds
+            # See "Back-off Requirements" here:
+            # https://cloud.google.com/bigquery/sla
+            retries = 5 if retries > 5
+            delay = 2 ** retries
+            sleep delay
+          end
+          def initialize options = {}
+            @retries = (options[:retries] || Backoff.retries).to_i
+            @reasons = (options[:reasons] || Backoff.reasons).to_a
+            @backoff =  options[:backoff] || Backoff.backoff
+          end
+          def execute
+            current_retries = 0
+            loop do
+              begin
+                return yield
+              rescue Google::Apis::Error => e
+                raise e unless retry? e.body, current_retries
+                @backoff.call current_retries
+                current_retries += 1
+              end
+            end
+          end
+          protected
+          def retry? result, current_retries #:nodoc:
+            if current_retries < @retries
+              return true if retry_error_reason? result
+            end
+            false
+          end
+          def retry_error_reason? err_body
+            err_hash = JSON.parse err_body
+            json_errors = Array err_hash["error"]["errors"]
+            return false if json_errors.empty?
+            json_errors.each do |json_error|
+              return false unless @reasons.include? json_error["reason"]
+            end
+            true
+          rescue
+            false
+          end
+        end
       end
     end
   end