RubyGems - google-cloud-bigquery - Versions diffs - 1.1.0 → 1.2.0 - Mend

google-cloud-bigquery 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/README.md +27 -19
data/lib/google/cloud/bigquery/convert.rb +106 -13
data/lib/google/cloud/bigquery/copy_job.rb +168 -0
data/lib/google/cloud/bigquery/dataset.rb +313 -112
data/lib/google/cloud/bigquery/encryption_configuration.rb +124 -0
data/lib/google/cloud/bigquery/extract_job.rb +149 -0
data/lib/google/cloud/bigquery/job.rb +14 -3
data/lib/google/cloud/bigquery/load_job.rb +770 -0
data/lib/google/cloud/bigquery/project.rb +118 -20
data/lib/google/cloud/bigquery/query_job.rb +403 -2
data/lib/google/cloud/bigquery/schema/field.rb +22 -22
data/lib/google/cloud/bigquery/service.rb +45 -328
data/lib/google/cloud/bigquery/table.rb +375 -95
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: cb7f8896ef2f04b07d335a2d619a60557246633ef343255c7fd6f2c31c1afbcf
-  data.tar.gz: 8445e2df96afbd0be615ee17d891d3b192a2a6a32f21f010113809c020e6aa76
+  metadata.gz: 41d3da96cf5cfe992d89be7d76eae719bee105acab2b2621ae9d96637963c4fa
+  data.tar.gz: 2da003b89f6ab554f97941a56aa54bc35b0a9bb239b1bf19638a1ed148df8592
 SHA512:
-  metadata.gz: ead4df8de6a2db97edf826bfdf82c49b48e7eebd999d3ee1448616a5b1d53d219bcc66dfe13fbe6e005282d7a59582ff4ead46adbe10a25cb365b19ebb0a5ba2
-  data.tar.gz: f1de83104a82b84673b071395aa1527bc4ed3d992c2ed0b7b04b9cbb13f3f303277da04ee93e944411ae0cab10cdea573783da6ef9ece20782f429c34da76ac2
+  metadata.gz: a049fc85d22b3ea866a5beff61c46e987411acf3d946837adf465ebfabfade7d4fc633e487b29eb3b3b0be36c0f08e3b8740cc636a8c5c362b40b13fc47ad127
+  data.tar.gz: 7f8c25afa22dfb9259e73ac5b3ae6917629b84bfcde9cf52055d0a88bb680450ef1ed173d8c64edfae06afbb535b3f02e75f024a7032b14907908525d6c4d582

data/README.md CHANGED

@@ -23,26 +23,27 @@ Instructions and configuration options are covered in the [Authentication Guide]
 ```ruby
 require "google/cloud/bigquery"
-bigquery = Google::Cloud::Bigquery.new(
-  project_id: "my-todo-project",
-  credentials: "/path/to/keyfile.json"
-)
-# Create a new table to archive todos
-dataset = bigquery.dataset "my-todo-archive"
-table = dataset.create_table "todos",
-          name: "Todos Archive",
-          description: "Archive for completed TODO records"
-# Load data into the table
-file = File.open "/archive/todos/completed-todos.csv"
-table.load file
-# Run a query for the number of completed todos by owner
-count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
-data = bigquery.query count_sql
+bigquery = Google::Cloud::Bigquery.new
+dataset = bigquery.create_dataset "my_dataset"
+table = dataset.create_table "my_table" do |t|
+  t.name = "My Table",
+  t.description = "A description of my table."
+  t.schema do |s|
+    s.string "first_name", mode: :required
+    s.string "last_name", mode: :required
+    s.integer "age", mode: :required
+  end
+end
+# Load data into the table from Google Cloud Storage
+table.load "gs://my-bucket/file-name.csv"
+# Run a query
+data = dataset.query "SELECT first_name FROM my_table"
 data.each do |row|
-  puts row[:name]
+  puts row[:first_name]
 end
 ```
@@ -50,6 +51,13 @@ end
 This library is supported on Ruby 2.0+.
+However, Ruby 2.3 or later is strongly recommended, as earlier releases have
+reached or are nearing end-of-life. After June 1, 2018, Google will provide
+official support only for Ruby versions that are considered current and
+supported by Ruby Core (that is, Ruby versions that are either in normal
+maintenance or in security maintenance).
+See https://www.ruby-lang.org/en/downloads/branches/ for further details.
 ## Versioning
 This library follows [Semantic Versioning](http://semver.org/).

data/lib/google/cloud/bigquery/convert.rb CHANGED

@@ -22,7 +22,7 @@ require "date"
 module Google
   module Cloud
     module Bigquery
-      # rubocop:disable all
+      # rubocop:disable Metrics/ModuleLength
       ##
       # @private
@@ -42,7 +42,6 @@ module Google
       # | `BYTES`     | `File`, `IO`, `StringIO`, or similar | |
       # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
       # | `STRUCT`    | `Hash`        | Hash keys may be strings or symbols. |
       module Convert
         ##
         # @private
@@ -62,6 +61,8 @@ module Google
           Hash[row_pairs]
         end
+        # rubocop:disable all
         def self.format_value value, field
           if value.nil?
             nil
@@ -212,16 +213,6 @@ module Google
           end
         end
-        ##
-        # @private
-        def self.to_json_rows rows
-          rows.map { |row| to_json_row row }
-        end
-        ##
-        # @private
-        def self.to_json_row row
-          Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
-        end
         ##
         # @private
         def self.to_json_value value
@@ -245,14 +236,116 @@ module Google
           end
         end
+        # rubocop:enable all
+        ##
+        # @private
+        def self.to_json_rows rows
+          rows.map { |row| to_json_row row }
+        end
+        ##
+        # @private
+        def self.to_json_row row
+          Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
+        end
         def self.resolve_legacy_sql standard_sql, legacy_sql
           return !standard_sql unless standard_sql.nil?
           return legacy_sql unless legacy_sql.nil?
           false
         end
-        # rubocop:enable all
+        ##
+        # @private
+        #
+        # Converts create disposition strings to API values.
+        #
+        # @return [String] API representation of create disposition.
+        def self.create_disposition str
+          val = {
+            "create_if_needed" => "CREATE_IF_NEEDED",
+            "createifneeded" => "CREATE_IF_NEEDED",
+            "if_needed" => "CREATE_IF_NEEDED",
+            "needed" => "CREATE_IF_NEEDED",
+            "create_never" => "CREATE_NEVER",
+            "createnever" => "CREATE_NEVER",
+            "never" => "CREATE_NEVER"
+          }[str.to_s.downcase]
+          return val unless val.nil?
+          str
+        end
+        ##
+        # @private
+        #
+        # Converts write disposition strings to API values.
+        #
+        # @return [String] API representation of write disposition.
+        def self.write_disposition str
+          val = {
+            "write_truncate" => "WRITE_TRUNCATE",
+            "writetruncate" => "WRITE_TRUNCATE",
+            "truncate" => "WRITE_TRUNCATE",
+            "write_append" => "WRITE_APPEND",
+            "writeappend" => "WRITE_APPEND",
+            "append" => "WRITE_APPEND",
+            "write_empty" => "WRITE_EMPTY",
+            "writeempty" => "WRITE_EMPTY",
+            "empty" => "WRITE_EMPTY"
+          }[str.to_s.downcase]
+          return val unless val.nil?
+          str
+        end
+        ##
+        # @private
+        #
+        # Converts source format strings to API values.
+        #
+        # @return [String] API representation of source format.
+        def self.source_format format
+          val = {
+            "csv" => "CSV",
+            "json" => "NEWLINE_DELIMITED_JSON",
+            "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
+            "avro" => "AVRO",
+            "datastore" => "DATASTORE_BACKUP",
+            "backup" => "DATASTORE_BACKUP",
+            "datastore_backup" => "DATASTORE_BACKUP"
+          }[format.to_s.downcase]
+          return val unless val.nil?
+          format
+        end
+        ##
+        # @private
+        #
+        # Converts file paths into source format by extension.
+        #
+        # @return [String] API representation of source format.
+        def self.derive_source_format_from_list paths
+          paths.map do |path|
+            derive_source_format path
+          end.compact.uniq.first
+        end
+        ##
+        # @private
+        #
+        # Converts file path into source format by extension.
+        #
+        # @return [String] API representation of source format.
+        def self.derive_source_format path
+          return "CSV" if path.end_with? ".csv"
+          return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
+          return "AVRO" if path.end_with? ".avro"
+          return "DATASTORE_BACKUP" if path.end_with? ".backup_info"
+          nil
+        end
       end
+      # rubocop:enable Metrics/ModuleLength
     end
   end
 end

data/lib/google/cloud/bigquery/copy_job.rb CHANGED

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+require "google/cloud/bigquery/encryption_configuration"
 module Google
   module Cloud
@@ -129,6 +130,173 @@ module Google
           disp = @gapi.configuration.copy.write_disposition
           disp == "WRITE_EMPTY"
         end
+        ##
+        # The encryption configuration of the destination table.
+        #
+        # @return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom
+        #   encryption configuration (e.g., Cloud KMS keys).
+        #
+        # @!group Attributes
+        def encryption
+          EncryptionConfiguration.from_gapi(
+            @gapi.configuration.copy.destination_encryption_configuration
+          )
+        end
+        ##
+        # Yielded to a block to accumulate changes for an API request.
+        class Updater < CopyJob
+          ##
+          # @private Create an Updater object.
+          def initialize gapi
+            @gapi = gapi
+          end
+          ##
+          # @private Create an Updater from an options hash.
+          #
+          # @return [Google::Cloud::Bigquery::CopyJob::Updater] A job
+          #   configuration object for setting copy options.
+          def self.from_options service, source, target, options = {}
+            job_ref = service.job_ref_from options[:job_id], options[:prefix]
+            req = Google::Apis::BigqueryV2::Job.new(
+              job_reference: job_ref,
+              configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
+                copy: Google::Apis::BigqueryV2::JobConfigurationTableCopy.new(
+                  source_table: source,
+                  destination_table: target
+                ),
+                dry_run: options[:dryrun]
+              )
+            )
+            updater = CopyJob::Updater.new req
+            updater.create = options[:create]
+            updater.write = options[:write]
+            updater.labels = options[:labels] if options[:labels]
+            updater
+          end
+          ##
+          # Sets the geographic location where the job should run. Required
+          # except for US and EU.
+          #
+          # @param [String] value A geographic location, such as "US", "EU" or
+          #   "asia-northeast1". Required except for US and EU.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   table = dataset.table "my_table"
+          #   destination_table = dataset.table "my_destination_table"
+          #
+          #   copy_job = table.copy_job destination_table do |j|
+          #     j.location = "EU"
+          #   end
+          #
+          #   copy_job.wait_until_done!
+          #   copy_job.done? #=> true
+          #
+          # @!group Attributes
+          def location= value
+            @gapi.job_reference.location = value
+          end
+          ##
+          # Sets the create disposition.
+          #
+          # This specifies whether the job is allowed to create new tables. The
+          # default value is `needed`.
+          #
+          # The following values are supported:
+          #
+          # * `needed` - Create the table if it does not exist.
+          # * `never` - The table must already exist. A 'notFound' error is
+          #             raised if the table does not exist.
+          #
+          # @param [String] new_create The new create disposition.
+          #
+          # @!group Attributes
+          def create= new_create
+            @gapi.configuration.copy.update! create_disposition:
+              Convert.create_disposition(new_create)
+          end
+          ##
+          # Sets the write disposition.
+          #
+          # This specifies how to handle data already present in the table. The
+          # default value is `append`.
+          #
+          # The following values are supported:
+          #
+          # * `truncate` - BigQuery overwrites the table data.
+          # * `append` - BigQuery appends the data to the table.
+          # * `empty` - An error will be returned if the table already contains
+          #   data.
+          #
+          # @param [String] new_write The new write disposition.
+          #
+          # @!group Attributes
+          def write= new_write
+            @gapi.configuration.copy.update! write_disposition:
+              Convert.write_disposition(new_write)
+          end
+          ##
+          # Sets the encryption configuration of the destination table.
+          #
+          # @param [Google::Cloud::BigQuery::EncryptionConfiguration] val
+          #   Custom encryption configuration (e.g., Cloud KMS keys).
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   table = dataset.table "my_table"
+          #
+          #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
+          #   encrypt_config = bigquery.encryption kms_key: key_name
+          #   job = table.copy_job "my_dataset.new_table" do |job|
+          #     job.encryption = encrypt_config
+          #   end
+          #
+          # @!group Attributes
+          def encryption= val
+            @gapi.configuration.copy.update!(
+              destination_encryption_configuration: val.to_gapi
+            )
+          end
+          ##
+          # Sets the labels to use for the job.
+          #
+          # @param [Hash] value A hash of user-provided labels associated with
+          #   the job. You can use these to organize and group your jobs. Label
+          #   keys and values can be no longer than 63 characters, can only
+          #   contain lowercase letters, numeric characters, underscores and
+          #   dashes. International characters are allowed. Label values are
+          #   optional. Label keys must start with a letter and each label in
+          #   the list must have a different key.
+          #
+          # @!group Attributes
+          def labels= value
+            @gapi.configuration.update! labels: value
+          end
+          ##
+          # @private Returns the Google API client library version of this job.
+          #
+          # @return [<Google::Apis::BigqueryV2::Job>] (See
+          #   {Google::Apis::BigqueryV2::Job})
+          def to_gapi
+            @gapi
+          end
+        end
       end
     end
   end

data/lib/google/cloud/bigquery/dataset.rb CHANGED

@@ -266,8 +266,8 @@ module Google
         # The geographic location where the dataset should reside. Possible
         # values include `EU` and `US`. The default value is `US`.
         #
-        # @return [String, nil] The location code, or `nil` if the object is a
-        #   reference (see {#reference?}).
+        # @return [String, nil] The geographic location, or `nil` if the object
+        #   is a reference (see {#reference?}).
         #
         # @!group Attributes
         #
@@ -696,6 +696,12 @@ module Google
         # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
         # for an overview of each BigQuery data type, including allowed values.
         #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {QueryJob::Updater#location=} in a block passed to this method. If the
+        # dataset is a full resource representation (see {#resource_full?}), the
+        # location of the job will be automatically set to the location of the
+        # dataset.
+        #
         # @param [String] query A query string, following the BigQuery [query
         #   syntax](https://cloud.google.com/bigquery/query-reference), of the
         #   query to execute. Example: "SELECT count(f1) FROM
@@ -761,11 +767,6 @@ module Google
         #   Flattens all nested and repeated fields in the query results. The
         #   default value is `true`. `large_results` parameter must be `true` if
         #   this is set to `false`.
-        # @param [Integer] maximum_billing_tier Limits the billing tier for this
-        #   job. Queries that have resource usage beyond this tier will fail
-        #   (without incurring a charge). Optional. If unspecified, this will be
-        #   set to your project default. For more information, see [High-Compute
-        #   queries](https://cloud.google.com/bigquery/pricing#high-compute).
         # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
         #   job. Queries that will have bytes billed beyond this limit will fail
         #   (without incurring a charge). Optional. If unspecified, this will be
@@ -799,6 +800,11 @@ module Google
         #   inline code resource is equivalent to providing a URI for a file
         #   containing the same code. See [User-Defined
         #   Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
+        # @param [Integer] maximum_billing_tier Deprecated: Change the billing
+        #   tier to allow high-compute queries.
+        # @yield [job] a job configuration object
+        # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
+        #   configuration object for setting additional options for the query.
         #
         # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
         #
@@ -865,7 +871,7 @@ module Google
         #     end
         #   end
         #
-        # @example Query using external data source:
+        # @example Query using external data source, set destination:
         #   require "google/cloud/bigquery"
         #
         #   bigquery = Google::Cloud::Bigquery.new
@@ -877,8 +883,10 @@ module Google
         #     csv.skip_leading_rows = 1
         #   end
         #
-        #   job = dataset.query_job "SELECT * FROM my_ext_table",
-        #                           external: { my_ext_table: csv_table }
+        #   job = dataset.query_job "SELECT * FROM my_ext_table" do |query|
+        #     query.external = { my_ext_table: csv_table }
+        #     query.table = dataset.table "my_table", skip_lookup: true
+        #   end
         #
         #   job.wait_until_done!
         #   if !job.failed?
@@ -895,17 +903,23 @@ module Google
                       legacy_sql: nil, large_results: nil, flatten: nil,
                       maximum_billing_tier: nil, maximum_bytes_billed: nil,
                       job_id: nil, prefix: nil, labels: nil, udfs: nil
+          ensure_service!
           options = { priority: priority, cache: cache, table: table,
                       create: create, write: write,
                       large_results: large_results, flatten: flatten,
                       legacy_sql: legacy_sql, standard_sql: standard_sql,
                       maximum_billing_tier: maximum_billing_tier,
                       maximum_bytes_billed: maximum_bytes_billed,
-                      params: params, external: external, labels: labels,
-                      job_id: job_id, prefix: prefix, udfs: udfs }
-          options[:dataset] ||= self
-          ensure_service!
-          gapi = service.query_job query, options
+                      job_id: job_id, prefix: prefix, params: params,
+                      external: external, labels: labels, udfs: udfs }
+          updater = QueryJob::Updater.from_options service, query, options
+          updater.dataset = self
+          updater.location = location if location # may be dataset reference
+          yield updater if block_given?
+          gapi = service.query_job updater.to_gapi
           Job.from_gapi gapi, service
         end
@@ -938,6 +952,12 @@ module Google
         # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
         # for an overview of each BigQuery data type, including allowed values.
         #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {QueryJob::Updater#location=} in a block passed to this method. If the
+        # dataset is a full resource representation (see {#resource_full?}), the
+        # location of the job will be automatically set to the location of the
+        # dataset.
+        #
         # @see https://cloud.google.com/bigquery/querying-data Querying Data
         #
         # @param [String] query A query string, following the BigQuery [query
@@ -985,6 +1005,9 @@ module Google
         #   When set to false, the values of `large_results` and `flatten` are
         #   ignored; the query will be run as if `large_results` is true and
         #   `flatten` is false. Optional. The default value is false.
+        # @yield [job] a job configuration object
+        # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
+        #   configuration object for setting additional options for the query.
         #
         # @return [Google::Cloud::Bigquery::Data] A new data object.
         #
@@ -1039,7 +1062,7 @@ module Google
         #     puts row[:name]
         #   end
         #
-        # @example Query using external data source:
+        # @example Query using external data source, set destination:
         #   require "google/cloud/bigquery"
         #
         #   bigquery = Google::Cloud::Bigquery.new
@@ -1051,8 +1074,10 @@ module Google
         #     csv.skip_leading_rows = 1
         #   end
         #
-        #   data = dataset.query "SELECT * FROM my_ext_table",
-        #                        external: { my_ext_table: csv_table }
+        #   data = dataset.query "SELECT * FROM my_ext_table" do |query|
+        #     query.external = { my_ext_table: csv_table }
+        #     query.table = dataset.table "my_table", skip_lookup: true
+        #   end
         #
         #   data.each do |row|
         #     puts row[:name]
@@ -1063,21 +1088,19 @@ module Google
         def query query, params: nil, external: nil, max: nil, cache: true,
                   standard_sql: nil, legacy_sql: nil
           ensure_service!
-          options = { params: params, external: external, cache: cache,
-                      legacy_sql: legacy_sql, standard_sql: standard_sql }
+          options = { priority: "INTERACTIVE", external: external, cache: cache,
+                      legacy_sql: legacy_sql, standard_sql: standard_sql,
+                      params: params }
+          options[:dataset] ||= self
+          updater = QueryJob::Updater.from_options service, query, options
+          updater.location = location if location # may be dataset reference
-          job = query_job query, options
-          job.wait_until_done!
+          yield updater if block_given?
-          if job.failed?
-            begin
-              # raise to activate ruby exception cause handling
-              raise job.gapi_error
-            rescue StandardError => e
-              # wrap Google::Apis::Error with Google::Cloud::Error
-              raise Google::Cloud::Error.from_error(e)
-            end
-          end
+          gapi = service.query_job updater.to_gapi
+          job = Job.from_gapi gapi, service
+          job.wait_until_done!
+          ensure_job_succeeded! job
           job.data max: max
         end
@@ -1147,10 +1170,17 @@ module Google
         # file directly. See [Loading Data with a POST
         # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
         #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {LoadJob::Updater#location=} in a block passed to this method. If the
+        # dataset is a full resource representation (see {#resource_full?}), the
+        # location of the job will be automatically set to the location of the
+        # dataset.
+        #
         # @param [String] table_id The destination table to load the data into.
-        # @param [File, Google::Cloud::Storage::File, String, URI] file A file
-        #   or the URI of a Google Cloud Storage file containing data to load
-        #   into the table.
+        # @param [File, Google::Cloud::Storage::File, String, URI,
+        #   Array<Google::Cloud::Storage::File, String, URI>] files
+        #   A file or the URI of a Google Cloud Storage file, or an Array of
+        #   those, containing data to load into the table.
         # @param [String] format The exported file format. The default value is
         #   `csv`.
         #
@@ -1269,13 +1299,12 @@ module Google
         #   optional. Label keys must start with a letter and each label in the
         #   list must have a different key.
         #
-        # @yield [schema] A block for setting the schema for the destination
-        #   table. The schema can be omitted if the destination table already
-        #   exists, or if you're loading data from a Google Cloud Datastore
-        #   backup.
-        # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
-        #   instance provided using the `schema` option, or a new, empty schema
-        #   instance
+        # @yield [updater] A block for setting the schema and other
+        #   options for the destination table. The schema can be omitted if the
+        #   destination table already exists, or if you're loading data from a
+        #   Google Cloud Datastore backup.
+        # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
+        #   updater to modify the load job and its schema.
         #
         # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
         #
@@ -1312,6 +1341,25 @@ module Google
         #     end
         #   end
         #
+        # @example Pass a list of google-cloud-storage files:
+        #   require "google/cloud/bigquery"
+        #   require "google/cloud/storage"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #
+        #   storage = Google::Cloud::Storage.new
+        #   bucket = storage.bucket "my-bucket"
+        #   file = bucket.file "file-name.csv"
+        #   list = [file, "gs://my-bucket/file-name2.csv"]
+        #   load_job = dataset.load_job "my_new_table", list do |schema|
+        #     schema.string "first_name", mode: :required
+        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
+        #       nested_schema.string "place", mode: :required
+        #       nested_schema.integer "number_of_years", mode: :required
+        #     end
+        #   end
+        #
         # @example Upload a file directly:
         #   require "google/cloud/bigquery"
         #
@@ -1333,13 +1381,15 @@ module Google
         #   bigquery = Google::Cloud::Bigquery.new
         #   dataset = bigquery.dataset "my_dataset"
         #
-        #   load_job = dataset.load_job "my_new_table",
-        #                           "gs://my-bucket/xxxx.kind_name.backup_info",
-        #                           format: "datastore_backup"
+        #   load_job = dataset.load_job(
+        #                "my_new_table",
+        #                "gs://my-bucket/xxxx.kind_name.backup_info") do |j|
+        #     j.format = "datastore_backup"
+        #   end
         #
         # @!group Data
         #
-        def load_job table_id, file, format: nil, create: nil, write: nil,
+        def load_job table_id, files, format: nil, create: nil, write: nil,
                      projection_fields: nil, jagged_rows: nil,
                      quoted_newlines: nil, encoding: nil, delimiter: nil,
                      ignore_unknown: nil, max_bad_records: nil, quote: nil,
@@ -1347,25 +1397,25 @@ module Google
                      prefix: nil, labels: nil, autodetect: nil, null_marker: nil
           ensure_service!
-          if block_given?
-            schema ||= Schema.from_gapi
-            yield schema
-          end
-          schema_gapi = schema.to_gapi if schema
-          options = { format: format, create: create, write: write,
-                      projection_fields: projection_fields,
-                      jagged_rows: jagged_rows,
-                      quoted_newlines: quoted_newlines, encoding: encoding,
-                      delimiter: delimiter, ignore_unknown: ignore_unknown,
-                      max_bad_records: max_bad_records, quote: quote,
-                      skip_leading: skip_leading, dryrun: dryrun,
-                      schema: schema_gapi, job_id: job_id, prefix: prefix,
-                      labels: labels, autodetect: autodetect,
-                      null_marker: null_marker }
-          return load_storage(table_id, file, options) if storage_url? file
-          return load_local(table_id, file, options) if local_file? file
-          raise Google::Cloud::Error, "Don't know how to load #{file}"
+          updater = load_job_updater table_id,
+                                     format: format, create: create,
+                                     write: write,
+                                     projection_fields: projection_fields,
+                                     jagged_rows: jagged_rows,
+                                     quoted_newlines: quoted_newlines,
+                                     encoding: encoding,
+                                     delimiter: delimiter,
+                                     ignore_unknown: ignore_unknown,
+                                     max_bad_records: max_bad_records,
+                                     quote: quote, skip_leading: skip_leading,
+                                     dryrun: dryrun, schema: schema,
+                                     job_id: job_id, prefix: prefix,
+                                     labels: labels, autodetect: autodetect,
+                                     null_marker: null_marker
+          yield updater if block_given?
+          load_local_or_uri files, updater
         end
         ##
@@ -1379,10 +1429,17 @@ module Google
         # file directly. See [Loading Data with a POST
         # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
         #
+        # The geographic location for the job ("US", "EU", etc.) can be set via
+        # {LoadJob::Updater#location=} in a block passed to this method. If the
+        # dataset is a full resource representation (see {#resource_full?}), the
+        # location of the job will be automatically set to the location of the
+        # dataset.
+        #
         # @param [String] table_id The destination table to load the data into.
-        # @param [File, Google::Cloud::Storage::File, String, URI] file A file
-        #   or the URI of a Google Cloud Storage file containing data to load
-        #   into the table.
+        # @param [File, Google::Cloud::Storage::File, String, URI,
+        #   Array<Google::Cloud::Storage::File, String, URI>] files
+        #   A file or the URI of a Google Cloud Storage file, or an Array of
+        #   those, containing data to load into the table.
         # @param [String] format The exported file format. The default value is
         #   `csv`.
         #
@@ -1479,13 +1536,12 @@ module Google
         #   this option. Also note that for most use cases, the block yielded by
         #   this method is a more convenient way to configure the schema.
         #
-        # @yield [schema] A block for setting the schema for the destination
-        #   table. The schema can be omitted if the destination table already
-        #   exists, or if you're loading data from a Google Cloud Datastore
-        #   backup.
-        # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
-        #   instance provided using the `schema` option, or a new, empty schema
-        #   instance
+        # @yield [updater] A block for setting the schema of the destination
+        #   table and other options for the load job. The schema can be omitted
+        #   if the destination table already exists, or if you're loading data
+        #   from a Google Cloud Datastore backup.
+        # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
+        #   updater to modify the load job and its schema.
         #
         # @return [Boolean] Returns `true` if the load job was successful.
         #
@@ -1522,6 +1578,25 @@ module Google
         #     end
         #   end
         #
+        # @example Pass a list of google-cloud-storage files:
+        #   require "google/cloud/bigquery"
+        #   require "google/cloud/storage"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #   dataset = bigquery.dataset "my_dataset"
+        #
+        #   storage = Google::Cloud::Storage.new
+        #   bucket = storage.bucket "my-bucket"
+        #   file = bucket.file "file-name.csv"
+        #   list = [file, "gs://my-bucket/file-name2.csv"]
+        #   dataset.load "my_new_table", list do |schema|
+        #     schema.string "first_name", mode: :required
+        #     schema.record "cities_lived", mode: :repeated do |nested_schema|
+        #       nested_schema.string "place", mode: :required
+        #       nested_schema.integer "number_of_years", mode: :required
+        #     end
+        #   end
+        #
         # @example Upload a file directly:
         #   require "google/cloud/bigquery"
         #
@@ -1544,41 +1619,39 @@ module Google
         #   dataset = bigquery.dataset "my_dataset"
         #
         #   dataset.load "my_new_table",
-        #                "gs://my-bucket/xxxx.kind_name.backup_info",
-        #                format: "datastore_backup"
+        #                "gs://my-bucket/xxxx.kind_name.backup_info" do |j|
+        #     j.format = "datastore_backup"
+        #   end
         #
         # @!group Data
         #
-        def load table_id, file, format: nil, create: nil, write: nil,
+        def load table_id, files, format: nil, create: nil, write: nil,
                  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
                  encoding: nil, delimiter: nil, ignore_unknown: nil,
                  max_bad_records: nil, quote: nil, skip_leading: nil,
                  schema: nil, autodetect: nil, null_marker: nil
+          ensure_service!
-          yield (schema ||= Schema.from_gapi) if block_given?
+          updater = load_job_updater table_id,
+                                     format: format, create: create,
+                                     write: write,
+                                     projection_fields: projection_fields,
+                                     jagged_rows: jagged_rows,
+                                     quoted_newlines: quoted_newlines,
+                                     encoding: encoding,
+                                     delimiter: delimiter,
+                                     ignore_unknown: ignore_unknown,
+                                     max_bad_records: max_bad_records,
+                                     quote: quote, skip_leading: skip_leading,
+                                     schema: schema,
+                                     autodetect: autodetect,
+                                     null_marker: null_marker
-          options = { format: format, create: create, write: write,
-                      projection_fields: projection_fields,
-                      jagged_rows: jagged_rows,
-                      quoted_newlines: quoted_newlines, encoding: encoding,
-                      delimiter: delimiter, ignore_unknown: ignore_unknown,
-                      max_bad_records: max_bad_records, quote: quote,
-                      skip_leading: skip_leading, schema: schema,
-                      autodetect: autodetect, null_marker: null_marker }
-          job = load_job table_id, file, options
+          yield updater if block_given?
+          job = load_local_or_uri files, updater
           job.wait_until_done!
-          if job.failed?
-            begin
-              # raise to activate ruby exception cause handling
-              raise job.gapi_error
-            rescue StandardError => e
-              # wrap Google::Apis::Error with Google::Cloud::Error
-              raise Google::Cloud::Error.from_error(e)
-            end
-          end
+          ensure_job_succeeded! job
           true
         end
@@ -1946,29 +2019,157 @@ module Google
           reload! if resource_partial?
         end
-        def load_storage table_id, url, options = {}
+        def ensure_job_succeeded! job
+          return unless job.failed?
+          begin
+            # raise to activate ruby exception cause handling
+            raise job.gapi_error
+          rescue StandardError => e
+            # wrap Google::Apis::Error with Google::Cloud::Error
+            raise Google::Cloud::Error.from_error(e)
+          end
+        end
+        def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil
+          job_ref = service.job_ref_from job_id, prefix
+          Google::Apis::BigqueryV2::Job.new(
+            job_reference: job_ref,
+            configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
+              load: Google::Apis::BigqueryV2::JobConfigurationLoad.new(
+                destination_table: Google::Apis::BigqueryV2::TableReference.new(
+                  project_id: @service.project,
+                  dataset_id: dataset_id,
+                  table_id: table_id
+                )
+              ),
+              dry_run: dryrun
+            )
+          )
+        end
+        def load_job_csv_options! job, jagged_rows: nil,
+                                  quoted_newlines: nil,
+                                  delimiter: nil,
+                                  quote: nil, skip_leading: nil,
+                                  null_marker: nil
+          job.jagged_rows = jagged_rows unless jagged_rows.nil?
+          job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
+          job.delimiter = delimiter unless delimiter.nil?
+          job.null_marker = null_marker unless null_marker.nil?
+          job.quote = quote unless quote.nil?
+          job.skip_leading = skip_leading unless skip_leading.nil?
+        end
+        def load_job_file_options! job, format: nil,
+                                   projection_fields: nil,
+                                   jagged_rows: nil, quoted_newlines: nil,
+                                   encoding: nil, delimiter: nil,
+                                   ignore_unknown: nil, max_bad_records: nil,
+                                   quote: nil, skip_leading: nil,
+                                   null_marker: nil
+          job.format = format unless format.nil?
+          unless projection_fields.nil?
+            job.projection_fields = projection_fields
+          end
+          job.encoding = encoding unless encoding.nil?
+          job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
+          job.max_bad_records = max_bad_records unless max_bad_records.nil?
+          load_job_csv_options! job, jagged_rows: jagged_rows,
+                                     quoted_newlines: quoted_newlines,
+                                     delimiter: delimiter,
+                                     quote: quote,
+                                     skip_leading: skip_leading,
+                                     null_marker: null_marker
+        end
+        def load_job_updater table_id, format: nil, create: nil,
+                             write: nil, projection_fields: nil,
+                             jagged_rows: nil, quoted_newlines: nil,
+                             encoding: nil, delimiter: nil,
+                             ignore_unknown: nil, max_bad_records: nil,
+                             quote: nil, skip_leading: nil, dryrun: nil,
+                             schema: nil, job_id: nil, prefix: nil, labels: nil,
+                             autodetect: nil, null_marker: nil
+          new_job = load_job_gapi table_id, dryrun, job_id: job_id,
+                                                    prefix: prefix
+          LoadJob::Updater.new(new_job).tap do |job|
+            job.location = location if location # may be dataset reference
+            job.create = create unless create.nil?
+            job.write = write unless write.nil?
+            job.schema = schema unless schema.nil?
+            job.autodetect = autodetect unless autodetect.nil?
+            job.labels = labels unless labels.nil?
+            load_job_file_options! job, format: format,
+                                        projection_fields: projection_fields,
+                                        jagged_rows: jagged_rows,
+                                        quoted_newlines: quoted_newlines,
+                                        encoding: encoding,
+                                        delimiter: delimiter,
+                                        ignore_unknown: ignore_unknown,
+                                        max_bad_records: max_bad_records,
+                                        quote: quote,
+                                        skip_leading: skip_leading,
+                                        null_marker: null_marker
+          end
+        end
+        def load_storage urls, job_gapi
           # Convert to storage URL
-          url = url.to_gs_url if url.respond_to? :to_gs_url
-          url = url.to_s if url.is_a? URI
+          urls = [urls].flatten.map do |url|
+            if url.respond_to? :to_gs_url
+              url.to_gs_url
+            elsif url.is_a? URI
+              url.to_s
+            else
+              url
+            end
+          end
-          gapi = service.load_table_gs_url dataset_id, table_id, url, options
+          unless urls.nil?
+            job_gapi.configuration.load.update! source_uris: urls
+            if job_gapi.configuration.load.source_format.nil?
+              source_format = Convert.derive_source_format_from_list urls
+              unless source_format.nil?
+                job_gapi.configuration.load.source_format = source_format
+              end
+            end
+          end
+          gapi = service.load_table_gs_url job_gapi
           Job.from_gapi gapi, service
         end
-        def load_local table_id, file, options = {}
-          # Convert to storage URL
-          file = file.to_gs_url if file.respond_to? :to_gs_url
+        def load_local file, job_gapi
+          path = Pathname(file).to_path
+          if job_gapi.configuration.load.source_format.nil?
+            source_format = Convert.derive_source_format path
+            unless source_format.nil?
+              job_gapi.configuration.load.source_format = source_format
+            end
+          end
-          gapi = service.load_table_file dataset_id, table_id, file, options
+          gapi = service.load_table_file file, job_gapi
           Job.from_gapi gapi, service
         end
-        def storage_url? file
-          file.respond_to?(:to_gs_url) ||
-            (file.respond_to?(:to_str) &&
-            file.to_str.downcase.start_with?("gs://")) ||
-            (file.is_a?(URI) &&
-            file.to_s.downcase.start_with?("gs://"))
+        def load_local_or_uri file, updater
+          job_gapi = updater.to_gapi
+          job = if local_file? file
+                  load_local file, job_gapi
+                else
+                  load_storage file, job_gapi
+                end
+          job
+        end
+        def storage_url? files
+          [files].flatten.all? do |file|
+            file.respond_to?(:to_gs_url) ||
+              (file.respond_to?(:to_str) &&
+                  file.to_str.downcase.start_with?("gs://")) ||
+              (file.is_a?(URI) &&
+                  file.to_s.downcase.start_with?("gs://"))
+          end
         end
         def local_file? file