RubyGems - google-cloud-bigquery - Versions diffs - 1.18.0 → 1.21.1 - Mend

google-cloud-bigquery 1.18.0 → 1.21.1

Files changed (26) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +48 -0
data/TROUBLESHOOTING.md +2 -8
data/lib/google/cloud/bigquery/argument.rb +197 -0
data/lib/google/cloud/bigquery/copy_job.rb +18 -1
data/lib/google/cloud/bigquery/data.rb +15 -0
data/lib/google/cloud/bigquery/dataset.rb +379 -49
data/lib/google/cloud/bigquery/dataset/list.rb +1 -2
data/lib/google/cloud/bigquery/extract_job.rb +19 -2
data/lib/google/cloud/bigquery/job.rb +198 -0
data/lib/google/cloud/bigquery/job/list.rb +5 -5
data/lib/google/cloud/bigquery/load_job.rb +273 -26
data/lib/google/cloud/bigquery/model.rb +6 -4
data/lib/google/cloud/bigquery/project.rb +82 -22
data/lib/google/cloud/bigquery/project/list.rb +1 -2
data/lib/google/cloud/bigquery/query_job.rb +292 -0
data/lib/google/cloud/bigquery/routine.rb +1108 -0
data/lib/google/cloud/bigquery/routine/list.rb +165 -0
data/lib/google/cloud/bigquery/schema.rb +2 -2
data/lib/google/cloud/bigquery/service.rb +96 -39
data/lib/google/cloud/bigquery/standard_sql.rb +257 -53
data/lib/google/cloud/bigquery/table.rb +410 -62
data/lib/google/cloud/bigquery/table/async_inserter.rb +21 -11
data/lib/google/cloud/bigquery/table/list.rb +1 -2
data/lib/google/cloud/bigquery/version.rb +1 -1
metadata +9 -6

data/lib/google/cloud/bigquery/model.rb CHANGED

@@ -449,7 +449,8 @@ module Google
         def feature_columns
           ensure_full_data!
           Array(@gapi_json[:featureColumns]).map do |field_gapi_json|
-            StandardSql::Field.from_gapi_json field_gapi_json
+            field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
+            StandardSql::Field.from_gapi field_gapi
           end
         end
@@ -464,7 +465,8 @@ module Google
         def label_columns
           ensure_full_data!
           Array(@gapi_json[:labelColumns]).map do |field_gapi_json|
-            StandardSql::Field.from_gapi_json field_gapi_json
+            field_gapi = Google::Apis::BigqueryV2::StandardSqlField.from_json field_gapi_json.to_json
+            StandardSql::Field.from_gapi field_gapi
           end
         end
@@ -554,7 +556,7 @@ module Google
         #   model = dataset.model "my_model", skip_lookup: true
         #   model.exists? #=> true
         #
-        def exists? force: nil
+        def exists? force: false
           return resource_exists? if force
           # If we have a value, return it
           return @exists unless @exists.nil?
@@ -668,7 +670,7 @@ module Google
         end
         ##
-        # @private New lazy Model object without making an HTTP request.
+        # @private New lazy Model object without making an HTTP request, for use with the skip_lookup option.
         def self.new_reference project_id, dataset_id, model_id, service
           raise ArgumentError, "project_id is required" unless project_id
           raise ArgumentError, "dataset_id is required" unless dataset_id

data/lib/google/cloud/bigquery/project.rb CHANGED

@@ -419,12 +419,20 @@ module Google
         #   list must have a different key. See [Requirements for
         #   labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
         # @param [Array<String>, String] udfs User-defined function resources
-        #   used in the query. May be either a code resource to load from a
-        #   Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
+        #   used in a legacy SQL query. May be either a code resource to load from
+        #   a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
         #   that contains code for a user-defined function (UDF). Providing an
         #   inline code resource is equivalent to providing a URI for a file
-        #   containing the same code. See [User-Defined
-        #   Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
+        #   containing the same code.
+        #
+        #   This parameter is used for defining User Defined Function (UDF)
+        #   resources only when using legacy SQL. Users of standard SQL should
+        #   leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
+        #   Routines API to define UDF resources.
+        #
+        #   For additional information on migrating, see: [Migrating to
+        #   standard SQL - Differences in user-defined JavaScript
+        #   functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
         # @param [Integer] maximum_billing_tier Deprecated: Change the billing
         #   tier to allow high-compute queries.
         # @yield [job] a job configuration object
@@ -527,7 +535,7 @@ module Google
         #
         #   job.wait_until_done!
         #   if !job.failed?
-        #     table_ref = job.ddl_target_table
+        #     table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
         #   end
         #
         # @example Execute a DML statement:
@@ -786,7 +794,7 @@ module Google
         #
         #   data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
         #
-        #   table_ref = data.ddl_target_table
+        #   table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
         #
         # @example Execute a DML statement:
         #   require "google/cloud/bigquery"
@@ -1046,8 +1054,7 @@ module Google
         #
         def datasets all: nil, filter: nil, token: nil, max: nil
           ensure_service!
-          options = { all: all, filter: filter, token: token, max: max }
-          gapi = service.list_datasets options
+          gapi = service.list_datasets all: all, filter: filter, token: token, max: max
           Dataset::List.from_gapi gapi, service, all, filter, max
         end
@@ -1085,18 +1092,22 @@ module Google
         #   part of the larger set of results to view. Optional.
         # @param [Integer] max Maximum number of jobs to return. Optional.
         # @param [String] filter A filter for job state. Optional.
-        # @param [Time] min_created_at Min value for {Job#created_at}. When
-        #   provided, only jobs created after or at this time are returned.
-        #   Optional.
-        # @param [Time] max_created_at Max value for {Job#created_at}. When
-        #   provided, only jobs created before or at this time are returned.
-        #   Optional.
         #
         #   Acceptable values are:
         #
         #   * `done` - Finished jobs
         #   * `pending` - Pending jobs
         #   * `running` - Running jobs
+        # @param [Time] min_created_at Min value for {Job#created_at}. When
+        #   provided, only jobs created after or at this time are returned.
+        #   Optional.
+        # @param [Time] max_created_at Max value for {Job#created_at}. When
+        #   provided, only jobs created before or at this time are returned.
+        #   Optional.
+        # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
+        #   object or a job ID. If set, retrieve only child jobs of the
+        #   specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
+        #   and {Job#parent_job_id}.
         #
         # @return [Array<Google::Cloud::Bigquery::Job>] (See
         #   {Google::Cloud::Bigquery::Job::List})
@@ -1145,13 +1156,63 @@ module Google
         #     # process job
         #   end
         #
-        def jobs all: nil, token: nil, max: nil, filter: nil,
-                 min_created_at: nil, max_created_at: nil
+        # @example Retrieve child jobs by setting `parent_job`:
+        #   require "google/cloud/bigquery"
+        #
+        #   bigquery = Google::Cloud::Bigquery.new
+        #
+        #   multi_statement_sql = <<~SQL
+        #     -- Declare a variable to hold names as an array.
+        #     DECLARE top_names ARRAY<STRING>;
+        #     -- Build an array of the top 100 names from the year 2017.
+        #     SET top_names = (
+        #     SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
+        #     FROM `bigquery-public-data.usa_names.usa_1910_current`
+        #     WHERE year = 2017
+        #     );
+        #     -- Which names appear as words in Shakespeare's plays?
+        #     SELECT
+        #     name AS shakespeare_name
+        #     FROM UNNEST(top_names) AS name
+        #     WHERE name IN (
+        #     SELECT word
+        #     FROM `bigquery-public-data.samples.shakespeare`
+        #     );
+        #   SQL
+        #
+        #   job = bigquery.query_job multi_statement_sql
+        #
+        #   job.wait_until_done!
+        #
+        #   child_jobs = bigquery.jobs parent_job: job
+        #
+        #   child_jobs.each do |child_job|
+        #     script_statistics = child_job.script_statistics
+        #     puts script_statistics.evaluation_kind
+        #     script_statistics.stack_frames.each do |stack_frame|
+        #       puts stack_frame.text
+        #     end
+        #   end
+        #
+        def jobs all: nil,
+                 token: nil,
+                 max: nil,
+                 filter: nil,
+                 min_created_at: nil,
+                 max_created_at: nil,
+                 parent_job: nil
           ensure_service!
-          options = { all: all, token: token, max: max, filter: filter, min_created_at: min_created_at,
-                      max_created_at: max_created_at }
-          gapi = service.list_jobs options
-          Job::List.from_gapi gapi, service, options
+          parent_job = parent_job.job_id if parent_job.is_a? Job
+          options = {
+            parent_job_id: parent_job,
+            all: all,
+            token: token,
+            max: max, filter: filter,
+            min_created_at: min_created_at,
+            max_created_at: max_created_at
+          }
+          gapi = service.list_jobs(**options)
+          Job::List.from_gapi gapi, service, **options
         end
         ##
@@ -1197,8 +1258,7 @@ module Google
         #
         def projects token: nil, max: nil
           ensure_service!
-          options = { token: token, max: max }
-          gapi = service.list_projects options
+          gapi = service.list_projects token: token, max: max
           Project::List.from_gapi gapi, service, max
         end

data/lib/google/cloud/bigquery/project/list.rb CHANGED

@@ -72,8 +72,7 @@ module Google
           def next
             return nil unless next?
             ensure_service!
-            options = { all: @hidden, token: token, max: @max }
-            gapi = @service.list_projects options
+            gapi = @service.list_projects token: token, max: @max
             self.class.from_gapi gapi, @service, @max
           end

data/lib/google/cloud/bigquery/query_job.rb CHANGED

@@ -48,6 +48,44 @@ module Google
       #     puts job.data.first
       #   end
       #
+      # @example With multiple statements and child jobs:
+      #   require "google/cloud/bigquery"
+      #
+      #   bigquery = Google::Cloud::Bigquery.new
+      #
+      #   multi_statement_sql = <<~SQL
+      #     -- Declare a variable to hold names as an array.
+      #     DECLARE top_names ARRAY<STRING>;
+      #     -- Build an array of the top 100 names from the year 2017.
+      #     SET top_names = (
+      #     SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
+      #     FROM `bigquery-public-data.usa_names.usa_1910_current`
+      #     WHERE year = 2017
+      #     );
+      #     -- Which names appear as words in Shakespeare's plays?
+      #     SELECT
+      #     name AS shakespeare_name
+      #     FROM UNNEST(top_names) AS name
+      #     WHERE name IN (
+      #     SELECT word
+      #     FROM `bigquery-public-data.samples.shakespeare`
+      #     );
+      #   SQL
+      #
+      #   job = bigquery.query_job multi_statement_sql
+      #
+      #   job.wait_until_done!
+      #
+      #   child_jobs = bigquery.jobs parent_job: job
+      #
+      #   child_jobs.each do |child_job|
+      #     script_statistics = child_job.script_statistics
+      #     puts script_statistics.evaluation_kind
+      #     script_statistics.stack_frames.each do |stack_frame|
+      #       puts stack_frame.text
+      #     end
+      #   end
+      #
       class QueryJob < Job
         ##
         # Checks if the priority for the query is `BATCH`.
@@ -305,6 +343,22 @@ module Google
           @gapi.statistics.query.ddl_operation_performed
         end
+        ##
+        # The DDL target routine, in reference state. (See {Routine#reference?}.)
+        # Present only for `CREATE/DROP FUNCTION/PROCEDURE` queries. (See
+        # {#statement_type}.)
+        #
+        # @return [Google::Cloud::Bigquery::Routine, nil] The DDL target routine, in
+        #   reference state.
+        #
+        def ddl_target_routine
+          return nil unless @gapi.statistics.query
+          ensure_service!
+          routine = @gapi.statistics.query.ddl_target_routine
+          return nil unless routine
+          Google::Cloud::Bigquery::Routine.new_reference_from_gapi routine, service
+        end
         ##
         # The DDL target table, in reference state. (See {Table#reference?}.)
         # Present only for `CREATE/DROP TABLE/VIEW` queries. (See
@@ -394,6 +448,69 @@ module Google
           EncryptionConfiguration.from_gapi @gapi.configuration.query.destination_encryption_configuration
         end
+        ###
+        # Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
+        # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+        #
+        # @return [Boolean] `true` when the table is range partitioned, or `false` otherwise.
+        #
+        # @!group Attributes
+        #
+        def range_partitioning?
+          !@gapi.configuration.query.range_partitioning.nil?
+        end
+        ###
+        # The field on which the destination table will be range partitioned, if any. The field must be a
+        # top-level `NULLABLE/REQUIRED` field. The only supported type is `INTEGER/INT64`. See
+        # [Creating and using integer range partitioned
+        # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+        #
+        # @return [String, nil] The partition field, if a field was configured, or `nil` if not range partitioned.
+        #
+        # @!group Attributes
+        #
+        def range_partitioning_field
+          @gapi.configuration.query.range_partitioning.field if range_partitioning?
+        end
+        ###
+        # The start of range partitioning, inclusive. See [Creating and using integer range partitioned
+        # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+        #
+        # @return [Integer, nil] The start of range partitioning, inclusive, or `nil` if not range partitioned.
+        #
+        # @!group Attributes
+        #
+        def range_partitioning_start
+          @gapi.configuration.query.range_partitioning.range.start if range_partitioning?
+        end
+        ###
+        # The width of each interval. See [Creating and using integer range partitioned
+        # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+        #
+        # @return [Integer, nil] The width of each interval, for data in range partitions, or `nil` if not range
+        #   partitioned.
+        #
+        # @!group Attributes
+        #
+        def range_partitioning_interval
+          @gapi.configuration.query.range_partitioning.range.interval if range_partitioning?
+        end
+        ###
+        # The end of range partitioning, exclusive. See [Creating and using integer range partitioned
+        # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+        #
+        # @return [Integer, nil] The end of range partitioning, exclusive, or `nil` if not range partitioned.
+        #
+        # @!group Attributes
+        #
+        def range_partitioning_end
+          @gapi.configuration.query.range_partitioning.range.end if range_partitioning?
+        end
         ###
         # Checks if the destination table will be time-partitioned. See
         # [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
@@ -1012,6 +1129,164 @@ module Google
             @gapi.configuration.query.update! destination_encryption_configuration: val.to_gapi
           end
+          ##
+          # Sets the field on which to range partition the table. See [Creating and using integer range partitioned
+          # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+          #
+          # See {#range_partitioning_start=}, {#range_partitioning_interval=} and {#range_partitioning_end=}.
+          #
+          # You can only set range partitioning when creating a table. BigQuery does not allow you to change
+          # partitioning on an existing table.
+          #
+          # @param [String] field The range partition field. the destination table is partitioned by this
+          #   field. The field must be a top-level `NULLABLE/REQUIRED` field. The only supported
+          #   type is `INTEGER/INT64`.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   destination_table = dataset.table "my_destination_table",
+          #                                     skip_lookup: true
+          #
+          #   job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
+          #     job.table = destination_table
+          #     job.range_partitioning_field = "num"
+          #     job.range_partitioning_start = 0
+          #     job.range_partitioning_interval = 10
+          #     job.range_partitioning_end = 100
+          #   end
+          #
+          #   job.wait_until_done!
+          #   job.done? #=> true
+          #
+          # @!group Attributes
+          #
+          def range_partitioning_field= field
+            @gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
+              range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
+            )
+            @gapi.configuration.query.range_partitioning.field = field
+          end
+          ##
+          # Sets the start of range partitioning, inclusive, for the destination table. See [Creating and using integer
+          # range partitioned tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+          #
+          # You can only set range partitioning when creating a table. BigQuery does not allow you to change
+          # partitioning on an existing table.
+          #
+          # See {#range_partitioning_field=}, {#range_partitioning_interval=} and {#range_partitioning_end=}.
+          #
+          # @param [Integer] range_start The start of range partitioning, inclusive.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   destination_table = dataset.table "my_destination_table",
+          #                                     skip_lookup: true
+          #
+          #   job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
+          #     job.table = destination_table
+          #     job.range_partitioning_field = "num"
+          #     job.range_partitioning_start = 0
+          #     job.range_partitioning_interval = 10
+          #     job.range_partitioning_end = 100
+          #   end
+          #
+          #   job.wait_until_done!
+          #   job.done? #=> true
+          #
+          # @!group Attributes
+          #
+          def range_partitioning_start= range_start
+            @gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
+              range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
+            )
+            @gapi.configuration.query.range_partitioning.range.start = range_start
+          end
+          ##
+          # Sets width of each interval for data in range partitions. See [Creating and using integer range partitioned
+          # tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+          #
+          # You can only set range partitioning when creating a table. BigQuery does not allow you to change
+          # partitioning on an existing table.
+          #
+          # See {#range_partitioning_field=}, {#range_partitioning_start=} and {#range_partitioning_end=}.
+          #
+          # @param [Integer] range_interval The width of each interval, for data in partitions.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   destination_table = dataset.table "my_destination_table",
+          #                                     skip_lookup: true
+          #
+          #   job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
+          #     job.table = destination_table
+          #     job.range_partitioning_field = "num"
+          #     job.range_partitioning_start = 0
+          #     job.range_partitioning_interval = 10
+          #     job.range_partitioning_end = 100
+          #   end
+          #
+          #   job.wait_until_done!
+          #   job.done? #=> true
+          #
+          # @!group Attributes
+          #
+          def range_partitioning_interval= range_interval
+            @gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
+              range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
+            )
+            @gapi.configuration.query.range_partitioning.range.interval = range_interval
+          end
+          ##
+          # Sets the end of range partitioning, exclusive, for the destination table. See [Creating and using integer
+          # range partitioned tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
+          #
+          # You can only set range partitioning when creating a table. BigQuery does not allow you to change
+          # partitioning on an existing table.
+          #
+          # See {#range_partitioning_start=}, {#range_partitioning_interval=} and {#range_partitioning_field=}.
+          #
+          # @param [Integer] range_end The end of range partitioning, exclusive.
+          #
+          # @example
+          #   require "google/cloud/bigquery"
+          #
+          #   bigquery = Google::Cloud::Bigquery.new
+          #   dataset = bigquery.dataset "my_dataset"
+          #   destination_table = dataset.table "my_destination_table",
+          #                                     skip_lookup: true
+          #
+          #   job = bigquery.query_job "SELECT num FROM UNNEST(GENERATE_ARRAY(0, 99)) AS num" do |job|
+          #     job.table = destination_table
+          #     job.range_partitioning_field = "num"
+          #     job.range_partitioning_start = 0
+          #     job.range_partitioning_interval = 10
+          #     job.range_partitioning_end = 100
+          #   end
+          #
+          #   job.wait_until_done!
+          #   job.done? #=> true
+          #
+          # @!group Attributes
+          #
+          def range_partitioning_end= range_end
+            @gapi.configuration.query.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
+              range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
+            )
+            @gapi.configuration.query.range_partitioning.range.end = range_end
+          end
           ##
           # Sets the partitioning for the destination table. See [Partitioned
           # Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
@@ -1198,6 +1473,23 @@ module Google
             @gapi.configuration.query.clustering.fields = fields
           end
+          def cancel
+            raise "not implemented in #{self.class}"
+          end
+          def rerun!
+            raise "not implemented in #{self.class}"
+          end
+          def reload!
+            raise "not implemented in #{self.class}"
+          end
+          alias refresh! reload!
+          def wait_until_done!
+            raise "not implemented in #{self.class}"
+          end
           ##
           # @private Returns the Google API client library version of this job.
           #