google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,6 +17,7 @@ require "json"
17
17
  require "google/cloud/errors"
18
18
  require "google/cloud/bigquery/service"
19
19
  require "google/cloud/bigquery/table"
20
+ require "google/cloud/bigquery/external"
20
21
  require "google/cloud/bigquery/dataset/list"
21
22
  require "google/cloud/bigquery/dataset/access"
22
23
  require "google/apis/bigquery_v2"
@@ -59,8 +60,9 @@ module Google
59
60
 
60
61
  ##
61
62
  # A unique ID for this dataset, without the project name.
62
- # The ID must contain only letters (a-z, A-Z), numbers (0-9),
63
- # or underscores (_). The maximum length is 1,024 characters.
63
+ #
64
+ # @return [String] The ID must contain only letters (a-z, A-Z), numbers
65
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
64
66
  #
65
67
  # @!group Attributes
66
68
  #
@@ -71,6 +73,8 @@ module Google
71
73
  ##
72
74
  # The ID of the project containing this dataset.
73
75
  #
76
+ # @return [String] The project ID.
77
+ #
74
78
  # @!group Attributes
75
79
  #
76
80
  def project_id
@@ -90,6 +94,8 @@ module Google
90
94
  ##
91
95
  # A descriptive name for the dataset.
92
96
  #
97
+ # @return [String] The friendly name.
98
+ #
93
99
  # @!group Attributes
94
100
  #
95
101
  def name
@@ -99,6 +105,8 @@ module Google
99
105
  ##
100
106
  # Updates the descriptive name for the dataset.
101
107
  #
108
+ # @param [String] new_name The new friendly name.
109
+ #
102
110
  # @!group Attributes
103
111
  #
104
112
  def name= new_name
@@ -107,7 +115,9 @@ module Google
107
115
  end
108
116
 
109
117
  ##
110
- # A string hash of the dataset.
118
+ # The ETag hash of the dataset.
119
+ #
120
+ # @return [String] The ETag hash.
111
121
  #
112
122
  # @!group Attributes
113
123
  #
@@ -119,6 +129,8 @@ module Google
119
129
  ##
120
130
  # A URL that can be used to access the dataset using the REST API.
121
131
  #
132
+ # @return [String] A REST URL for the resource.
133
+ #
122
134
  # @!group Attributes
123
135
  #
124
136
  def api_url
@@ -129,6 +141,8 @@ module Google
129
141
  ##
130
142
  # A user-friendly description of the dataset.
131
143
  #
144
+ # @return [String] The description.
145
+ #
132
146
  # @!group Attributes
133
147
  #
134
148
  def description
@@ -139,6 +153,8 @@ module Google
139
153
  ##
140
154
  # Updates the user-friendly description of the dataset.
141
155
  #
156
+ # @param [String] new_description The new description for the dataset.
157
+ #
142
158
  # @!group Attributes
143
159
  #
144
160
  def description= new_description
@@ -149,6 +165,8 @@ module Google
149
165
  ##
150
166
  # The default lifetime of all tables in the dataset, in milliseconds.
151
167
  #
168
+ # @return [Integer] The default table expiration in milliseconds.
169
+ #
152
170
  # @!group Attributes
153
171
  #
154
172
  def default_expiration
@@ -164,6 +182,9 @@ module Google
164
182
  # Updates the default lifetime of all tables in the dataset, in
165
183
  # milliseconds.
166
184
  #
185
+ # @param [Integer] new_default_expiration The new default table
186
+ # expiration in milliseconds.
187
+ #
167
188
  # @!group Attributes
168
189
  #
169
190
  def default_expiration= new_default_expiration
@@ -174,6 +195,8 @@ module Google
174
195
  ##
175
196
  # The time when this dataset was created.
176
197
  #
198
+ # @return [Time, nil] The creation time.
199
+ #
177
200
  # @!group Attributes
178
201
  #
179
202
  def created_at
@@ -188,6 +211,8 @@ module Google
188
211
  ##
189
212
  # The date when this dataset or any of its tables was last modified.
190
213
  #
214
+ # @return [Time, nil] The last modified time.
215
+ #
191
216
  # @!group Attributes
192
217
  #
193
218
  def modified_at
@@ -201,7 +226,9 @@ module Google
201
226
 
202
227
  ##
203
228
  # The geographic location where the dataset should reside. Possible
204
- # values include EU and US. The default value is US.
229
+ # values include `EU` and `US`. The default value is `US`.
230
+ #
231
+ # @return [String] The location code.
205
232
  #
206
233
  # @!group Attributes
207
234
  #
@@ -210,6 +237,63 @@ module Google
210
237
  @gapi.location
211
238
  end
212
239
 
240
+ ##
241
+ # A hash of user-provided labels associated with this dataset. Labels
242
+ # are used to organize and group datasets. See [Using
243
+ # Labels](https://cloud.google.com/bigquery/docs/labels).
244
+ #
245
+ # The returned hash is frozen and changes are not allowed. Use
246
+ # {#labels=} to replace the entire hash.
247
+ #
248
+ # @return [Hash<String, String>] A hash containing key/value pairs.
249
+ #
250
+ # @example
251
+ # require "google/cloud/bigquery"
252
+ #
253
+ # bigquery = Google::Cloud::Bigquery.new
254
+ # dataset = bigquery.dataset "my_dataset"
255
+ #
256
+ # labels = dataset.labels
257
+ # labels["department"] #=> "shipping"
258
+ #
259
+ # @!group Attributes
260
+ #
261
+ def labels
262
+ m = @gapi.labels
263
+ m = m.to_h if m.respond_to? :to_h
264
+ m.dup.freeze
265
+ end
266
+
267
+ ##
268
+ # Updates the hash of user-provided labels associated with this dataset.
269
+ # Labels are used to organize and group datasets. See [Using
270
+ # Labels](https://cloud.google.com/bigquery/docs/labels).
271
+ #
272
+ # @param [Hash<String, String>] labels A hash containing key/value
273
+ # pairs.
274
+ #
275
+ # * Label keys and values can be no longer than 63 characters.
276
+ # * Label keys and values can contain only lowercase letters, numbers,
277
+ # underscores, hyphens, and international characters.
278
+ # * Label keys and values cannot exceed 128 bytes in size.
279
+ # * Label keys must begin with a letter.
280
+ # * Label keys must be unique within a dataset.
281
+ #
282
+ # @example
283
+ # require "google/cloud/bigquery"
284
+ #
285
+ # bigquery = Google::Cloud::Bigquery.new
286
+ # dataset = bigquery.dataset "my_dataset"
287
+ #
288
+ # dataset.labels = { "department" => "shipping" }
289
+ #
290
+ # @!group Attributes
291
+ #
292
+ def labels= labels
293
+ @gapi.labels = labels
294
+ patch_gapi! :labels
295
+ end
296
+
213
297
  ##
214
298
  # Retrieves the access rules for a Dataset. The rules can be updated
215
299
  # when passing a block, see {Dataset::Access} for all the methods
@@ -221,7 +305,7 @@ module Google
221
305
  # @yield [access] a block for setting rules
222
306
  # @yieldparam [Dataset::Access] access the object accepting rules
223
307
  #
224
- # @return [Google::Cloud::Bigquery::Dataset::Access]
308
+ # @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
225
309
  #
226
310
  # @example
227
311
  # require "google/cloud/bigquery"
@@ -229,14 +313,8 @@ module Google
229
313
  # bigquery = Google::Cloud::Bigquery.new
230
314
  # dataset = bigquery.dataset "my_dataset"
231
315
  #
232
- # dataset.access # [{"role"=>"OWNER",
233
- # # "specialGroup"=>"projectOwners"},
234
- # # {"role"=>"WRITER",
235
- # # "specialGroup"=>"projectWriters"},
236
- # # {"role"=>"READER",
237
- # # "specialGroup"=>"projectReaders"},
238
- # # {"role"=>"OWNER",
239
- # # "userByEmail"=>"123456789-...com"}]
316
+ # access = dataset.access
317
+ # access.writer_user? "reader@example.com" #=> false
240
318
  #
241
319
  # @example Manage the access rules by passing a block:
242
320
  # require "google/cloud/bigquery"
@@ -305,7 +383,7 @@ module Google
305
383
  # @yield [table] a block for setting the table
306
384
  # @yieldparam [Table] table the table object to be updated
307
385
  #
308
- # @return [Google::Cloud::Bigquery::Table]
386
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
309
387
  #
310
388
  # @example
311
389
  # require "google/cloud/bigquery"
@@ -394,8 +472,15 @@ module Google
394
472
  # [legacy
395
473
  # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
396
474
  # dialect. Optional. The default value is false.
475
+ # @param [Array<String>, String] udfs User-defined function resources
476
+ # used in the query. May be either a code resource to load from a
477
+ # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
478
+ # that contains code for a user-defined function (UDF). Providing an
479
+ # inline code resource is equivalent to providing a URI for a file
480
+ # containing the same code. See [User-Defined
481
+ # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
397
482
  #
398
- # @return [Google::Cloud::Bigquery::View]
483
+ # @return [Google::Cloud::Bigquery::View] A new view object.
399
484
  #
400
485
  # @example
401
486
  # require "google/cloud/bigquery"
@@ -419,7 +504,7 @@ module Google
419
504
  # @!group Table
420
505
  #
421
506
  def create_view table_id, query, name: nil, description: nil,
422
- standard_sql: nil, legacy_sql: nil
507
+ standard_sql: nil, legacy_sql: nil, udfs: nil
423
508
  new_view_opts = {
424
509
  table_reference: Google::Apis::BigqueryV2::TableReference.new(
425
510
  project_id: project_id, dataset_id: dataset_id, table_id: table_id
@@ -429,7 +514,8 @@ module Google
429
514
  view: Google::Apis::BigqueryV2::ViewDefinition.new(
430
515
  query: query,
431
516
  use_legacy_sql: Convert.resolve_legacy_sql(standard_sql,
432
- legacy_sql)
517
+ legacy_sql),
518
+ user_defined_function_resources: udfs_gapi(udfs)
433
519
  )
434
520
  }.delete_if { |_, v| v.nil? }
435
521
  new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
@@ -474,8 +560,8 @@ module Google
474
560
  # @param [Integer] max Maximum number of tables to return.
475
561
  #
476
562
  # @return [Array<Google::Cloud::Bigquery::Table>,
477
- # Array<Google::Cloud::Bigquery::View>] (See
478
- # {Google::Cloud::Bigquery::Table::List})
563
+ # Array<Google::Cloud::Bigquery::View>] An array of tables and/or
564
+ # views(See {Google::Cloud::Bigquery::Table::List})
479
565
  #
480
566
  # @example
481
567
  # require "google/cloud/bigquery"
@@ -546,6 +632,10 @@ module Google
546
632
  # passed is a hash `{ myparam: "foo" }`, the query must use named
547
633
  # query parameters. When set, `legacy_sql` will automatically be set
548
634
  # to false and `standard_sql` to true.
635
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
636
+ # that represents the mapping of the external tables to the table
637
+ # names used in the SQL query. The hash keys are the table names, and
638
+ # the hash values are the external table objects. See {Dataset#query}.
549
639
  # @param [String] priority Specifies a priority for the query. Possible
550
640
  # values include `INTERACTIVE` and `BATCH`. The default value is
551
641
  # `INTERACTIVE`.
@@ -605,8 +695,37 @@ module Google
605
695
  # job. Queries that will have bytes billed beyond this limit will fail
606
696
  # (without incurring a charge). Optional. If unspecified, this will be
607
697
  # set to your project default.
608
- #
609
- # @return [Google::Cloud::Bigquery::QueryJob]
698
+ # @param [String] job_id A user-defined ID for the query job. The ID
699
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
700
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
701
+ # `job_id` is provided, then `prefix` will not be used.
702
+ #
703
+ # See [Generating a job
704
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
705
+ # @param [String] prefix A string, usually human-readable, that will be
706
+ # prepended to a generated value to produce a unique job ID. For
707
+ # example, the prefix `daily_import_job_` can be given to generate a
708
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
709
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
710
+ # underscores (_), or dashes (-). The maximum length of the entire ID
711
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
712
+ # be used.
713
+ # @param [Hash] labels A hash of user-provided labels associated with
714
+ # the job. You can use these to organize and group your jobs. Label
715
+ # keys and values can be no longer than 63 characters, can only
716
+ # contain lowercase letters, numeric characters, underscores and
717
+ # dashes. International characters are allowed. Label values are
718
+ # optional. Label keys must start with a letter and each label in the
719
+ # list must have a different key.
720
+ # @param [Array<String>, String] udfs User-defined function resources
721
+ # used in the query. May be either a code resource to load from a
722
+ # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
723
+ # that contains code for a user-defined function (UDF). Providing an
724
+ # inline code resource is equivalent to providing a URI for a file
725
+ # containing the same code. See [User-Defined
726
+ # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
727
+ #
728
+ # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
610
729
  #
611
730
  # @example Query using standard SQL:
612
731
  # require "google/cloud/bigquery"
@@ -618,7 +737,7 @@ module Google
618
737
  #
619
738
  # job.wait_until_done!
620
739
  # if !job.failed?
621
- # job.query_results.each do |row|
740
+ # job.data.each do |row|
622
741
  # puts row[:name]
623
742
  # end
624
743
  # end
@@ -634,7 +753,7 @@ module Google
634
753
  #
635
754
  # job.wait_until_done!
636
755
  # if !job.failed?
637
- # job.query_results.each do |row|
756
+ # job.data.each do |row|
638
757
  # puts row[:name]
639
758
  # end
640
759
  # end
@@ -650,7 +769,7 @@ module Google
650
769
  #
651
770
  # job.wait_until_done!
652
771
  # if !job.failed?
653
- # job.query_results.each do |row|
772
+ # job.data.each do |row|
654
773
  # puts row[:name]
655
774
  # end
656
775
  # end
@@ -666,24 +785,49 @@ module Google
666
785
  #
667
786
  # job.wait_until_done!
668
787
  # if !job.failed?
669
- # job.query_results.each do |row|
788
+ # job.data.each do |row|
789
+ # puts row[:name]
790
+ # end
791
+ # end
792
+ #
793
+ # @example Query using external data source:
794
+ # require "google/cloud/bigquery"
795
+ #
796
+ # bigquery = Google::Cloud::Bigquery.new
797
+ # dataset = bigquery.dataset "my_dataset"
798
+ #
799
+ # csv_url = "gs://bucket/path/to/data.csv"
800
+ # csv_table = dataset.external csv_url do |csv|
801
+ # csv.autodetect = true
802
+ # csv.skip_leading_rows = 1
803
+ # end
804
+ #
805
+ # job = dataset.query_job "SELECT * FROM my_ext_table",
806
+ # external: { my_ext_table: csv_table }
807
+ #
808
+ # job.wait_until_done!
809
+ # if !job.failed?
810
+ # job.data.each do |row|
670
811
  # puts row[:name]
671
812
  # end
672
813
  # end
673
814
  #
674
815
  # @!group Data
675
816
  #
676
- def query_job query, params: nil, priority: "INTERACTIVE", cache: true,
677
- table: nil, create: nil, write: nil, standard_sql: nil,
817
+ def query_job query, params: nil, external: nil,
818
+ priority: "INTERACTIVE", cache: true, table: nil,
819
+ create: nil, write: nil, standard_sql: nil,
678
820
  legacy_sql: nil, large_results: nil, flatten: nil,
679
- maximum_billing_tier: nil, maximum_bytes_billed: nil
821
+ maximum_billing_tier: nil, maximum_bytes_billed: nil,
822
+ job_id: nil, prefix: nil, labels: nil, udfs: nil
680
823
  options = { priority: priority, cache: cache, table: table,
681
824
  create: create, write: write,
682
825
  large_results: large_results, flatten: flatten,
683
826
  legacy_sql: legacy_sql, standard_sql: standard_sql,
684
827
  maximum_billing_tier: maximum_billing_tier,
685
828
  maximum_bytes_billed: maximum_bytes_billed,
686
- params: params }
829
+ params: params, external: external, labels: labels,
830
+ job_id: job_id, prefix: prefix, udfs: udfs }
687
831
  options[:dataset] ||= self
688
832
  ensure_service!
689
833
  gapi = service.query_job query, options
@@ -691,8 +835,10 @@ module Google
691
835
  end
692
836
 
693
837
  ##
694
- # Queries data using the [synchronous
695
- # method](https://cloud.google.com/bigquery/querying-data).
838
+ # Queries data using a synchronous method that blocks for a response. In
839
+ # this method, a {QueryJob} is created and its results are saved
840
+ # to a temporary table, then read from the table. Timeouts and transient
841
+ # errors are generally handled as needed to complete the query.
696
842
  #
697
843
  # Sets the current dataset as the default dataset in the query. Useful
698
844
  # for using unqualified table names.
@@ -717,6 +863,8 @@ module Google
717
863
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
718
864
  # for an overview of each BigQuery data type, including allowed values.
719
865
  #
866
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
867
+ #
720
868
  # @param [String] query A query string, following the BigQuery [query
721
869
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
722
870
  # query to execute. Example: "SELECT count(f1) FROM
@@ -728,22 +876,16 @@ module Google
728
876
  # passed is a hash `{ myparam: "foo" }`, the query must use named
729
877
  # query parameters. When set, `legacy_sql` will automatically be set
730
878
  # to false and `standard_sql` to true.
879
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
880
+ # that represents the mapping of the external tables to the table
881
+ # names used in the SQL query. The hash keys are the table names, and
882
+ # the hash values are the external table objects. See {Dataset#query}.
731
883
  # @param [Integer] max The maximum number of rows of data to return per
732
884
  # page of results. Setting this flag to a small value such as 1000 and
733
885
  # then paging through results might improve reliability when the query
734
886
  # result set is large. In addition to this limit, responses are also
735
887
  # limited to 10 MB. By default, there is no maximum row count, and
736
888
  # only the byte limit applies.
737
- # @param [Integer] timeout How long to wait for the query to complete,
738
- # in milliseconds, before the request times out and returns. Note that
739
- # this is only a timeout for the request, not the query. If the query
740
- # takes longer to run than the timeout value, the call returns without
741
- # any results and with QueryData#complete? set to false. The default
742
- # value is 10000 milliseconds (10 seconds).
743
- # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
744
- # job. Instead, if the query is valid, BigQuery returns statistics
745
- # about the job such as how many bytes would be processed. If the
746
- # query is invalid, an error returns. The default value is `false`.
747
889
  # @param [Boolean] cache Whether to look for the result in the query
748
890
  # cache. The query cache is a best-effort cache that will be flushed
749
891
  # whenever tables in the query are modified. The default value is
@@ -769,7 +911,7 @@ module Google
769
911
  # ignored; the query will be run as if `large_results` is true and
770
912
  # `flatten` is false. Optional. The default value is false.
771
913
  #
772
- # @return [Google::Cloud::Bigquery::QueryData]
914
+ # @return [Google::Cloud::Bigquery::Data] A new data object.
773
915
  #
774
916
  # @example Query using standard SQL:
775
917
  # require "google/cloud/bigquery"
@@ -822,25 +964,112 @@ module Google
822
964
  # puts row[:name]
823
965
  # end
824
966
  #
967
+ # @example Query using external data source:
968
+ # require "google/cloud/bigquery"
969
+ #
970
+ # bigquery = Google::Cloud::Bigquery.new
971
+ # dataset = bigquery.dataset "my_dataset"
972
+ #
973
+ # csv_url = "gs://bucket/path/to/data.csv"
974
+ # csv_table = dataset.external csv_url do |csv|
975
+ # csv.autodetect = true
976
+ # csv.skip_leading_rows = 1
977
+ # end
978
+ #
979
+ # data = dataset.query "SELECT * FROM my_ext_table",
980
+ # external: { my_ext_table: csv_table }
981
+ #
982
+ # data.each do |row|
983
+ # puts row[:name]
984
+ # end
985
+ #
825
986
  # @!group Data
826
987
  #
827
- def query query, params: nil, max: nil, timeout: 10000, dryrun: nil,
828
- cache: true, standard_sql: nil, legacy_sql: nil
829
- options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
830
- legacy_sql: legacy_sql, standard_sql: standard_sql,
831
- params: params }
832
- options[:dataset] ||= dataset_id
833
- options[:project] ||= project_id
988
+ def query query, params: nil, external: nil, max: nil, cache: true,
989
+ standard_sql: nil, legacy_sql: nil
834
990
  ensure_service!
835
- gapi = service.query query, options
836
- QueryData.from_gapi gapi, service
991
+ options = { params: params, external: external, cache: cache,
992
+ legacy_sql: legacy_sql, standard_sql: standard_sql }
993
+
994
+ job = query_job query, options
995
+ job.wait_until_done!
996
+
997
+ if job.failed?
998
+ begin
999
+ # raise to activate ruby exception cause handling
1000
+ fail job.gapi_error
1001
+ rescue => e
1002
+ # wrap Google::Apis::Error with Google::Cloud::Error
1003
+ raise Google::Cloud::Error.from_error(e)
1004
+ end
1005
+ end
1006
+
1007
+ job.data max: max
1008
+ end
1009
+
1010
+ ##
1011
+ # Creates a new External::DataSource (or subclass) object that
1012
+ # represents the external data source that can be queried from directly,
1013
+ # even though the data is not stored in BigQuery. Instead of loading or
1014
+ # streaming the data, this object references the external data source.
1015
+ #
1016
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
1017
+ # External Data Sources
1018
+ #
1019
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
1020
+ # point to your data in Google Cloud. An attempt will be made to
1021
+ # derive the format from the URLs provided.
1022
+ # @param [String|Symbol] format The data format. This value will be used
1023
+ # even if the provided URLs are recognized as a different format.
1024
+ # Optional.
1025
+ #
1026
+ # The following values are supported:
1027
+ #
1028
+ # * `csv` - CSV
1029
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1030
+ # * `avro` - [Avro](http://avro.apache.org/)
1031
+ # * `sheets` - Google Sheets
1032
+ # * `datastore_backup` - Cloud Datastore backup
1033
+ # * `bigtable` - Bigtable
1034
+ #
1035
+ # @return [External::DataSource] External data source.
1036
+ #
1037
+ # @example
1038
+ # require "google/cloud/bigquery"
1039
+ #
1040
+ # bigquery = Google::Cloud::Bigquery.new
1041
+ #
1042
+ # dataset = bigquery.dataset "my_dataset"
1043
+ #
1044
+ # csv_url = "gs://bucket/path/to/data.csv"
1045
+ # csv_table = dataset.external csv_url do |csv|
1046
+ # csv.autodetect = true
1047
+ # csv.skip_leading_rows = 1
1048
+ # end
1049
+ #
1050
+ # data = dataset.query "SELECT * FROM my_ext_table",
1051
+ # external: { my_ext_table: csv_table }
1052
+ #
1053
+ # data.each do |row|
1054
+ # puts row[:name]
1055
+ # end
1056
+ #
1057
+ def external url, format: nil
1058
+ ext = External.from_urls url, format
1059
+ yield ext if block_given?
1060
+ ext
837
1061
  end
838
1062
 
839
1063
  ##
840
- # Loads data into the provided destination table. For the source of the
841
- # data, you can pass a google-cloud storage file path or a
842
- # google-cloud-storage `File` instance. Or, you can upload a file
843
- # directly. See [Loading Data with a POST
1064
+ # Loads data into the provided destination table using an asynchronous
1065
+ # method. In this method, a {LoadJob} is immediately returned. The
1066
+ # caller may poll the service by repeatedly calling {Job#reload!} and
1067
+ # {Job#done?} to detect when the job is done, or simply block until the
1068
+ # job is done by calling #{Job#wait_until_done!}. See also {#load}.
1069
+ #
1070
+ # For the source of the data, you can pass a google-cloud storage file
1071
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1072
+ # file directly. See [Loading Data with a POST
844
1073
  # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
845
1074
  #
846
1075
  # @param [String] table_id The destination table to load the data into.
@@ -888,6 +1117,9 @@ module Google
888
1117
  # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
889
1118
  # quoted data sections that contain newline characters in a CSV file.
890
1119
  # The default value is `false`.
1120
+ # @param [Boolean] autodetect Indicates if BigQuery should
1121
+ # automatically infer the options and schema for CSV and JSON sources.
1122
+ # The default value is `false`.
891
1123
  # @param [String] encoding The character encoding of the data. The
892
1124
  # supported values are `UTF-8` or `ISO-8859-1`. The default value is
893
1125
  # `UTF-8`.
@@ -912,6 +1144,13 @@ module Google
912
1144
  # records exceeds this value, an invalid error is returned in the job
913
1145
  # result. The default value is `0`, which requires that all records
914
1146
  # are valid.
1147
+ # @param [String] null_marker Specifies a string that represents a null
1148
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1149
+ # interprets `\N` as a null value when loading a CSV file. The default
1150
+ # value is the empty string. If you set this property to a custom
1151
+ # value, BigQuery throws an error if an empty string is present for
1152
+ # all data types except for STRING and BYTE. For STRING and BYTE
1153
+ # columns, BigQuery interprets the empty string as an empty value.
915
1154
  # @param [String] quote The value that is used to quote data sections in
916
1155
  # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
917
1156
  # then uses the first byte of the encoded string to split the data in
@@ -932,6 +1171,28 @@ module Google
932
1171
  # See {Project#schema} for the creation of the schema for use with
933
1172
  # this option. Also note that for most use cases, the block yielded by
934
1173
  # this method is a more convenient way to configure the schema.
1174
+ # @param [String] job_id A user-defined ID for the load job. The ID
1175
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1176
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
1177
+ # `job_id` is provided, then `prefix` will not be used.
1178
+ #
1179
+ # See [Generating a job
1180
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1181
+ # @param [String] prefix A string, usually human-readable, that will be
1182
+ # prepended to a generated value to produce a unique job ID. For
1183
+ # example, the prefix `daily_import_job_` can be given to generate a
1184
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1185
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1186
+ # underscores (_), or dashes (-). The maximum length of the entire ID
1187
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1188
+ # be used.
1189
+ # @param [Hash] labels A hash of user-provided labels associated with
1190
+ # the job. You can use these to organize and group your jobs. Label
1191
+ # keys and values can be no longer than 63 characters, can only
1192
+ # contain lowercase letters, numeric characters, underscores and
1193
+ # dashes. International characters are allowed. Label values are
1194
+ # optional. Label keys must start with a letter and each label in the
1195
+ # list must have a different key.
935
1196
  #
936
1197
  # @yield [schema] A block for setting the schema for the destination
937
1198
  # table. The schema can be omitted if the destination table already
@@ -941,7 +1202,7 @@ module Google
941
1202
  # instance provided using the `schema` option, or a new, empty schema
942
1203
  # instance
943
1204
  #
944
- # @return [Google::Cloud::Bigquery::LoadJob]
1205
+ # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
945
1206
  #
946
1207
  # @example
947
1208
  # require "google/cloud/bigquery"
@@ -950,7 +1211,7 @@ module Google
950
1211
  # dataset = bigquery.dataset "my_dataset"
951
1212
  #
952
1213
  # gs_url = "gs://my-bucket/file-name.csv"
953
- # load_job = dataset.load "my_new_table", gs_url do |schema|
1214
+ # load_job = dataset.load_job "my_new_table", gs_url do |schema|
954
1215
  # schema.string "first_name", mode: :required
955
1216
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
956
1217
  # nested_schema.string "place", mode: :required
@@ -968,7 +1229,7 @@ module Google
968
1229
  # storage = Google::Cloud::Storage.new
969
1230
  # bucket = storage.bucket "my-bucket"
970
1231
  # file = bucket.file "file-name.csv"
971
- # load_job = dataset.load "my_new_table", file do |schema|
1232
+ # load_job = dataset.load_job "my_new_table", file do |schema|
972
1233
  # schema.string "first_name", mode: :required
973
1234
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
974
1235
  # nested_schema.string "place", mode: :required
@@ -983,7 +1244,7 @@ module Google
983
1244
  # dataset = bigquery.dataset "my_dataset"
984
1245
  #
985
1246
  # file = File.open "my_data.csv"
986
- # load_job = dataset.load "my_new_table", file do |schema|
1247
+ # load_job = dataset.load_job "my_new_table", file do |schema|
987
1248
  # schema.string "first_name", mode: :required
988
1249
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
989
1250
  # nested_schema.string "place", mode: :required
@@ -997,17 +1258,18 @@ module Google
997
1258
  # bigquery = Google::Cloud::Bigquery.new
998
1259
  # dataset = bigquery.dataset "my_dataset"
999
1260
  #
1000
- # load_job = dataset.load "my_new_table",
1261
+ # load_job = dataset.load_job "my_new_table",
1001
1262
  # "gs://my-bucket/xxxx.kind_name.backup_info",
1002
1263
  # format: "datastore_backup"
1003
1264
  #
1004
1265
  # @!group Data
1005
1266
  #
1006
- def load table_id, file, format: nil, create: nil, write: nil,
1007
- projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1008
- encoding: nil, delimiter: nil, ignore_unknown: nil,
1009
- max_bad_records: nil, quote: nil, skip_leading: nil,
1010
- dryrun: nil, schema: nil
1267
+ def load_job table_id, file, format: nil, create: nil, write: nil,
1268
+ projection_fields: nil, jagged_rows: nil,
1269
+ quoted_newlines: nil, encoding: nil, delimiter: nil,
1270
+ ignore_unknown: nil, max_bad_records: nil, quote: nil,
1271
+ skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
1272
+ prefix: nil, labels: nil, autodetect: nil, null_marker: nil
1011
1273
  ensure_service!
1012
1274
 
1013
1275
  if block_given?
@@ -1023,12 +1285,228 @@ module Google
1023
1285
  delimiter: delimiter, ignore_unknown: ignore_unknown,
1024
1286
  max_bad_records: max_bad_records, quote: quote,
1025
1287
  skip_leading: skip_leading, dryrun: dryrun,
1026
- schema: schema_gapi }
1288
+ schema: schema_gapi, job_id: job_id, prefix: prefix,
1289
+ labels: labels, autodetect: autodetect,
1290
+ null_marker: null_marker }
1027
1291
  return load_storage(table_id, file, options) if storage_url? file
1028
1292
  return load_local(table_id, file, options) if local_file? file
1029
1293
  fail Google::Cloud::Error, "Don't know how to load #{file}"
1030
1294
  end
1031
1295
 
1296
+ ##
1297
+ # Loads data into the provided destination table using a synchronous
1298
+ # method that blocks for a response. Timeouts and transient errors are
1299
+ # generally handled as needed to complete the job. See also
1300
+ # {#load_job}.
1301
+ #
1302
+ # For the source of the data, you can pass a google-cloud storage file
1303
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1304
+ # file directly. See [Loading Data with a POST
1305
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1306
+ #
1307
+ # @param [String] table_id The destination table to load the data into.
1308
+ # @param [File, Google::Cloud::Storage::File, String] file A file or the
1309
+ # URI of a Google Cloud Storage file containing data to load into the
1310
+ # table.
1311
+ # @param [String] format The exported file format. The default value is
1312
+ # `csv`.
1313
+ #
1314
+ # The following values are supported:
1315
+ #
1316
+ # * `csv` - CSV
1317
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1318
+ # * `avro` - [Avro](http://avro.apache.org/)
1319
+ # * `datastore_backup` - Cloud Datastore backup
1320
+ # @param [String] create Specifies whether the job is allowed to create
1321
+ # new tables. The default value is `needed`.
1322
+ #
1323
+ # The following values are supported:
1324
+ #
1325
+ # * `needed` - Create the table if it does not exist.
1326
+ # * `never` - The table must already exist. A 'notFound' error is
1327
+ # raised if the table does not exist.
1328
+ # @param [String] write Specifies how to handle data already present in
1329
+ # the table. The default value is `append`.
1330
+ #
1331
+ # The following values are supported:
1332
+ #
1333
+ # * `truncate` - BigQuery overwrites the table data.
1334
+ # * `append` - BigQuery appends the data to the table.
1335
+ # * `empty` - An error will be returned if the table already contains
1336
+ # data.
1337
+ # @param [Array<String>] projection_fields If the `format` option is set
1338
+ # to `datastore_backup`, indicates which entity properties to load
1339
+ # from a Cloud Datastore backup. Property names are case sensitive and
1340
+ # must be top-level properties. If not set, BigQuery loads all
1341
+ # properties. If any named property isn't found in the Cloud Datastore
1342
+ # backup, an invalid error is returned.
1343
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1344
+ # optional columns. The missing values are treated as nulls. If
1345
+ # `false`, records with missing trailing columns are treated as bad
1346
+ # records, and if there are too many bad records, an invalid error is
1347
+ # returned in the job result. The default value is `false`. Only
1348
+ # applicable to CSV, ignored for other formats.
1349
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1350
+ # quoted data sections that contain newline characters in a CSV file.
1351
+ # The default value is `false`.
1352
+ # @param [Boolean] autodetect Indicates if BigQuery should
1353
+ # automatically infer the options and schema for CSV and JSON sources.
1354
+ # The default value is `false`.
1355
+ # @param [String] encoding The character encoding of the data. The
1356
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1357
+ # `UTF-8`.
1358
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1359
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1360
+ # then uses the first byte of the encoded string to split the data in
1361
+ # its raw, binary state. Default is <code>,</code>.
1362
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1363
+ # extra values that are not represented in the table schema. If true,
1364
+ # the extra values are ignored. If false, records with extra columns
1365
+ # are treated as bad records, and if there are too many bad records,
1366
+ # an invalid error is returned in the job result. The default value is
1367
+ # `false`.
1368
+ #
1369
+ # The `format` property determines what BigQuery treats as an extra
1370
+ # value:
1371
+ #
1372
+ # * `CSV`: Trailing columns
1373
+ # * `JSON`: Named values that don't match any column names
1374
+ # @param [Integer] max_bad_records The maximum number of bad records
1375
+ # that BigQuery can ignore when running the job. If the number of bad
1376
+ # records exceeds this value, an invalid error is returned in the job
1377
+ # result. The default value is `0`, which requires that all records
1378
+ # are valid.
1379
+ # @param [String] null_marker Specifies a string that represents a null
1380
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1381
+ # interprets `\N` as a null value when loading a CSV file. The default
1382
+ # value is the empty string. If you set this property to a custom
1383
+ # value, BigQuery throws an error if an empty string is present for
1384
+ # all data types except for STRING and BYTE. For STRING and BYTE
1385
+ # columns, BigQuery interprets the empty string as an empty value.
1386
+ # @param [String] quote The value that is used to quote data sections in
1387
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1388
+ # then uses the first byte of the encoded string to split the data in
1389
+ # its raw, binary state. The default value is a double-quote
1390
+ # <code>"</code>. If your data does not contain quoted sections, set
1391
+ # the property value to an empty string. If your data contains quoted
1392
+ # newline characters, you must also set the allowQuotedNewlines
1393
+ # property to true.
1394
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1395
+ # file that BigQuery will skip when loading the data. The default
1396
+ # value is `0`. This property is useful if you have header rows in the
1397
+ # file that should be skipped.
1398
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1399
+ # destination table. Optional. The schema can be omitted if the
1400
+ # destination table already exists, or if you're loading data from a
1401
+ # Google Cloud Datastore backup.
1402
+ #
1403
+ # See {Project#schema} for the creation of the schema for use with
1404
+ # this option. Also note that for most use cases, the block yielded by
1405
+ # this method is a more convenient way to configure the schema.
1406
+ #
1407
+ # @yield [schema] A block for setting the schema for the destination
1408
+ # table. The schema can be omitted if the destination table already
1409
+ # exists, or if you're loading data from a Google Cloud Datastore
1410
+ # backup.
1411
+ # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
1412
+ # instance provided using the `schema` option, or a new, empty schema
1413
+ # instance
1414
+ #
1415
+ # @return [Boolean] Returns `true` if the load job was successful.
1416
+ #
1417
+ # @example
1418
+ # require "google/cloud/bigquery"
1419
+ #
1420
+ # bigquery = Google::Cloud::Bigquery.new
1421
+ # dataset = bigquery.dataset "my_dataset"
1422
+ #
1423
+ # gs_url = "gs://my-bucket/file-name.csv"
1424
+ # dataset.load "my_new_table", gs_url do |schema|
1425
+ # schema.string "first_name", mode: :required
1426
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1427
+ # nested_schema.string "place", mode: :required
1428
+ # nested_schema.integer "number_of_years", mode: :required
1429
+ # end
1430
+ # end
1431
+ #
1432
+ # @example Pass a google-cloud-storage `File` instance:
1433
+ # require "google/cloud/bigquery"
1434
+ # require "google/cloud/storage"
1435
+ #
1436
+ # bigquery = Google::Cloud::Bigquery.new
1437
+ # dataset = bigquery.dataset "my_dataset"
1438
+ #
1439
+ # storage = Google::Cloud::Storage.new
1440
+ # bucket = storage.bucket "my-bucket"
1441
+ # file = bucket.file "file-name.csv"
1442
+ # dataset.load "my_new_table", file do |schema|
1443
+ # schema.string "first_name", mode: :required
1444
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1445
+ # nested_schema.string "place", mode: :required
1446
+ # nested_schema.integer "number_of_years", mode: :required
1447
+ # end
1448
+ # end
1449
+ #
1450
+ # @example Upload a file directly:
1451
+ # require "google/cloud/bigquery"
1452
+ #
1453
+ # bigquery = Google::Cloud::Bigquery.new
1454
+ # dataset = bigquery.dataset "my_dataset"
1455
+ #
1456
+ # file = File.open "my_data.csv"
1457
+ # dataset.load "my_new_table", file do |schema|
1458
+ # schema.string "first_name", mode: :required
1459
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1460
+ # nested_schema.string "place", mode: :required
1461
+ # nested_schema.integer "number_of_years", mode: :required
1462
+ # end
1463
+ # end
1464
+ #
1465
+ # @example Schema is not required with a Cloud Datastore backup:
1466
+ # require "google/cloud/bigquery"
1467
+ #
1468
+ # bigquery = Google::Cloud::Bigquery.new
1469
+ # dataset = bigquery.dataset "my_dataset"
1470
+ #
1471
+ # dataset.load "my_new_table",
1472
+ # "gs://my-bucket/xxxx.kind_name.backup_info",
1473
+ # format: "datastore_backup"
1474
+ #
1475
+ # @!group Data
1476
+ #
1477
+ def load table_id, file, format: nil, create: nil, write: nil,
1478
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1479
+ encoding: nil, delimiter: nil, ignore_unknown: nil,
1480
+ max_bad_records: nil, quote: nil, skip_leading: nil,
1481
+ schema: nil, autodetect: nil, null_marker: nil
1482
+
1483
+ yield (schema ||= Schema.from_gapi) if block_given?
1484
+
1485
+ options = { format: format, create: create, write: write,
1486
+ projection_fields: projection_fields,
1487
+ jagged_rows: jagged_rows,
1488
+ quoted_newlines: quoted_newlines, encoding: encoding,
1489
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
1490
+ max_bad_records: max_bad_records, quote: quote,
1491
+ skip_leading: skip_leading, schema: schema,
1492
+ autodetect: autodetect, null_marker: null_marker }
1493
+ job = load_job table_id, file, options
1494
+
1495
+ job.wait_until_done!
1496
+
1497
+ if job.failed?
1498
+ begin
1499
+ # raise to activate ruby exception cause handling
1500
+ fail job.gapi_error
1501
+ rescue => e
1502
+ # wrap Google::Apis::Error with Google::Cloud::Error
1503
+ raise Google::Cloud::Error.from_error(e)
1504
+ end
1505
+ end
1506
+
1507
+ true
1508
+ end
1509
+
1032
1510
  ##
1033
1511
  # @private New Dataset from a Google API Client object.
1034
1512
  def self.from_gapi gapi, conn
@@ -1038,8 +1516,158 @@ module Google
1038
1516
  end
1039
1517
  end
1040
1518
 
1519
+ ##
1520
+ # Inserts data into the given table for near-immediate querying, without
1521
+ # the need to complete a load operation before the data can appear in
1522
+ # query results.
1523
+ #
1524
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
1525
+ # Streaming Data Into BigQuery
1526
+ #
1527
+ # @param [String] table_id The ID of the destination table.
1528
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1529
+ # containing the data. Required.
1530
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1531
+ # if invalid rows exist. The default value is `false`, which causes
1532
+ # the entire request to fail if any invalid rows exist.
1533
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
1534
+ # do not match the schema. The unknown values are ignored. Default is
1535
+ # false, which treats unknown values as errors.
1536
+ # @param [Boolean] autocreate Specifies whether the method should create
1537
+ # a new table with the given `table_id`, if no table is found for
1538
+ # `table_id`. The default value is false.
1539
+ #
1540
+ # @return [Google::Cloud::Bigquery::InsertResponse] An insert response
1541
+ # object.
1542
+ #
1543
+ # @example
1544
+ # require "google/cloud/bigquery"
1545
+ #
1546
+ # bigquery = Google::Cloud::Bigquery.new
1547
+ # dataset = bigquery.dataset "my_dataset"
1548
+ #
1549
+ # rows = [
1550
+ # { "first_name" => "Alice", "age" => 21 },
1551
+ # { "first_name" => "Bob", "age" => 22 }
1552
+ # ]
1553
+ # dataset.insert "my_table", rows
1554
+ #
1555
+ # @example Using `autocreate` to create a new table if none exists.
1556
+ # require "google/cloud/bigquery"
1557
+ #
1558
+ # bigquery = Google::Cloud::Bigquery.new
1559
+ # dataset = bigquery.dataset "my_dataset"
1560
+ #
1561
+ # rows = [
1562
+ # { "first_name" => "Alice", "age" => 21 },
1563
+ # { "first_name" => "Bob", "age" => 22 }
1564
+ # ]
1565
+ # dataset.insert "my_table", rows, autocreate: true do |t|
1566
+ # t.schema.string "first_name", mode: :required
1567
+ # t.schema.integer "age", mode: :required
1568
+ # end
1569
+ #
1570
+ # @!group Data
1571
+ #
1572
+ def insert table_id, rows, skip_invalid: nil, ignore_unknown: nil,
1573
+ autocreate: nil
1574
+ if autocreate
1575
+ begin
1576
+ insert_data table_id, rows, skip_invalid: skip_invalid,
1577
+ ignore_unknown: ignore_unknown
1578
+ rescue Google::Cloud::NotFoundError
1579
+ sleep rand(1..60)
1580
+ begin
1581
+ create_table table_id do |tbl_updater|
1582
+ yield tbl_updater if block_given?
1583
+ end
1584
+ # rubocop:disable Lint/HandleExceptions
1585
+ rescue Google::Cloud::AlreadyExistsError
1586
+ end
1587
+ # rubocop:enable Lint/HandleExceptions
1588
+
1589
+ sleep 60
1590
+ insert table_id, rows, skip_invalid: skip_invalid,
1591
+ ignore_unknown: ignore_unknown,
1592
+ autocreate: true
1593
+ end
1594
+ else
1595
+ insert_data table_id, rows, skip_invalid: skip_invalid,
1596
+ ignore_unknown: ignore_unknown
1597
+ end
1598
+ end
1599
+
1600
+ ##
1601
+ # Create an asynchonous inserter object used to insert rows in batches.
1602
+ #
1603
+ # @param [String] table_id The ID of the table to insert rows into.
1604
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1605
+ # if invalid rows exist. The default value is `false`, which causes
1606
+ # the entire request to fail if any invalid rows exist.
1607
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
1608
+ # do not match the schema. The unknown values are ignored. Default is
1609
+ # false, which treats unknown values as errors.
1610
+ # @attr_reader [Integer] max_bytes The maximum size of rows to be
1611
+ # collected before the batch is published. Default is 10,000,000
1612
+ # (10MB).
1613
+ # @param [Integer] max_rows The maximum number of rows to be collected
1614
+ # before the batch is published. Default is 500.
1615
+ # @attr_reader [Numeric] interval The number of seconds to collect
1616
+ # messages before the batch is published. Default is 10.
1617
+ # @attr_reader [Numeric] threads The number of threads used to insert
1618
+ # batches of rows. Default is 4.
1619
+ # @yield [response] the callback for when a batch of rows is inserted
1620
+ # @yieldparam [InsertResponse] response the result of the asynchonous
1621
+ # insert
1622
+ #
1623
+ # @return [Table::AsyncInserter] Returns an inserter object.
1624
+ #
1625
+ # @example
1626
+ # require "google/cloud/bigquery"
1627
+ #
1628
+ # bigquery = Google::Cloud::Bigquery.new
1629
+ # dataset = bigquery.dataset "my_dataset"
1630
+ # table = dataset.table "my_table"
1631
+ # inserter = table.insert_async do |response|
1632
+ # log_insert "inserted #{response.insert_count} rows " \
1633
+ # "with #{response.error_count} errors"
1634
+ # end
1635
+ #
1636
+ # rows = [
1637
+ # { "first_name" => "Alice", "age" => 21 },
1638
+ # { "first_name" => "Bob", "age" => 22 }
1639
+ # ]
1640
+ # inserter.insert rows
1641
+ #
1642
+ # inserter.stop.wait!
1643
+ #
1644
+ def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
1645
+ max_bytes: 10000000, max_rows: 500, interval: 10,
1646
+ threads: 4, &block
1647
+ ensure_service!
1648
+
1649
+ # Get table, don't use Dataset#table which handles NotFoundError
1650
+ gapi = service.get_table dataset_id, table_id
1651
+ table = Table.from_gapi gapi, service
1652
+ # Get the AsyncInserter from the table
1653
+ table.insert_async skip_invalid: skip_invalid,
1654
+ ignore_unknown: ignore_unknown,
1655
+ max_bytes: max_bytes, max_rows: max_rows,
1656
+ interval: interval, threads: threads, &block
1657
+ end
1658
+
1041
1659
  protected
1042
1660
 
1661
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil
1662
+ rows = [rows] if rows.is_a? Hash
1663
+ fail ArgumentError, "No rows provided" if rows.empty?
1664
+ ensure_service!
1665
+ options = { skip_invalid: skip_invalid,
1666
+ ignore_unknown: ignore_unknown }
1667
+ gapi = service.insert_tabledata dataset_id, table_id, rows, options
1668
+ InsertResponse.from_gapi rows, gapi
1669
+ end
1670
+
1043
1671
  ##
1044
1672
  # Raise an error unless an active service is available.
1045
1673
  def ensure_service!
@@ -1053,6 +1681,7 @@ module Google
1053
1681
  [attr, @gapi.send(attr)]
1054
1682
  end]
1055
1683
  patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
1684
+ patch_gapi.etag = etag if etag
1056
1685
  @gapi = service.patch_dataset dataset_id, patch_gapi
1057
1686
  end
1058
1687
 
@@ -1101,6 +1730,19 @@ module Google
1101
1730
  false
1102
1731
  end
1103
1732
 
1733
+ def udfs_gapi array_or_str
1734
+ return [] if array_or_str.nil?
1735
+ Array(array_or_str).map do |uri_or_code|
1736
+ resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
1737
+ if uri_or_code.start_with?("gs://")
1738
+ resource.resource_uri = uri_or_code
1739
+ else
1740
+ resource.inline_code = uri_or_code
1741
+ end
1742
+ resource
1743
+ end
1744
+ end
1745
+
1104
1746
  ##
1105
1747
  # Yielded to a block to accumulate changes for a patch request.
1106
1748
  class Updater < Dataset