google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ require "json"
17
17
  require "google/cloud/errors"
18
18
  require "google/cloud/bigquery/service"
19
19
  require "google/cloud/bigquery/table"
20
+ require "google/cloud/bigquery/external"
20
21
  require "google/cloud/bigquery/dataset/list"
21
22
  require "google/cloud/bigquery/dataset/access"
22
23
  require "google/apis/bigquery_v2"
@@ -59,8 +60,9 @@ module Google
59
60
 
60
61
  ##
61
62
  # A unique ID for this dataset, without the project name.
62
- # The ID must contain only letters (a-z, A-Z), numbers (0-9),
63
- # or underscores (_). The maximum length is 1,024 characters.
63
+ #
64
+ # @return [String] The ID must contain only letters (a-z, A-Z), numbers
65
+ # (0-9), or underscores (_). The maximum length is 1,024 characters.
64
66
  #
65
67
  # @!group Attributes
66
68
  #
@@ -71,6 +73,8 @@ module Google
71
73
  ##
72
74
  # The ID of the project containing this dataset.
73
75
  #
76
+ # @return [String] The project ID.
77
+ #
74
78
  # @!group Attributes
75
79
  #
76
80
  def project_id
@@ -90,6 +94,8 @@ module Google
90
94
  ##
91
95
  # A descriptive name for the dataset.
92
96
  #
97
+ # @return [String] The friendly name.
98
+ #
93
99
  # @!group Attributes
94
100
  #
95
101
  def name
@@ -99,6 +105,8 @@ module Google
99
105
  ##
100
106
  # Updates the descriptive name for the dataset.
101
107
  #
108
+ # @param [String] new_name The new friendly name.
109
+ #
102
110
  # @!group Attributes
103
111
  #
104
112
  def name= new_name
@@ -107,7 +115,9 @@ module Google
107
115
  end
108
116
 
109
117
  ##
110
- # A string hash of the dataset.
118
+ # The ETag hash of the dataset.
119
+ #
120
+ # @return [String] The ETag hash.
111
121
  #
112
122
  # @!group Attributes
113
123
  #
@@ -119,6 +129,8 @@ module Google
119
129
  ##
120
130
  # A URL that can be used to access the dataset using the REST API.
121
131
  #
132
+ # @return [String] A REST URL for the resource.
133
+ #
122
134
  # @!group Attributes
123
135
  #
124
136
  def api_url
@@ -129,6 +141,8 @@ module Google
129
141
  ##
130
142
  # A user-friendly description of the dataset.
131
143
  #
144
+ # @return [String] The description.
145
+ #
132
146
  # @!group Attributes
133
147
  #
134
148
  def description
@@ -139,6 +153,8 @@ module Google
139
153
  ##
140
154
  # Updates the user-friendly description of the dataset.
141
155
  #
156
+ # @param [String] new_description The new description for the dataset.
157
+ #
142
158
  # @!group Attributes
143
159
  #
144
160
  def description= new_description
@@ -149,6 +165,8 @@ module Google
149
165
  ##
150
166
  # The default lifetime of all tables in the dataset, in milliseconds.
151
167
  #
168
+ # @return [Integer] The default table expiration in milliseconds.
169
+ #
152
170
  # @!group Attributes
153
171
  #
154
172
  def default_expiration
@@ -164,6 +182,9 @@ module Google
164
182
  # Updates the default lifetime of all tables in the dataset, in
165
183
  # milliseconds.
166
184
  #
185
+ # @param [Integer] new_default_expiration The new default table
186
+ # expiration in milliseconds.
187
+ #
167
188
  # @!group Attributes
168
189
  #
169
190
  def default_expiration= new_default_expiration
@@ -174,6 +195,8 @@ module Google
174
195
  ##
175
196
  # The time when this dataset was created.
176
197
  #
198
+ # @return [Time, nil] The creation time.
199
+ #
177
200
  # @!group Attributes
178
201
  #
179
202
  def created_at
@@ -188,6 +211,8 @@ module Google
188
211
  ##
189
212
  # The date when this dataset or any of its tables was last modified.
190
213
  #
214
+ # @return [Time, nil] The last modified time.
215
+ #
191
216
  # @!group Attributes
192
217
  #
193
218
  def modified_at
@@ -201,7 +226,9 @@ module Google
201
226
 
202
227
  ##
203
228
  # The geographic location where the dataset should reside. Possible
204
- # values include EU and US. The default value is US.
229
+ # values include `EU` and `US`. The default value is `US`.
230
+ #
231
+ # @return [String] The location code.
205
232
  #
206
233
  # @!group Attributes
207
234
  #
@@ -210,6 +237,63 @@ module Google
210
237
  @gapi.location
211
238
  end
212
239
 
240
+ ##
241
+ # A hash of user-provided labels associated with this dataset. Labels
242
+ # are used to organize and group datasets. See [Using
243
+ # Labels](https://cloud.google.com/bigquery/docs/labels).
244
+ #
245
+ # The returned hash is frozen and changes are not allowed. Use
246
+ # {#labels=} to replace the entire hash.
247
+ #
248
+ # @return [Hash<String, String>] A hash containing key/value pairs.
249
+ #
250
+ # @example
251
+ # require "google/cloud/bigquery"
252
+ #
253
+ # bigquery = Google::Cloud::Bigquery.new
254
+ # dataset = bigquery.dataset "my_dataset"
255
+ #
256
+ # labels = dataset.labels
257
+ # labels["department"] #=> "shipping"
258
+ #
259
+ # @!group Attributes
260
+ #
261
+ def labels
262
+ m = @gapi.labels
263
+ m = m.to_h if m.respond_to? :to_h
264
+ m.dup.freeze
265
+ end
266
+
267
+ ##
268
+ # Updates the hash of user-provided labels associated with this dataset.
269
+ # Labels are used to organize and group datasets. See [Using
270
+ # Labels](https://cloud.google.com/bigquery/docs/labels).
271
+ #
272
+ # @param [Hash<String, String>] labels A hash containing key/value
273
+ # pairs.
274
+ #
275
+ # * Label keys and values can be no longer than 63 characters.
276
+ # * Label keys and values can contain only lowercase letters, numbers,
277
+ # underscores, hyphens, and international characters.
278
+ # * Label keys and values cannot exceed 128 bytes in size.
279
+ # * Label keys must begin with a letter.
280
+ # * Label keys must be unique within a dataset.
281
+ #
282
+ # @example
283
+ # require "google/cloud/bigquery"
284
+ #
285
+ # bigquery = Google::Cloud::Bigquery.new
286
+ # dataset = bigquery.dataset "my_dataset"
287
+ #
288
+ # dataset.labels = { "department" => "shipping" }
289
+ #
290
+ # @!group Attributes
291
+ #
292
+ def labels= labels
293
+ @gapi.labels = labels
294
+ patch_gapi! :labels
295
+ end
296
+
213
297
  ##
214
298
  # Retrieves the access rules for a Dataset. The rules can be updated
215
299
  # when passing a block, see {Dataset::Access} for all the methods
@@ -221,7 +305,7 @@ module Google
221
305
  # @yield [access] a block for setting rules
222
306
  # @yieldparam [Dataset::Access] access the object accepting rules
223
307
  #
224
- # @return [Google::Cloud::Bigquery::Dataset::Access]
308
+ # @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
225
309
  #
226
310
  # @example
227
311
  # require "google/cloud/bigquery"
@@ -229,14 +313,8 @@ module Google
229
313
  # bigquery = Google::Cloud::Bigquery.new
230
314
  # dataset = bigquery.dataset "my_dataset"
231
315
  #
232
- # dataset.access # [{"role"=>"OWNER",
233
- # # "specialGroup"=>"projectOwners"},
234
- # # {"role"=>"WRITER",
235
- # # "specialGroup"=>"projectWriters"},
236
- # # {"role"=>"READER",
237
- # # "specialGroup"=>"projectReaders"},
238
- # # {"role"=>"OWNER",
239
- # # "userByEmail"=>"123456789-...com"}]
316
+ # access = dataset.access
317
+ # access.writer_user? "reader@example.com" #=> false
240
318
  #
241
319
  # @example Manage the access rules by passing a block:
242
320
  # require "google/cloud/bigquery"
@@ -305,7 +383,7 @@ module Google
305
383
  # @yield [table] a block for setting the table
306
384
  # @yieldparam [Table] table the table object to be updated
307
385
  #
308
- # @return [Google::Cloud::Bigquery::Table]
386
+ # @return [Google::Cloud::Bigquery::Table] A new table object.
309
387
  #
310
388
  # @example
311
389
  # require "google/cloud/bigquery"
@@ -394,8 +472,15 @@ module Google
394
472
  # [legacy
395
473
  # SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
396
474
  # dialect. Optional. The default value is false.
475
+ # @param [Array<String>, String] udfs User-defined function resources
476
+ # used in the query. May be either a code resource to load from a
477
+ # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
478
+ # that contains code for a user-defined function (UDF). Providing an
479
+ # inline code resource is equivalent to providing a URI for a file
480
+ # containing the same code. See [User-Defined
481
+ # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
397
482
  #
398
- # @return [Google::Cloud::Bigquery::View]
483
+ # @return [Google::Cloud::Bigquery::View] A new view object.
399
484
  #
400
485
  # @example
401
486
  # require "google/cloud/bigquery"
@@ -419,7 +504,7 @@ module Google
419
504
  # @!group Table
420
505
  #
421
506
  def create_view table_id, query, name: nil, description: nil,
422
- standard_sql: nil, legacy_sql: nil
507
+ standard_sql: nil, legacy_sql: nil, udfs: nil
423
508
  new_view_opts = {
424
509
  table_reference: Google::Apis::BigqueryV2::TableReference.new(
425
510
  project_id: project_id, dataset_id: dataset_id, table_id: table_id
@@ -429,7 +514,8 @@ module Google
429
514
  view: Google::Apis::BigqueryV2::ViewDefinition.new(
430
515
  query: query,
431
516
  use_legacy_sql: Convert.resolve_legacy_sql(standard_sql,
432
- legacy_sql)
517
+ legacy_sql),
518
+ user_defined_function_resources: udfs_gapi(udfs)
433
519
  )
434
520
  }.delete_if { |_, v| v.nil? }
435
521
  new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
@@ -474,8 +560,8 @@ module Google
474
560
  # @param [Integer] max Maximum number of tables to return.
475
561
  #
476
562
  # @return [Array<Google::Cloud::Bigquery::Table>,
477
- # Array<Google::Cloud::Bigquery::View>] (See
478
- # {Google::Cloud::Bigquery::Table::List})
563
+ # Array<Google::Cloud::Bigquery::View>] An array of tables and/or
564
+ # views(See {Google::Cloud::Bigquery::Table::List})
479
565
  #
480
566
  # @example
481
567
  # require "google/cloud/bigquery"
@@ -546,6 +632,10 @@ module Google
546
632
  # passed is a hash `{ myparam: "foo" }`, the query must use named
547
633
  # query parameters. When set, `legacy_sql` will automatically be set
548
634
  # to false and `standard_sql` to true.
635
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
636
+ # that represents the mapping of the external tables to the table
637
+ # names used in the SQL query. The hash keys are the table names, and
638
+ # the hash values are the external table objects. See {Dataset#query}.
549
639
  # @param [String] priority Specifies a priority for the query. Possible
550
640
  # values include `INTERACTIVE` and `BATCH`. The default value is
551
641
  # `INTERACTIVE`.
@@ -605,8 +695,37 @@ module Google
605
695
  # job. Queries that will have bytes billed beyond this limit will fail
606
696
  # (without incurring a charge). Optional. If unspecified, this will be
607
697
  # set to your project default.
608
- #
609
- # @return [Google::Cloud::Bigquery::QueryJob]
698
+ # @param [String] job_id A user-defined ID for the query job. The ID
699
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
700
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
701
+ # `job_id` is provided, then `prefix` will not be used.
702
+ #
703
+ # See [Generating a job
704
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
705
+ # @param [String] prefix A string, usually human-readable, that will be
706
+ # prepended to a generated value to produce a unique job ID. For
707
+ # example, the prefix `daily_import_job_` can be given to generate a
708
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
709
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
710
+ # underscores (_), or dashes (-). The maximum length of the entire ID
711
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
712
+ # be used.
713
+ # @param [Hash] labels A hash of user-provided labels associated with
714
+ # the job. You can use these to organize and group your jobs. Label
715
+ # keys and values can be no longer than 63 characters, can only
716
+ # contain lowercase letters, numeric characters, underscores and
717
+ # dashes. International characters are allowed. Label values are
718
+ # optional. Label keys must start with a letter and each label in the
719
+ # list must have a different key.
720
+ # @param [Array<String>, String] udfs User-defined function resources
721
+ # used in the query. May be either a code resource to load from a
722
+ # Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
723
+ # that contains code for a user-defined function (UDF). Providing an
724
+ # inline code resource is equivalent to providing a URI for a file
725
+ # containing the same code. See [User-Defined
726
+ # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
727
+ #
728
+ # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
610
729
  #
611
730
  # @example Query using standard SQL:
612
731
  # require "google/cloud/bigquery"
@@ -618,7 +737,7 @@ module Google
618
737
  #
619
738
  # job.wait_until_done!
620
739
  # if !job.failed?
621
- # job.query_results.each do |row|
740
+ # job.data.each do |row|
622
741
  # puts row[:name]
623
742
  # end
624
743
  # end
@@ -634,7 +753,7 @@ module Google
634
753
  #
635
754
  # job.wait_until_done!
636
755
  # if !job.failed?
637
- # job.query_results.each do |row|
756
+ # job.data.each do |row|
638
757
  # puts row[:name]
639
758
  # end
640
759
  # end
@@ -650,7 +769,7 @@ module Google
650
769
  #
651
770
  # job.wait_until_done!
652
771
  # if !job.failed?
653
- # job.query_results.each do |row|
772
+ # job.data.each do |row|
654
773
  # puts row[:name]
655
774
  # end
656
775
  # end
@@ -666,24 +785,49 @@ module Google
666
785
  #
667
786
  # job.wait_until_done!
668
787
  # if !job.failed?
669
- # job.query_results.each do |row|
788
+ # job.data.each do |row|
789
+ # puts row[:name]
790
+ # end
791
+ # end
792
+ #
793
+ # @example Query using external data source:
794
+ # require "google/cloud/bigquery"
795
+ #
796
+ # bigquery = Google::Cloud::Bigquery.new
797
+ # dataset = bigquery.dataset "my_dataset"
798
+ #
799
+ # csv_url = "gs://bucket/path/to/data.csv"
800
+ # csv_table = dataset.external csv_url do |csv|
801
+ # csv.autodetect = true
802
+ # csv.skip_leading_rows = 1
803
+ # end
804
+ #
805
+ # job = dataset.query_job "SELECT * FROM my_ext_table",
806
+ # external: { my_ext_table: csv_table }
807
+ #
808
+ # job.wait_until_done!
809
+ # if !job.failed?
810
+ # job.data.each do |row|
670
811
  # puts row[:name]
671
812
  # end
672
813
  # end
673
814
  #
674
815
  # @!group Data
675
816
  #
676
- def query_job query, params: nil, priority: "INTERACTIVE", cache: true,
677
- table: nil, create: nil, write: nil, standard_sql: nil,
817
+ def query_job query, params: nil, external: nil,
818
+ priority: "INTERACTIVE", cache: true, table: nil,
819
+ create: nil, write: nil, standard_sql: nil,
678
820
  legacy_sql: nil, large_results: nil, flatten: nil,
679
- maximum_billing_tier: nil, maximum_bytes_billed: nil
821
+ maximum_billing_tier: nil, maximum_bytes_billed: nil,
822
+ job_id: nil, prefix: nil, labels: nil, udfs: nil
680
823
  options = { priority: priority, cache: cache, table: table,
681
824
  create: create, write: write,
682
825
  large_results: large_results, flatten: flatten,
683
826
  legacy_sql: legacy_sql, standard_sql: standard_sql,
684
827
  maximum_billing_tier: maximum_billing_tier,
685
828
  maximum_bytes_billed: maximum_bytes_billed,
686
- params: params }
829
+ params: params, external: external, labels: labels,
830
+ job_id: job_id, prefix: prefix, udfs: udfs }
687
831
  options[:dataset] ||= self
688
832
  ensure_service!
689
833
  gapi = service.query_job query, options
@@ -691,8 +835,10 @@ module Google
691
835
  end
692
836
 
693
837
  ##
694
- # Queries data using the [synchronous
695
- # method](https://cloud.google.com/bigquery/querying-data).
838
+ # Queries data using a synchronous method that blocks for a response. In
839
+ # this method, a {QueryJob} is created and its results are saved
840
+ # to a temporary table, then read from the table. Timeouts and transient
841
+ # errors are generally handled as needed to complete the query.
696
842
  #
697
843
  # Sets the current dataset as the default dataset in the query. Useful
698
844
  # for using unqualified table names.
@@ -717,6 +863,8 @@ module Google
717
863
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
718
864
  # for an overview of each BigQuery data type, including allowed values.
719
865
  #
866
+ # @see https://cloud.google.com/bigquery/querying-data Querying Data
867
+ #
720
868
  # @param [String] query A query string, following the BigQuery [query
721
869
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
722
870
  # query to execute. Example: "SELECT count(f1) FROM
@@ -728,22 +876,16 @@ module Google
728
876
  # passed is a hash `{ myparam: "foo" }`, the query must use named
729
877
  # query parameters. When set, `legacy_sql` will automatically be set
730
878
  # to false and `standard_sql` to true.
879
+ # @param [Hash<String|Symbol, External::DataSource>] external A Hash
880
+ # that represents the mapping of the external tables to the table
881
+ # names used in the SQL query. The hash keys are the table names, and
882
+ # the hash values are the external table objects. See {Dataset#query}.
731
883
  # @param [Integer] max The maximum number of rows of data to return per
732
884
  # page of results. Setting this flag to a small value such as 1000 and
733
885
  # then paging through results might improve reliability when the query
734
886
  # result set is large. In addition to this limit, responses are also
735
887
  # limited to 10 MB. By default, there is no maximum row count, and
736
888
  # only the byte limit applies.
737
- # @param [Integer] timeout How long to wait for the query to complete,
738
- # in milliseconds, before the request times out and returns. Note that
739
- # this is only a timeout for the request, not the query. If the query
740
- # takes longer to run than the timeout value, the call returns without
741
- # any results and with QueryData#complete? set to false. The default
742
- # value is 10000 milliseconds (10 seconds).
743
- # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
744
- # job. Instead, if the query is valid, BigQuery returns statistics
745
- # about the job such as how many bytes would be processed. If the
746
- # query is invalid, an error returns. The default value is `false`.
747
889
  # @param [Boolean] cache Whether to look for the result in the query
748
890
  # cache. The query cache is a best-effort cache that will be flushed
749
891
  # whenever tables in the query are modified. The default value is
@@ -769,7 +911,7 @@ module Google
769
911
  # ignored; the query will be run as if `large_results` is true and
770
912
  # `flatten` is false. Optional. The default value is false.
771
913
  #
772
- # @return [Google::Cloud::Bigquery::QueryData]
914
+ # @return [Google::Cloud::Bigquery::Data] A new data object.
773
915
  #
774
916
  # @example Query using standard SQL:
775
917
  # require "google/cloud/bigquery"
@@ -822,25 +964,112 @@ module Google
822
964
  # puts row[:name]
823
965
  # end
824
966
  #
967
+ # @example Query using external data source:
968
+ # require "google/cloud/bigquery"
969
+ #
970
+ # bigquery = Google::Cloud::Bigquery.new
971
+ # dataset = bigquery.dataset "my_dataset"
972
+ #
973
+ # csv_url = "gs://bucket/path/to/data.csv"
974
+ # csv_table = dataset.external csv_url do |csv|
975
+ # csv.autodetect = true
976
+ # csv.skip_leading_rows = 1
977
+ # end
978
+ #
979
+ # data = dataset.query "SELECT * FROM my_ext_table",
980
+ # external: { my_ext_table: csv_table }
981
+ #
982
+ # data.each do |row|
983
+ # puts row[:name]
984
+ # end
985
+ #
825
986
  # @!group Data
826
987
  #
827
- def query query, params: nil, max: nil, timeout: 10000, dryrun: nil,
828
- cache: true, standard_sql: nil, legacy_sql: nil
829
- options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
830
- legacy_sql: legacy_sql, standard_sql: standard_sql,
831
- params: params }
832
- options[:dataset] ||= dataset_id
833
- options[:project] ||= project_id
988
+ def query query, params: nil, external: nil, max: nil, cache: true,
989
+ standard_sql: nil, legacy_sql: nil
834
990
  ensure_service!
835
- gapi = service.query query, options
836
- QueryData.from_gapi gapi, service
991
+ options = { params: params, external: external, cache: cache,
992
+ legacy_sql: legacy_sql, standard_sql: standard_sql }
993
+
994
+ job = query_job query, options
995
+ job.wait_until_done!
996
+
997
+ if job.failed?
998
+ begin
999
+ # raise to activate ruby exception cause handling
1000
+ fail job.gapi_error
1001
+ rescue => e
1002
+ # wrap Google::Apis::Error with Google::Cloud::Error
1003
+ raise Google::Cloud::Error.from_error(e)
1004
+ end
1005
+ end
1006
+
1007
+ job.data max: max
1008
+ end
1009
+
1010
+ ##
1011
+ # Creates a new External::DataSource (or subclass) object that
1012
+ # represents the external data source that can be queried from directly,
1013
+ # even though the data is not stored in BigQuery. Instead of loading or
1014
+ # streaming the data, this object references the external data source.
1015
+ #
1016
+ # @see https://cloud.google.com/bigquery/external-data-sources Querying
1017
+ # External Data Sources
1018
+ #
1019
+ # @param [String, Array<String>] url The fully-qualified URL(s) that
1020
+ # point to your data in Google Cloud. An attempt will be made to
1021
+ # derive the format from the URLs provided.
1022
+ # @param [String|Symbol] format The data format. This value will be used
1023
+ # even if the provided URLs are recognized as a different format.
1024
+ # Optional.
1025
+ #
1026
+ # The following values are supported:
1027
+ #
1028
+ # * `csv` - CSV
1029
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1030
+ # * `avro` - [Avro](http://avro.apache.org/)
1031
+ # * `sheets` - Google Sheets
1032
+ # * `datastore_backup` - Cloud Datastore backup
1033
+ # * `bigtable` - Bigtable
1034
+ #
1035
+ # @return [External::DataSource] External data source.
1036
+ #
1037
+ # @example
1038
+ # require "google/cloud/bigquery"
1039
+ #
1040
+ # bigquery = Google::Cloud::Bigquery.new
1041
+ #
1042
+ # dataset = bigquery.dataset "my_dataset"
1043
+ #
1044
+ # csv_url = "gs://bucket/path/to/data.csv"
1045
+ # csv_table = dataset.external csv_url do |csv|
1046
+ # csv.autodetect = true
1047
+ # csv.skip_leading_rows = 1
1048
+ # end
1049
+ #
1050
+ # data = dataset.query "SELECT * FROM my_ext_table",
1051
+ # external: { my_ext_table: csv_table }
1052
+ #
1053
+ # data.each do |row|
1054
+ # puts row[:name]
1055
+ # end
1056
+ #
1057
+ def external url, format: nil
1058
+ ext = External.from_urls url, format
1059
+ yield ext if block_given?
1060
+ ext
837
1061
  end
838
1062
 
839
1063
  ##
840
- # Loads data into the provided destination table. For the source of the
841
- # data, you can pass a google-cloud storage file path or a
842
- # google-cloud-storage `File` instance. Or, you can upload a file
843
- # directly. See [Loading Data with a POST
1064
+ # Loads data into the provided destination table using an asynchronous
1065
+ # method. In this method, a {LoadJob} is immediately returned. The
1066
+ # caller may poll the service by repeatedly calling {Job#reload!} and
1067
+ # {Job#done?} to detect when the job is done, or simply block until the
1068
+ # job is done by calling #{Job#wait_until_done!}. See also {#load}.
1069
+ #
1070
+ # For the source of the data, you can pass a google-cloud storage file
1071
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1072
+ # file directly. See [Loading Data with a POST
844
1073
  # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
845
1074
  #
846
1075
  # @param [String] table_id The destination table to load the data into.
@@ -888,6 +1117,9 @@ module Google
888
1117
  # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
889
1118
  # quoted data sections that contain newline characters in a CSV file.
890
1119
  # The default value is `false`.
1120
+ # @param [Boolean] autodetect Indicates if BigQuery should
1121
+ # automatically infer the options and schema for CSV and JSON sources.
1122
+ # The default value is `false`.
891
1123
  # @param [String] encoding The character encoding of the data. The
892
1124
  # supported values are `UTF-8` or `ISO-8859-1`. The default value is
893
1125
  # `UTF-8`.
@@ -912,6 +1144,13 @@ module Google
912
1144
  # records exceeds this value, an invalid error is returned in the job
913
1145
  # result. The default value is `0`, which requires that all records
914
1146
  # are valid.
1147
+ # @param [String] null_marker Specifies a string that represents a null
1148
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1149
+ # interprets `\N` as a null value when loading a CSV file. The default
1150
+ # value is the empty string. If you set this property to a custom
1151
+ # value, BigQuery throws an error if an empty string is present for
1152
+ # all data types except for STRING and BYTE. For STRING and BYTE
1153
+ # columns, BigQuery interprets the empty string as an empty value.
915
1154
  # @param [String] quote The value that is used to quote data sections in
916
1155
  # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
917
1156
  # then uses the first byte of the encoded string to split the data in
@@ -932,6 +1171,28 @@ module Google
932
1171
  # See {Project#schema} for the creation of the schema for use with
933
1172
  # this option. Also note that for most use cases, the block yielded by
934
1173
  # this method is a more convenient way to configure the schema.
1174
+ # @param [String] job_id A user-defined ID for the load job. The ID
1175
+ # must contain only letters (a-z, A-Z), numbers (0-9), underscores
1176
+ # (_), or dashes (-). The maximum length is 1,024 characters. If
1177
+ # `job_id` is provided, then `prefix` will not be used.
1178
+ #
1179
+ # See [Generating a job
1180
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1181
+ # @param [String] prefix A string, usually human-readable, that will be
1182
+ # prepended to a generated value to produce a unique job ID. For
1183
+ # example, the prefix `daily_import_job_` can be given to generate a
1184
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1185
+ # prefix must contain only letters (a-z, A-Z), numbers (0-9),
1186
+ # underscores (_), or dashes (-). The maximum length of the entire ID
1187
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1188
+ # be used.
1189
+ # @param [Hash] labels A hash of user-provided labels associated with
1190
+ # the job. You can use these to organize and group your jobs. Label
1191
+ # keys and values can be no longer than 63 characters, can only
1192
+ # contain lowercase letters, numeric characters, underscores and
1193
+ # dashes. International characters are allowed. Label values are
1194
+ # optional. Label keys must start with a letter and each label in the
1195
+ # list must have a different key.
935
1196
  #
936
1197
  # @yield [schema] A block for setting the schema for the destination
937
1198
  # table. The schema can be omitted if the destination table already
@@ -941,7 +1202,7 @@ module Google
941
1202
  # instance provided using the `schema` option, or a new, empty schema
942
1203
  # instance
943
1204
  #
944
- # @return [Google::Cloud::Bigquery::LoadJob]
1205
+ # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
945
1206
  #
946
1207
  # @example
947
1208
  # require "google/cloud/bigquery"
@@ -950,7 +1211,7 @@ module Google
950
1211
  # dataset = bigquery.dataset "my_dataset"
951
1212
  #
952
1213
  # gs_url = "gs://my-bucket/file-name.csv"
953
- # load_job = dataset.load "my_new_table", gs_url do |schema|
1214
+ # load_job = dataset.load_job "my_new_table", gs_url do |schema|
954
1215
  # schema.string "first_name", mode: :required
955
1216
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
956
1217
  # nested_schema.string "place", mode: :required
@@ -968,7 +1229,7 @@ module Google
968
1229
  # storage = Google::Cloud::Storage.new
969
1230
  # bucket = storage.bucket "my-bucket"
970
1231
  # file = bucket.file "file-name.csv"
971
- # load_job = dataset.load "my_new_table", file do |schema|
1232
+ # load_job = dataset.load_job "my_new_table", file do |schema|
972
1233
  # schema.string "first_name", mode: :required
973
1234
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
974
1235
  # nested_schema.string "place", mode: :required
@@ -983,7 +1244,7 @@ module Google
983
1244
  # dataset = bigquery.dataset "my_dataset"
984
1245
  #
985
1246
  # file = File.open "my_data.csv"
986
- # load_job = dataset.load "my_new_table", file do |schema|
1247
+ # load_job = dataset.load_job "my_new_table", file do |schema|
987
1248
  # schema.string "first_name", mode: :required
988
1249
  # schema.record "cities_lived", mode: :repeated do |nested_schema|
989
1250
  # nested_schema.string "place", mode: :required
@@ -997,17 +1258,18 @@ module Google
997
1258
  # bigquery = Google::Cloud::Bigquery.new
998
1259
  # dataset = bigquery.dataset "my_dataset"
999
1260
  #
1000
- # load_job = dataset.load "my_new_table",
1261
+ # load_job = dataset.load_job "my_new_table",
1001
1262
  # "gs://my-bucket/xxxx.kind_name.backup_info",
1002
1263
  # format: "datastore_backup"
1003
1264
  #
1004
1265
  # @!group Data
1005
1266
  #
1006
- def load table_id, file, format: nil, create: nil, write: nil,
1007
- projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1008
- encoding: nil, delimiter: nil, ignore_unknown: nil,
1009
- max_bad_records: nil, quote: nil, skip_leading: nil,
1010
- dryrun: nil, schema: nil
1267
+ def load_job table_id, file, format: nil, create: nil, write: nil,
1268
+ projection_fields: nil, jagged_rows: nil,
1269
+ quoted_newlines: nil, encoding: nil, delimiter: nil,
1270
+ ignore_unknown: nil, max_bad_records: nil, quote: nil,
1271
+ skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
1272
+ prefix: nil, labels: nil, autodetect: nil, null_marker: nil
1011
1273
  ensure_service!
1012
1274
 
1013
1275
  if block_given?
@@ -1023,12 +1285,228 @@ module Google
1023
1285
  delimiter: delimiter, ignore_unknown: ignore_unknown,
1024
1286
  max_bad_records: max_bad_records, quote: quote,
1025
1287
  skip_leading: skip_leading, dryrun: dryrun,
1026
- schema: schema_gapi }
1288
+ schema: schema_gapi, job_id: job_id, prefix: prefix,
1289
+ labels: labels, autodetect: autodetect,
1290
+ null_marker: null_marker }
1027
1291
  return load_storage(table_id, file, options) if storage_url? file
1028
1292
  return load_local(table_id, file, options) if local_file? file
1029
1293
  fail Google::Cloud::Error, "Don't know how to load #{file}"
1030
1294
  end
1031
1295
 
1296
+ ##
1297
+ # Loads data into the provided destination table using a synchronous
1298
+ # method that blocks for a response. Timeouts and transient errors are
1299
+ # generally handled as needed to complete the job. See also
1300
+ # {#load_job}.
1301
+ #
1302
+ # For the source of the data, you can pass a google-cloud storage file
1303
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1304
+ # file directly. See [Loading Data with a POST
1305
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1306
+ #
1307
+ # @param [String] table_id The destination table to load the data into.
1308
+ # @param [File, Google::Cloud::Storage::File, String] file A file or the
1309
+ # URI of a Google Cloud Storage file containing data to load into the
1310
+ # table.
1311
+ # @param [String] format The exported file format. The default value is
1312
+ # `csv`.
1313
+ #
1314
+ # The following values are supported:
1315
+ #
1316
+ # * `csv` - CSV
1317
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
1318
+ # * `avro` - [Avro](http://avro.apache.org/)
1319
+ # * `datastore_backup` - Cloud Datastore backup
1320
+ # @param [String] create Specifies whether the job is allowed to create
1321
+ # new tables. The default value is `needed`.
1322
+ #
1323
+ # The following values are supported:
1324
+ #
1325
+ # * `needed` - Create the table if it does not exist.
1326
+ # * `never` - The table must already exist. A 'notFound' error is
1327
+ # raised if the table does not exist.
1328
+ # @param [String] write Specifies how to handle data already present in
1329
+ # the table. The default value is `append`.
1330
+ #
1331
+ # The following values are supported:
1332
+ #
1333
+ # * `truncate` - BigQuery overwrites the table data.
1334
+ # * `append` - BigQuery appends the data to the table.
1335
+ # * `empty` - An error will be returned if the table already contains
1336
+ # data.
1337
+ # @param [Array<String>] projection_fields If the `format` option is set
1338
+ # to `datastore_backup`, indicates which entity properties to load
1339
+ # from a Cloud Datastore backup. Property names are case sensitive and
1340
+ # must be top-level properties. If not set, BigQuery loads all
1341
+ # properties. If any named property isn't found in the Cloud Datastore
1342
+ # backup, an invalid error is returned.
1343
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1344
+ # optional columns. The missing values are treated as nulls. If
1345
+ # `false`, records with missing trailing columns are treated as bad
1346
+ # records, and if there are too many bad records, an invalid error is
1347
+ # returned in the job result. The default value is `false`. Only
1348
+ # applicable to CSV, ignored for other formats.
1349
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1350
+ # quoted data sections that contain newline characters in a CSV file.
1351
+ # The default value is `false`.
1352
+ # @param [Boolean] autodetect Indicates if BigQuery should
1353
+ # automatically infer the options and schema for CSV and JSON sources.
1354
+ # The default value is `false`.
1355
+ # @param [String] encoding The character encoding of the data. The
1356
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1357
+ # `UTF-8`.
1358
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1359
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1360
+ # then uses the first byte of the encoded string to split the data in
1361
+ # its raw, binary state. Default is <code>,</code>.
1362
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1363
+ # extra values that are not represented in the table schema. If true,
1364
+ # the extra values are ignored. If false, records with extra columns
1365
+ # are treated as bad records, and if there are too many bad records,
1366
+ # an invalid error is returned in the job result. The default value is
1367
+ # `false`.
1368
+ #
1369
+ # The `format` property determines what BigQuery treats as an extra
1370
+ # value:
1371
+ #
1372
+ # * `CSV`: Trailing columns
1373
+ # * `JSON`: Named values that don't match any column names
1374
+ # @param [Integer] max_bad_records The maximum number of bad records
1375
+ # that BigQuery can ignore when running the job. If the number of bad
1376
+ # records exceeds this value, an invalid error is returned in the job
1377
+ # result. The default value is `0`, which requires that all records
1378
+ # are valid.
1379
+ # @param [String] null_marker Specifies a string that represents a null
1380
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1381
+ # interprets `\N` as a null value when loading a CSV file. The default
1382
+ # value is the empty string. If you set this property to a custom
1383
+ # value, BigQuery throws an error if an empty string is present for
1384
+ # all data types except for STRING and BYTE. For STRING and BYTE
1385
+ # columns, BigQuery interprets the empty string as an empty value.
1386
+ # @param [String] quote The value that is used to quote data sections in
1387
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1388
+ # then uses the first byte of the encoded string to split the data in
1389
+ # its raw, binary state. The default value is a double-quote
1390
+ # <code>"</code>. If your data does not contain quoted sections, set
1391
+ # the property value to an empty string. If your data contains quoted
1392
+ # newline characters, you must also set the allowQuotedNewlines
1393
+ # property to true.
1394
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1395
+ # file that BigQuery will skip when loading the data. The default
1396
+ # value is `0`. This property is useful if you have header rows in the
1397
+ # file that should be skipped.
1398
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1399
+ # destination table. Optional. The schema can be omitted if the
1400
+ # destination table already exists, or if you're loading data from a
1401
+ # Google Cloud Datastore backup.
1402
+ #
1403
+ # See {Project#schema} for the creation of the schema for use with
1404
+ # this option. Also note that for most use cases, the block yielded by
1405
+ # this method is a more convenient way to configure the schema.
1406
+ #
1407
+ # @yield [schema] A block for setting the schema for the destination
1408
+ # table. The schema can be omitted if the destination table already
1409
+ # exists, or if you're loading data from a Google Cloud Datastore
1410
+ # backup.
1411
+ # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
1412
+ # instance provided using the `schema` option, or a new, empty schema
1413
+ # instance
1414
+ #
1415
+ # @return [Boolean] Returns `true` if the load job was successful.
1416
+ #
1417
+ # @example
1418
+ # require "google/cloud/bigquery"
1419
+ #
1420
+ # bigquery = Google::Cloud::Bigquery.new
1421
+ # dataset = bigquery.dataset "my_dataset"
1422
+ #
1423
+ # gs_url = "gs://my-bucket/file-name.csv"
1424
+ # dataset.load "my_new_table", gs_url do |schema|
1425
+ # schema.string "first_name", mode: :required
1426
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1427
+ # nested_schema.string "place", mode: :required
1428
+ # nested_schema.integer "number_of_years", mode: :required
1429
+ # end
1430
+ # end
1431
+ #
1432
+ # @example Pass a google-cloud-storage `File` instance:
1433
+ # require "google/cloud/bigquery"
1434
+ # require "google/cloud/storage"
1435
+ #
1436
+ # bigquery = Google::Cloud::Bigquery.new
1437
+ # dataset = bigquery.dataset "my_dataset"
1438
+ #
1439
+ # storage = Google::Cloud::Storage.new
1440
+ # bucket = storage.bucket "my-bucket"
1441
+ # file = bucket.file "file-name.csv"
1442
+ # dataset.load "my_new_table", file do |schema|
1443
+ # schema.string "first_name", mode: :required
1444
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1445
+ # nested_schema.string "place", mode: :required
1446
+ # nested_schema.integer "number_of_years", mode: :required
1447
+ # end
1448
+ # end
1449
+ #
1450
+ # @example Upload a file directly:
1451
+ # require "google/cloud/bigquery"
1452
+ #
1453
+ # bigquery = Google::Cloud::Bigquery.new
1454
+ # dataset = bigquery.dataset "my_dataset"
1455
+ #
1456
+ # file = File.open "my_data.csv"
1457
+ # dataset.load "my_new_table", file do |schema|
1458
+ # schema.string "first_name", mode: :required
1459
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1460
+ # nested_schema.string "place", mode: :required
1461
+ # nested_schema.integer "number_of_years", mode: :required
1462
+ # end
1463
+ # end
1464
+ #
1465
+ # @example Schema is not required with a Cloud Datastore backup:
1466
+ # require "google/cloud/bigquery"
1467
+ #
1468
+ # bigquery = Google::Cloud::Bigquery.new
1469
+ # dataset = bigquery.dataset "my_dataset"
1470
+ #
1471
+ # dataset.load "my_new_table",
1472
+ # "gs://my-bucket/xxxx.kind_name.backup_info",
1473
+ # format: "datastore_backup"
1474
+ #
1475
+ # @!group Data
1476
+ #
1477
+ def load table_id, file, format: nil, create: nil, write: nil,
1478
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1479
+ encoding: nil, delimiter: nil, ignore_unknown: nil,
1480
+ max_bad_records: nil, quote: nil, skip_leading: nil,
1481
+ schema: nil, autodetect: nil, null_marker: nil
1482
+
1483
+ yield (schema ||= Schema.from_gapi) if block_given?
1484
+
1485
+ options = { format: format, create: create, write: write,
1486
+ projection_fields: projection_fields,
1487
+ jagged_rows: jagged_rows,
1488
+ quoted_newlines: quoted_newlines, encoding: encoding,
1489
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
1490
+ max_bad_records: max_bad_records, quote: quote,
1491
+ skip_leading: skip_leading, schema: schema,
1492
+ autodetect: autodetect, null_marker: null_marker }
1493
+ job = load_job table_id, file, options
1494
+
1495
+ job.wait_until_done!
1496
+
1497
+ if job.failed?
1498
+ begin
1499
+ # raise to activate ruby exception cause handling
1500
+ fail job.gapi_error
1501
+ rescue => e
1502
+ # wrap Google::Apis::Error with Google::Cloud::Error
1503
+ raise Google::Cloud::Error.from_error(e)
1504
+ end
1505
+ end
1506
+
1507
+ true
1508
+ end
1509
+
1032
1510
  ##
1033
1511
  # @private New Dataset from a Google API Client object.
1034
1512
  def self.from_gapi gapi, conn
@@ -1038,8 +1516,158 @@ module Google
1038
1516
  end
1039
1517
  end
1040
1518
 
1519
+ ##
1520
+ # Inserts data into the given table for near-immediate querying, without
1521
+ # the need to complete a load operation before the data can appear in
1522
+ # query results.
1523
+ #
1524
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
1525
+ # Streaming Data Into BigQuery
1526
+ #
1527
+ # @param [String] table_id The ID of the destination table.
1528
+ # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1529
+ # containing the data. Required.
1530
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1531
+ # if invalid rows exist. The default value is `false`, which causes
1532
+ # the entire request to fail if any invalid rows exist.
1533
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
1534
+ # do not match the schema. The unknown values are ignored. Default is
1535
+ # false, which treats unknown values as errors.
1536
+ # @param [Boolean] autocreate Specifies whether the method should create
1537
+ # a new table with the given `table_id`, if no table is found for
1538
+ # `table_id`. The default value is false.
1539
+ #
1540
+ # @return [Google::Cloud::Bigquery::InsertResponse] An insert response
1541
+ # object.
1542
+ #
1543
+ # @example
1544
+ # require "google/cloud/bigquery"
1545
+ #
1546
+ # bigquery = Google::Cloud::Bigquery.new
1547
+ # dataset = bigquery.dataset "my_dataset"
1548
+ #
1549
+ # rows = [
1550
+ # { "first_name" => "Alice", "age" => 21 },
1551
+ # { "first_name" => "Bob", "age" => 22 }
1552
+ # ]
1553
+ # dataset.insert "my_table", rows
1554
+ #
1555
+ # @example Using `autocreate` to create a new table if none exists.
1556
+ # require "google/cloud/bigquery"
1557
+ #
1558
+ # bigquery = Google::Cloud::Bigquery.new
1559
+ # dataset = bigquery.dataset "my_dataset"
1560
+ #
1561
+ # rows = [
1562
+ # { "first_name" => "Alice", "age" => 21 },
1563
+ # { "first_name" => "Bob", "age" => 22 }
1564
+ # ]
1565
+ # dataset.insert "my_table", rows, autocreate: true do |t|
1566
+ # t.schema.string "first_name", mode: :required
1567
+ # t.schema.integer "age", mode: :required
1568
+ # end
1569
+ #
1570
+ # @!group Data
1571
+ #
1572
+ def insert table_id, rows, skip_invalid: nil, ignore_unknown: nil,
1573
+ autocreate: nil
1574
+ if autocreate
1575
+ begin
1576
+ insert_data table_id, rows, skip_invalid: skip_invalid,
1577
+ ignore_unknown: ignore_unknown
1578
+ rescue Google::Cloud::NotFoundError
1579
+ sleep rand(1..60)
1580
+ begin
1581
+ create_table table_id do |tbl_updater|
1582
+ yield tbl_updater if block_given?
1583
+ end
1584
+ # rubocop:disable Lint/HandleExceptions
1585
+ rescue Google::Cloud::AlreadyExistsError
1586
+ end
1587
+ # rubocop:enable Lint/HandleExceptions
1588
+
1589
+ sleep 60
1590
+ insert table_id, rows, skip_invalid: skip_invalid,
1591
+ ignore_unknown: ignore_unknown,
1592
+ autocreate: true
1593
+ end
1594
+ else
1595
+ insert_data table_id, rows, skip_invalid: skip_invalid,
1596
+ ignore_unknown: ignore_unknown
1597
+ end
1598
+ end
1599
+
1600
+ ##
1601
+ # Create an asynchonous inserter object used to insert rows in batches.
1602
+ #
1603
+ # @param [String] table_id The ID of the table to insert rows into.
1604
+ # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1605
+ # if invalid rows exist. The default value is `false`, which causes
1606
+ # the entire request to fail if any invalid rows exist.
1607
+ # @param [Boolean] ignore_unknown Accept rows that contain values that
1608
+ # do not match the schema. The unknown values are ignored. Default is
1609
+ # false, which treats unknown values as errors.
1610
+ # @attr_reader [Integer] max_bytes The maximum size of rows to be
1611
+ # collected before the batch is published. Default is 10,000,000
1612
+ # (10MB).
1613
+ # @param [Integer] max_rows The maximum number of rows to be collected
1614
+ # before the batch is published. Default is 500.
1615
+ # @attr_reader [Numeric] interval The number of seconds to collect
1616
+ # messages before the batch is published. Default is 10.
1617
+ # @attr_reader [Numeric] threads The number of threads used to insert
1618
+ # batches of rows. Default is 4.
1619
+ # @yield [response] the callback for when a batch of rows is inserted
1620
+ # @yieldparam [InsertResponse] response the result of the asynchonous
1621
+ # insert
1622
+ #
1623
+ # @return [Table::AsyncInserter] Returns an inserter object.
1624
+ #
1625
+ # @example
1626
+ # require "google/cloud/bigquery"
1627
+ #
1628
+ # bigquery = Google::Cloud::Bigquery.new
1629
+ # dataset = bigquery.dataset "my_dataset"
1630
+ # table = dataset.table "my_table"
1631
+ # inserter = table.insert_async do |response|
1632
+ # log_insert "inserted #{response.insert_count} rows " \
1633
+ # "with #{response.error_count} errors"
1634
+ # end
1635
+ #
1636
+ # rows = [
1637
+ # { "first_name" => "Alice", "age" => 21 },
1638
+ # { "first_name" => "Bob", "age" => 22 }
1639
+ # ]
1640
+ # inserter.insert rows
1641
+ #
1642
+ # inserter.stop.wait!
1643
+ #
1644
+ def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
1645
+ max_bytes: 10000000, max_rows: 500, interval: 10,
1646
+ threads: 4, &block
1647
+ ensure_service!
1648
+
1649
+ # Get table, don't use Dataset#table which handles NotFoundError
1650
+ gapi = service.get_table dataset_id, table_id
1651
+ table = Table.from_gapi gapi, service
1652
+ # Get the AsyncInserter from the table
1653
+ table.insert_async skip_invalid: skip_invalid,
1654
+ ignore_unknown: ignore_unknown,
1655
+ max_bytes: max_bytes, max_rows: max_rows,
1656
+ interval: interval, threads: threads, &block
1657
+ end
1658
+
1041
1659
  protected
1042
1660
 
1661
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil
1662
+ rows = [rows] if rows.is_a? Hash
1663
+ fail ArgumentError, "No rows provided" if rows.empty?
1664
+ ensure_service!
1665
+ options = { skip_invalid: skip_invalid,
1666
+ ignore_unknown: ignore_unknown }
1667
+ gapi = service.insert_tabledata dataset_id, table_id, rows, options
1668
+ InsertResponse.from_gapi rows, gapi
1669
+ end
1670
+
1043
1671
  ##
1044
1672
  # Raise an error unless an active service is available.
1045
1673
  def ensure_service!
@@ -1053,6 +1681,7 @@ module Google
1053
1681
  [attr, @gapi.send(attr)]
1054
1682
  end]
1055
1683
  patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
1684
+ patch_gapi.etag = etag if etag
1056
1685
  @gapi = service.patch_dataset dataset_id, patch_gapi
1057
1686
  end
1058
1687
 
@@ -1101,6 +1730,19 @@ module Google
1101
1730
  false
1102
1731
  end
1103
1732
 
1733
+ def udfs_gapi array_or_str
1734
+ return [] if array_or_str.nil?
1735
+ Array(array_or_str).map do |uri_or_code|
1736
+ resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
1737
+ if uri_or_code.start_with?("gs://")
1738
+ resource.resource_uri = uri_or_code
1739
+ else
1740
+ resource.inline_code = uri_or_code
1741
+ end
1742
+ resource
1743
+ end
1744
+ end
1745
+
1104
1746
  ##
1105
1747
  # Yielded to a block to accumulate changes for a patch request.
1106
1748
  class Updater < Dataset