google-cloud-bigquery 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -17,6 +17,7 @@ require "json"
|
|
17
17
|
require "google/cloud/errors"
|
18
18
|
require "google/cloud/bigquery/service"
|
19
19
|
require "google/cloud/bigquery/table"
|
20
|
+
require "google/cloud/bigquery/external"
|
20
21
|
require "google/cloud/bigquery/dataset/list"
|
21
22
|
require "google/cloud/bigquery/dataset/access"
|
22
23
|
require "google/apis/bigquery_v2"
|
@@ -59,8 +60,9 @@ module Google
|
|
59
60
|
|
60
61
|
##
|
61
62
|
# A unique ID for this dataset, without the project name.
|
62
|
-
#
|
63
|
-
#
|
63
|
+
#
|
64
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
65
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
64
66
|
#
|
65
67
|
# @!group Attributes
|
66
68
|
#
|
@@ -71,6 +73,8 @@ module Google
|
|
71
73
|
##
|
72
74
|
# The ID of the project containing this dataset.
|
73
75
|
#
|
76
|
+
# @return [String] The project ID.
|
77
|
+
#
|
74
78
|
# @!group Attributes
|
75
79
|
#
|
76
80
|
def project_id
|
@@ -90,6 +94,8 @@ module Google
|
|
90
94
|
##
|
91
95
|
# A descriptive name for the dataset.
|
92
96
|
#
|
97
|
+
# @return [String] The friendly name.
|
98
|
+
#
|
93
99
|
# @!group Attributes
|
94
100
|
#
|
95
101
|
def name
|
@@ -99,6 +105,8 @@ module Google
|
|
99
105
|
##
|
100
106
|
# Updates the descriptive name for the dataset.
|
101
107
|
#
|
108
|
+
# @param [String] new_name The new friendly name.
|
109
|
+
#
|
102
110
|
# @!group Attributes
|
103
111
|
#
|
104
112
|
def name= new_name
|
@@ -107,7 +115,9 @@ module Google
|
|
107
115
|
end
|
108
116
|
|
109
117
|
##
|
110
|
-
#
|
118
|
+
# The ETag hash of the dataset.
|
119
|
+
#
|
120
|
+
# @return [String] The ETag hash.
|
111
121
|
#
|
112
122
|
# @!group Attributes
|
113
123
|
#
|
@@ -119,6 +129,8 @@ module Google
|
|
119
129
|
##
|
120
130
|
# A URL that can be used to access the dataset using the REST API.
|
121
131
|
#
|
132
|
+
# @return [String] A REST URL for the resource.
|
133
|
+
#
|
122
134
|
# @!group Attributes
|
123
135
|
#
|
124
136
|
def api_url
|
@@ -129,6 +141,8 @@ module Google
|
|
129
141
|
##
|
130
142
|
# A user-friendly description of the dataset.
|
131
143
|
#
|
144
|
+
# @return [String] The description.
|
145
|
+
#
|
132
146
|
# @!group Attributes
|
133
147
|
#
|
134
148
|
def description
|
@@ -139,6 +153,8 @@ module Google
|
|
139
153
|
##
|
140
154
|
# Updates the user-friendly description of the dataset.
|
141
155
|
#
|
156
|
+
# @param [String] new_description The new description for the dataset.
|
157
|
+
#
|
142
158
|
# @!group Attributes
|
143
159
|
#
|
144
160
|
def description= new_description
|
@@ -149,6 +165,8 @@ module Google
|
|
149
165
|
##
|
150
166
|
# The default lifetime of all tables in the dataset, in milliseconds.
|
151
167
|
#
|
168
|
+
# @return [Integer] The default table expiration in milliseconds.
|
169
|
+
#
|
152
170
|
# @!group Attributes
|
153
171
|
#
|
154
172
|
def default_expiration
|
@@ -164,6 +182,9 @@ module Google
|
|
164
182
|
# Updates the default lifetime of all tables in the dataset, in
|
165
183
|
# milliseconds.
|
166
184
|
#
|
185
|
+
# @param [Integer] new_default_expiration The new default table
|
186
|
+
# expiration in milliseconds.
|
187
|
+
#
|
167
188
|
# @!group Attributes
|
168
189
|
#
|
169
190
|
def default_expiration= new_default_expiration
|
@@ -174,6 +195,8 @@ module Google
|
|
174
195
|
##
|
175
196
|
# The time when this dataset was created.
|
176
197
|
#
|
198
|
+
# @return [Time, nil] The creation time.
|
199
|
+
#
|
177
200
|
# @!group Attributes
|
178
201
|
#
|
179
202
|
def created_at
|
@@ -188,6 +211,8 @@ module Google
|
|
188
211
|
##
|
189
212
|
# The date when this dataset or any of its tables was last modified.
|
190
213
|
#
|
214
|
+
# @return [Time, nil] The last modified time.
|
215
|
+
#
|
191
216
|
# @!group Attributes
|
192
217
|
#
|
193
218
|
def modified_at
|
@@ -201,7 +226,9 @@ module Google
|
|
201
226
|
|
202
227
|
##
|
203
228
|
# The geographic location where the dataset should reside. Possible
|
204
|
-
# values include EU and US
|
229
|
+
# values include `EU` and `US`. The default value is `US`.
|
230
|
+
#
|
231
|
+
# @return [String] The location code.
|
205
232
|
#
|
206
233
|
# @!group Attributes
|
207
234
|
#
|
@@ -210,6 +237,63 @@ module Google
|
|
210
237
|
@gapi.location
|
211
238
|
end
|
212
239
|
|
240
|
+
##
|
241
|
+
# A hash of user-provided labels associated with this dataset. Labels
|
242
|
+
# are used to organize and group datasets. See [Using
|
243
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
244
|
+
#
|
245
|
+
# The returned hash is frozen and changes are not allowed. Use
|
246
|
+
# {#labels=} to replace the entire hash.
|
247
|
+
#
|
248
|
+
# @return [Hash<String, String>] A hash containing key/value pairs.
|
249
|
+
#
|
250
|
+
# @example
|
251
|
+
# require "google/cloud/bigquery"
|
252
|
+
#
|
253
|
+
# bigquery = Google::Cloud::Bigquery.new
|
254
|
+
# dataset = bigquery.dataset "my_dataset"
|
255
|
+
#
|
256
|
+
# labels = dataset.labels
|
257
|
+
# labels["department"] #=> "shipping"
|
258
|
+
#
|
259
|
+
# @!group Attributes
|
260
|
+
#
|
261
|
+
def labels
|
262
|
+
m = @gapi.labels
|
263
|
+
m = m.to_h if m.respond_to? :to_h
|
264
|
+
m.dup.freeze
|
265
|
+
end
|
266
|
+
|
267
|
+
##
|
268
|
+
# Updates the hash of user-provided labels associated with this dataset.
|
269
|
+
# Labels are used to organize and group datasets. See [Using
|
270
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
271
|
+
#
|
272
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
273
|
+
# pairs.
|
274
|
+
#
|
275
|
+
# * Label keys and values can be no longer than 63 characters.
|
276
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
277
|
+
# underscores, hyphens, and international characters.
|
278
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
279
|
+
# * Label keys must begin with a letter.
|
280
|
+
# * Label keys must be unique within a dataset.
|
281
|
+
#
|
282
|
+
# @example
|
283
|
+
# require "google/cloud/bigquery"
|
284
|
+
#
|
285
|
+
# bigquery = Google::Cloud::Bigquery.new
|
286
|
+
# dataset = bigquery.dataset "my_dataset"
|
287
|
+
#
|
288
|
+
# dataset.labels = { "department" => "shipping" }
|
289
|
+
#
|
290
|
+
# @!group Attributes
|
291
|
+
#
|
292
|
+
def labels= labels
|
293
|
+
@gapi.labels = labels
|
294
|
+
patch_gapi! :labels
|
295
|
+
end
|
296
|
+
|
213
297
|
##
|
214
298
|
# Retrieves the access rules for a Dataset. The rules can be updated
|
215
299
|
# when passing a block, see {Dataset::Access} for all the methods
|
@@ -221,7 +305,7 @@ module Google
|
|
221
305
|
# @yield [access] a block for setting rules
|
222
306
|
# @yieldparam [Dataset::Access] access the object accepting rules
|
223
307
|
#
|
224
|
-
# @return [Google::Cloud::Bigquery::Dataset::Access]
|
308
|
+
# @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
|
225
309
|
#
|
226
310
|
# @example
|
227
311
|
# require "google/cloud/bigquery"
|
@@ -229,14 +313,8 @@ module Google
|
|
229
313
|
# bigquery = Google::Cloud::Bigquery.new
|
230
314
|
# dataset = bigquery.dataset "my_dataset"
|
231
315
|
#
|
232
|
-
# dataset.access
|
233
|
-
#
|
234
|
-
# # {"role"=>"WRITER",
|
235
|
-
# # "specialGroup"=>"projectWriters"},
|
236
|
-
# # {"role"=>"READER",
|
237
|
-
# # "specialGroup"=>"projectReaders"},
|
238
|
-
# # {"role"=>"OWNER",
|
239
|
-
# # "userByEmail"=>"123456789-...com"}]
|
316
|
+
# access = dataset.access
|
317
|
+
# access.writer_user? "reader@example.com" #=> false
|
240
318
|
#
|
241
319
|
# @example Manage the access rules by passing a block:
|
242
320
|
# require "google/cloud/bigquery"
|
@@ -305,7 +383,7 @@ module Google
|
|
305
383
|
# @yield [table] a block for setting the table
|
306
384
|
# @yieldparam [Table] table the table object to be updated
|
307
385
|
#
|
308
|
-
# @return [Google::Cloud::Bigquery::Table]
|
386
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
309
387
|
#
|
310
388
|
# @example
|
311
389
|
# require "google/cloud/bigquery"
|
@@ -394,8 +472,15 @@ module Google
|
|
394
472
|
# [legacy
|
395
473
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
396
474
|
# dialect. Optional. The default value is false.
|
475
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
476
|
+
# used in the query. May be either a code resource to load from a
|
477
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
478
|
+
# that contains code for a user-defined function (UDF). Providing an
|
479
|
+
# inline code resource is equivalent to providing a URI for a file
|
480
|
+
# containing the same code. See [User-Defined
|
481
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
397
482
|
#
|
398
|
-
# @return [Google::Cloud::Bigquery::View]
|
483
|
+
# @return [Google::Cloud::Bigquery::View] A new view object.
|
399
484
|
#
|
400
485
|
# @example
|
401
486
|
# require "google/cloud/bigquery"
|
@@ -419,7 +504,7 @@ module Google
|
|
419
504
|
# @!group Table
|
420
505
|
#
|
421
506
|
def create_view table_id, query, name: nil, description: nil,
|
422
|
-
standard_sql: nil, legacy_sql: nil
|
507
|
+
standard_sql: nil, legacy_sql: nil, udfs: nil
|
423
508
|
new_view_opts = {
|
424
509
|
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
425
510
|
project_id: project_id, dataset_id: dataset_id, table_id: table_id
|
@@ -429,7 +514,8 @@ module Google
|
|
429
514
|
view: Google::Apis::BigqueryV2::ViewDefinition.new(
|
430
515
|
query: query,
|
431
516
|
use_legacy_sql: Convert.resolve_legacy_sql(standard_sql,
|
432
|
-
legacy_sql)
|
517
|
+
legacy_sql),
|
518
|
+
user_defined_function_resources: udfs_gapi(udfs)
|
433
519
|
)
|
434
520
|
}.delete_if { |_, v| v.nil? }
|
435
521
|
new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
|
@@ -474,8 +560,8 @@ module Google
|
|
474
560
|
# @param [Integer] max Maximum number of tables to return.
|
475
561
|
#
|
476
562
|
# @return [Array<Google::Cloud::Bigquery::Table>,
|
477
|
-
# Array<Google::Cloud::Bigquery::View>]
|
478
|
-
# {Google::Cloud::Bigquery::Table::List})
|
563
|
+
# Array<Google::Cloud::Bigquery::View>] An array of tables and/or
|
564
|
+
# views(See {Google::Cloud::Bigquery::Table::List})
|
479
565
|
#
|
480
566
|
# @example
|
481
567
|
# require "google/cloud/bigquery"
|
@@ -546,6 +632,10 @@ module Google
|
|
546
632
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
547
633
|
# query parameters. When set, `legacy_sql` will automatically be set
|
548
634
|
# to false and `standard_sql` to true.
|
635
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
636
|
+
# that represents the mapping of the external tables to the table
|
637
|
+
# names used in the SQL query. The hash keys are the table names, and
|
638
|
+
# the hash values are the external table objects. See {Dataset#query}.
|
549
639
|
# @param [String] priority Specifies a priority for the query. Possible
|
550
640
|
# values include `INTERACTIVE` and `BATCH`. The default value is
|
551
641
|
# `INTERACTIVE`.
|
@@ -605,8 +695,37 @@ module Google
|
|
605
695
|
# job. Queries that will have bytes billed beyond this limit will fail
|
606
696
|
# (without incurring a charge). Optional. If unspecified, this will be
|
607
697
|
# set to your project default.
|
608
|
-
#
|
609
|
-
#
|
698
|
+
# @param [String] job_id A user-defined ID for the query job. The ID
|
699
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
700
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
701
|
+
# `job_id` is provided, then `prefix` will not be used.
|
702
|
+
#
|
703
|
+
# See [Generating a job
|
704
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
705
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
706
|
+
# prepended to a generated value to produce a unique job ID. For
|
707
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
708
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
709
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
710
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
711
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
712
|
+
# be used.
|
713
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
714
|
+
# the job. You can use these to organize and group your jobs. Label
|
715
|
+
# keys and values can be no longer than 63 characters, can only
|
716
|
+
# contain lowercase letters, numeric characters, underscores and
|
717
|
+
# dashes. International characters are allowed. Label values are
|
718
|
+
# optional. Label keys must start with a letter and each label in the
|
719
|
+
# list must have a different key.
|
720
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
721
|
+
# used in the query. May be either a code resource to load from a
|
722
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
723
|
+
# that contains code for a user-defined function (UDF). Providing an
|
724
|
+
# inline code resource is equivalent to providing a URI for a file
|
725
|
+
# containing the same code. See [User-Defined
|
726
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
727
|
+
#
|
728
|
+
# @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
|
610
729
|
#
|
611
730
|
# @example Query using standard SQL:
|
612
731
|
# require "google/cloud/bigquery"
|
@@ -618,7 +737,7 @@ module Google
|
|
618
737
|
#
|
619
738
|
# job.wait_until_done!
|
620
739
|
# if !job.failed?
|
621
|
-
# job.
|
740
|
+
# job.data.each do |row|
|
622
741
|
# puts row[:name]
|
623
742
|
# end
|
624
743
|
# end
|
@@ -634,7 +753,7 @@ module Google
|
|
634
753
|
#
|
635
754
|
# job.wait_until_done!
|
636
755
|
# if !job.failed?
|
637
|
-
# job.
|
756
|
+
# job.data.each do |row|
|
638
757
|
# puts row[:name]
|
639
758
|
# end
|
640
759
|
# end
|
@@ -650,7 +769,7 @@ module Google
|
|
650
769
|
#
|
651
770
|
# job.wait_until_done!
|
652
771
|
# if !job.failed?
|
653
|
-
# job.
|
772
|
+
# job.data.each do |row|
|
654
773
|
# puts row[:name]
|
655
774
|
# end
|
656
775
|
# end
|
@@ -666,24 +785,49 @@ module Google
|
|
666
785
|
#
|
667
786
|
# job.wait_until_done!
|
668
787
|
# if !job.failed?
|
669
|
-
# job.
|
788
|
+
# job.data.each do |row|
|
789
|
+
# puts row[:name]
|
790
|
+
# end
|
791
|
+
# end
|
792
|
+
#
|
793
|
+
# @example Query using external data source:
|
794
|
+
# require "google/cloud/bigquery"
|
795
|
+
#
|
796
|
+
# bigquery = Google::Cloud::Bigquery.new
|
797
|
+
# dataset = bigquery.dataset "my_dataset"
|
798
|
+
#
|
799
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
800
|
+
# csv_table = dataset.external csv_url do |csv|
|
801
|
+
# csv.autodetect = true
|
802
|
+
# csv.skip_leading_rows = 1
|
803
|
+
# end
|
804
|
+
#
|
805
|
+
# job = dataset.query_job "SELECT * FROM my_ext_table",
|
806
|
+
# external: { my_ext_table: csv_table }
|
807
|
+
#
|
808
|
+
# job.wait_until_done!
|
809
|
+
# if !job.failed?
|
810
|
+
# job.data.each do |row|
|
670
811
|
# puts row[:name]
|
671
812
|
# end
|
672
813
|
# end
|
673
814
|
#
|
674
815
|
# @!group Data
|
675
816
|
#
|
676
|
-
def query_job query, params: nil,
|
677
|
-
|
817
|
+
def query_job query, params: nil, external: nil,
|
818
|
+
priority: "INTERACTIVE", cache: true, table: nil,
|
819
|
+
create: nil, write: nil, standard_sql: nil,
|
678
820
|
legacy_sql: nil, large_results: nil, flatten: nil,
|
679
|
-
maximum_billing_tier: nil, maximum_bytes_billed: nil
|
821
|
+
maximum_billing_tier: nil, maximum_bytes_billed: nil,
|
822
|
+
job_id: nil, prefix: nil, labels: nil, udfs: nil
|
680
823
|
options = { priority: priority, cache: cache, table: table,
|
681
824
|
create: create, write: write,
|
682
825
|
large_results: large_results, flatten: flatten,
|
683
826
|
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
684
827
|
maximum_billing_tier: maximum_billing_tier,
|
685
828
|
maximum_bytes_billed: maximum_bytes_billed,
|
686
|
-
params: params
|
829
|
+
params: params, external: external, labels: labels,
|
830
|
+
job_id: job_id, prefix: prefix, udfs: udfs }
|
687
831
|
options[:dataset] ||= self
|
688
832
|
ensure_service!
|
689
833
|
gapi = service.query_job query, options
|
@@ -691,8 +835,10 @@ module Google
|
|
691
835
|
end
|
692
836
|
|
693
837
|
##
|
694
|
-
# Queries data using
|
695
|
-
# method
|
838
|
+
# Queries data using a synchronous method that blocks for a response. In
|
839
|
+
# this method, a {QueryJob} is created and its results are saved
|
840
|
+
# to a temporary table, then read from the table. Timeouts and transient
|
841
|
+
# errors are generally handled as needed to complete the query.
|
696
842
|
#
|
697
843
|
# Sets the current dataset as the default dataset in the query. Useful
|
698
844
|
# for using unqualified table names.
|
@@ -717,6 +863,8 @@ module Google
|
|
717
863
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
718
864
|
# for an overview of each BigQuery data type, including allowed values.
|
719
865
|
#
|
866
|
+
# @see https://cloud.google.com/bigquery/querying-data Querying Data
|
867
|
+
#
|
720
868
|
# @param [String] query A query string, following the BigQuery [query
|
721
869
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
722
870
|
# query to execute. Example: "SELECT count(f1) FROM
|
@@ -728,22 +876,16 @@ module Google
|
|
728
876
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
729
877
|
# query parameters. When set, `legacy_sql` will automatically be set
|
730
878
|
# to false and `standard_sql` to true.
|
879
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
880
|
+
# that represents the mapping of the external tables to the table
|
881
|
+
# names used in the SQL query. The hash keys are the table names, and
|
882
|
+
# the hash values are the external table objects. See {Dataset#query}.
|
731
883
|
# @param [Integer] max The maximum number of rows of data to return per
|
732
884
|
# page of results. Setting this flag to a small value such as 1000 and
|
733
885
|
# then paging through results might improve reliability when the query
|
734
886
|
# result set is large. In addition to this limit, responses are also
|
735
887
|
# limited to 10 MB. By default, there is no maximum row count, and
|
736
888
|
# only the byte limit applies.
|
737
|
-
# @param [Integer] timeout How long to wait for the query to complete,
|
738
|
-
# in milliseconds, before the request times out and returns. Note that
|
739
|
-
# this is only a timeout for the request, not the query. If the query
|
740
|
-
# takes longer to run than the timeout value, the call returns without
|
741
|
-
# any results and with QueryData#complete? set to false. The default
|
742
|
-
# value is 10000 milliseconds (10 seconds).
|
743
|
-
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
744
|
-
# job. Instead, if the query is valid, BigQuery returns statistics
|
745
|
-
# about the job such as how many bytes would be processed. If the
|
746
|
-
# query is invalid, an error returns. The default value is `false`.
|
747
889
|
# @param [Boolean] cache Whether to look for the result in the query
|
748
890
|
# cache. The query cache is a best-effort cache that will be flushed
|
749
891
|
# whenever tables in the query are modified. The default value is
|
@@ -769,7 +911,7 @@ module Google
|
|
769
911
|
# ignored; the query will be run as if `large_results` is true and
|
770
912
|
# `flatten` is false. Optional. The default value is false.
|
771
913
|
#
|
772
|
-
# @return [Google::Cloud::Bigquery::
|
914
|
+
# @return [Google::Cloud::Bigquery::Data] A new data object.
|
773
915
|
#
|
774
916
|
# @example Query using standard SQL:
|
775
917
|
# require "google/cloud/bigquery"
|
@@ -822,25 +964,112 @@ module Google
|
|
822
964
|
# puts row[:name]
|
823
965
|
# end
|
824
966
|
#
|
967
|
+
# @example Query using external data source:
|
968
|
+
# require "google/cloud/bigquery"
|
969
|
+
#
|
970
|
+
# bigquery = Google::Cloud::Bigquery.new
|
971
|
+
# dataset = bigquery.dataset "my_dataset"
|
972
|
+
#
|
973
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
974
|
+
# csv_table = dataset.external csv_url do |csv|
|
975
|
+
# csv.autodetect = true
|
976
|
+
# csv.skip_leading_rows = 1
|
977
|
+
# end
|
978
|
+
#
|
979
|
+
# data = dataset.query "SELECT * FROM my_ext_table",
|
980
|
+
# external: { my_ext_table: csv_table }
|
981
|
+
#
|
982
|
+
# data.each do |row|
|
983
|
+
# puts row[:name]
|
984
|
+
# end
|
985
|
+
#
|
825
986
|
# @!group Data
|
826
987
|
#
|
827
|
-
def query query, params: nil,
|
828
|
-
|
829
|
-
options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
|
830
|
-
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
831
|
-
params: params }
|
832
|
-
options[:dataset] ||= dataset_id
|
833
|
-
options[:project] ||= project_id
|
988
|
+
def query query, params: nil, external: nil, max: nil, cache: true,
|
989
|
+
standard_sql: nil, legacy_sql: nil
|
834
990
|
ensure_service!
|
835
|
-
|
836
|
-
|
991
|
+
options = { params: params, external: external, cache: cache,
|
992
|
+
legacy_sql: legacy_sql, standard_sql: standard_sql }
|
993
|
+
|
994
|
+
job = query_job query, options
|
995
|
+
job.wait_until_done!
|
996
|
+
|
997
|
+
if job.failed?
|
998
|
+
begin
|
999
|
+
# raise to activate ruby exception cause handling
|
1000
|
+
fail job.gapi_error
|
1001
|
+
rescue => e
|
1002
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1003
|
+
raise Google::Cloud::Error.from_error(e)
|
1004
|
+
end
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
job.data max: max
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
##
|
1011
|
+
# Creates a new External::DataSource (or subclass) object that
|
1012
|
+
# represents the external data source that can be queried from directly,
|
1013
|
+
# even though the data is not stored in BigQuery. Instead of loading or
|
1014
|
+
# streaming the data, this object references the external data source.
|
1015
|
+
#
|
1016
|
+
# @see https://cloud.google.com/bigquery/external-data-sources Querying
|
1017
|
+
# External Data Sources
|
1018
|
+
#
|
1019
|
+
# @param [String, Array<String>] url The fully-qualified URL(s) that
|
1020
|
+
# point to your data in Google Cloud. An attempt will be made to
|
1021
|
+
# derive the format from the URLs provided.
|
1022
|
+
# @param [String|Symbol] format The data format. This value will be used
|
1023
|
+
# even if the provided URLs are recognized as a different format.
|
1024
|
+
# Optional.
|
1025
|
+
#
|
1026
|
+
# The following values are supported:
|
1027
|
+
#
|
1028
|
+
# * `csv` - CSV
|
1029
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1030
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1031
|
+
# * `sheets` - Google Sheets
|
1032
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1033
|
+
# * `bigtable` - Bigtable
|
1034
|
+
#
|
1035
|
+
# @return [External::DataSource] External data source.
|
1036
|
+
#
|
1037
|
+
# @example
|
1038
|
+
# require "google/cloud/bigquery"
|
1039
|
+
#
|
1040
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1041
|
+
#
|
1042
|
+
# dataset = bigquery.dataset "my_dataset"
|
1043
|
+
#
|
1044
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
1045
|
+
# csv_table = dataset.external csv_url do |csv|
|
1046
|
+
# csv.autodetect = true
|
1047
|
+
# csv.skip_leading_rows = 1
|
1048
|
+
# end
|
1049
|
+
#
|
1050
|
+
# data = dataset.query "SELECT * FROM my_ext_table",
|
1051
|
+
# external: { my_ext_table: csv_table }
|
1052
|
+
#
|
1053
|
+
# data.each do |row|
|
1054
|
+
# puts row[:name]
|
1055
|
+
# end
|
1056
|
+
#
|
1057
|
+
def external url, format: nil
|
1058
|
+
ext = External.from_urls url, format
|
1059
|
+
yield ext if block_given?
|
1060
|
+
ext
|
837
1061
|
end
|
838
1062
|
|
839
1063
|
##
|
840
|
-
# Loads data into the provided destination table
|
841
|
-
#
|
842
|
-
#
|
843
|
-
#
|
1064
|
+
# Loads data into the provided destination table using an asynchronous
|
1065
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
1066
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
1067
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
1068
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
1069
|
+
#
|
1070
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1071
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1072
|
+
# file directly. See [Loading Data with a POST
|
844
1073
|
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
845
1074
|
#
|
846
1075
|
# @param [String] table_id The destination table to load the data into.
|
@@ -888,6 +1117,9 @@ module Google
|
|
888
1117
|
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
889
1118
|
# quoted data sections that contain newline characters in a CSV file.
|
890
1119
|
# The default value is `false`.
|
1120
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1121
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1122
|
+
# The default value is `false`.
|
891
1123
|
# @param [String] encoding The character encoding of the data. The
|
892
1124
|
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
893
1125
|
# `UTF-8`.
|
@@ -912,6 +1144,13 @@ module Google
|
|
912
1144
|
# records exceeds this value, an invalid error is returned in the job
|
913
1145
|
# result. The default value is `0`, which requires that all records
|
914
1146
|
# are valid.
|
1147
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1148
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1149
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1150
|
+
# value is the empty string. If you set this property to a custom
|
1151
|
+
# value, BigQuery throws an error if an empty string is present for
|
1152
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1153
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
915
1154
|
# @param [String] quote The value that is used to quote data sections in
|
916
1155
|
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
917
1156
|
# then uses the first byte of the encoded string to split the data in
|
@@ -932,6 +1171,28 @@ module Google
|
|
932
1171
|
# See {Project#schema} for the creation of the schema for use with
|
933
1172
|
# this option. Also note that for most use cases, the block yielded by
|
934
1173
|
# this method is a more convenient way to configure the schema.
|
1174
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1175
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1176
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
1177
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1178
|
+
#
|
1179
|
+
# See [Generating a job
|
1180
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1181
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1182
|
+
# prepended to a generated value to produce a unique job ID. For
|
1183
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1184
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1185
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
1186
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
1187
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1188
|
+
# be used.
|
1189
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1190
|
+
# the job. You can use these to organize and group your jobs. Label
|
1191
|
+
# keys and values can be no longer than 63 characters, can only
|
1192
|
+
# contain lowercase letters, numeric characters, underscores and
|
1193
|
+
# dashes. International characters are allowed. Label values are
|
1194
|
+
# optional. Label keys must start with a letter and each label in the
|
1195
|
+
# list must have a different key.
|
935
1196
|
#
|
936
1197
|
# @yield [schema] A block for setting the schema for the destination
|
937
1198
|
# table. The schema can be omitted if the destination table already
|
@@ -941,7 +1202,7 @@ module Google
|
|
941
1202
|
# instance provided using the `schema` option, or a new, empty schema
|
942
1203
|
# instance
|
943
1204
|
#
|
944
|
-
# @return [Google::Cloud::Bigquery::LoadJob]
|
1205
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
945
1206
|
#
|
946
1207
|
# @example
|
947
1208
|
# require "google/cloud/bigquery"
|
@@ -950,7 +1211,7 @@ module Google
|
|
950
1211
|
# dataset = bigquery.dataset "my_dataset"
|
951
1212
|
#
|
952
1213
|
# gs_url = "gs://my-bucket/file-name.csv"
|
953
|
-
# load_job = dataset.
|
1214
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |schema|
|
954
1215
|
# schema.string "first_name", mode: :required
|
955
1216
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
956
1217
|
# nested_schema.string "place", mode: :required
|
@@ -968,7 +1229,7 @@ module Google
|
|
968
1229
|
# storage = Google::Cloud::Storage.new
|
969
1230
|
# bucket = storage.bucket "my-bucket"
|
970
1231
|
# file = bucket.file "file-name.csv"
|
971
|
-
# load_job = dataset.
|
1232
|
+
# load_job = dataset.load_job "my_new_table", file do |schema|
|
972
1233
|
# schema.string "first_name", mode: :required
|
973
1234
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
974
1235
|
# nested_schema.string "place", mode: :required
|
@@ -983,7 +1244,7 @@ module Google
|
|
983
1244
|
# dataset = bigquery.dataset "my_dataset"
|
984
1245
|
#
|
985
1246
|
# file = File.open "my_data.csv"
|
986
|
-
# load_job = dataset.
|
1247
|
+
# load_job = dataset.load_job "my_new_table", file do |schema|
|
987
1248
|
# schema.string "first_name", mode: :required
|
988
1249
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
989
1250
|
# nested_schema.string "place", mode: :required
|
@@ -997,17 +1258,18 @@ module Google
|
|
997
1258
|
# bigquery = Google::Cloud::Bigquery.new
|
998
1259
|
# dataset = bigquery.dataset "my_dataset"
|
999
1260
|
#
|
1000
|
-
# load_job = dataset.
|
1261
|
+
# load_job = dataset.load_job "my_new_table",
|
1001
1262
|
# "gs://my-bucket/xxxx.kind_name.backup_info",
|
1002
1263
|
# format: "datastore_backup"
|
1003
1264
|
#
|
1004
1265
|
# @!group Data
|
1005
1266
|
#
|
1006
|
-
def
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1267
|
+
def load_job table_id, file, format: nil, create: nil, write: nil,
|
1268
|
+
projection_fields: nil, jagged_rows: nil,
|
1269
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil,
|
1270
|
+
ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1271
|
+
skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
1272
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
1011
1273
|
ensure_service!
|
1012
1274
|
|
1013
1275
|
if block_given?
|
@@ -1023,12 +1285,228 @@ module Google
|
|
1023
1285
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1024
1286
|
max_bad_records: max_bad_records, quote: quote,
|
1025
1287
|
skip_leading: skip_leading, dryrun: dryrun,
|
1026
|
-
schema: schema_gapi
|
1288
|
+
schema: schema_gapi, job_id: job_id, prefix: prefix,
|
1289
|
+
labels: labels, autodetect: autodetect,
|
1290
|
+
null_marker: null_marker }
|
1027
1291
|
return load_storage(table_id, file, options) if storage_url? file
|
1028
1292
|
return load_local(table_id, file, options) if local_file? file
|
1029
1293
|
fail Google::Cloud::Error, "Don't know how to load #{file}"
|
1030
1294
|
end
|
1031
1295
|
|
1296
|
+
##
|
1297
|
+
# Loads data into the provided destination table using a synchronous
|
1298
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1299
|
+
# generally handled as needed to complete the job. See also
|
1300
|
+
# {#load_job}.
|
1301
|
+
#
|
1302
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1303
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1304
|
+
# file directly. See [Loading Data with a POST
|
1305
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1306
|
+
#
|
1307
|
+
# @param [String] table_id The destination table to load the data into.
|
1308
|
+
# @param [File, Google::Cloud::Storage::File, String] file A file or the
|
1309
|
+
# URI of a Google Cloud Storage file containing data to load into the
|
1310
|
+
# table.
|
1311
|
+
# @param [String] format The exported file format. The default value is
|
1312
|
+
# `csv`.
|
1313
|
+
#
|
1314
|
+
# The following values are supported:
|
1315
|
+
#
|
1316
|
+
# * `csv` - CSV
|
1317
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1318
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1319
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1320
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1321
|
+
# new tables. The default value is `needed`.
|
1322
|
+
#
|
1323
|
+
# The following values are supported:
|
1324
|
+
#
|
1325
|
+
# * `needed` - Create the table if it does not exist.
|
1326
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1327
|
+
# raised if the table does not exist.
|
1328
|
+
# @param [String] write Specifies how to handle data already present in
|
1329
|
+
# the table. The default value is `append`.
|
1330
|
+
#
|
1331
|
+
# The following values are supported:
|
1332
|
+
#
|
1333
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1334
|
+
# * `append` - BigQuery appends the data to the table.
|
1335
|
+
# * `empty` - An error will be returned if the table already contains
|
1336
|
+
# data.
|
1337
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1338
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1339
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1340
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1341
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1342
|
+
# backup, an invalid error is returned.
|
1343
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1344
|
+
# optional columns. The missing values are treated as nulls. If
|
1345
|
+
# `false`, records with missing trailing columns are treated as bad
|
1346
|
+
# records, and if there are too many bad records, an invalid error is
|
1347
|
+
# returned in the job result. The default value is `false`. Only
|
1348
|
+
# applicable to CSV, ignored for other formats.
|
1349
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1350
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1351
|
+
# The default value is `false`.
|
1352
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1353
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1354
|
+
# The default value is `false`.
|
1355
|
+
# @param [String] encoding The character encoding of the data. The
|
1356
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1357
|
+
# `UTF-8`.
|
1358
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1359
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1360
|
+
# then uses the first byte of the encoded string to split the data in
|
1361
|
+
# its raw, binary state. Default is <code>,</code>.
|
1362
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1363
|
+
# extra values that are not represented in the table schema. If true,
|
1364
|
+
# the extra values are ignored. If false, records with extra columns
|
1365
|
+
# are treated as bad records, and if there are too many bad records,
|
1366
|
+
# an invalid error is returned in the job result. The default value is
|
1367
|
+
# `false`.
|
1368
|
+
#
|
1369
|
+
# The `format` property determines what BigQuery treats as an extra
|
1370
|
+
# value:
|
1371
|
+
#
|
1372
|
+
# * `CSV`: Trailing columns
|
1373
|
+
# * `JSON`: Named values that don't match any column names
|
1374
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1375
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1376
|
+
# records exceeds this value, an invalid error is returned in the job
|
1377
|
+
# result. The default value is `0`, which requires that all records
|
1378
|
+
# are valid.
|
1379
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1380
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1381
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1382
|
+
# value is the empty string. If you set this property to a custom
|
1383
|
+
# value, BigQuery throws an error if an empty string is present for
|
1384
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1385
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1386
|
+
# @param [String] quote The value that is used to quote data sections in
|
1387
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1388
|
+
# then uses the first byte of the encoded string to split the data in
|
1389
|
+
# its raw, binary state. The default value is a double-quote
|
1390
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1391
|
+
# the property value to an empty string. If your data contains quoted
|
1392
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1393
|
+
# property to true.
|
1394
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1395
|
+
# file that BigQuery will skip when loading the data. The default
|
1396
|
+
# value is `0`. This property is useful if you have header rows in the
|
1397
|
+
# file that should be skipped.
|
1398
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1399
|
+
# destination table. Optional. The schema can be omitted if the
|
1400
|
+
# destination table already exists, or if you're loading data from a
|
1401
|
+
# Google Cloud Datastore backup.
|
1402
|
+
#
|
1403
|
+
# See {Project#schema} for the creation of the schema for use with
|
1404
|
+
# this option. Also note that for most use cases, the block yielded by
|
1405
|
+
# this method is a more convenient way to configure the schema.
|
1406
|
+
#
|
1407
|
+
# @yield [schema] A block for setting the schema for the destination
|
1408
|
+
# table. The schema can be omitted if the destination table already
|
1409
|
+
# exists, or if you're loading data from a Google Cloud Datastore
|
1410
|
+
# backup.
|
1411
|
+
# @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
|
1412
|
+
# instance provided using the `schema` option, or a new, empty schema
|
1413
|
+
# instance
|
1414
|
+
#
|
1415
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
1416
|
+
#
|
1417
|
+
# @example
|
1418
|
+
# require "google/cloud/bigquery"
|
1419
|
+
#
|
1420
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1421
|
+
# dataset = bigquery.dataset "my_dataset"
|
1422
|
+
#
|
1423
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1424
|
+
# dataset.load "my_new_table", gs_url do |schema|
|
1425
|
+
# schema.string "first_name", mode: :required
|
1426
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1427
|
+
# nested_schema.string "place", mode: :required
|
1428
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1429
|
+
# end
|
1430
|
+
# end
|
1431
|
+
#
|
1432
|
+
# @example Pass a google-cloud-storage `File` instance:
|
1433
|
+
# require "google/cloud/bigquery"
|
1434
|
+
# require "google/cloud/storage"
|
1435
|
+
#
|
1436
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1437
|
+
# dataset = bigquery.dataset "my_dataset"
|
1438
|
+
#
|
1439
|
+
# storage = Google::Cloud::Storage.new
|
1440
|
+
# bucket = storage.bucket "my-bucket"
|
1441
|
+
# file = bucket.file "file-name.csv"
|
1442
|
+
# dataset.load "my_new_table", file do |schema|
|
1443
|
+
# schema.string "first_name", mode: :required
|
1444
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1445
|
+
# nested_schema.string "place", mode: :required
|
1446
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1447
|
+
# end
|
1448
|
+
# end
|
1449
|
+
#
|
1450
|
+
# @example Upload a file directly:
|
1451
|
+
# require "google/cloud/bigquery"
|
1452
|
+
#
|
1453
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1454
|
+
# dataset = bigquery.dataset "my_dataset"
|
1455
|
+
#
|
1456
|
+
# file = File.open "my_data.csv"
|
1457
|
+
# dataset.load "my_new_table", file do |schema|
|
1458
|
+
# schema.string "first_name", mode: :required
|
1459
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1460
|
+
# nested_schema.string "place", mode: :required
|
1461
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1462
|
+
# end
|
1463
|
+
# end
|
1464
|
+
#
|
1465
|
+
# @example Schema is not required with a Cloud Datastore backup:
|
1466
|
+
# require "google/cloud/bigquery"
|
1467
|
+
#
|
1468
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1469
|
+
# dataset = bigquery.dataset "my_dataset"
|
1470
|
+
#
|
1471
|
+
# dataset.load "my_new_table",
|
1472
|
+
# "gs://my-bucket/xxxx.kind_name.backup_info",
|
1473
|
+
# format: "datastore_backup"
|
1474
|
+
#
|
1475
|
+
# @!group Data
|
1476
|
+
#
|
1477
|
+
def load table_id, file, format: nil, create: nil, write: nil,
|
1478
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
|
1479
|
+
encoding: nil, delimiter: nil, ignore_unknown: nil,
|
1480
|
+
max_bad_records: nil, quote: nil, skip_leading: nil,
|
1481
|
+
schema: nil, autodetect: nil, null_marker: nil
|
1482
|
+
|
1483
|
+
yield (schema ||= Schema.from_gapi) if block_given?
|
1484
|
+
|
1485
|
+
options = { format: format, create: create, write: write,
|
1486
|
+
projection_fields: projection_fields,
|
1487
|
+
jagged_rows: jagged_rows,
|
1488
|
+
quoted_newlines: quoted_newlines, encoding: encoding,
|
1489
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1490
|
+
max_bad_records: max_bad_records, quote: quote,
|
1491
|
+
skip_leading: skip_leading, schema: schema,
|
1492
|
+
autodetect: autodetect, null_marker: null_marker }
|
1493
|
+
job = load_job table_id, file, options
|
1494
|
+
|
1495
|
+
job.wait_until_done!
|
1496
|
+
|
1497
|
+
if job.failed?
|
1498
|
+
begin
|
1499
|
+
# raise to activate ruby exception cause handling
|
1500
|
+
fail job.gapi_error
|
1501
|
+
rescue => e
|
1502
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1503
|
+
raise Google::Cloud::Error.from_error(e)
|
1504
|
+
end
|
1505
|
+
end
|
1506
|
+
|
1507
|
+
true
|
1508
|
+
end
|
1509
|
+
|
1032
1510
|
##
|
1033
1511
|
# @private New Dataset from a Google API Client object.
|
1034
1512
|
def self.from_gapi gapi, conn
|
@@ -1038,8 +1516,158 @@ module Google
|
|
1038
1516
|
end
|
1039
1517
|
end
|
1040
1518
|
|
1519
|
+
##
|
1520
|
+
# Inserts data into the given table for near-immediate querying, without
|
1521
|
+
# the need to complete a load operation before the data can appear in
|
1522
|
+
# query results.
|
1523
|
+
#
|
1524
|
+
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
1525
|
+
# Streaming Data Into BigQuery
|
1526
|
+
#
|
1527
|
+
# @param [String] table_id The ID of the destination table.
|
1528
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
1529
|
+
# containing the data. Required.
|
1530
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1531
|
+
# if invalid rows exist. The default value is `false`, which causes
|
1532
|
+
# the entire request to fail if any invalid rows exist.
|
1533
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
1534
|
+
# do not match the schema. The unknown values are ignored. Default is
|
1535
|
+
# false, which treats unknown values as errors.
|
1536
|
+
# @param [Boolean] autocreate Specifies whether the method should create
|
1537
|
+
# a new table with the given `table_id`, if no table is found for
|
1538
|
+
# `table_id`. The default value is false.
|
1539
|
+
#
|
1540
|
+
# @return [Google::Cloud::Bigquery::InsertResponse] An insert response
|
1541
|
+
# object.
|
1542
|
+
#
|
1543
|
+
# @example
|
1544
|
+
# require "google/cloud/bigquery"
|
1545
|
+
#
|
1546
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1547
|
+
# dataset = bigquery.dataset "my_dataset"
|
1548
|
+
#
|
1549
|
+
# rows = [
|
1550
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1551
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1552
|
+
# ]
|
1553
|
+
# dataset.insert "my_table", rows
|
1554
|
+
#
|
1555
|
+
# @example Using `autocreate` to create a new table if none exists.
|
1556
|
+
# require "google/cloud/bigquery"
|
1557
|
+
#
|
1558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1559
|
+
# dataset = bigquery.dataset "my_dataset"
|
1560
|
+
#
|
1561
|
+
# rows = [
|
1562
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1563
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1564
|
+
# ]
|
1565
|
+
# dataset.insert "my_table", rows, autocreate: true do |t|
|
1566
|
+
# t.schema.string "first_name", mode: :required
|
1567
|
+
# t.schema.integer "age", mode: :required
|
1568
|
+
# end
|
1569
|
+
#
|
1570
|
+
# @!group Data
|
1571
|
+
#
|
1572
|
+
def insert table_id, rows, skip_invalid: nil, ignore_unknown: nil,
|
1573
|
+
autocreate: nil
|
1574
|
+
if autocreate
|
1575
|
+
begin
|
1576
|
+
insert_data table_id, rows, skip_invalid: skip_invalid,
|
1577
|
+
ignore_unknown: ignore_unknown
|
1578
|
+
rescue Google::Cloud::NotFoundError
|
1579
|
+
sleep rand(1..60)
|
1580
|
+
begin
|
1581
|
+
create_table table_id do |tbl_updater|
|
1582
|
+
yield tbl_updater if block_given?
|
1583
|
+
end
|
1584
|
+
# rubocop:disable Lint/HandleExceptions
|
1585
|
+
rescue Google::Cloud::AlreadyExistsError
|
1586
|
+
end
|
1587
|
+
# rubocop:enable Lint/HandleExceptions
|
1588
|
+
|
1589
|
+
sleep 60
|
1590
|
+
insert table_id, rows, skip_invalid: skip_invalid,
|
1591
|
+
ignore_unknown: ignore_unknown,
|
1592
|
+
autocreate: true
|
1593
|
+
end
|
1594
|
+
else
|
1595
|
+
insert_data table_id, rows, skip_invalid: skip_invalid,
|
1596
|
+
ignore_unknown: ignore_unknown
|
1597
|
+
end
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
##
|
1601
|
+
# Create an asynchonous inserter object used to insert rows in batches.
|
1602
|
+
#
|
1603
|
+
# @param [String] table_id The ID of the table to insert rows into.
|
1604
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1605
|
+
# if invalid rows exist. The default value is `false`, which causes
|
1606
|
+
# the entire request to fail if any invalid rows exist.
|
1607
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
1608
|
+
# do not match the schema. The unknown values are ignored. Default is
|
1609
|
+
# false, which treats unknown values as errors.
|
1610
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
1611
|
+
# collected before the batch is published. Default is 10,000,000
|
1612
|
+
# (10MB).
|
1613
|
+
# @param [Integer] max_rows The maximum number of rows to be collected
|
1614
|
+
# before the batch is published. Default is 500.
|
1615
|
+
# @attr_reader [Numeric] interval The number of seconds to collect
|
1616
|
+
# messages before the batch is published. Default is 10.
|
1617
|
+
# @attr_reader [Numeric] threads The number of threads used to insert
|
1618
|
+
# batches of rows. Default is 4.
|
1619
|
+
# @yield [response] the callback for when a batch of rows is inserted
|
1620
|
+
# @yieldparam [InsertResponse] response the result of the asynchonous
|
1621
|
+
# insert
|
1622
|
+
#
|
1623
|
+
# @return [Table::AsyncInserter] Returns an inserter object.
|
1624
|
+
#
|
1625
|
+
# @example
|
1626
|
+
# require "google/cloud/bigquery"
|
1627
|
+
#
|
1628
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1629
|
+
# dataset = bigquery.dataset "my_dataset"
|
1630
|
+
# table = dataset.table "my_table"
|
1631
|
+
# inserter = table.insert_async do |response|
|
1632
|
+
# log_insert "inserted #{response.insert_count} rows " \
|
1633
|
+
# "with #{response.error_count} errors"
|
1634
|
+
# end
|
1635
|
+
#
|
1636
|
+
# rows = [
|
1637
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1638
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1639
|
+
# ]
|
1640
|
+
# inserter.insert rows
|
1641
|
+
#
|
1642
|
+
# inserter.stop.wait!
|
1643
|
+
#
|
1644
|
+
def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
|
1645
|
+
max_bytes: 10000000, max_rows: 500, interval: 10,
|
1646
|
+
threads: 4, &block
|
1647
|
+
ensure_service!
|
1648
|
+
|
1649
|
+
# Get table, don't use Dataset#table which handles NotFoundError
|
1650
|
+
gapi = service.get_table dataset_id, table_id
|
1651
|
+
table = Table.from_gapi gapi, service
|
1652
|
+
# Get the AsyncInserter from the table
|
1653
|
+
table.insert_async skip_invalid: skip_invalid,
|
1654
|
+
ignore_unknown: ignore_unknown,
|
1655
|
+
max_bytes: max_bytes, max_rows: max_rows,
|
1656
|
+
interval: interval, threads: threads, &block
|
1657
|
+
end
|
1658
|
+
|
1041
1659
|
protected
|
1042
1660
|
|
1661
|
+
def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil
|
1662
|
+
rows = [rows] if rows.is_a? Hash
|
1663
|
+
fail ArgumentError, "No rows provided" if rows.empty?
|
1664
|
+
ensure_service!
|
1665
|
+
options = { skip_invalid: skip_invalid,
|
1666
|
+
ignore_unknown: ignore_unknown }
|
1667
|
+
gapi = service.insert_tabledata dataset_id, table_id, rows, options
|
1668
|
+
InsertResponse.from_gapi rows, gapi
|
1669
|
+
end
|
1670
|
+
|
1043
1671
|
##
|
1044
1672
|
# Raise an error unless an active service is available.
|
1045
1673
|
def ensure_service!
|
@@ -1053,6 +1681,7 @@ module Google
|
|
1053
1681
|
[attr, @gapi.send(attr)]
|
1054
1682
|
end]
|
1055
1683
|
patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
|
1684
|
+
patch_gapi.etag = etag if etag
|
1056
1685
|
@gapi = service.patch_dataset dataset_id, patch_gapi
|
1057
1686
|
end
|
1058
1687
|
|
@@ -1101,6 +1730,19 @@ module Google
|
|
1101
1730
|
false
|
1102
1731
|
end
|
1103
1732
|
|
1733
|
+
def udfs_gapi array_or_str
|
1734
|
+
return [] if array_or_str.nil?
|
1735
|
+
Array(array_or_str).map do |uri_or_code|
|
1736
|
+
resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
|
1737
|
+
if uri_or_code.start_with?("gs://")
|
1738
|
+
resource.resource_uri = uri_or_code
|
1739
|
+
else
|
1740
|
+
resource.inline_code = uri_or_code
|
1741
|
+
end
|
1742
|
+
resource
|
1743
|
+
end
|
1744
|
+
end
|
1745
|
+
|
1104
1746
|
##
|
1105
1747
|
# Yielded to a block to accumulate changes for a patch request.
|
1106
1748
|
class Updater < Dataset
|