google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
@@ -17,6 +17,7 @@ require "json"
|
|
17
17
|
require "google/cloud/errors"
|
18
18
|
require "google/cloud/bigquery/service"
|
19
19
|
require "google/cloud/bigquery/table"
|
20
|
+
require "google/cloud/bigquery/external"
|
20
21
|
require "google/cloud/bigquery/dataset/list"
|
21
22
|
require "google/cloud/bigquery/dataset/access"
|
22
23
|
require "google/apis/bigquery_v2"
|
@@ -59,8 +60,9 @@ module Google
|
|
59
60
|
|
60
61
|
##
|
61
62
|
# A unique ID for this dataset, without the project name.
|
62
|
-
#
|
63
|
-
#
|
63
|
+
#
|
64
|
+
# @return [String] The ID must contain only letters (a-z, A-Z), numbers
|
65
|
+
# (0-9), or underscores (_). The maximum length is 1,024 characters.
|
64
66
|
#
|
65
67
|
# @!group Attributes
|
66
68
|
#
|
@@ -71,6 +73,8 @@ module Google
|
|
71
73
|
##
|
72
74
|
# The ID of the project containing this dataset.
|
73
75
|
#
|
76
|
+
# @return [String] The project ID.
|
77
|
+
#
|
74
78
|
# @!group Attributes
|
75
79
|
#
|
76
80
|
def project_id
|
@@ -90,6 +94,8 @@ module Google
|
|
90
94
|
##
|
91
95
|
# A descriptive name for the dataset.
|
92
96
|
#
|
97
|
+
# @return [String] The friendly name.
|
98
|
+
#
|
93
99
|
# @!group Attributes
|
94
100
|
#
|
95
101
|
def name
|
@@ -99,6 +105,8 @@ module Google
|
|
99
105
|
##
|
100
106
|
# Updates the descriptive name for the dataset.
|
101
107
|
#
|
108
|
+
# @param [String] new_name The new friendly name.
|
109
|
+
#
|
102
110
|
# @!group Attributes
|
103
111
|
#
|
104
112
|
def name= new_name
|
@@ -107,7 +115,9 @@ module Google
|
|
107
115
|
end
|
108
116
|
|
109
117
|
##
|
110
|
-
#
|
118
|
+
# The ETag hash of the dataset.
|
119
|
+
#
|
120
|
+
# @return [String] The ETag hash.
|
111
121
|
#
|
112
122
|
# @!group Attributes
|
113
123
|
#
|
@@ -119,6 +129,8 @@ module Google
|
|
119
129
|
##
|
120
130
|
# A URL that can be used to access the dataset using the REST API.
|
121
131
|
#
|
132
|
+
# @return [String] A REST URL for the resource.
|
133
|
+
#
|
122
134
|
# @!group Attributes
|
123
135
|
#
|
124
136
|
def api_url
|
@@ -129,6 +141,8 @@ module Google
|
|
129
141
|
##
|
130
142
|
# A user-friendly description of the dataset.
|
131
143
|
#
|
144
|
+
# @return [String] The description.
|
145
|
+
#
|
132
146
|
# @!group Attributes
|
133
147
|
#
|
134
148
|
def description
|
@@ -139,6 +153,8 @@ module Google
|
|
139
153
|
##
|
140
154
|
# Updates the user-friendly description of the dataset.
|
141
155
|
#
|
156
|
+
# @param [String] new_description The new description for the dataset.
|
157
|
+
#
|
142
158
|
# @!group Attributes
|
143
159
|
#
|
144
160
|
def description= new_description
|
@@ -149,6 +165,8 @@ module Google
|
|
149
165
|
##
|
150
166
|
# The default lifetime of all tables in the dataset, in milliseconds.
|
151
167
|
#
|
168
|
+
# @return [Integer] The default table expiration in milliseconds.
|
169
|
+
#
|
152
170
|
# @!group Attributes
|
153
171
|
#
|
154
172
|
def default_expiration
|
@@ -164,6 +182,9 @@ module Google
|
|
164
182
|
# Updates the default lifetime of all tables in the dataset, in
|
165
183
|
# milliseconds.
|
166
184
|
#
|
185
|
+
# @param [Integer] new_default_expiration The new default table
|
186
|
+
# expiration in milliseconds.
|
187
|
+
#
|
167
188
|
# @!group Attributes
|
168
189
|
#
|
169
190
|
def default_expiration= new_default_expiration
|
@@ -174,6 +195,8 @@ module Google
|
|
174
195
|
##
|
175
196
|
# The time when this dataset was created.
|
176
197
|
#
|
198
|
+
# @return [Time, nil] The creation time.
|
199
|
+
#
|
177
200
|
# @!group Attributes
|
178
201
|
#
|
179
202
|
def created_at
|
@@ -188,6 +211,8 @@ module Google
|
|
188
211
|
##
|
189
212
|
# The date when this dataset or any of its tables was last modified.
|
190
213
|
#
|
214
|
+
# @return [Time, nil] The last modified time.
|
215
|
+
#
|
191
216
|
# @!group Attributes
|
192
217
|
#
|
193
218
|
def modified_at
|
@@ -201,7 +226,9 @@ module Google
|
|
201
226
|
|
202
227
|
##
|
203
228
|
# The geographic location where the dataset should reside. Possible
|
204
|
-
# values include EU and US
|
229
|
+
# values include `EU` and `US`. The default value is `US`.
|
230
|
+
#
|
231
|
+
# @return [String] The location code.
|
205
232
|
#
|
206
233
|
# @!group Attributes
|
207
234
|
#
|
@@ -210,6 +237,63 @@ module Google
|
|
210
237
|
@gapi.location
|
211
238
|
end
|
212
239
|
|
240
|
+
##
|
241
|
+
# A hash of user-provided labels associated with this dataset. Labels
|
242
|
+
# are used to organize and group datasets. See [Using
|
243
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
244
|
+
#
|
245
|
+
# The returned hash is frozen and changes are not allowed. Use
|
246
|
+
# {#labels=} to replace the entire hash.
|
247
|
+
#
|
248
|
+
# @return [Hash<String, String>] A hash containing key/value pairs.
|
249
|
+
#
|
250
|
+
# @example
|
251
|
+
# require "google/cloud/bigquery"
|
252
|
+
#
|
253
|
+
# bigquery = Google::Cloud::Bigquery.new
|
254
|
+
# dataset = bigquery.dataset "my_dataset"
|
255
|
+
#
|
256
|
+
# labels = dataset.labels
|
257
|
+
# labels["department"] #=> "shipping"
|
258
|
+
#
|
259
|
+
# @!group Attributes
|
260
|
+
#
|
261
|
+
def labels
|
262
|
+
m = @gapi.labels
|
263
|
+
m = m.to_h if m.respond_to? :to_h
|
264
|
+
m.dup.freeze
|
265
|
+
end
|
266
|
+
|
267
|
+
##
|
268
|
+
# Updates the hash of user-provided labels associated with this dataset.
|
269
|
+
# Labels are used to organize and group datasets. See [Using
|
270
|
+
# Labels](https://cloud.google.com/bigquery/docs/labels).
|
271
|
+
#
|
272
|
+
# @param [Hash<String, String>] labels A hash containing key/value
|
273
|
+
# pairs.
|
274
|
+
#
|
275
|
+
# * Label keys and values can be no longer than 63 characters.
|
276
|
+
# * Label keys and values can contain only lowercase letters, numbers,
|
277
|
+
# underscores, hyphens, and international characters.
|
278
|
+
# * Label keys and values cannot exceed 128 bytes in size.
|
279
|
+
# * Label keys must begin with a letter.
|
280
|
+
# * Label keys must be unique within a dataset.
|
281
|
+
#
|
282
|
+
# @example
|
283
|
+
# require "google/cloud/bigquery"
|
284
|
+
#
|
285
|
+
# bigquery = Google::Cloud::Bigquery.new
|
286
|
+
# dataset = bigquery.dataset "my_dataset"
|
287
|
+
#
|
288
|
+
# dataset.labels = { "department" => "shipping" }
|
289
|
+
#
|
290
|
+
# @!group Attributes
|
291
|
+
#
|
292
|
+
def labels= labels
|
293
|
+
@gapi.labels = labels
|
294
|
+
patch_gapi! :labels
|
295
|
+
end
|
296
|
+
|
213
297
|
##
|
214
298
|
# Retrieves the access rules for a Dataset. The rules can be updated
|
215
299
|
# when passing a block, see {Dataset::Access} for all the methods
|
@@ -221,7 +305,7 @@ module Google
|
|
221
305
|
# @yield [access] a block for setting rules
|
222
306
|
# @yieldparam [Dataset::Access] access the object accepting rules
|
223
307
|
#
|
224
|
-
# @return [Google::Cloud::Bigquery::Dataset::Access]
|
308
|
+
# @return [Google::Cloud::Bigquery::Dataset::Access] The access object.
|
225
309
|
#
|
226
310
|
# @example
|
227
311
|
# require "google/cloud/bigquery"
|
@@ -229,14 +313,8 @@ module Google
|
|
229
313
|
# bigquery = Google::Cloud::Bigquery.new
|
230
314
|
# dataset = bigquery.dataset "my_dataset"
|
231
315
|
#
|
232
|
-
# dataset.access
|
233
|
-
#
|
234
|
-
# # {"role"=>"WRITER",
|
235
|
-
# # "specialGroup"=>"projectWriters"},
|
236
|
-
# # {"role"=>"READER",
|
237
|
-
# # "specialGroup"=>"projectReaders"},
|
238
|
-
# # {"role"=>"OWNER",
|
239
|
-
# # "userByEmail"=>"123456789-...com"}]
|
316
|
+
# access = dataset.access
|
317
|
+
# access.writer_user? "reader@example.com" #=> false
|
240
318
|
#
|
241
319
|
# @example Manage the access rules by passing a block:
|
242
320
|
# require "google/cloud/bigquery"
|
@@ -305,7 +383,7 @@ module Google
|
|
305
383
|
# @yield [table] a block for setting the table
|
306
384
|
# @yieldparam [Table] table the table object to be updated
|
307
385
|
#
|
308
|
-
# @return [Google::Cloud::Bigquery::Table]
|
386
|
+
# @return [Google::Cloud::Bigquery::Table] A new table object.
|
309
387
|
#
|
310
388
|
# @example
|
311
389
|
# require "google/cloud/bigquery"
|
@@ -394,8 +472,15 @@ module Google
|
|
394
472
|
# [legacy
|
395
473
|
# SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
|
396
474
|
# dialect. Optional. The default value is false.
|
475
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
476
|
+
# used in the query. May be either a code resource to load from a
|
477
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
478
|
+
# that contains code for a user-defined function (UDF). Providing an
|
479
|
+
# inline code resource is equivalent to providing a URI for a file
|
480
|
+
# containing the same code. See [User-Defined
|
481
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
397
482
|
#
|
398
|
-
# @return [Google::Cloud::Bigquery::View]
|
483
|
+
# @return [Google::Cloud::Bigquery::View] A new view object.
|
399
484
|
#
|
400
485
|
# @example
|
401
486
|
# require "google/cloud/bigquery"
|
@@ -419,7 +504,7 @@ module Google
|
|
419
504
|
# @!group Table
|
420
505
|
#
|
421
506
|
def create_view table_id, query, name: nil, description: nil,
|
422
|
-
standard_sql: nil, legacy_sql: nil
|
507
|
+
standard_sql: nil, legacy_sql: nil, udfs: nil
|
423
508
|
new_view_opts = {
|
424
509
|
table_reference: Google::Apis::BigqueryV2::TableReference.new(
|
425
510
|
project_id: project_id, dataset_id: dataset_id, table_id: table_id
|
@@ -429,7 +514,8 @@ module Google
|
|
429
514
|
view: Google::Apis::BigqueryV2::ViewDefinition.new(
|
430
515
|
query: query,
|
431
516
|
use_legacy_sql: Convert.resolve_legacy_sql(standard_sql,
|
432
|
-
legacy_sql)
|
517
|
+
legacy_sql),
|
518
|
+
user_defined_function_resources: udfs_gapi(udfs)
|
433
519
|
)
|
434
520
|
}.delete_if { |_, v| v.nil? }
|
435
521
|
new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
|
@@ -474,8 +560,8 @@ module Google
|
|
474
560
|
# @param [Integer] max Maximum number of tables to return.
|
475
561
|
#
|
476
562
|
# @return [Array<Google::Cloud::Bigquery::Table>,
|
477
|
-
# Array<Google::Cloud::Bigquery::View>]
|
478
|
-
# {Google::Cloud::Bigquery::Table::List})
|
563
|
+
# Array<Google::Cloud::Bigquery::View>] An array of tables and/or
|
564
|
+
# views(See {Google::Cloud::Bigquery::Table::List})
|
479
565
|
#
|
480
566
|
# @example
|
481
567
|
# require "google/cloud/bigquery"
|
@@ -546,6 +632,10 @@ module Google
|
|
546
632
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
547
633
|
# query parameters. When set, `legacy_sql` will automatically be set
|
548
634
|
# to false and `standard_sql` to true.
|
635
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
636
|
+
# that represents the mapping of the external tables to the table
|
637
|
+
# names used in the SQL query. The hash keys are the table names, and
|
638
|
+
# the hash values are the external table objects. See {Dataset#query}.
|
549
639
|
# @param [String] priority Specifies a priority for the query. Possible
|
550
640
|
# values include `INTERACTIVE` and `BATCH`. The default value is
|
551
641
|
# `INTERACTIVE`.
|
@@ -605,8 +695,37 @@ module Google
|
|
605
695
|
# job. Queries that will have bytes billed beyond this limit will fail
|
606
696
|
# (without incurring a charge). Optional. If unspecified, this will be
|
607
697
|
# set to your project default.
|
608
|
-
#
|
609
|
-
#
|
698
|
+
# @param [String] job_id A user-defined ID for the query job. The ID
|
699
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
700
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
701
|
+
# `job_id` is provided, then `prefix` will not be used.
|
702
|
+
#
|
703
|
+
# See [Generating a job
|
704
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
705
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
706
|
+
# prepended to a generated value to produce a unique job ID. For
|
707
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
708
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
709
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
710
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
711
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
712
|
+
# be used.
|
713
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
714
|
+
# the job. You can use these to organize and group your jobs. Label
|
715
|
+
# keys and values can be no longer than 63 characters, can only
|
716
|
+
# contain lowercase letters, numeric characters, underscores and
|
717
|
+
# dashes. International characters are allowed. Label values are
|
718
|
+
# optional. Label keys must start with a letter and each label in the
|
719
|
+
# list must have a different key.
|
720
|
+
# @param [Array<String>, String] udfs User-defined function resources
|
721
|
+
# used in the query. May be either a code resource to load from a
|
722
|
+
# Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
|
723
|
+
# that contains code for a user-defined function (UDF). Providing an
|
724
|
+
# inline code resource is equivalent to providing a URI for a file
|
725
|
+
# containing the same code. See [User-Defined
|
726
|
+
# Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
|
727
|
+
#
|
728
|
+
# @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
|
610
729
|
#
|
611
730
|
# @example Query using standard SQL:
|
612
731
|
# require "google/cloud/bigquery"
|
@@ -618,7 +737,7 @@ module Google
|
|
618
737
|
#
|
619
738
|
# job.wait_until_done!
|
620
739
|
# if !job.failed?
|
621
|
-
# job.
|
740
|
+
# job.data.each do |row|
|
622
741
|
# puts row[:name]
|
623
742
|
# end
|
624
743
|
# end
|
@@ -634,7 +753,7 @@ module Google
|
|
634
753
|
#
|
635
754
|
# job.wait_until_done!
|
636
755
|
# if !job.failed?
|
637
|
-
# job.
|
756
|
+
# job.data.each do |row|
|
638
757
|
# puts row[:name]
|
639
758
|
# end
|
640
759
|
# end
|
@@ -650,7 +769,7 @@ module Google
|
|
650
769
|
#
|
651
770
|
# job.wait_until_done!
|
652
771
|
# if !job.failed?
|
653
|
-
# job.
|
772
|
+
# job.data.each do |row|
|
654
773
|
# puts row[:name]
|
655
774
|
# end
|
656
775
|
# end
|
@@ -666,24 +785,49 @@ module Google
|
|
666
785
|
#
|
667
786
|
# job.wait_until_done!
|
668
787
|
# if !job.failed?
|
669
|
-
# job.
|
788
|
+
# job.data.each do |row|
|
789
|
+
# puts row[:name]
|
790
|
+
# end
|
791
|
+
# end
|
792
|
+
#
|
793
|
+
# @example Query using external data source:
|
794
|
+
# require "google/cloud/bigquery"
|
795
|
+
#
|
796
|
+
# bigquery = Google::Cloud::Bigquery.new
|
797
|
+
# dataset = bigquery.dataset "my_dataset"
|
798
|
+
#
|
799
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
800
|
+
# csv_table = dataset.external csv_url do |csv|
|
801
|
+
# csv.autodetect = true
|
802
|
+
# csv.skip_leading_rows = 1
|
803
|
+
# end
|
804
|
+
#
|
805
|
+
# job = dataset.query_job "SELECT * FROM my_ext_table",
|
806
|
+
# external: { my_ext_table: csv_table }
|
807
|
+
#
|
808
|
+
# job.wait_until_done!
|
809
|
+
# if !job.failed?
|
810
|
+
# job.data.each do |row|
|
670
811
|
# puts row[:name]
|
671
812
|
# end
|
672
813
|
# end
|
673
814
|
#
|
674
815
|
# @!group Data
|
675
816
|
#
|
676
|
-
def query_job query, params: nil,
|
677
|
-
|
817
|
+
def query_job query, params: nil, external: nil,
|
818
|
+
priority: "INTERACTIVE", cache: true, table: nil,
|
819
|
+
create: nil, write: nil, standard_sql: nil,
|
678
820
|
legacy_sql: nil, large_results: nil, flatten: nil,
|
679
|
-
maximum_billing_tier: nil, maximum_bytes_billed: nil
|
821
|
+
maximum_billing_tier: nil, maximum_bytes_billed: nil,
|
822
|
+
job_id: nil, prefix: nil, labels: nil, udfs: nil
|
680
823
|
options = { priority: priority, cache: cache, table: table,
|
681
824
|
create: create, write: write,
|
682
825
|
large_results: large_results, flatten: flatten,
|
683
826
|
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
684
827
|
maximum_billing_tier: maximum_billing_tier,
|
685
828
|
maximum_bytes_billed: maximum_bytes_billed,
|
686
|
-
params: params
|
829
|
+
params: params, external: external, labels: labels,
|
830
|
+
job_id: job_id, prefix: prefix, udfs: udfs }
|
687
831
|
options[:dataset] ||= self
|
688
832
|
ensure_service!
|
689
833
|
gapi = service.query_job query, options
|
@@ -691,8 +835,10 @@ module Google
|
|
691
835
|
end
|
692
836
|
|
693
837
|
##
|
694
|
-
# Queries data using
|
695
|
-
# method
|
838
|
+
# Queries data using a synchronous method that blocks for a response. In
|
839
|
+
# this method, a {QueryJob} is created and its results are saved
|
840
|
+
# to a temporary table, then read from the table. Timeouts and transient
|
841
|
+
# errors are generally handled as needed to complete the query.
|
696
842
|
#
|
697
843
|
# Sets the current dataset as the default dataset in the query. Useful
|
698
844
|
# for using unqualified table names.
|
@@ -717,6 +863,8 @@ module Google
|
|
717
863
|
# See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
|
718
864
|
# for an overview of each BigQuery data type, including allowed values.
|
719
865
|
#
|
866
|
+
# @see https://cloud.google.com/bigquery/querying-data Querying Data
|
867
|
+
#
|
720
868
|
# @param [String] query A query string, following the BigQuery [query
|
721
869
|
# syntax](https://cloud.google.com/bigquery/query-reference), of the
|
722
870
|
# query to execute. Example: "SELECT count(f1) FROM
|
@@ -728,22 +876,16 @@ module Google
|
|
728
876
|
# passed is a hash `{ myparam: "foo" }`, the query must use named
|
729
877
|
# query parameters. When set, `legacy_sql` will automatically be set
|
730
878
|
# to false and `standard_sql` to true.
|
879
|
+
# @param [Hash<String|Symbol, External::DataSource>] external A Hash
|
880
|
+
# that represents the mapping of the external tables to the table
|
881
|
+
# names used in the SQL query. The hash keys are the table names, and
|
882
|
+
# the hash values are the external table objects. See {Dataset#query}.
|
731
883
|
# @param [Integer] max The maximum number of rows of data to return per
|
732
884
|
# page of results. Setting this flag to a small value such as 1000 and
|
733
885
|
# then paging through results might improve reliability when the query
|
734
886
|
# result set is large. In addition to this limit, responses are also
|
735
887
|
# limited to 10 MB. By default, there is no maximum row count, and
|
736
888
|
# only the byte limit applies.
|
737
|
-
# @param [Integer] timeout How long to wait for the query to complete,
|
738
|
-
# in milliseconds, before the request times out and returns. Note that
|
739
|
-
# this is only a timeout for the request, not the query. If the query
|
740
|
-
# takes longer to run than the timeout value, the call returns without
|
741
|
-
# any results and with QueryData#complete? set to false. The default
|
742
|
-
# value is 10000 milliseconds (10 seconds).
|
743
|
-
# @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
|
744
|
-
# job. Instead, if the query is valid, BigQuery returns statistics
|
745
|
-
# about the job such as how many bytes would be processed. If the
|
746
|
-
# query is invalid, an error returns. The default value is `false`.
|
747
889
|
# @param [Boolean] cache Whether to look for the result in the query
|
748
890
|
# cache. The query cache is a best-effort cache that will be flushed
|
749
891
|
# whenever tables in the query are modified. The default value is
|
@@ -769,7 +911,7 @@ module Google
|
|
769
911
|
# ignored; the query will be run as if `large_results` is true and
|
770
912
|
# `flatten` is false. Optional. The default value is false.
|
771
913
|
#
|
772
|
-
# @return [Google::Cloud::Bigquery::
|
914
|
+
# @return [Google::Cloud::Bigquery::Data] A new data object.
|
773
915
|
#
|
774
916
|
# @example Query using standard SQL:
|
775
917
|
# require "google/cloud/bigquery"
|
@@ -822,25 +964,112 @@ module Google
|
|
822
964
|
# puts row[:name]
|
823
965
|
# end
|
824
966
|
#
|
967
|
+
# @example Query using external data source:
|
968
|
+
# require "google/cloud/bigquery"
|
969
|
+
#
|
970
|
+
# bigquery = Google::Cloud::Bigquery.new
|
971
|
+
# dataset = bigquery.dataset "my_dataset"
|
972
|
+
#
|
973
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
974
|
+
# csv_table = dataset.external csv_url do |csv|
|
975
|
+
# csv.autodetect = true
|
976
|
+
# csv.skip_leading_rows = 1
|
977
|
+
# end
|
978
|
+
#
|
979
|
+
# data = dataset.query "SELECT * FROM my_ext_table",
|
980
|
+
# external: { my_ext_table: csv_table }
|
981
|
+
#
|
982
|
+
# data.each do |row|
|
983
|
+
# puts row[:name]
|
984
|
+
# end
|
985
|
+
#
|
825
986
|
# @!group Data
|
826
987
|
#
|
827
|
-
def query query, params: nil,
|
828
|
-
|
829
|
-
options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache,
|
830
|
-
legacy_sql: legacy_sql, standard_sql: standard_sql,
|
831
|
-
params: params }
|
832
|
-
options[:dataset] ||= dataset_id
|
833
|
-
options[:project] ||= project_id
|
988
|
+
def query query, params: nil, external: nil, max: nil, cache: true,
|
989
|
+
standard_sql: nil, legacy_sql: nil
|
834
990
|
ensure_service!
|
835
|
-
|
836
|
-
|
991
|
+
options = { params: params, external: external, cache: cache,
|
992
|
+
legacy_sql: legacy_sql, standard_sql: standard_sql }
|
993
|
+
|
994
|
+
job = query_job query, options
|
995
|
+
job.wait_until_done!
|
996
|
+
|
997
|
+
if job.failed?
|
998
|
+
begin
|
999
|
+
# raise to activate ruby exception cause handling
|
1000
|
+
fail job.gapi_error
|
1001
|
+
rescue => e
|
1002
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1003
|
+
raise Google::Cloud::Error.from_error(e)
|
1004
|
+
end
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
job.data max: max
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
##
|
1011
|
+
# Creates a new External::DataSource (or subclass) object that
|
1012
|
+
# represents the external data source that can be queried from directly,
|
1013
|
+
# even though the data is not stored in BigQuery. Instead of loading or
|
1014
|
+
# streaming the data, this object references the external data source.
|
1015
|
+
#
|
1016
|
+
# @see https://cloud.google.com/bigquery/external-data-sources Querying
|
1017
|
+
# External Data Sources
|
1018
|
+
#
|
1019
|
+
# @param [String, Array<String>] url The fully-qualified URL(s) that
|
1020
|
+
# point to your data in Google Cloud. An attempt will be made to
|
1021
|
+
# derive the format from the URLs provided.
|
1022
|
+
# @param [String|Symbol] format The data format. This value will be used
|
1023
|
+
# even if the provided URLs are recognized as a different format.
|
1024
|
+
# Optional.
|
1025
|
+
#
|
1026
|
+
# The following values are supported:
|
1027
|
+
#
|
1028
|
+
# * `csv` - CSV
|
1029
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1030
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1031
|
+
# * `sheets` - Google Sheets
|
1032
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1033
|
+
# * `bigtable` - Bigtable
|
1034
|
+
#
|
1035
|
+
# @return [External::DataSource] External data source.
|
1036
|
+
#
|
1037
|
+
# @example
|
1038
|
+
# require "google/cloud/bigquery"
|
1039
|
+
#
|
1040
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1041
|
+
#
|
1042
|
+
# dataset = bigquery.dataset "my_dataset"
|
1043
|
+
#
|
1044
|
+
# csv_url = "gs://bucket/path/to/data.csv"
|
1045
|
+
# csv_table = dataset.external csv_url do |csv|
|
1046
|
+
# csv.autodetect = true
|
1047
|
+
# csv.skip_leading_rows = 1
|
1048
|
+
# end
|
1049
|
+
#
|
1050
|
+
# data = dataset.query "SELECT * FROM my_ext_table",
|
1051
|
+
# external: { my_ext_table: csv_table }
|
1052
|
+
#
|
1053
|
+
# data.each do |row|
|
1054
|
+
# puts row[:name]
|
1055
|
+
# end
|
1056
|
+
#
|
1057
|
+
def external url, format: nil
|
1058
|
+
ext = External.from_urls url, format
|
1059
|
+
yield ext if block_given?
|
1060
|
+
ext
|
837
1061
|
end
|
838
1062
|
|
839
1063
|
##
|
840
|
-
# Loads data into the provided destination table
|
841
|
-
#
|
842
|
-
#
|
843
|
-
#
|
1064
|
+
# Loads data into the provided destination table using an asynchronous
|
1065
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
1066
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
1067
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
1068
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
1069
|
+
#
|
1070
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1071
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1072
|
+
# file directly. See [Loading Data with a POST
|
844
1073
|
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
845
1074
|
#
|
846
1075
|
# @param [String] table_id The destination table to load the data into.
|
@@ -888,6 +1117,9 @@ module Google
|
|
888
1117
|
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
889
1118
|
# quoted data sections that contain newline characters in a CSV file.
|
890
1119
|
# The default value is `false`.
|
1120
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1121
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1122
|
+
# The default value is `false`.
|
891
1123
|
# @param [String] encoding The character encoding of the data. The
|
892
1124
|
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
893
1125
|
# `UTF-8`.
|
@@ -912,6 +1144,13 @@ module Google
|
|
912
1144
|
# records exceeds this value, an invalid error is returned in the job
|
913
1145
|
# result. The default value is `0`, which requires that all records
|
914
1146
|
# are valid.
|
1147
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1148
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1149
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1150
|
+
# value is the empty string. If you set this property to a custom
|
1151
|
+
# value, BigQuery throws an error if an empty string is present for
|
1152
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1153
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
915
1154
|
# @param [String] quote The value that is used to quote data sections in
|
916
1155
|
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
917
1156
|
# then uses the first byte of the encoded string to split the data in
|
@@ -932,6 +1171,28 @@ module Google
|
|
932
1171
|
# See {Project#schema} for the creation of the schema for use with
|
933
1172
|
# this option. Also note that for most use cases, the block yielded by
|
934
1173
|
# this method is a more convenient way to configure the schema.
|
1174
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1175
|
+
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
|
1176
|
+
# (_), or dashes (-). The maximum length is 1,024 characters. If
|
1177
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1178
|
+
#
|
1179
|
+
# See [Generating a job
|
1180
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1181
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1182
|
+
# prepended to a generated value to produce a unique job ID. For
|
1183
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1184
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1185
|
+
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
|
1186
|
+
# underscores (_), or dashes (-). The maximum length of the entire ID
|
1187
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1188
|
+
# be used.
|
1189
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1190
|
+
# the job. You can use these to organize and group your jobs. Label
|
1191
|
+
# keys and values can be no longer than 63 characters, can only
|
1192
|
+
# contain lowercase letters, numeric characters, underscores and
|
1193
|
+
# dashes. International characters are allowed. Label values are
|
1194
|
+
# optional. Label keys must start with a letter and each label in the
|
1195
|
+
# list must have a different key.
|
935
1196
|
#
|
936
1197
|
# @yield [schema] A block for setting the schema for the destination
|
937
1198
|
# table. The schema can be omitted if the destination table already
|
@@ -941,7 +1202,7 @@ module Google
|
|
941
1202
|
# instance provided using the `schema` option, or a new, empty schema
|
942
1203
|
# instance
|
943
1204
|
#
|
944
|
-
# @return [Google::Cloud::Bigquery::LoadJob]
|
1205
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
945
1206
|
#
|
946
1207
|
# @example
|
947
1208
|
# require "google/cloud/bigquery"
|
@@ -950,7 +1211,7 @@ module Google
|
|
950
1211
|
# dataset = bigquery.dataset "my_dataset"
|
951
1212
|
#
|
952
1213
|
# gs_url = "gs://my-bucket/file-name.csv"
|
953
|
-
# load_job = dataset.
|
1214
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |schema|
|
954
1215
|
# schema.string "first_name", mode: :required
|
955
1216
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
956
1217
|
# nested_schema.string "place", mode: :required
|
@@ -968,7 +1229,7 @@ module Google
|
|
968
1229
|
# storage = Google::Cloud::Storage.new
|
969
1230
|
# bucket = storage.bucket "my-bucket"
|
970
1231
|
# file = bucket.file "file-name.csv"
|
971
|
-
# load_job = dataset.
|
1232
|
+
# load_job = dataset.load_job "my_new_table", file do |schema|
|
972
1233
|
# schema.string "first_name", mode: :required
|
973
1234
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
974
1235
|
# nested_schema.string "place", mode: :required
|
@@ -983,7 +1244,7 @@ module Google
|
|
983
1244
|
# dataset = bigquery.dataset "my_dataset"
|
984
1245
|
#
|
985
1246
|
# file = File.open "my_data.csv"
|
986
|
-
# load_job = dataset.
|
1247
|
+
# load_job = dataset.load_job "my_new_table", file do |schema|
|
987
1248
|
# schema.string "first_name", mode: :required
|
988
1249
|
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
989
1250
|
# nested_schema.string "place", mode: :required
|
@@ -997,17 +1258,18 @@ module Google
|
|
997
1258
|
# bigquery = Google::Cloud::Bigquery.new
|
998
1259
|
# dataset = bigquery.dataset "my_dataset"
|
999
1260
|
#
|
1000
|
-
# load_job = dataset.
|
1261
|
+
# load_job = dataset.load_job "my_new_table",
|
1001
1262
|
# "gs://my-bucket/xxxx.kind_name.backup_info",
|
1002
1263
|
# format: "datastore_backup"
|
1003
1264
|
#
|
1004
1265
|
# @!group Data
|
1005
1266
|
#
|
1006
|
-
def
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1267
|
+
def load_job table_id, file, format: nil, create: nil, write: nil,
|
1268
|
+
projection_fields: nil, jagged_rows: nil,
|
1269
|
+
quoted_newlines: nil, encoding: nil, delimiter: nil,
|
1270
|
+
ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1271
|
+
skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
1272
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
1011
1273
|
ensure_service!
|
1012
1274
|
|
1013
1275
|
if block_given?
|
@@ -1023,12 +1285,228 @@ module Google
|
|
1023
1285
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1024
1286
|
max_bad_records: max_bad_records, quote: quote,
|
1025
1287
|
skip_leading: skip_leading, dryrun: dryrun,
|
1026
|
-
schema: schema_gapi
|
1288
|
+
schema: schema_gapi, job_id: job_id, prefix: prefix,
|
1289
|
+
labels: labels, autodetect: autodetect,
|
1290
|
+
null_marker: null_marker }
|
1027
1291
|
return load_storage(table_id, file, options) if storage_url? file
|
1028
1292
|
return load_local(table_id, file, options) if local_file? file
|
1029
1293
|
fail Google::Cloud::Error, "Don't know how to load #{file}"
|
1030
1294
|
end
|
1031
1295
|
|
1296
|
+
##
|
1297
|
+
# Loads data into the provided destination table using a synchronous
|
1298
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1299
|
+
# generally handled as needed to complete the job. See also
|
1300
|
+
# {#load_job}.
|
1301
|
+
#
|
1302
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1303
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1304
|
+
# file directly. See [Loading Data with a POST
|
1305
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1306
|
+
#
|
1307
|
+
# @param [String] table_id The destination table to load the data into.
|
1308
|
+
# @param [File, Google::Cloud::Storage::File, String] file A file or the
|
1309
|
+
# URI of a Google Cloud Storage file containing data to load into the
|
1310
|
+
# table.
|
1311
|
+
# @param [String] format The exported file format. The default value is
|
1312
|
+
# `csv`.
|
1313
|
+
#
|
1314
|
+
# The following values are supported:
|
1315
|
+
#
|
1316
|
+
# * `csv` - CSV
|
1317
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
1318
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1319
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1320
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1321
|
+
# new tables. The default value is `needed`.
|
1322
|
+
#
|
1323
|
+
# The following values are supported:
|
1324
|
+
#
|
1325
|
+
# * `needed` - Create the table if it does not exist.
|
1326
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1327
|
+
# raised if the table does not exist.
|
1328
|
+
# @param [String] write Specifies how to handle data already present in
|
1329
|
+
# the table. The default value is `append`.
|
1330
|
+
#
|
1331
|
+
# The following values are supported:
|
1332
|
+
#
|
1333
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1334
|
+
# * `append` - BigQuery appends the data to the table.
|
1335
|
+
# * `empty` - An error will be returned if the table already contains
|
1336
|
+
# data.
|
1337
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1338
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1339
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1340
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1341
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1342
|
+
# backup, an invalid error is returned.
|
1343
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1344
|
+
# optional columns. The missing values are treated as nulls. If
|
1345
|
+
# `false`, records with missing trailing columns are treated as bad
|
1346
|
+
# records, and if there are too many bad records, an invalid error is
|
1347
|
+
# returned in the job result. The default value is `false`. Only
|
1348
|
+
# applicable to CSV, ignored for other formats.
|
1349
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1350
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1351
|
+
# The default value is `false`.
|
1352
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1353
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1354
|
+
# The default value is `false`.
|
1355
|
+
# @param [String] encoding The character encoding of the data. The
|
1356
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1357
|
+
# `UTF-8`.
|
1358
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1359
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1360
|
+
# then uses the first byte of the encoded string to split the data in
|
1361
|
+
# its raw, binary state. Default is <code>,</code>.
|
1362
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1363
|
+
# extra values that are not represented in the table schema. If true,
|
1364
|
+
# the extra values are ignored. If false, records with extra columns
|
1365
|
+
# are treated as bad records, and if there are too many bad records,
|
1366
|
+
# an invalid error is returned in the job result. The default value is
|
1367
|
+
# `false`.
|
1368
|
+
#
|
1369
|
+
# The `format` property determines what BigQuery treats as an extra
|
1370
|
+
# value:
|
1371
|
+
#
|
1372
|
+
# * `CSV`: Trailing columns
|
1373
|
+
# * `JSON`: Named values that don't match any column names
|
1374
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1375
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1376
|
+
# records exceeds this value, an invalid error is returned in the job
|
1377
|
+
# result. The default value is `0`, which requires that all records
|
1378
|
+
# are valid.
|
1379
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1380
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1381
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1382
|
+
# value is the empty string. If you set this property to a custom
|
1383
|
+
# value, BigQuery throws an error if an empty string is present for
|
1384
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1385
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1386
|
+
# @param [String] quote The value that is used to quote data sections in
|
1387
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1388
|
+
# then uses the first byte of the encoded string to split the data in
|
1389
|
+
# its raw, binary state. The default value is a double-quote
|
1390
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1391
|
+
# the property value to an empty string. If your data contains quoted
|
1392
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1393
|
+
# property to true.
|
1394
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1395
|
+
# file that BigQuery will skip when loading the data. The default
|
1396
|
+
# value is `0`. This property is useful if you have header rows in the
|
1397
|
+
# file that should be skipped.
|
1398
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1399
|
+
# destination table. Optional. The schema can be omitted if the
|
1400
|
+
# destination table already exists, or if you're loading data from a
|
1401
|
+
# Google Cloud Datastore backup.
|
1402
|
+
#
|
1403
|
+
# See {Project#schema} for the creation of the schema for use with
|
1404
|
+
# this option. Also note that for most use cases, the block yielded by
|
1405
|
+
# this method is a more convenient way to configure the schema.
|
1406
|
+
#
|
1407
|
+
# @yield [schema] A block for setting the schema for the destination
|
1408
|
+
# table. The schema can be omitted if the destination table already
|
1409
|
+
# exists, or if you're loading data from a Google Cloud Datastore
|
1410
|
+
# backup.
|
1411
|
+
# @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
|
1412
|
+
# instance provided using the `schema` option, or a new, empty schema
|
1413
|
+
# instance
|
1414
|
+
#
|
1415
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
1416
|
+
#
|
1417
|
+
# @example
|
1418
|
+
# require "google/cloud/bigquery"
|
1419
|
+
#
|
1420
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1421
|
+
# dataset = bigquery.dataset "my_dataset"
|
1422
|
+
#
|
1423
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1424
|
+
# dataset.load "my_new_table", gs_url do |schema|
|
1425
|
+
# schema.string "first_name", mode: :required
|
1426
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1427
|
+
# nested_schema.string "place", mode: :required
|
1428
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1429
|
+
# end
|
1430
|
+
# end
|
1431
|
+
#
|
1432
|
+
# @example Pass a google-cloud-storage `File` instance:
|
1433
|
+
# require "google/cloud/bigquery"
|
1434
|
+
# require "google/cloud/storage"
|
1435
|
+
#
|
1436
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1437
|
+
# dataset = bigquery.dataset "my_dataset"
|
1438
|
+
#
|
1439
|
+
# storage = Google::Cloud::Storage.new
|
1440
|
+
# bucket = storage.bucket "my-bucket"
|
1441
|
+
# file = bucket.file "file-name.csv"
|
1442
|
+
# dataset.load "my_new_table", file do |schema|
|
1443
|
+
# schema.string "first_name", mode: :required
|
1444
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1445
|
+
# nested_schema.string "place", mode: :required
|
1446
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1447
|
+
# end
|
1448
|
+
# end
|
1449
|
+
#
|
1450
|
+
# @example Upload a file directly:
|
1451
|
+
# require "google/cloud/bigquery"
|
1452
|
+
#
|
1453
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1454
|
+
# dataset = bigquery.dataset "my_dataset"
|
1455
|
+
#
|
1456
|
+
# file = File.open "my_data.csv"
|
1457
|
+
# dataset.load "my_new_table", file do |schema|
|
1458
|
+
# schema.string "first_name", mode: :required
|
1459
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1460
|
+
# nested_schema.string "place", mode: :required
|
1461
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1462
|
+
# end
|
1463
|
+
# end
|
1464
|
+
#
|
1465
|
+
# @example Schema is not required with a Cloud Datastore backup:
|
1466
|
+
# require "google/cloud/bigquery"
|
1467
|
+
#
|
1468
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1469
|
+
# dataset = bigquery.dataset "my_dataset"
|
1470
|
+
#
|
1471
|
+
# dataset.load "my_new_table",
|
1472
|
+
# "gs://my-bucket/xxxx.kind_name.backup_info",
|
1473
|
+
# format: "datastore_backup"
|
1474
|
+
#
|
1475
|
+
# @!group Data
|
1476
|
+
#
|
1477
|
+
def load table_id, file, format: nil, create: nil, write: nil,
|
1478
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
|
1479
|
+
encoding: nil, delimiter: nil, ignore_unknown: nil,
|
1480
|
+
max_bad_records: nil, quote: nil, skip_leading: nil,
|
1481
|
+
schema: nil, autodetect: nil, null_marker: nil
|
1482
|
+
|
1483
|
+
yield (schema ||= Schema.from_gapi) if block_given?
|
1484
|
+
|
1485
|
+
options = { format: format, create: create, write: write,
|
1486
|
+
projection_fields: projection_fields,
|
1487
|
+
jagged_rows: jagged_rows,
|
1488
|
+
quoted_newlines: quoted_newlines, encoding: encoding,
|
1489
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1490
|
+
max_bad_records: max_bad_records, quote: quote,
|
1491
|
+
skip_leading: skip_leading, schema: schema,
|
1492
|
+
autodetect: autodetect, null_marker: null_marker }
|
1493
|
+
job = load_job table_id, file, options
|
1494
|
+
|
1495
|
+
job.wait_until_done!
|
1496
|
+
|
1497
|
+
if job.failed?
|
1498
|
+
begin
|
1499
|
+
# raise to activate ruby exception cause handling
|
1500
|
+
fail job.gapi_error
|
1501
|
+
rescue => e
|
1502
|
+
# wrap Google::Apis::Error with Google::Cloud::Error
|
1503
|
+
raise Google::Cloud::Error.from_error(e)
|
1504
|
+
end
|
1505
|
+
end
|
1506
|
+
|
1507
|
+
true
|
1508
|
+
end
|
1509
|
+
|
1032
1510
|
##
|
1033
1511
|
# @private New Dataset from a Google API Client object.
|
1034
1512
|
def self.from_gapi gapi, conn
|
@@ -1038,8 +1516,158 @@ module Google
|
|
1038
1516
|
end
|
1039
1517
|
end
|
1040
1518
|
|
1519
|
+
##
|
1520
|
+
# Inserts data into the given table for near-immediate querying, without
|
1521
|
+
# the need to complete a load operation before the data can appear in
|
1522
|
+
# query results.
|
1523
|
+
#
|
1524
|
+
# @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
|
1525
|
+
# Streaming Data Into BigQuery
|
1526
|
+
#
|
1527
|
+
# @param [String] table_id The ID of the destination table.
|
1528
|
+
# @param [Hash, Array<Hash>] rows A hash object or array of hash objects
|
1529
|
+
# containing the data. Required.
|
1530
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1531
|
+
# if invalid rows exist. The default value is `false`, which causes
|
1532
|
+
# the entire request to fail if any invalid rows exist.
|
1533
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
1534
|
+
# do not match the schema. The unknown values are ignored. Default is
|
1535
|
+
# false, which treats unknown values as errors.
|
1536
|
+
# @param [Boolean] autocreate Specifies whether the method should create
|
1537
|
+
# a new table with the given `table_id`, if no table is found for
|
1538
|
+
# `table_id`. The default value is false.
|
1539
|
+
#
|
1540
|
+
# @return [Google::Cloud::Bigquery::InsertResponse] An insert response
|
1541
|
+
# object.
|
1542
|
+
#
|
1543
|
+
# @example
|
1544
|
+
# require "google/cloud/bigquery"
|
1545
|
+
#
|
1546
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1547
|
+
# dataset = bigquery.dataset "my_dataset"
|
1548
|
+
#
|
1549
|
+
# rows = [
|
1550
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1551
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1552
|
+
# ]
|
1553
|
+
# dataset.insert "my_table", rows
|
1554
|
+
#
|
1555
|
+
# @example Using `autocreate` to create a new table if none exists.
|
1556
|
+
# require "google/cloud/bigquery"
|
1557
|
+
#
|
1558
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1559
|
+
# dataset = bigquery.dataset "my_dataset"
|
1560
|
+
#
|
1561
|
+
# rows = [
|
1562
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1563
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1564
|
+
# ]
|
1565
|
+
# dataset.insert "my_table", rows, autocreate: true do |t|
|
1566
|
+
# t.schema.string "first_name", mode: :required
|
1567
|
+
# t.schema.integer "age", mode: :required
|
1568
|
+
# end
|
1569
|
+
#
|
1570
|
+
# @!group Data
|
1571
|
+
#
|
1572
|
+
def insert table_id, rows, skip_invalid: nil, ignore_unknown: nil,
|
1573
|
+
autocreate: nil
|
1574
|
+
if autocreate
|
1575
|
+
begin
|
1576
|
+
insert_data table_id, rows, skip_invalid: skip_invalid,
|
1577
|
+
ignore_unknown: ignore_unknown
|
1578
|
+
rescue Google::Cloud::NotFoundError
|
1579
|
+
sleep rand(1..60)
|
1580
|
+
begin
|
1581
|
+
create_table table_id do |tbl_updater|
|
1582
|
+
yield tbl_updater if block_given?
|
1583
|
+
end
|
1584
|
+
# rubocop:disable Lint/HandleExceptions
|
1585
|
+
rescue Google::Cloud::AlreadyExistsError
|
1586
|
+
end
|
1587
|
+
# rubocop:enable Lint/HandleExceptions
|
1588
|
+
|
1589
|
+
sleep 60
|
1590
|
+
insert table_id, rows, skip_invalid: skip_invalid,
|
1591
|
+
ignore_unknown: ignore_unknown,
|
1592
|
+
autocreate: true
|
1593
|
+
end
|
1594
|
+
else
|
1595
|
+
insert_data table_id, rows, skip_invalid: skip_invalid,
|
1596
|
+
ignore_unknown: ignore_unknown
|
1597
|
+
end
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
##
|
1601
|
+
# Create an asynchonous inserter object used to insert rows in batches.
|
1602
|
+
#
|
1603
|
+
# @param [String] table_id The ID of the table to insert rows into.
|
1604
|
+
# @param [Boolean] skip_invalid Insert all valid rows of a request, even
|
1605
|
+
# if invalid rows exist. The default value is `false`, which causes
|
1606
|
+
# the entire request to fail if any invalid rows exist.
|
1607
|
+
# @param [Boolean] ignore_unknown Accept rows that contain values that
|
1608
|
+
# do not match the schema. The unknown values are ignored. Default is
|
1609
|
+
# false, which treats unknown values as errors.
|
1610
|
+
# @attr_reader [Integer] max_bytes The maximum size of rows to be
|
1611
|
+
# collected before the batch is published. Default is 10,000,000
|
1612
|
+
# (10MB).
|
1613
|
+
# @param [Integer] max_rows The maximum number of rows to be collected
|
1614
|
+
# before the batch is published. Default is 500.
|
1615
|
+
# @attr_reader [Numeric] interval The number of seconds to collect
|
1616
|
+
# messages before the batch is published. Default is 10.
|
1617
|
+
# @attr_reader [Numeric] threads The number of threads used to insert
|
1618
|
+
# batches of rows. Default is 4.
|
1619
|
+
# @yield [response] the callback for when a batch of rows is inserted
|
1620
|
+
# @yieldparam [InsertResponse] response the result of the asynchonous
|
1621
|
+
# insert
|
1622
|
+
#
|
1623
|
+
# @return [Table::AsyncInserter] Returns an inserter object.
|
1624
|
+
#
|
1625
|
+
# @example
|
1626
|
+
# require "google/cloud/bigquery"
|
1627
|
+
#
|
1628
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1629
|
+
# dataset = bigquery.dataset "my_dataset"
|
1630
|
+
# table = dataset.table "my_table"
|
1631
|
+
# inserter = table.insert_async do |response|
|
1632
|
+
# log_insert "inserted #{response.insert_count} rows " \
|
1633
|
+
# "with #{response.error_count} errors"
|
1634
|
+
# end
|
1635
|
+
#
|
1636
|
+
# rows = [
|
1637
|
+
# { "first_name" => "Alice", "age" => 21 },
|
1638
|
+
# { "first_name" => "Bob", "age" => 22 }
|
1639
|
+
# ]
|
1640
|
+
# inserter.insert rows
|
1641
|
+
#
|
1642
|
+
# inserter.stop.wait!
|
1643
|
+
#
|
1644
|
+
def insert_async table_id, skip_invalid: nil, ignore_unknown: nil,
|
1645
|
+
max_bytes: 10000000, max_rows: 500, interval: 10,
|
1646
|
+
threads: 4, &block
|
1647
|
+
ensure_service!
|
1648
|
+
|
1649
|
+
# Get table, don't use Dataset#table which handles NotFoundError
|
1650
|
+
gapi = service.get_table dataset_id, table_id
|
1651
|
+
table = Table.from_gapi gapi, service
|
1652
|
+
# Get the AsyncInserter from the table
|
1653
|
+
table.insert_async skip_invalid: skip_invalid,
|
1654
|
+
ignore_unknown: ignore_unknown,
|
1655
|
+
max_bytes: max_bytes, max_rows: max_rows,
|
1656
|
+
interval: interval, threads: threads, &block
|
1657
|
+
end
|
1658
|
+
|
1041
1659
|
protected
|
1042
1660
|
|
1661
|
+
def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil
|
1662
|
+
rows = [rows] if rows.is_a? Hash
|
1663
|
+
fail ArgumentError, "No rows provided" if rows.empty?
|
1664
|
+
ensure_service!
|
1665
|
+
options = { skip_invalid: skip_invalid,
|
1666
|
+
ignore_unknown: ignore_unknown }
|
1667
|
+
gapi = service.insert_tabledata dataset_id, table_id, rows, options
|
1668
|
+
InsertResponse.from_gapi rows, gapi
|
1669
|
+
end
|
1670
|
+
|
1043
1671
|
##
|
1044
1672
|
# Raise an error unless an active service is available.
|
1045
1673
|
def ensure_service!
|
@@ -1053,6 +1681,7 @@ module Google
|
|
1053
1681
|
[attr, @gapi.send(attr)]
|
1054
1682
|
end]
|
1055
1683
|
patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
|
1684
|
+
patch_gapi.etag = etag if etag
|
1056
1685
|
@gapi = service.patch_dataset dataset_id, patch_gapi
|
1057
1686
|
end
|
1058
1687
|
|
@@ -1101,6 +1730,19 @@ module Google
|
|
1101
1730
|
false
|
1102
1731
|
end
|
1103
1732
|
|
1733
|
+
def udfs_gapi array_or_str
|
1734
|
+
return [] if array_or_str.nil?
|
1735
|
+
Array(array_or_str).map do |uri_or_code|
|
1736
|
+
resource = Google::Apis::BigqueryV2::UserDefinedFunctionResource.new
|
1737
|
+
if uri_or_code.start_with?("gs://")
|
1738
|
+
resource.resource_uri = uri_or_code
|
1739
|
+
else
|
1740
|
+
resource.inline_code = uri_or_code
|
1741
|
+
end
|
1742
|
+
resource
|
1743
|
+
end
|
1744
|
+
end
|
1745
|
+
|
1104
1746
|
##
|
1105
1747
|
# Yielded to a block to accumulate changes for a patch request.
|
1106
1748
|
class Updater < Dataset
|