google-cloud-bigquery 1.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +16 -0
- data/AUTHENTICATION.md +158 -0
- data/CHANGELOG.md +397 -0
- data/CODE_OF_CONDUCT.md +40 -0
- data/CONTRIBUTING.md +188 -0
- data/LICENSE +201 -0
- data/LOGGING.md +27 -0
- data/OVERVIEW.md +463 -0
- data/TROUBLESHOOTING.md +31 -0
- data/lib/google-cloud-bigquery.rb +139 -0
- data/lib/google/cloud/bigquery.rb +145 -0
- data/lib/google/cloud/bigquery/argument.rb +197 -0
- data/lib/google/cloud/bigquery/convert.rb +383 -0
- data/lib/google/cloud/bigquery/copy_job.rb +316 -0
- data/lib/google/cloud/bigquery/credentials.rb +50 -0
- data/lib/google/cloud/bigquery/data.rb +526 -0
- data/lib/google/cloud/bigquery/dataset.rb +2845 -0
- data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
- data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
- data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
- data/lib/google/cloud/bigquery/external.rb +2432 -0
- data/lib/google/cloud/bigquery/extract_job.rb +368 -0
- data/lib/google/cloud/bigquery/insert_response.rb +180 -0
- data/lib/google/cloud/bigquery/job.rb +657 -0
- data/lib/google/cloud/bigquery/job/list.rb +162 -0
- data/lib/google/cloud/bigquery/load_job.rb +1704 -0
- data/lib/google/cloud/bigquery/model.rb +740 -0
- data/lib/google/cloud/bigquery/model/list.rb +164 -0
- data/lib/google/cloud/bigquery/project.rb +1655 -0
- data/lib/google/cloud/bigquery/project/list.rb +161 -0
- data/lib/google/cloud/bigquery/query_job.rb +1695 -0
- data/lib/google/cloud/bigquery/routine.rb +1108 -0
- data/lib/google/cloud/bigquery/routine/list.rb +165 -0
- data/lib/google/cloud/bigquery/schema.rb +564 -0
- data/lib/google/cloud/bigquery/schema/field.rb +668 -0
- data/lib/google/cloud/bigquery/service.rb +589 -0
- data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
- data/lib/google/cloud/bigquery/table.rb +3340 -0
- data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
- data/lib/google/cloud/bigquery/table/list.rb +172 -0
- data/lib/google/cloud/bigquery/time.rb +65 -0
- data/lib/google/cloud/bigquery/version.rb +22 -0
- metadata +297 -0
@@ -0,0 +1,162 @@
|
|
1
|
+
# Copyright 2015 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "delegate"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
class Job
|
22
|
+
##
|
23
|
+
# Job::List is a special case Array with additional values.
|
24
|
+
class List < DelegateClass(::Array)
|
25
|
+
##
|
26
|
+
# If not empty, indicates that there are more records that match
|
27
|
+
# the request and this value should be passed to continue.
|
28
|
+
attr_accessor :token
|
29
|
+
|
30
|
+
# A hash of this page of results.
|
31
|
+
attr_accessor :etag
|
32
|
+
|
33
|
+
##
|
34
|
+
# @private Create a new Job::List with an array of jobs.
|
35
|
+
def initialize arr = []
|
36
|
+
super arr
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Whether there is a next page of jobs.
|
41
|
+
#
|
42
|
+
# @return [Boolean]
|
43
|
+
#
|
44
|
+
# @example
|
45
|
+
# require "google/cloud/bigquery"
|
46
|
+
#
|
47
|
+
# bigquery = Google::Cloud::Bigquery.new
|
48
|
+
#
|
49
|
+
# jobs = bigquery.jobs
|
50
|
+
# if jobs.next?
|
51
|
+
# next_jobs = jobs.next
|
52
|
+
# end
|
53
|
+
def next?
|
54
|
+
!token.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# Retrieve the next page of jobs.
|
59
|
+
#
|
60
|
+
# @return [Job::List]
|
61
|
+
#
|
62
|
+
# @example
|
63
|
+
# require "google/cloud/bigquery"
|
64
|
+
#
|
65
|
+
# bigquery = Google::Cloud::Bigquery.new
|
66
|
+
#
|
67
|
+
# jobs = bigquery.jobs
|
68
|
+
# if jobs.next?
|
69
|
+
# next_jobs = jobs.next
|
70
|
+
# end
|
71
|
+
def next
|
72
|
+
return nil unless next?
|
73
|
+
ensure_service!
|
74
|
+
next_kwargs = @kwargs.merge token: token
|
75
|
+
next_gapi = @service.list_jobs next_kwargs
|
76
|
+
self.class.from_gapi next_gapi, @service, next_kwargs
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Retrieves remaining results by repeatedly invoking {#next} until
|
81
|
+
# {#next?} returns `false`. Calls the given block once for each
|
82
|
+
# result, which is passed as the argument to the block.
|
83
|
+
#
|
84
|
+
# An Enumerator is returned if no block is given.
|
85
|
+
#
|
86
|
+
# This method will make repeated API calls until all remaining results
|
87
|
+
# are retrieved. (Unlike `#each`, for example, which merely iterates
|
88
|
+
# over the results returned by a single API call.) Use with caution.
|
89
|
+
#
|
90
|
+
# @param [Integer] request_limit The upper limit of API requests to
|
91
|
+
# make to load all jobs. Default is no limit.
|
92
|
+
# @yield [job] The block for accessing each job.
|
93
|
+
# @yieldparam [Job] job The job object.
|
94
|
+
#
|
95
|
+
# @return [Enumerator]
|
96
|
+
#
|
97
|
+
# @example Iterating each job by passing a block:
|
98
|
+
# require "google/cloud/bigquery"
|
99
|
+
#
|
100
|
+
# bigquery = Google::Cloud::Bigquery.new
|
101
|
+
#
|
102
|
+
# bigquery.jobs.all do |job|
|
103
|
+
# puts job.state
|
104
|
+
# end
|
105
|
+
#
|
106
|
+
# @example Using the enumerator by not passing a block:
|
107
|
+
# require "google/cloud/bigquery"
|
108
|
+
#
|
109
|
+
# bigquery = Google::Cloud::Bigquery.new
|
110
|
+
#
|
111
|
+
# all_states = bigquery.jobs.all.map do |job|
|
112
|
+
# job.state
|
113
|
+
# end
|
114
|
+
#
|
115
|
+
# @example Limit the number of API calls made:
|
116
|
+
# require "google/cloud/bigquery"
|
117
|
+
#
|
118
|
+
# bigquery = Google::Cloud::Bigquery.new
|
119
|
+
#
|
120
|
+
# bigquery.jobs.all(request_limit: 10) do |job|
|
121
|
+
# puts job.state
|
122
|
+
# end
|
123
|
+
#
|
124
|
+
def all request_limit: nil
|
125
|
+
request_limit = request_limit.to_i if request_limit
|
126
|
+
return enum_for :all, request_limit: request_limit unless block_given?
|
127
|
+
results = self
|
128
|
+
loop do
|
129
|
+
results.each { |r| yield r }
|
130
|
+
if request_limit
|
131
|
+
request_limit -= 1
|
132
|
+
break if request_limit.negative?
|
133
|
+
end
|
134
|
+
break unless results.next?
|
135
|
+
results = results.next
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
##
|
140
|
+
# @private New Job::List from a Google API Client
|
141
|
+
# Google::Apis::BigqueryV2::JobList object.
|
142
|
+
def self.from_gapi gapi_list, service, **kwargs
|
143
|
+
jobs = List.new(Array(gapi_list.jobs).map { |gapi_object| Job.from_gapi gapi_object, service })
|
144
|
+
jobs.instance_variable_set :@token, gapi_list.next_page_token
|
145
|
+
jobs.instance_variable_set :@etag, gapi_list.etag
|
146
|
+
jobs.instance_variable_set :@service, service
|
147
|
+
jobs.instance_variable_set :@kwargs, kwargs
|
148
|
+
jobs
|
149
|
+
end
|
150
|
+
|
151
|
+
protected
|
152
|
+
|
153
|
+
##
|
154
|
+
# Raise an error unless an active service is available.
|
155
|
+
def ensure_service!
|
156
|
+
raise "Must have active connection" unless @service
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,1704 @@
|
|
1
|
+
# Copyright 2015 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/cloud/bigquery/service"
|
17
|
+
require "google/cloud/bigquery/encryption_configuration"
|
18
|
+
|
19
|
+
module Google
|
20
|
+
module Cloud
|
21
|
+
module Bigquery
|
22
|
+
##
|
23
|
+
# # LoadJob
|
24
|
+
#
|
25
|
+
# A {Job} subclass representing a load operation that may be performed
|
26
|
+
# on a {Table}. A LoadJob instance is created when you call
|
27
|
+
# {Table#load_job}.
|
28
|
+
#
|
29
|
+
# @see https://cloud.google.com/bigquery/loading-data
|
30
|
+
# Loading Data Into BigQuery
|
31
|
+
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
32
|
+
# reference
|
33
|
+
#
|
34
|
+
# @example
|
35
|
+
# require "google/cloud/bigquery"
|
36
|
+
#
|
37
|
+
# bigquery = Google::Cloud::Bigquery.new
|
38
|
+
# dataset = bigquery.dataset "my_dataset"
|
39
|
+
#
|
40
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
41
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |schema|
|
42
|
+
# schema.string "first_name", mode: :required
|
43
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
44
|
+
# nested_schema.string "place", mode: :required
|
45
|
+
# nested_schema.integer "number_of_years", mode: :required
|
46
|
+
# end
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# load_job.wait_until_done!
|
50
|
+
# load_job.done? #=> true
|
51
|
+
#
|
52
|
+
#
|
53
|
+
class LoadJob < Job
|
54
|
+
##
|
55
|
+
# The URI or URIs representing the Google Cloud Storage files from which
|
56
|
+
# the operation loads data.
|
57
|
+
def sources
|
58
|
+
Array @gapi.configuration.load.source_uris
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# The table into which the operation loads data. This is the table on
|
63
|
+
# which {Table#load_job} was invoked.
|
64
|
+
#
|
65
|
+
# @return [Table] A table instance.
|
66
|
+
#
|
67
|
+
def destination
|
68
|
+
table = @gapi.configuration.load.destination_table
|
69
|
+
return nil unless table
|
70
|
+
retrieve_table table.project_id, table.dataset_id, table.table_id
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# The delimiter used between fields in the source data. The default is a
|
75
|
+
# comma (,).
|
76
|
+
#
|
77
|
+
# @return [String] A string containing the character, such as `","`.
|
78
|
+
#
|
79
|
+
def delimiter
|
80
|
+
@gapi.configuration.load.field_delimiter || ","
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# The number of rows at the top of a CSV file that BigQuery will skip
|
85
|
+
# when loading the data. The default value is 0. This property is useful
|
86
|
+
# if you have header rows in the file that should be skipped.
|
87
|
+
#
|
88
|
+
# @return [Integer] The number of header rows at the top of a CSV file
|
89
|
+
# to skip.
|
90
|
+
#
|
91
|
+
def skip_leading_rows
|
92
|
+
@gapi.configuration.load.skip_leading_rows || 0
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Checks if the character encoding of the data is UTF-8. This is the
|
97
|
+
# default.
|
98
|
+
#
|
99
|
+
# @return [Boolean] `true` when the character encoding is UTF-8,
|
100
|
+
# `false` otherwise.
|
101
|
+
#
|
102
|
+
def utf8?
|
103
|
+
val = @gapi.configuration.load.encoding
|
104
|
+
return true if val.nil?
|
105
|
+
val == "UTF-8"
|
106
|
+
end
|
107
|
+
|
108
|
+
##
|
109
|
+
# Checks if the character encoding of the data is ISO-8859-1.
|
110
|
+
#
|
111
|
+
# @return [Boolean] `true` when the character encoding is ISO-8859-1,
|
112
|
+
# `false` otherwise.
|
113
|
+
#
|
114
|
+
def iso8859_1?
|
115
|
+
val = @gapi.configuration.load.encoding
|
116
|
+
val == "ISO-8859-1"
|
117
|
+
end
|
118
|
+
|
119
|
+
##
|
120
|
+
# The value that is used to quote data sections in a CSV file. The
|
121
|
+
# default value is a double-quote (`"`). If your data does not contain
|
122
|
+
# quoted sections, the value should be an empty string. If your data
|
123
|
+
# contains quoted newline characters, {#quoted_newlines?} should return
|
124
|
+
# `true`.
|
125
|
+
#
|
126
|
+
# @return [String] A string containing the character, such as `"\""`.
|
127
|
+
#
|
128
|
+
def quote
|
129
|
+
val = @gapi.configuration.load.quote
|
130
|
+
val = "\"" if val.nil?
|
131
|
+
val
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# The maximum number of bad records that the load operation can ignore.
|
136
|
+
# If the number of bad records exceeds this value, an error is returned.
|
137
|
+
# The default value is `0`, which requires that all records be valid.
|
138
|
+
#
|
139
|
+
# @return [Integer] The maximum number of bad records.
|
140
|
+
#
|
141
|
+
def max_bad_records
|
142
|
+
val = @gapi.configuration.load.max_bad_records
|
143
|
+
val = 0 if val.nil?
|
144
|
+
val
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Specifies a string that represents a null value in a CSV file. For
|
149
|
+
# example, if you specify `\N`, BigQuery interprets `\N` as a null value
|
150
|
+
# when loading a CSV file. The default value is the empty string. If you
|
151
|
+
# set this property to a custom value, BigQuery throws an error if an
|
152
|
+
# empty string is present for all data types except for STRING and BYTE.
|
153
|
+
# For STRING and BYTE columns, BigQuery interprets the empty string as
|
154
|
+
# an empty value.
|
155
|
+
#
|
156
|
+
# @return [String] A string representing null value in a CSV file.
|
157
|
+
#
|
158
|
+
def null_marker
|
159
|
+
val = @gapi.configuration.load.null_marker
|
160
|
+
val = "" if val.nil?
|
161
|
+
val
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Checks if quoted data sections may contain newline characters in a CSV
|
166
|
+
# file. The default is `false`.
|
167
|
+
#
|
168
|
+
# @return [Boolean] `true` when quoted newlines are allowed, `false`
|
169
|
+
# otherwise.
|
170
|
+
#
|
171
|
+
def quoted_newlines?
|
172
|
+
val = @gapi.configuration.load.allow_quoted_newlines
|
173
|
+
val = false if val.nil?
|
174
|
+
val
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Checks if BigQuery should automatically infer the options and schema
|
179
|
+
# for CSV and JSON sources. The default is `false`.
|
180
|
+
#
|
181
|
+
# @return [Boolean] `true` when autodetect is enabled, `false`
|
182
|
+
# otherwise.
|
183
|
+
#
|
184
|
+
def autodetect?
|
185
|
+
val = @gapi.configuration.load.autodetect
|
186
|
+
val = false if val.nil?
|
187
|
+
val
|
188
|
+
end
|
189
|
+
|
190
|
+
##
|
191
|
+
# Checks if the format of the source data is [newline-delimited
|
192
|
+
# JSON](http://jsonlines.org/). The default is `false`.
|
193
|
+
#
|
194
|
+
# @return [Boolean] `true` when the source format is
|
195
|
+
# `NEWLINE_DELIMITED_JSON`, `false` otherwise.
|
196
|
+
#
|
197
|
+
def json?
|
198
|
+
val = @gapi.configuration.load.source_format
|
199
|
+
val == "NEWLINE_DELIMITED_JSON"
|
200
|
+
end
|
201
|
+
|
202
|
+
##
|
203
|
+
# Checks if the format of the source data is CSV. The default is `true`.
|
204
|
+
#
|
205
|
+
# @return [Boolean] `true` when the source format is `CSV`, `false`
|
206
|
+
# otherwise.
|
207
|
+
#
|
208
|
+
def csv?
|
209
|
+
val = @gapi.configuration.load.source_format
|
210
|
+
return true if val.nil?
|
211
|
+
val == "CSV"
|
212
|
+
end
|
213
|
+
|
214
|
+
##
|
215
|
+
# Checks if the source data is a Google Cloud Datastore backup.
|
216
|
+
#
|
217
|
+
# @return [Boolean] `true` when the source format is `DATASTORE_BACKUP`,
|
218
|
+
# `false` otherwise.
|
219
|
+
#
|
220
|
+
def backup?
|
221
|
+
val = @gapi.configuration.load.source_format
|
222
|
+
val == "DATASTORE_BACKUP"
|
223
|
+
end
|
224
|
+
|
225
|
+
##
|
226
|
+
# Checks if the load operation accepts rows that are missing trailing
|
227
|
+
# optional columns. The missing values are treated as nulls. If `false`,
|
228
|
+
# records with missing trailing columns are treated as bad records, and
|
229
|
+
# if there are too many bad records, an error is returned. The default
|
230
|
+
# value is `false`. Only applicable to CSV, ignored for other formats.
|
231
|
+
#
|
232
|
+
# @return [Boolean] `true` when jagged rows are allowed, `false`
|
233
|
+
# otherwise.
|
234
|
+
#
|
235
|
+
def allow_jagged_rows?
|
236
|
+
val = @gapi.configuration.load.allow_jagged_rows
|
237
|
+
val = false if val.nil?
|
238
|
+
val
|
239
|
+
end
|
240
|
+
|
241
|
+
##
|
242
|
+
# Checks if the load operation allows extra values that are not
|
243
|
+
# represented in the table schema. If `true`, the extra values are
|
244
|
+
# ignored. If `false`, records with extra columns are treated as bad
|
245
|
+
# records, and if there are too many bad records, an invalid error is
|
246
|
+
# returned. The default is `false`.
|
247
|
+
#
|
248
|
+
# @return [Boolean] `true` when unknown values are ignored, `false`
|
249
|
+
# otherwise.
|
250
|
+
#
|
251
|
+
def ignore_unknown_values?
|
252
|
+
val = @gapi.configuration.load.ignore_unknown_values
|
253
|
+
val = false if val.nil?
|
254
|
+
val
|
255
|
+
end
|
256
|
+
|
257
|
+
##
|
258
|
+
# The schema for the destination table. The schema can be omitted if the
|
259
|
+
# destination table already exists, or if you're loading data from
|
260
|
+
# Google Cloud Datastore.
|
261
|
+
#
|
262
|
+
# The returned object is frozen and changes are not allowed. Use
|
263
|
+
# {Table#schema} to update the schema.
|
264
|
+
#
|
265
|
+
# @return [Schema, nil] A schema object, or `nil`.
|
266
|
+
#
|
267
|
+
def schema
|
268
|
+
Schema.from_gapi(@gapi.configuration.load.schema).freeze
|
269
|
+
end
|
270
|
+
|
271
|
+
##
|
272
|
+
# Allows the schema of the destination table to be updated as a side
|
273
|
+
# effect of the load job if a schema is autodetected or supplied in the
|
274
|
+
# job configuration. Schema update options are supported in two cases:
|
275
|
+
# when write disposition is `WRITE_APPEND`; when write disposition is
|
276
|
+
# `WRITE_TRUNCATE` and the destination table is a partition of a table,
|
277
|
+
# specified by partition decorators. For normal tables, `WRITE_TRUNCATE`
|
278
|
+
# will always overwrite the schema. One or more of the following values
|
279
|
+
# are specified:
|
280
|
+
#
|
281
|
+
# * `ALLOW_FIELD_ADDITION`: allow adding a nullable field to the schema.
|
282
|
+
# * `ALLOW_FIELD_RELAXATION`: allow relaxing a required field in the
|
283
|
+
# original schema to nullable.
|
284
|
+
#
|
285
|
+
# @return [Array<String>] An array of strings.
|
286
|
+
#
|
287
|
+
def schema_update_options
|
288
|
+
Array @gapi.configuration.load.schema_update_options
|
289
|
+
end
|
290
|
+
|
291
|
+
##
|
292
|
+
# The number of source data files in the load job.
|
293
|
+
#
|
294
|
+
# @return [Integer] The number of source files.
|
295
|
+
#
|
296
|
+
def input_files
|
297
|
+
Integer @gapi.statistics.load.input_files
|
298
|
+
rescue StandardError
|
299
|
+
nil
|
300
|
+
end
|
301
|
+
|
302
|
+
##
|
303
|
+
# The number of bytes of source data in the load job.
|
304
|
+
#
|
305
|
+
# @return [Integer] The number of bytes.
|
306
|
+
#
|
307
|
+
def input_file_bytes
|
308
|
+
Integer @gapi.statistics.load.input_file_bytes
|
309
|
+
rescue StandardError
|
310
|
+
nil
|
311
|
+
end
|
312
|
+
|
313
|
+
##
|
314
|
+
# The number of rows that have been loaded into the table. While an
|
315
|
+
# import job is in the running state, this value may change.
|
316
|
+
#
|
317
|
+
# @return [Integer] The number of rows that have been loaded.
|
318
|
+
#
|
319
|
+
def output_rows
|
320
|
+
Integer @gapi.statistics.load.output_rows
|
321
|
+
rescue StandardError
|
322
|
+
nil
|
323
|
+
end
|
324
|
+
|
325
|
+
##
|
326
|
+
# The encryption configuration of the destination table.
|
327
|
+
#
|
328
|
+
# @return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom
|
329
|
+
# encryption configuration (e.g., Cloud KMS keys).
|
330
|
+
#
|
331
|
+
# @!group Attributes
|
332
|
+
def encryption
|
333
|
+
EncryptionConfiguration.from_gapi(
|
334
|
+
@gapi.configuration.load.destination_encryption_configuration
|
335
|
+
)
|
336
|
+
end
|
337
|
+
|
338
|
+
##
|
339
|
+
# The number of bytes that have been loaded into the table. While an
|
340
|
+
# import job is in the running state, this value may change.
|
341
|
+
#
|
342
|
+
# @return [Integer] The number of bytes that have been loaded.
|
343
|
+
#
|
344
|
+
def output_bytes
|
345
|
+
Integer @gapi.statistics.load.output_bytes
|
346
|
+
rescue StandardError
|
347
|
+
nil
|
348
|
+
end
|
349
|
+
|
350
|
+
###
|
351
|
+
# Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned
|
352
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
353
|
+
#
|
354
|
+
# @return [Boolean] `true` when the table is range partitioned, or `false` otherwise.
|
355
|
+
#
|
356
|
+
# @!group Attributes
|
357
|
+
#
|
358
|
+
def range_partitioning?
|
359
|
+
!@gapi.configuration.load.range_partitioning.nil?
|
360
|
+
end
|
361
|
+
|
362
|
+
###
|
363
|
+
# The field on which the destination table will be range partitioned, if any. The field must be a
|
364
|
+
# top-level `NULLABLE/REQUIRED` field. The only supported type is `INTEGER/INT64`. See
|
365
|
+
# [Creating and using integer range partitioned
|
366
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
367
|
+
#
|
368
|
+
# @return [String, nil] The partition field, if a field was configured, or `nil` if not range partitioned.
|
369
|
+
#
|
370
|
+
# @!group Attributes
|
371
|
+
#
|
372
|
+
def range_partitioning_field
|
373
|
+
@gapi.configuration.load.range_partitioning.field if range_partitioning?
|
374
|
+
end
|
375
|
+
|
376
|
+
###
|
377
|
+
# The start of range partitioning, inclusive. See [Creating and using integer range partitioned
|
378
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
379
|
+
#
|
380
|
+
# @return [Integer, nil] The start of range partitioning, inclusive, or `nil` if not range partitioned.
|
381
|
+
#
|
382
|
+
# @!group Attributes
|
383
|
+
#
|
384
|
+
def range_partitioning_start
|
385
|
+
@gapi.configuration.load.range_partitioning.range.start if range_partitioning?
|
386
|
+
end
|
387
|
+
|
388
|
+
###
|
389
|
+
# The width of each interval. See [Creating and using integer range partitioned
|
390
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
391
|
+
#
|
392
|
+
# @return [Integer, nil] The width of each interval, for data in range partitions, or `nil` if not range
|
393
|
+
# partitioned.
|
394
|
+
#
|
395
|
+
# @!group Attributes
|
396
|
+
#
|
397
|
+
def range_partitioning_interval
|
398
|
+
return nil unless range_partitioning?
|
399
|
+
@gapi.configuration.load.range_partitioning.range.interval
|
400
|
+
end
|
401
|
+
|
402
|
+
###
|
403
|
+
# The end of range partitioning, exclusive. See [Creating and using integer range partitioned
|
404
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
405
|
+
#
|
406
|
+
# @return [Integer, nil] The end of range partitioning, exclusive, or `nil` if not range partitioned.
|
407
|
+
#
|
408
|
+
# @!group Attributes
|
409
|
+
#
|
410
|
+
def range_partitioning_end
|
411
|
+
@gapi.configuration.load.range_partitioning.range.end if range_partitioning?
|
412
|
+
end
|
413
|
+
|
414
|
+
###
|
415
|
+
# Checks if the destination table will be time partitioned. See
|
416
|
+
# [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
417
|
+
#
|
418
|
+
# @return [Boolean, nil] `true` when the table will be time-partitioned,
|
419
|
+
# or `false` otherwise.
|
420
|
+
#
|
421
|
+
# @!group Attributes
|
422
|
+
#
|
423
|
+
def time_partitioning?
|
424
|
+
!@gapi.configuration.load.time_partitioning.nil?
|
425
|
+
end
|
426
|
+
|
427
|
+
###
|
428
|
+
# The period for which the destination table will be time partitioned, if
|
429
|
+
# any. See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
430
|
+
#
|
431
|
+
# @return [String, nil] The time partition type. Currently the only supported
|
432
|
+
# value is "DAY", or `nil` if not present.
|
433
|
+
#
|
434
|
+
# @!group Attributes
|
435
|
+
#
|
436
|
+
def time_partitioning_type
|
437
|
+
@gapi.configuration.load.time_partitioning.type if time_partitioning?
|
438
|
+
end
|
439
|
+
|
440
|
+
###
|
441
|
+
# The field on which the destination table will be time partitioned, if any.
|
442
|
+
# If not set, the destination table will be time partitioned by pseudo column
|
443
|
+
# `_PARTITIONTIME`; if set, the table will be time partitioned by this field.
|
444
|
+
# See [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
445
|
+
#
|
446
|
+
# @return [String, nil] The time partition field, if a field was configured.
|
447
|
+
# `nil` if not time partitioned or not set (partitioned by pseudo column
|
448
|
+
# '_PARTITIONTIME').
|
449
|
+
#
|
450
|
+
# @!group Attributes
|
451
|
+
#
|
452
|
+
def time_partitioning_field
|
453
|
+
@gapi.configuration.load.time_partitioning.field if time_partitioning?
|
454
|
+
end
|
455
|
+
|
456
|
+
###
|
457
|
+
# The expiration for the destination table time partitions, if any, in
|
458
|
+
# seconds. See [Partitioned
|
459
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
460
|
+
#
|
461
|
+
# @return [Integer, nil] The expiration time, in seconds, for data in
|
462
|
+
# time partitions, or `nil` if not present.
|
463
|
+
#
|
464
|
+
# @!group Attributes
|
465
|
+
#
|
466
|
+
def time_partitioning_expiration
|
467
|
+
return nil unless time_partitioning?
|
468
|
+
return nil if @gapi.configuration.load.time_partitioning.expiration_ms.nil?
|
469
|
+
|
470
|
+
@gapi.configuration.load.time_partitioning.expiration_ms / 1_000
|
471
|
+
end
|
472
|
+
|
473
|
+
###
|
474
|
+
# If set to true, queries over the destination table will require a
|
475
|
+
# time partition filter that can be used for partition elimination to be
|
476
|
+
# specified. See [Partitioned
|
477
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
478
|
+
#
|
479
|
+
# @return [Boolean] `true` when a time partition filter will be required,
|
480
|
+
# or `false` otherwise.
|
481
|
+
#
|
482
|
+
# @!group Attributes
|
483
|
+
#
|
484
|
+
def time_partitioning_require_filter?
|
485
|
+
tp = @gapi.configuration.load.time_partitioning
|
486
|
+
return false if tp.nil? || tp.require_partition_filter.nil?
|
487
|
+
tp.require_partition_filter
|
488
|
+
end
|
489
|
+
|
490
|
+
###
|
491
|
+
# Checks if the destination table will be clustered.
|
492
|
+
#
|
493
|
+
# @see https://cloud.google.com/bigquery/docs/clustered-tables
|
494
|
+
# Introduction to Clustered Tables
|
495
|
+
#
|
496
|
+
# @return [Boolean, nil] `true` when the table will be clustered,
|
497
|
+
# or `false` otherwise.
|
498
|
+
#
|
499
|
+
# @!group Attributes
|
500
|
+
#
|
501
|
+
def clustering?
|
502
|
+
!@gapi.configuration.load.clustering.nil?
|
503
|
+
end
|
504
|
+
|
505
|
+
###
|
506
|
+
# One or more fields on which the destination table should be clustered.
|
507
|
+
# Must be specified with time-based partitioning, data in the table will
|
508
|
+
# be first partitioned and subsequently clustered. The order of the
|
509
|
+
# returned fields determines the sort order of the data.
|
510
|
+
#
|
511
|
+
# See {LoadJob::Updater#clustering_fields=}.
|
512
|
+
#
|
513
|
+
# @see https://cloud.google.com/bigquery/docs/partitioned-tables
|
514
|
+
# Partitioned Tables
|
515
|
+
# @see https://cloud.google.com/bigquery/docs/clustered-tables
|
516
|
+
# Introduction to Clustered Tables
|
517
|
+
# @see https://cloud.google.com/bigquery/docs/creating-clustered-tables
|
518
|
+
# Creating and Using Clustered Tables
|
519
|
+
#
|
520
|
+
# @return [Array<String>, nil] The clustering fields, or `nil` if the
|
521
|
+
# destination table will not be clustered.
|
522
|
+
#
|
523
|
+
# @!group Attributes
|
524
|
+
#
|
525
|
+
def clustering_fields
|
526
|
+
@gapi.configuration.load.clustering.fields if clustering?
|
527
|
+
end
|
528
|
+
|
529
|
+
##
|
530
|
+
# Yielded to a block to accumulate changes for a patch request.
|
531
|
+
class Updater < LoadJob
|
532
|
+
##
|
533
|
+
# A list of attributes that were updated.
|
534
|
+
attr_reader :updates
|
535
|
+
|
536
|
+
##
|
537
|
+
# @private Create an Updater object.
|
538
|
+
def initialize gapi
|
539
|
+
@updates = []
|
540
|
+
@gapi = gapi
|
541
|
+
@schema = nil
|
542
|
+
end
|
543
|
+
|
544
|
+
##
|
545
|
+
# Returns the table's schema. This method can also be used to set,
|
546
|
+
# replace, or add to the schema by passing a block. See {Schema} for
|
547
|
+
# available methods.
|
548
|
+
#
|
549
|
+
# @param [Boolean] replace Whether to replace the existing schema with
|
550
|
+
# the new schema. If `true`, the fields will replace the existing
|
551
|
+
# schema. If `false`, the fields will be added to the existing
|
552
|
+
# schema. When a table already contains data, schema changes must be
|
553
|
+
# additive. Thus, the default value is `false`.
|
554
|
+
# @yield [schema] a block for setting the schema
|
555
|
+
# @yieldparam [Schema] schema the object accepting the schema
|
556
|
+
#
|
557
|
+
# @return [Google::Cloud::Bigquery::Schema]
|
558
|
+
#
|
559
|
+
# @example
|
560
|
+
# require "google/cloud/bigquery"
|
561
|
+
#
|
562
|
+
# bigquery = Google::Cloud::Bigquery.new
|
563
|
+
# dataset = bigquery.dataset "my_dataset"
|
564
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |j|
|
565
|
+
# j.schema do |s|
|
566
|
+
# s.string "first_name", mode: :required
|
567
|
+
# s.record "cities_lived", mode: :repeated do |r|
|
568
|
+
# r.string "place", mode: :required
|
569
|
+
# r.integer "number_of_years", mode: :required
|
570
|
+
# end
|
571
|
+
# end
|
572
|
+
# end
|
573
|
+
#
|
574
|
+
# @!group Schema
|
575
|
+
#
|
576
|
+
def schema replace: false
|
577
|
+
# Same as Table#schema, but not frozen
|
578
|
+
# TODO: make sure to call ensure_full_data! on Dataset#update
|
579
|
+
@schema ||= Schema.from_gapi @gapi.configuration.load.schema
|
580
|
+
if block_given?
|
581
|
+
@schema = Schema.from_gapi if replace
|
582
|
+
yield @schema
|
583
|
+
check_for_mutated_schema!
|
584
|
+
end
|
585
|
+
# Do not freeze on updater, allow modifications
|
586
|
+
@schema
|
587
|
+
end
|
588
|
+
|
589
|
+
##
|
590
|
+
# Sets the schema of the destination table.
|
591
|
+
#
|
592
|
+
# @param [Google::Cloud::Bigquery::Schema] new_schema The schema for
|
593
|
+
# the destination table. Optional. The schema can be omitted if the
|
594
|
+
# destination table already exists, or if you're loading data from a
|
595
|
+
# source that includes a schema, such as Avro or a Google Cloud
|
596
|
+
# Datastore backup.
|
597
|
+
#
|
598
|
+
# @example
|
599
|
+
# require "google/cloud/bigquery"
|
600
|
+
#
|
601
|
+
# bigquery = Google::Cloud::Bigquery.new
|
602
|
+
# schema = bigquery.schema do |s|
|
603
|
+
# s.string "first_name", mode: :required
|
604
|
+
# s.record "cities_lived", mode: :repeated do |nested_schema|
|
605
|
+
# nested_schema.string "place", mode: :required
|
606
|
+
# nested_schema.integer "number_of_years", mode: :required
|
607
|
+
# end
|
608
|
+
# end
|
609
|
+
# dataset = bigquery.dataset "my_dataset"
|
610
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |j|
|
611
|
+
# j.schema = schema
|
612
|
+
# end
|
613
|
+
#
|
614
|
+
# @!group Schema
|
615
|
+
#
|
616
|
+
def schema= new_schema
|
617
|
+
@schema = new_schema
|
618
|
+
end
|
619
|
+
|
620
|
+
##
|
621
|
+
# Adds a string field to the schema.
|
622
|
+
#
|
623
|
+
# See {Schema#string}.
|
624
|
+
#
|
625
|
+
# @param [String] name The field name. The name must contain only
|
626
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
627
|
+
# start with a letter or underscore. The maximum length is 128
|
628
|
+
# characters.
|
629
|
+
# @param [String] description A description of the field.
|
630
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
631
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
632
|
+
# `:nullable`.
|
633
|
+
#
|
634
|
+
# @example
|
635
|
+
# require "google/cloud/bigquery"
|
636
|
+
#
|
637
|
+
# bigquery = Google::Cloud::Bigquery.new
|
638
|
+
# dataset = bigquery.dataset "my_dataset"
|
639
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
640
|
+
# schema.string "first_name", mode: :required
|
641
|
+
# end
|
642
|
+
#
|
643
|
+
# @!group Schema
|
644
|
+
def string name, description: nil, mode: :nullable
|
645
|
+
schema.string name, description: description, mode: mode
|
646
|
+
end
|
647
|
+
|
648
|
+
##
|
649
|
+
# Adds an integer field to the schema.
|
650
|
+
#
|
651
|
+
# See {Schema#integer}.
|
652
|
+
#
|
653
|
+
# @param [String] name The field name. The name must contain only
|
654
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
655
|
+
# start with a letter or underscore. The maximum length is 128
|
656
|
+
# characters.
|
657
|
+
# @param [String] description A description of the field.
|
658
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
659
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
660
|
+
# `:nullable`.
|
661
|
+
#
|
662
|
+
# @example
|
663
|
+
# require "google/cloud/bigquery"
|
664
|
+
#
|
665
|
+
# bigquery = Google::Cloud::Bigquery.new
|
666
|
+
# dataset = bigquery.dataset "my_dataset"
|
667
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
668
|
+
# schema.integer "age", mode: :required
|
669
|
+
# end
|
670
|
+
#
|
671
|
+
# @!group Schema
|
672
|
+
def integer name, description: nil, mode: :nullable
|
673
|
+
schema.integer name, description: description, mode: mode
|
674
|
+
end
|
675
|
+
|
676
|
+
##
|
677
|
+
# Adds a floating-point number field to the schema.
|
678
|
+
#
|
679
|
+
# See {Schema#float}.
|
680
|
+
#
|
681
|
+
# @param [String] name The field name. The name must contain only
|
682
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
683
|
+
# start with a letter or underscore. The maximum length is 128
|
684
|
+
# characters.
|
685
|
+
# @param [String] description A description of the field.
|
686
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
687
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
688
|
+
# `:nullable`.
|
689
|
+
#
|
690
|
+
# @example
|
691
|
+
# require "google/cloud/bigquery"
|
692
|
+
#
|
693
|
+
# bigquery = Google::Cloud::Bigquery.new
|
694
|
+
# dataset = bigquery.dataset "my_dataset"
|
695
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
696
|
+
# schema.float "price", mode: :required
|
697
|
+
# end
|
698
|
+
#
|
699
|
+
# @!group Schema
|
700
|
+
def float name, description: nil, mode: :nullable
|
701
|
+
schema.float name, description: description, mode: mode
|
702
|
+
end
|
703
|
+
|
704
|
+
##
|
705
|
+
# Adds a numeric number field to the schema. Numeric is a
|
706
|
+
# fixed-precision numeric type with 38 decimal digits, 9 that follow
|
707
|
+
# the decimal point.
|
708
|
+
#
|
709
|
+
# See {Schema#numeric}
|
710
|
+
#
|
711
|
+
# @param [String] name The field name. The name must contain only
|
712
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
713
|
+
# start with a letter or underscore. The maximum length is 128
|
714
|
+
# characters.
|
715
|
+
# @param [String] description A description of the field.
|
716
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
717
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
718
|
+
# `:nullable`.
|
719
|
+
#
|
720
|
+
# @example
|
721
|
+
# require "google/cloud/bigquery"
|
722
|
+
#
|
723
|
+
# bigquery = Google::Cloud::Bigquery.new
|
724
|
+
# dataset = bigquery.dataset "my_dataset"
|
725
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
726
|
+
# schema.numeric "total_cost", mode: :required
|
727
|
+
# end
|
728
|
+
#
|
729
|
+
# @!group Schema
|
730
|
+
def numeric name, description: nil, mode: :nullable
|
731
|
+
schema.numeric name, description: description, mode: mode
|
732
|
+
end
|
733
|
+
|
734
|
+
##
|
735
|
+
# Adds a boolean field to the schema.
|
736
|
+
#
|
737
|
+
# See {Schema#boolean}.
|
738
|
+
#
|
739
|
+
# @param [String] name The field name. The name must contain only
|
740
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
741
|
+
# start with a letter or underscore. The maximum length is 128
|
742
|
+
# characters.
|
743
|
+
# @param [String] description A description of the field.
|
744
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
745
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
746
|
+
# `:nullable`.
|
747
|
+
#
|
748
|
+
# @example
|
749
|
+
# require "google/cloud/bigquery"
|
750
|
+
#
|
751
|
+
# bigquery = Google::Cloud::Bigquery.new
|
752
|
+
# dataset = bigquery.dataset "my_dataset"
|
753
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
754
|
+
# schema.boolean "active", mode: :required
|
755
|
+
# end
|
756
|
+
#
|
757
|
+
# @!group Schema
|
758
|
+
def boolean name, description: nil, mode: :nullable
|
759
|
+
schema.boolean name, description: description, mode: mode
|
760
|
+
end
|
761
|
+
|
762
|
+
##
|
763
|
+
# Adds a bytes field to the schema.
|
764
|
+
#
|
765
|
+
# See {Schema#bytes}.
|
766
|
+
#
|
767
|
+
# @param [String] name The field name. The name must contain only
|
768
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
769
|
+
# start with a letter or underscore. The maximum length is 128
|
770
|
+
# characters.
|
771
|
+
# @param [String] description A description of the field.
|
772
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
773
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
774
|
+
# `:nullable`.
|
775
|
+
#
|
776
|
+
# @example
|
777
|
+
# require "google/cloud/bigquery"
|
778
|
+
#
|
779
|
+
# bigquery = Google::Cloud::Bigquery.new
|
780
|
+
# dataset = bigquery.dataset "my_dataset"
|
781
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
782
|
+
# schema.bytes "avatar", mode: :required
|
783
|
+
# end
|
784
|
+
#
|
785
|
+
# @!group Schema
|
786
|
+
def bytes name, description: nil, mode: :nullable
|
787
|
+
schema.bytes name, description: description, mode: mode
|
788
|
+
end
|
789
|
+
|
790
|
+
##
|
791
|
+
# Adds a timestamp field to the schema.
|
792
|
+
#
|
793
|
+
# See {Schema#timestamp}.
|
794
|
+
#
|
795
|
+
# @param [String] name The field name. The name must contain only
|
796
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
797
|
+
# start with a letter or underscore. The maximum length is 128
|
798
|
+
# characters.
|
799
|
+
# @param [String] description A description of the field.
|
800
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
801
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
802
|
+
# `:nullable`.
|
803
|
+
#
|
804
|
+
# @example
|
805
|
+
# require "google/cloud/bigquery"
|
806
|
+
#
|
807
|
+
# bigquery = Google::Cloud::Bigquery.new
|
808
|
+
# dataset = bigquery.dataset "my_dataset"
|
809
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
810
|
+
# schema.timestamp "creation_date", mode: :required
|
811
|
+
# end
|
812
|
+
#
|
813
|
+
# @!group Schema
|
814
|
+
def timestamp name, description: nil, mode: :nullable
|
815
|
+
schema.timestamp name, description: description, mode: mode
|
816
|
+
end
|
817
|
+
|
818
|
+
##
|
819
|
+
# Adds a time field to the schema.
|
820
|
+
#
|
821
|
+
# See {Schema#time}.
|
822
|
+
#
|
823
|
+
# @param [String] name The field name. The name must contain only
|
824
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
825
|
+
# start with a letter or underscore. The maximum length is 128
|
826
|
+
# characters.
|
827
|
+
# @param [String] description A description of the field.
|
828
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
829
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
830
|
+
# `:nullable`.
|
831
|
+
#
|
832
|
+
# @example
|
833
|
+
# require "google/cloud/bigquery"
|
834
|
+
#
|
835
|
+
# bigquery = Google::Cloud::Bigquery.new
|
836
|
+
# dataset = bigquery.dataset "my_dataset"
|
837
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
838
|
+
# schema.time "duration", mode: :required
|
839
|
+
# end
|
840
|
+
#
|
841
|
+
# @!group Schema
|
842
|
+
def time name, description: nil, mode: :nullable
|
843
|
+
schema.time name, description: description, mode: mode
|
844
|
+
end
|
845
|
+
|
846
|
+
##
|
847
|
+
# Adds a datetime field to the schema.
|
848
|
+
#
|
849
|
+
# See {Schema#datetime}.
|
850
|
+
#
|
851
|
+
# @param [String] name The field name. The name must contain only
|
852
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
853
|
+
# start with a letter or underscore. The maximum length is 128
|
854
|
+
# characters.
|
855
|
+
# @param [String] description A description of the field.
|
856
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
857
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
858
|
+
# `:nullable`.
|
859
|
+
#
|
860
|
+
# @example
|
861
|
+
# require "google/cloud/bigquery"
|
862
|
+
#
|
863
|
+
# bigquery = Google::Cloud::Bigquery.new
|
864
|
+
# dataset = bigquery.dataset "my_dataset"
|
865
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
866
|
+
# schema.datetime "target_end", mode: :required
|
867
|
+
# end
|
868
|
+
#
|
869
|
+
# @!group Schema
|
870
|
+
def datetime name, description: nil, mode: :nullable
|
871
|
+
schema.datetime name, description: description, mode: mode
|
872
|
+
end
|
873
|
+
|
874
|
+
##
|
875
|
+
# Adds a date field to the schema.
|
876
|
+
#
|
877
|
+
# See {Schema#date}.
|
878
|
+
#
|
879
|
+
# @param [String] name The field name. The name must contain only
|
880
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
881
|
+
# start with a letter or underscore. The maximum length is 128
|
882
|
+
# characters.
|
883
|
+
# @param [String] description A description of the field.
|
884
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
885
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
886
|
+
# `:nullable`.
|
887
|
+
#
|
888
|
+
# @example
|
889
|
+
# require "google/cloud/bigquery"
|
890
|
+
#
|
891
|
+
# bigquery = Google::Cloud::Bigquery.new
|
892
|
+
# dataset = bigquery.dataset "my_dataset"
|
893
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
894
|
+
# schema.date "birthday", mode: :required
|
895
|
+
# end
|
896
|
+
#
|
897
|
+
# @!group Schema
|
898
|
+
def date name, description: nil, mode: :nullable
|
899
|
+
schema.date name, description: description, mode: mode
|
900
|
+
end
|
901
|
+
|
902
|
+
##
|
903
|
+
# Adds a record field to the schema. A block must be passed describing
|
904
|
+
# the nested fields of the record. For more information about nested
|
905
|
+
# and repeated records, see [Loading denormalized, nested, and
|
906
|
+
# repeated data
|
907
|
+
# ](https://cloud.google.com/bigquery/docs/loading-data#loading_denormalized_nested_and_repeated_data).
|
908
|
+
#
|
909
|
+
# See {Schema#record}.
|
910
|
+
#
|
911
|
+
# @param [String] name The field name. The name must contain only
|
912
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
913
|
+
# start with a letter or underscore. The maximum length is 128
|
914
|
+
# characters.
|
915
|
+
# @param [String] description A description of the field.
|
916
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
917
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
918
|
+
# `:nullable`.
|
919
|
+
# @yield [nested_schema] a block for setting the nested schema
|
920
|
+
# @yieldparam [Schema] nested_schema the object accepting the
|
921
|
+
# nested schema
|
922
|
+
#
|
923
|
+
# @example
|
924
|
+
# require "google/cloud/bigquery"
|
925
|
+
#
|
926
|
+
# bigquery = Google::Cloud::Bigquery.new
|
927
|
+
# dataset = bigquery.dataset "my_dataset"
|
928
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |schema|
|
929
|
+
# schema.record "cities_lived", mode: :repeated do |cities_lived|
|
930
|
+
# cities_lived.string "place", mode: :required
|
931
|
+
# cities_lived.integer "number_of_years", mode: :required
|
932
|
+
# end
|
933
|
+
# end
|
934
|
+
#
|
935
|
+
# @!group Schema
|
936
|
+
#
|
937
|
+
def record name, description: nil, mode: nil, &block
|
938
|
+
schema.record name, description: description, mode: mode, &block
|
939
|
+
end
|
940
|
+
|
941
|
+
##
|
942
|
+
# Make sure any access changes are saved
|
943
|
+
def check_for_mutated_schema!
|
944
|
+
return if @schema.nil?
|
945
|
+
return unless @schema.changed?
|
946
|
+
@gapi.configuration.load.schema = @schema.to_gapi
|
947
|
+
patch_gapi! :schema
|
948
|
+
end
|
949
|
+
|
950
|
+
##
|
951
|
+
# Sets the geographic location where the job should run. Required
|
952
|
+
# except for US and EU.
|
953
|
+
#
|
954
|
+
# @param [String] value A geographic location, such as "US", "EU" or
|
955
|
+
# "asia-northeast1". Required except for US and EU.
|
956
|
+
#
|
957
|
+
# @example
|
958
|
+
# require "google/cloud/bigquery"
|
959
|
+
#
|
960
|
+
# bigquery = Google::Cloud::Bigquery.new
|
961
|
+
# dataset = bigquery.dataset "my_dataset"
|
962
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |j|
|
963
|
+
# j.schema do |s|
|
964
|
+
# s.string "first_name", mode: :required
|
965
|
+
# s.record "cities_lived", mode: :repeated do |r|
|
966
|
+
# r.string "place", mode: :required
|
967
|
+
# r.integer "number_of_years", mode: :required
|
968
|
+
# end
|
969
|
+
# end
|
970
|
+
# j.location = "EU"
|
971
|
+
# end
|
972
|
+
#
|
973
|
+
# @!group Attributes
|
974
|
+
def location= value
|
975
|
+
@gapi.job_reference.location = value
|
976
|
+
return unless value.nil?
|
977
|
+
|
978
|
+
# Treat assigning value of nil the same as unsetting the value.
|
979
|
+
unset = @gapi.job_reference.instance_variables.include? :@location
|
980
|
+
@gapi.job_reference.remove_instance_variable :@location if unset
|
981
|
+
end
|
982
|
+
|
983
|
+
##
|
984
|
+
# Sets the source file format. The default value is `csv`.
|
985
|
+
#
|
986
|
+
# The following values are supported:
|
987
|
+
#
|
988
|
+
# * `csv` - CSV
|
989
|
+
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
|
990
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
991
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
992
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
993
|
+
# * `datastore_backup` - Cloud Datastore backup
|
994
|
+
#
|
995
|
+
# @param [String] new_format The new source format.
|
996
|
+
#
|
997
|
+
# @!group Attributes
|
998
|
+
#
|
999
|
+
def format= new_format
|
1000
|
+
@gapi.configuration.load.update! source_format: Convert.source_format(new_format)
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
##
|
1004
|
+
# Sets the create disposition.
|
1005
|
+
#
|
1006
|
+
# This specifies whether the job is allowed to create new tables. The
|
1007
|
+
# default value is `needed`.
|
1008
|
+
#
|
1009
|
+
# The following values are supported:
|
1010
|
+
#
|
1011
|
+
# * `needed` - Create the table if it does not exist.
|
1012
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1013
|
+
# raised if the table does not exist.
|
1014
|
+
#
|
1015
|
+
# @param [String] new_create The new create disposition.
|
1016
|
+
#
|
1017
|
+
# @!group Attributes
|
1018
|
+
#
|
1019
|
+
def create= new_create
|
1020
|
+
@gapi.configuration.load.update! create_disposition: Convert.create_disposition(new_create)
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
##
|
1024
|
+
# Sets the write disposition.
|
1025
|
+
#
|
1026
|
+
# This specifies how to handle data already present in the table. The
|
1027
|
+
# default value is `append`.
|
1028
|
+
#
|
1029
|
+
# The following values are supported:
|
1030
|
+
#
|
1031
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1032
|
+
# * `append` - BigQuery appends the data to the table.
|
1033
|
+
# * `empty` - An error will be returned if the table already contains
|
1034
|
+
# data.
|
1035
|
+
#
|
1036
|
+
# @param [String] new_write The new write disposition.
|
1037
|
+
#
|
1038
|
+
# @!group Attributes
|
1039
|
+
#
|
1040
|
+
def write= new_write
|
1041
|
+
@gapi.configuration.load.update! write_disposition: Convert.write_disposition(new_write)
|
1042
|
+
end
|
1043
|
+
|
1044
|
+
##
|
1045
|
+
# Sets the projection fields.
|
1046
|
+
#
|
1047
|
+
# If the `format` option is set to `datastore_backup`, indicates
|
1048
|
+
# which entity properties to load from a Cloud Datastore backup.
|
1049
|
+
# Property names are case sensitive and must be top-level properties.
|
1050
|
+
# If not set, BigQuery loads all properties. If any named property
|
1051
|
+
# isn't found in the Cloud Datastore backup, an invalid error is
|
1052
|
+
# returned.
|
1053
|
+
#
|
1054
|
+
# @param [Array<String>] new_fields The new projection fields.
|
1055
|
+
#
|
1056
|
+
# @!group Attributes
|
1057
|
+
#
|
1058
|
+
def projection_fields= new_fields
|
1059
|
+
if new_fields.nil?
|
1060
|
+
@gapi.configuration.load.update! projection_fields: nil
|
1061
|
+
else
|
1062
|
+
@gapi.configuration.load.update! projection_fields: Array(new_fields)
|
1063
|
+
end
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
##
|
1067
|
+
# Sets the source URIs to load.
|
1068
|
+
#
|
1069
|
+
# The fully-qualified URIs that point to your data in Google Cloud.
|
1070
|
+
#
|
1071
|
+
# * For Google Cloud Storage URIs: Each URI can contain one '*'
|
1072
|
+
# wildcard character and it must come after the 'bucket' name. Size
|
1073
|
+
# limits related to load jobs apply to external data sources. For
|
1074
|
+
# * Google Cloud Bigtable URIs: Exactly one URI can be specified and
|
1075
|
+
# it has be a fully specified and valid HTTPS URL for a Google Cloud
|
1076
|
+
# Bigtable table.
|
1077
|
+
# * For Google Cloud Datastore backups: Exactly one URI can be
|
1078
|
+
# specified. Also, the '*' wildcard character is not allowed.
|
1079
|
+
#
|
1080
|
+
# @param [Array<String>] new_uris The new source URIs to load.
|
1081
|
+
#
|
1082
|
+
# @!group Attributes
|
1083
|
+
#
|
1084
|
+
def source_uris= new_uris
|
1085
|
+
if new_uris.nil?
|
1086
|
+
@gapi.configuration.load.update! source_uris: nil
|
1087
|
+
else
|
1088
|
+
@gapi.configuration.load.update! source_uris: Array(new_uris)
|
1089
|
+
end
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
##
|
1093
|
+
# Sets flag for allowing jagged rows.
|
1094
|
+
#
|
1095
|
+
# Accept rows that are missing trailing optional columns. The missing
|
1096
|
+
# values are treated as nulls. If `false`, records with missing
|
1097
|
+
# trailing columns are treated as bad records, and if there are too
|
1098
|
+
# many bad records, an invalid error is returned in the job result.
|
1099
|
+
# The default value is `false`. Only applicable to CSV, ignored for
|
1100
|
+
# other formats.
|
1101
|
+
#
|
1102
|
+
# @param [Boolean] val Accept rows that are missing trailing optional
|
1103
|
+
# columns.
|
1104
|
+
#
|
1105
|
+
# @!group Attributes
|
1106
|
+
#
|
1107
|
+
def jagged_rows= val
|
1108
|
+
@gapi.configuration.load.update! allow_jagged_rows: val
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
##
|
1112
|
+
# Allows quoted data sections to contain newline characters in CSV.
|
1113
|
+
#
|
1114
|
+
# @param [Boolean] val Indicates if BigQuery should allow quoted data
|
1115
|
+
# sections that contain newline characters in a CSV file. The
|
1116
|
+
# default value is `false`.
|
1117
|
+
#
|
1118
|
+
# @!group Attributes
|
1119
|
+
#
|
1120
|
+
def quoted_newlines= val
|
1121
|
+
@gapi.configuration.load.update! allow_quoted_newlines: val
|
1122
|
+
end
|
1123
|
+
|
1124
|
+
##
|
1125
|
+
# Allows BigQuery to autodetect the schema.
|
1126
|
+
#
|
1127
|
+
# @param [Boolean] val Indicates if BigQuery should automatically
|
1128
|
+
# infer the options and schema for CSV and JSON sources. The default
|
1129
|
+
# value is `false`.
|
1130
|
+
#
|
1131
|
+
# @!group Attributes
|
1132
|
+
#
|
1133
|
+
def autodetect= val
|
1134
|
+
@gapi.configuration.load.update! autodetect: val
|
1135
|
+
end
|
1136
|
+
|
1137
|
+
##
|
1138
|
+
# Sets the character encoding of the data.
|
1139
|
+
#
|
1140
|
+
# @param [String] val The character encoding of the data. The
|
1141
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value
|
1142
|
+
# is `UTF-8`.
|
1143
|
+
#
|
1144
|
+
# @!group Attributes
|
1145
|
+
#
|
1146
|
+
def encoding= val
|
1147
|
+
@gapi.configuration.load.update! encoding: val
|
1148
|
+
end
|
1149
|
+
|
1150
|
+
##
|
1151
|
+
# Sets the separator for fields in a CSV file.
|
1152
|
+
#
|
1153
|
+
# @param [String] val Specifices the separator for fields in a CSV
|
1154
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1155
|
+
# then uses the first byte of the encoded string to split the data
|
1156
|
+
# in its raw, binary state. Default is <code>,</code>.
|
1157
|
+
#
|
1158
|
+
# @!group Attributes
|
1159
|
+
#
|
1160
|
+
def delimiter= val
|
1161
|
+
@gapi.configuration.load.update! field_delimiter: val
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
##
|
1165
|
+
# Allows unknown columns to be ignored.
|
1166
|
+
#
|
1167
|
+
# @param [Boolean] val Indicates if BigQuery should allow extra
|
1168
|
+
# values that are not represented in the table schema. If true, the
|
1169
|
+
# extra values are ignored. If false, records with extra columns are
|
1170
|
+
# treated as bad records, and if there are too many bad records, an
|
1171
|
+
# invalid error is returned in the job result. The default value is
|
1172
|
+
# `false`.
|
1173
|
+
#
|
1174
|
+
# The `format` property determines what BigQuery treats as an extra
|
1175
|
+
# value:
|
1176
|
+
#
|
1177
|
+
# * `CSV`: Trailing columns
|
1178
|
+
# * `JSON`: Named values that don't match any column names
|
1179
|
+
#
|
1180
|
+
# @!group Attributes
|
1181
|
+
#
|
1182
|
+
def ignore_unknown= val
|
1183
|
+
@gapi.configuration.load.update! ignore_unknown_values: val
|
1184
|
+
end
|
1185
|
+
|
1186
|
+
##
|
1187
|
+
# Sets the maximum number of bad records that can be ignored.
|
1188
|
+
#
|
1189
|
+
# @param [Integer] val The maximum number of bad records that
|
1190
|
+
# BigQuery can ignore when running the job. If the number of bad
|
1191
|
+
# records exceeds this value, an invalid error is returned in the
|
1192
|
+
# job result. The default value is `0`, which requires that all
|
1193
|
+
# records are valid.
|
1194
|
+
#
|
1195
|
+
# @!group Attributes
|
1196
|
+
#
|
1197
|
+
def max_bad_records= val
|
1198
|
+
@gapi.configuration.load.update! max_bad_records: val
|
1199
|
+
end
|
1200
|
+
|
1201
|
+
##
|
1202
|
+
# Sets the string that represents a null value in a CSV file.
|
1203
|
+
#
|
1204
|
+
# @param [String] val Specifies a string that represents a null value
|
1205
|
+
# in a CSV file. For example, if you specify `\N`, BigQuery
|
1206
|
+
# interprets `\N` as a null value when loading a CSV file. The
|
1207
|
+
# default value is the empty string. If you set this property to a
|
1208
|
+
# custom value, BigQuery throws an error if an empty string is
|
1209
|
+
# present for all data types except for STRING and BYTE. For STRING
|
1210
|
+
# and BYTE columns, BigQuery interprets the empty string as an empty
|
1211
|
+
# value.
|
1212
|
+
#
|
1213
|
+
# @!group Attributes
|
1214
|
+
#
|
1215
|
+
def null_marker= val
|
1216
|
+
@gapi.configuration.load.update! null_marker: val
|
1217
|
+
end
|
1218
|
+
|
1219
|
+
##
|
1220
|
+
# Sets the character to use to quote string values in CSVs.
|
1221
|
+
#
|
1222
|
+
# @param [String] val The value that is used to quote data sections
|
1223
|
+
# in a CSV file. BigQuery converts the string to ISO-8859-1
|
1224
|
+
# encoding, and then uses the first byte of the encoded string to
|
1225
|
+
# split the data in its raw, binary state. The default value is a
|
1226
|
+
# double-quote <code>"</code>. If your data does not contain quoted
|
1227
|
+
# sections, set the property value to an empty string. If your data
|
1228
|
+
# contains quoted newline characters, you must also set the
|
1229
|
+
# allowQuotedNewlines property to true.
|
1230
|
+
#
|
1231
|
+
# @!group Attributes
|
1232
|
+
#
|
1233
|
+
def quote= val
|
1234
|
+
@gapi.configuration.load.update! quote: val
|
1235
|
+
end
|
1236
|
+
|
1237
|
+
##
|
1238
|
+
# Sets the schema update options, which allow the schema of the
|
1239
|
+
# destination table to be updated as a side effect of the load job if
|
1240
|
+
# a schema is autodetected or supplied in the job configuration.
|
1241
|
+
# Schema update options are supported in two cases: when write
|
1242
|
+
# disposition is `WRITE_APPEND`; when write disposition is
|
1243
|
+
# `WRITE_TRUNCATE` and the destination table is a partition of a
|
1244
|
+
# table, specified by partition decorators. For normal tables,
|
1245
|
+
# `WRITE_TRUNCATE` will always overwrite the schema. One or more of
|
1246
|
+
# the following values are specified:
|
1247
|
+
#
|
1248
|
+
# * `ALLOW_FIELD_ADDITION`: allow adding a nullable field to the
|
1249
|
+
# schema.
|
1250
|
+
# * `ALLOW_FIELD_RELAXATION`: allow relaxing a required field in the
|
1251
|
+
# original schema to nullable.
|
1252
|
+
#
|
1253
|
+
# @param [Array<String>] new_options The new schema update options.
|
1254
|
+
#
|
1255
|
+
# @!group Attributes
|
1256
|
+
#
|
1257
|
+
def schema_update_options= new_options
|
1258
|
+
if new_options.nil?
|
1259
|
+
@gapi.configuration.load.update! schema_update_options: nil
|
1260
|
+
else
|
1261
|
+
@gapi.configuration.load.update! schema_update_options: Array(new_options)
|
1262
|
+
end
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
##
|
1266
|
+
# Sets the number of leading rows to skip in the file.
|
1267
|
+
#
|
1268
|
+
# @param [Integer] val The number of rows at the top of a CSV file
|
1269
|
+
# that BigQuery will skip when loading the data. The default
|
1270
|
+
# value is `0`. This property is useful if you have header rows in
|
1271
|
+
# the file that should be skipped.
|
1272
|
+
#
|
1273
|
+
# @!group Attributes
|
1274
|
+
#
|
1275
|
+
def skip_leading= val
|
1276
|
+
@gapi.configuration.load.update! skip_leading_rows: val
|
1277
|
+
end
|
1278
|
+
|
1279
|
+
##
|
1280
|
+
# Sets the encryption configuration of the destination table.
|
1281
|
+
#
|
1282
|
+
# @param [Google::Cloud::BigQuery::EncryptionConfiguration] val
|
1283
|
+
# Custom encryption configuration (e.g., Cloud KMS keys).
|
1284
|
+
#
|
1285
|
+
# @example
|
1286
|
+
# require "google/cloud/bigquery"
|
1287
|
+
#
|
1288
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1289
|
+
# dataset = bigquery.dataset "my_dataset"
|
1290
|
+
#
|
1291
|
+
# key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
|
1292
|
+
# encrypt_config = bigquery.encryption kms_key: key_name
|
1293
|
+
# job = dataset.load_job "my_table", "gs://abc/file" do |job|
|
1294
|
+
# job.encryption = encrypt_config
|
1295
|
+
# end
|
1296
|
+
#
|
1297
|
+
# @!group Attributes
|
1298
|
+
def encryption= val
|
1299
|
+
@gapi.configuration.load.update! destination_encryption_configuration: val.to_gapi
|
1300
|
+
end
|
1301
|
+
|
1302
|
+
##
|
1303
|
+
# Sets the labels to use for the load job.
|
1304
|
+
#
|
1305
|
+
# @param [Hash] val A hash of user-provided labels associated with
|
1306
|
+
# the job. You can use these to organize and group your jobs. Label
|
1307
|
+
# keys and values can be no longer than 63 characters, can only
|
1308
|
+
# contain lowercase letters, numeric characters, underscores and
|
1309
|
+
# dashes. International characters are allowed. Label values are
|
1310
|
+
# optional. Label keys must start with a letter and each label in
|
1311
|
+
# the list must have a different key.
|
1312
|
+
#
|
1313
|
+
# @!group Attributes
|
1314
|
+
#
|
1315
|
+
def labels= val
|
1316
|
+
@gapi.configuration.update! labels: val
|
1317
|
+
end
|
1318
|
+
|
1319
|
+
##
|
1320
|
+
# Sets the field on which to range partition the table. See [Creating and using integer range partitioned
|
1321
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1322
|
+
#
|
1323
|
+
# See {#range_partitioning_start=}, {#range_partitioning_interval=} and {#range_partitioning_end=}.
|
1324
|
+
#
|
1325
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1326
|
+
# partitioning on an existing table.
|
1327
|
+
#
|
1328
|
+
# @param [String] field The range partition field. the destination table is partitioned by this
|
1329
|
+
# field. The field must be a top-level `NULLABLE/REQUIRED` field. The only supported
|
1330
|
+
# type is `INTEGER/INT64`.
|
1331
|
+
#
|
1332
|
+
# @example
|
1333
|
+
# require "google/cloud/bigquery"
|
1334
|
+
#
|
1335
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1336
|
+
# dataset = bigquery.dataset "my_dataset"
|
1337
|
+
#
|
1338
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1339
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1340
|
+
# job.schema do |schema|
|
1341
|
+
# schema.integer "my_table_id", mode: :required
|
1342
|
+
# schema.string "my_table_data", mode: :required
|
1343
|
+
# end
|
1344
|
+
# job.range_partitioning_field = "my_table_id"
|
1345
|
+
# job.range_partitioning_start = 0
|
1346
|
+
# job.range_partitioning_interval = 10
|
1347
|
+
# job.range_partitioning_end = 100
|
1348
|
+
# end
|
1349
|
+
#
|
1350
|
+
# load_job.wait_until_done!
|
1351
|
+
# load_job.done? #=> true
|
1352
|
+
#
|
1353
|
+
# @!group Attributes
|
1354
|
+
#
|
1355
|
+
def range_partitioning_field= field
|
1356
|
+
@gapi.configuration.load.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1357
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1358
|
+
)
|
1359
|
+
@gapi.configuration.load.range_partitioning.field = field
|
1360
|
+
end
|
1361
|
+
|
1362
|
+
##
|
1363
|
+
# Sets the start of range partitioning, inclusive, for the destination table. See [Creating and using integer
|
1364
|
+
# range partitioned tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1365
|
+
#
|
1366
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1367
|
+
# partitioning on an existing table.
|
1368
|
+
#
|
1369
|
+
# See {#range_partitioning_field=}, {#range_partitioning_interval=} and {#range_partitioning_end=}.
|
1370
|
+
#
|
1371
|
+
# @param [Integer] range_start The start of range partitioning, inclusive.
|
1372
|
+
#
|
1373
|
+
# @example
|
1374
|
+
# require "google/cloud/bigquery"
|
1375
|
+
#
|
1376
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1377
|
+
# dataset = bigquery.dataset "my_dataset"
|
1378
|
+
#
|
1379
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1380
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1381
|
+
# job.schema do |schema|
|
1382
|
+
# schema.integer "my_table_id", mode: :required
|
1383
|
+
# schema.string "my_table_data", mode: :required
|
1384
|
+
# end
|
1385
|
+
# job.range_partitioning_field = "my_table_id"
|
1386
|
+
# job.range_partitioning_start = 0
|
1387
|
+
# job.range_partitioning_interval = 10
|
1388
|
+
# job.range_partitioning_end = 100
|
1389
|
+
# end
|
1390
|
+
#
|
1391
|
+
# load_job.wait_until_done!
|
1392
|
+
# load_job.done? #=> true
|
1393
|
+
#
|
1394
|
+
# @!group Attributes
|
1395
|
+
#
|
1396
|
+
def range_partitioning_start= range_start
|
1397
|
+
@gapi.configuration.load.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1398
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1399
|
+
)
|
1400
|
+
@gapi.configuration.load.range_partitioning.range.start = range_start
|
1401
|
+
end
|
1402
|
+
|
1403
|
+
##
|
1404
|
+
# Sets width of each interval for data in range partitions. See [Creating and using integer range partitioned
|
1405
|
+
# tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1406
|
+
#
|
1407
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1408
|
+
# partitioning on an existing table.
|
1409
|
+
#
|
1410
|
+
# See {#range_partitioning_field=}, {#range_partitioning_start=} and {#range_partitioning_end=}.
|
1411
|
+
#
|
1412
|
+
# @param [Integer] range_interval The width of each interval, for data in partitions.
|
1413
|
+
#
|
1414
|
+
# @example
|
1415
|
+
# require "google/cloud/bigquery"
|
1416
|
+
#
|
1417
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1418
|
+
# dataset = bigquery.dataset "my_dataset"
|
1419
|
+
#
|
1420
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1421
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1422
|
+
# job.schema do |schema|
|
1423
|
+
# schema.integer "my_table_id", mode: :required
|
1424
|
+
# schema.string "my_table_data", mode: :required
|
1425
|
+
# end
|
1426
|
+
# job.range_partitioning_field = "my_table_id"
|
1427
|
+
# job.range_partitioning_start = 0
|
1428
|
+
# job.range_partitioning_interval = 10
|
1429
|
+
# job.range_partitioning_end = 100
|
1430
|
+
# end
|
1431
|
+
#
|
1432
|
+
# load_job.wait_until_done!
|
1433
|
+
# load_job.done? #=> true
|
1434
|
+
#
|
1435
|
+
# @!group Attributes
|
1436
|
+
#
|
1437
|
+
def range_partitioning_interval= range_interval
|
1438
|
+
@gapi.configuration.load.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1439
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1440
|
+
)
|
1441
|
+
@gapi.configuration.load.range_partitioning.range.interval = range_interval
|
1442
|
+
end
|
1443
|
+
|
1444
|
+
##
|
1445
|
+
# Sets the end of range partitioning, exclusive, for the destination table. See [Creating and using integer
|
1446
|
+
# range partitioned tables](https://cloud.google.com/bigquery/docs/creating-integer-range-partitions).
|
1447
|
+
#
|
1448
|
+
# You can only set range partitioning when creating a table. BigQuery does not allow you to change
|
1449
|
+
# partitioning on an existing table.
|
1450
|
+
#
|
1451
|
+
# See {#range_partitioning_start=}, {#range_partitioning_interval=} and {#range_partitioning_field=}.
|
1452
|
+
#
|
1453
|
+
# @param [Integer] range_end The end of range partitioning, exclusive.
|
1454
|
+
#
|
1455
|
+
# @example
|
1456
|
+
# require "google/cloud/bigquery"
|
1457
|
+
#
|
1458
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1459
|
+
# dataset = bigquery.dataset "my_dataset"
|
1460
|
+
#
|
1461
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1462
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1463
|
+
# job.schema do |schema|
|
1464
|
+
# schema.integer "my_table_id", mode: :required
|
1465
|
+
# schema.string "my_table_data", mode: :required
|
1466
|
+
# end
|
1467
|
+
# job.range_partitioning_field = "my_table_id"
|
1468
|
+
# job.range_partitioning_start = 0
|
1469
|
+
# job.range_partitioning_interval = 10
|
1470
|
+
# job.range_partitioning_end = 100
|
1471
|
+
# end
|
1472
|
+
#
|
1473
|
+
# load_job.wait_until_done!
|
1474
|
+
# load_job.done? #=> true
|
1475
|
+
#
|
1476
|
+
# @!group Attributes
|
1477
|
+
#
|
1478
|
+
def range_partitioning_end= range_end
|
1479
|
+
@gapi.configuration.load.range_partitioning ||= Google::Apis::BigqueryV2::RangePartitioning.new(
|
1480
|
+
range: Google::Apis::BigqueryV2::RangePartitioning::Range.new
|
1481
|
+
)
|
1482
|
+
@gapi.configuration.load.range_partitioning.range.end = range_end
|
1483
|
+
end
|
1484
|
+
|
1485
|
+
##
|
1486
|
+
# Sets the time partitioning for the destination table. See [Partitioned
|
1487
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1488
|
+
#
|
1489
|
+
# You can only set the time partitioning field while creating a table.
|
1490
|
+
# BigQuery does not allow you to change partitioning on an existing
|
1491
|
+
# table.
|
1492
|
+
#
|
1493
|
+
# @param [String] type The time partition type. Currently the only
|
1494
|
+
# supported value is "DAY".
|
1495
|
+
#
|
1496
|
+
# @example
|
1497
|
+
# require "google/cloud/bigquery"
|
1498
|
+
#
|
1499
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1500
|
+
# dataset = bigquery.dataset "my_dataset"
|
1501
|
+
#
|
1502
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1503
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1504
|
+
# job.time_partitioning_type = "DAY"
|
1505
|
+
# end
|
1506
|
+
#
|
1507
|
+
# load_job.wait_until_done!
|
1508
|
+
# load_job.done? #=> true
|
1509
|
+
#
|
1510
|
+
# @!group Attributes
|
1511
|
+
#
|
1512
|
+
def time_partitioning_type= type
|
1513
|
+
@gapi.configuration.load.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1514
|
+
@gapi.configuration.load.time_partitioning.update! type: type
|
1515
|
+
end
|
1516
|
+
|
1517
|
+
##
|
1518
|
+
# Sets the field on which to time partition the destination table. If not
|
1519
|
+
# set, the destination table is time partitioned by pseudo column
|
1520
|
+
# `_PARTITIONTIME`; if set, the table is time partitioned by this field.
|
1521
|
+
# See [Partitioned
|
1522
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1523
|
+
#
|
1524
|
+
# The destination table must also be time partitioned. See
|
1525
|
+
# {#time_partitioning_type=}.
|
1526
|
+
#
|
1527
|
+
# You can only set the time partitioning field while creating a table.
|
1528
|
+
# BigQuery does not allow you to change partitioning on an existing
|
1529
|
+
# table.
|
1530
|
+
#
|
1531
|
+
# @param [String] field The time partition field. The field must be a
|
1532
|
+
# top-level TIMESTAMP or DATE field. Its mode must be NULLABLE or
|
1533
|
+
# REQUIRED.
|
1534
|
+
#
|
1535
|
+
# @example
|
1536
|
+
# require "google/cloud/bigquery"
|
1537
|
+
#
|
1538
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1539
|
+
# dataset = bigquery.dataset "my_dataset"
|
1540
|
+
#
|
1541
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1542
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1543
|
+
# job.time_partitioning_type = "DAY"
|
1544
|
+
# job.time_partitioning_field = "dob"
|
1545
|
+
# job.schema do |schema|
|
1546
|
+
# schema.timestamp "dob", mode: :required
|
1547
|
+
# end
|
1548
|
+
# end
|
1549
|
+
#
|
1550
|
+
# load_job.wait_until_done!
|
1551
|
+
# load_job.done? #=> true
|
1552
|
+
#
|
1553
|
+
# @!group Attributes
|
1554
|
+
#
|
1555
|
+
def time_partitioning_field= field
|
1556
|
+
@gapi.configuration.load.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1557
|
+
@gapi.configuration.load.time_partitioning.update! field: field
|
1558
|
+
end
|
1559
|
+
|
1560
|
+
##
|
1561
|
+
# Sets the time partition expiration for the destination table. See
|
1562
|
+
# [Partitioned
|
1563
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1564
|
+
#
|
1565
|
+
# The destination table must also be time partitioned. See
|
1566
|
+
# {#time_partitioning_type=}.
|
1567
|
+
#
|
1568
|
+
# @param [Integer] expiration An expiration time, in seconds,
|
1569
|
+
# for data in time partitions.
|
1570
|
+
#
|
1571
|
+
# @example
|
1572
|
+
# require "google/cloud/bigquery"
|
1573
|
+
#
|
1574
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1575
|
+
# dataset = bigquery.dataset "my_dataset"
|
1576
|
+
#
|
1577
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1578
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1579
|
+
# job.time_partitioning_type = "DAY"
|
1580
|
+
# job.time_partitioning_expiration = 86_400
|
1581
|
+
# end
|
1582
|
+
#
|
1583
|
+
# load_job.wait_until_done!
|
1584
|
+
# load_job.done? #=> true
|
1585
|
+
#
|
1586
|
+
# @!group Attributes
|
1587
|
+
#
|
1588
|
+
def time_partitioning_expiration= expiration
|
1589
|
+
@gapi.configuration.load.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1590
|
+
@gapi.configuration.load.time_partitioning.update! expiration_ms: expiration * 1000
|
1591
|
+
end
|
1592
|
+
|
1593
|
+
##
|
1594
|
+
# If set to true, queries over the destination table will require a
|
1595
|
+
# time partition filter that can be used for time partition elimination to be
|
1596
|
+
# specified. See [Partitioned
|
1597
|
+
# Tables](https://cloud.google.com/bigquery/docs/partitioned-tables).
|
1598
|
+
#
|
1599
|
+
# @param [Boolean] val Indicates if queries over the destination table
|
1600
|
+
# will require a time partition filter. The default value is `false`.
|
1601
|
+
#
|
1602
|
+
# @!group Attributes
|
1603
|
+
#
|
1604
|
+
def time_partitioning_require_filter= val
|
1605
|
+
@gapi.configuration.load.time_partitioning ||= Google::Apis::BigqueryV2::TimePartitioning.new
|
1606
|
+
@gapi.configuration.load.time_partitioning.update! require_partition_filter: val
|
1607
|
+
end
|
1608
|
+
|
1609
|
+
##
|
1610
|
+
# Sets one or more fields on which the destination table should be
|
1611
|
+
# clustered. Must be specified with time-based partitioning, data in
|
1612
|
+
# the table will be first partitioned and subsequently clustered.
|
1613
|
+
#
|
1614
|
+
# Only top-level, non-repeated, simple-type fields are supported. When
|
1615
|
+
# you cluster a table using multiple columns, the order of columns you
|
1616
|
+
# specify is important. The order of the specified columns determines
|
1617
|
+
# the sort order of the data.
|
1618
|
+
#
|
1619
|
+
# See {LoadJob#clustering_fields}.
|
1620
|
+
#
|
1621
|
+
# @see https://cloud.google.com/bigquery/docs/partitioned-tables
|
1622
|
+
# Partitioned Tables
|
1623
|
+
# @see https://cloud.google.com/bigquery/docs/clustered-tables
|
1624
|
+
# Introduction to Clustered Tables
|
1625
|
+
# @see https://cloud.google.com/bigquery/docs/creating-clustered-tables
|
1626
|
+
# Creating and Using Clustered Tables
|
1627
|
+
#
|
1628
|
+
# @param [Array<String>] fields The clustering fields. Only top-level,
|
1629
|
+
# non-repeated, simple-type fields are supported.
|
1630
|
+
#
|
1631
|
+
# @example
|
1632
|
+
# require "google/cloud/bigquery"
|
1633
|
+
#
|
1634
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1635
|
+
# dataset = bigquery.dataset "my_dataset"
|
1636
|
+
#
|
1637
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1638
|
+
# load_job = dataset.load_job "my_new_table", gs_url do |job|
|
1639
|
+
# job.time_partitioning_type = "DAY"
|
1640
|
+
# job.time_partitioning_field = "dob"
|
1641
|
+
# job.schema do |schema|
|
1642
|
+
# schema.timestamp "dob", mode: :required
|
1643
|
+
# schema.string "first_name", mode: :required
|
1644
|
+
# schema.string "last_name", mode: :required
|
1645
|
+
# end
|
1646
|
+
# job.clustering_fields = ["last_name", "first_name"]
|
1647
|
+
# end
|
1648
|
+
#
|
1649
|
+
# load_job.wait_until_done!
|
1650
|
+
# load_job.done? #=> true
|
1651
|
+
#
|
1652
|
+
# @!group Attributes
|
1653
|
+
#
|
1654
|
+
def clustering_fields= fields
|
1655
|
+
@gapi.configuration.load.clustering ||= Google::Apis::BigqueryV2::Clustering.new
|
1656
|
+
@gapi.configuration.load.clustering.fields = fields
|
1657
|
+
end
|
1658
|
+
|
1659
|
+
def cancel
|
1660
|
+
raise "not implemented in #{self.class}"
|
1661
|
+
end
|
1662
|
+
|
1663
|
+
def rerun!
|
1664
|
+
raise "not implemented in #{self.class}"
|
1665
|
+
end
|
1666
|
+
|
1667
|
+
def reload!
|
1668
|
+
raise "not implemented in #{self.class}"
|
1669
|
+
end
|
1670
|
+
alias refresh! reload!
|
1671
|
+
|
1672
|
+
def wait_until_done!
|
1673
|
+
raise "not implemented in #{self.class}"
|
1674
|
+
end
|
1675
|
+
|
1676
|
+
##
|
1677
|
+
# @private Returns the Google API client library version of this job.
|
1678
|
+
#
|
1679
|
+
# @return [<Google::Apis::BigqueryV2::Job>] (See
|
1680
|
+
# {Google::Apis::BigqueryV2::Job})
|
1681
|
+
def to_gapi
|
1682
|
+
check_for_mutated_schema!
|
1683
|
+
@gapi
|
1684
|
+
end
|
1685
|
+
|
1686
|
+
protected
|
1687
|
+
|
1688
|
+
##
|
1689
|
+
# Change to a NOOP
|
1690
|
+
def ensure_full_data!
|
1691
|
+
# Do nothing because we trust the gapi is full before we get here.
|
1692
|
+
end
|
1693
|
+
|
1694
|
+
##
|
1695
|
+
# Queue up all the updates instead of making them.
|
1696
|
+
def patch_gapi! attribute
|
1697
|
+
@updates << attribute
|
1698
|
+
@updates.uniq!
|
1699
|
+
end
|
1700
|
+
end
|
1701
|
+
end
|
1702
|
+
end
|
1703
|
+
end
|
1704
|
+
end
|