google-cloud-bigquery 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb7f8896ef2f04b07d335a2d619a60557246633ef343255c7fd6f2c31c1afbcf
4
- data.tar.gz: 8445e2df96afbd0be615ee17d891d3b192a2a6a32f21f010113809c020e6aa76
3
+ metadata.gz: 41d3da96cf5cfe992d89be7d76eae719bee105acab2b2621ae9d96637963c4fa
4
+ data.tar.gz: 2da003b89f6ab554f97941a56aa54bc35b0a9bb239b1bf19638a1ed148df8592
5
5
  SHA512:
6
- metadata.gz: ead4df8de6a2db97edf826bfdf82c49b48e7eebd999d3ee1448616a5b1d53d219bcc66dfe13fbe6e005282d7a59582ff4ead46adbe10a25cb365b19ebb0a5ba2
7
- data.tar.gz: f1de83104a82b84673b071395aa1527bc4ed3d992c2ed0b7b04b9cbb13f3f303277da04ee93e944411ae0cab10cdea573783da6ef9ece20782f429c34da76ac2
6
+ metadata.gz: a049fc85d22b3ea866a5beff61c46e987411acf3d946837adf465ebfabfade7d4fc633e487b29eb3b3b0be36c0f08e3b8740cc636a8c5c362b40b13fc47ad127
7
+ data.tar.gz: 7f8c25afa22dfb9259e73ac5b3ae6917629b84bfcde9cf52055d0a88bb680450ef1ed173d8c64edfae06afbb535b3f02e75f024a7032b14907908525d6c4d582
data/README.md CHANGED
@@ -23,26 +23,27 @@ Instructions and configuration options are covered in the [Authentication Guide]
23
23
  ```ruby
24
24
  require "google/cloud/bigquery"
25
25
 
26
- bigquery = Google::Cloud::Bigquery.new(
27
- project_id: "my-todo-project",
28
- credentials: "/path/to/keyfile.json"
29
- )
30
-
31
- # Create a new table to archive todos
32
- dataset = bigquery.dataset "my-todo-archive"
33
- table = dataset.create_table "todos",
34
- name: "Todos Archive",
35
- description: "Archive for completed TODO records"
36
-
37
- # Load data into the table
38
- file = File.open "/archive/todos/completed-todos.csv"
39
- table.load file
40
-
41
- # Run a query for the number of completed todos by owner
42
- count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
43
- data = bigquery.query count_sql
26
+ bigquery = Google::Cloud::Bigquery.new
27
+ dataset = bigquery.create_dataset "my_dataset"
28
+
29
+ table = dataset.create_table "my_table" do |t|
30
+ t.name = "My Table",
31
+ t.description = "A description of my table."
32
+ t.schema do |s|
33
+ s.string "first_name", mode: :required
34
+ s.string "last_name", mode: :required
35
+ s.integer "age", mode: :required
36
+ end
37
+ end
38
+
39
+ # Load data into the table from Google Cloud Storage
40
+ table.load "gs://my-bucket/file-name.csv"
41
+
42
+ # Run a query
43
+ data = dataset.query "SELECT first_name FROM my_table"
44
+
44
45
  data.each do |row|
45
- puts row[:name]
46
+ puts row[:first_name]
46
47
  end
47
48
  ```
48
49
 
@@ -50,6 +51,13 @@ end
50
51
 
51
52
  This library is supported on Ruby 2.0+.
52
53
 
54
+ However, Ruby 2.3 or later is strongly recommended, as earlier releases have
55
+ reached or are nearing end-of-life. After June 1, 2018, Google will provide
56
+ official support only for Ruby versions that are considered current and
57
+ supported by Ruby Core (that is, Ruby versions that are either in normal
58
+ maintenance or in security maintenance).
59
+ See https://www.ruby-lang.org/en/downloads/branches/ for further details.
60
+
53
61
  ## Versioning
54
62
 
55
63
  This library follows [Semantic Versioning](http://semver.org/).
@@ -22,7 +22,7 @@ require "date"
22
22
  module Google
23
23
  module Cloud
24
24
  module Bigquery
25
- # rubocop:disable all
25
+ # rubocop:disable Metrics/ModuleLength
26
26
 
27
27
  ##
28
28
  # @private
@@ -42,7 +42,6 @@ module Google
42
42
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
43
43
  # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
44
44
  # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
45
-
46
45
  module Convert
47
46
  ##
48
47
  # @private
@@ -62,6 +61,8 @@ module Google
62
61
  Hash[row_pairs]
63
62
  end
64
63
 
64
+ # rubocop:disable all
65
+
65
66
  def self.format_value value, field
66
67
  if value.nil?
67
68
  nil
@@ -212,16 +213,6 @@ module Google
212
213
  end
213
214
  end
214
215
 
215
- ##
216
- # @private
217
- def self.to_json_rows rows
218
- rows.map { |row| to_json_row row }
219
- end
220
- ##
221
- # @private
222
- def self.to_json_row row
223
- Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
224
- end
225
216
  ##
226
217
  # @private
227
218
  def self.to_json_value value
@@ -245,14 +236,116 @@ module Google
245
236
  end
246
237
  end
247
238
 
239
+ # rubocop:enable all
240
+
241
+ ##
242
+ # @private
243
+ def self.to_json_rows rows
244
+ rows.map { |row| to_json_row row }
245
+ end
246
+
247
+ ##
248
+ # @private
249
+ def self.to_json_row row
250
+ Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
251
+ end
252
+
248
253
  def self.resolve_legacy_sql standard_sql, legacy_sql
249
254
  return !standard_sql unless standard_sql.nil?
250
255
  return legacy_sql unless legacy_sql.nil?
251
256
  false
252
257
  end
253
258
 
254
- # rubocop:enable all
259
+ ##
260
+ # @private
261
+ #
262
+ # Converts create disposition strings to API values.
263
+ #
264
+ # @return [String] API representation of create disposition.
265
+ def self.create_disposition str
266
+ val = {
267
+ "create_if_needed" => "CREATE_IF_NEEDED",
268
+ "createifneeded" => "CREATE_IF_NEEDED",
269
+ "if_needed" => "CREATE_IF_NEEDED",
270
+ "needed" => "CREATE_IF_NEEDED",
271
+ "create_never" => "CREATE_NEVER",
272
+ "createnever" => "CREATE_NEVER",
273
+ "never" => "CREATE_NEVER"
274
+ }[str.to_s.downcase]
275
+ return val unless val.nil?
276
+ str
277
+ end
278
+
279
+ ##
280
+ # @private
281
+ #
282
+ # Converts write disposition strings to API values.
283
+ #
284
+ # @return [String] API representation of write disposition.
285
+ def self.write_disposition str
286
+ val = {
287
+ "write_truncate" => "WRITE_TRUNCATE",
288
+ "writetruncate" => "WRITE_TRUNCATE",
289
+ "truncate" => "WRITE_TRUNCATE",
290
+ "write_append" => "WRITE_APPEND",
291
+ "writeappend" => "WRITE_APPEND",
292
+ "append" => "WRITE_APPEND",
293
+ "write_empty" => "WRITE_EMPTY",
294
+ "writeempty" => "WRITE_EMPTY",
295
+ "empty" => "WRITE_EMPTY"
296
+ }[str.to_s.downcase]
297
+ return val unless val.nil?
298
+ str
299
+ end
300
+
301
+ ##
302
+ # @private
303
+ #
304
+ # Converts source format strings to API values.
305
+ #
306
+ # @return [String] API representation of source format.
307
+ def self.source_format format
308
+ val = {
309
+ "csv" => "CSV",
310
+ "json" => "NEWLINE_DELIMITED_JSON",
311
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
312
+ "avro" => "AVRO",
313
+ "datastore" => "DATASTORE_BACKUP",
314
+ "backup" => "DATASTORE_BACKUP",
315
+ "datastore_backup" => "DATASTORE_BACKUP"
316
+ }[format.to_s.downcase]
317
+ return val unless val.nil?
318
+ format
319
+ end
320
+
321
+ ##
322
+ # @private
323
+ #
324
+ # Converts file paths into source format by extension.
325
+ #
326
+ # @return [String] API representation of source format.
327
+ def self.derive_source_format_from_list paths
328
+ paths.map do |path|
329
+ derive_source_format path
330
+ end.compact.uniq.first
331
+ end
332
+
333
+ ##
334
+ # @private
335
+ #
336
+ # Converts file path into source format by extension.
337
+ #
338
+ # @return [String] API representation of source format.
339
+ def self.derive_source_format path
340
+ return "CSV" if path.end_with? ".csv"
341
+ return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
342
+ return "AVRO" if path.end_with? ".avro"
343
+ return "DATASTORE_BACKUP" if path.end_with? ".backup_info"
344
+ nil
345
+ end
255
346
  end
347
+
348
+ # rubocop:enable Metrics/ModuleLength
256
349
  end
257
350
  end
258
351
  end
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ require "google/cloud/bigquery/encryption_configuration"
15
16
 
16
17
  module Google
17
18
  module Cloud
@@ -129,6 +130,173 @@ module Google
129
130
  disp = @gapi.configuration.copy.write_disposition
130
131
  disp == "WRITE_EMPTY"
131
132
  end
133
+
134
+ ##
135
+ # The encryption configuration of the destination table.
136
+ #
137
+ # @return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom
138
+ # encryption configuration (e.g., Cloud KMS keys).
139
+ #
140
+ # @!group Attributes
141
+ def encryption
142
+ EncryptionConfiguration.from_gapi(
143
+ @gapi.configuration.copy.destination_encryption_configuration
144
+ )
145
+ end
146
+
147
+ ##
148
+ # Yielded to a block to accumulate changes for an API request.
149
+ class Updater < CopyJob
150
+ ##
151
+ # @private Create an Updater object.
152
+ def initialize gapi
153
+ @gapi = gapi
154
+ end
155
+
156
+ ##
157
+ # @private Create an Updater from an options hash.
158
+ #
159
+ # @return [Google::Cloud::Bigquery::CopyJob::Updater] A job
160
+ # configuration object for setting copy options.
161
+ def self.from_options service, source, target, options = {}
162
+ job_ref = service.job_ref_from options[:job_id], options[:prefix]
163
+ req = Google::Apis::BigqueryV2::Job.new(
164
+ job_reference: job_ref,
165
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
166
+ copy: Google::Apis::BigqueryV2::JobConfigurationTableCopy.new(
167
+ source_table: source,
168
+ destination_table: target
169
+ ),
170
+ dry_run: options[:dryrun]
171
+ )
172
+ )
173
+
174
+ updater = CopyJob::Updater.new req
175
+ updater.create = options[:create]
176
+ updater.write = options[:write]
177
+ updater.labels = options[:labels] if options[:labels]
178
+ updater
179
+ end
180
+
181
+ ##
182
+ # Sets the geographic location where the job should run. Required
183
+ # except for US and EU.
184
+ #
185
+ # @param [String] value A geographic location, such as "US", "EU" or
186
+ # "asia-northeast1". Required except for US and EU.
187
+ #
188
+ # @example
189
+ # require "google/cloud/bigquery"
190
+ #
191
+ # bigquery = Google::Cloud::Bigquery.new
192
+ # dataset = bigquery.dataset "my_dataset"
193
+ # table = dataset.table "my_table"
194
+ # destination_table = dataset.table "my_destination_table"
195
+ #
196
+ # copy_job = table.copy_job destination_table do |j|
197
+ # j.location = "EU"
198
+ # end
199
+ #
200
+ # copy_job.wait_until_done!
201
+ # copy_job.done? #=> true
202
+ #
203
+ # @!group Attributes
204
+ def location= value
205
+ @gapi.job_reference.location = value
206
+ end
207
+
208
+ ##
209
+ # Sets the create disposition.
210
+ #
211
+ # This specifies whether the job is allowed to create new tables. The
212
+ # default value is `needed`.
213
+ #
214
+ # The following values are supported:
215
+ #
216
+ # * `needed` - Create the table if it does not exist.
217
+ # * `never` - The table must already exist. A 'notFound' error is
218
+ # raised if the table does not exist.
219
+ #
220
+ # @param [String] new_create The new create disposition.
221
+ #
222
+ # @!group Attributes
223
+ def create= new_create
224
+ @gapi.configuration.copy.update! create_disposition:
225
+ Convert.create_disposition(new_create)
226
+ end
227
+
228
+ ##
229
+ # Sets the write disposition.
230
+ #
231
+ # This specifies how to handle data already present in the table. The
232
+ # default value is `append`.
233
+ #
234
+ # The following values are supported:
235
+ #
236
+ # * `truncate` - BigQuery overwrites the table data.
237
+ # * `append` - BigQuery appends the data to the table.
238
+ # * `empty` - An error will be returned if the table already contains
239
+ # data.
240
+ #
241
+ # @param [String] new_write The new write disposition.
242
+ #
243
+ # @!group Attributes
244
+ def write= new_write
245
+ @gapi.configuration.copy.update! write_disposition:
246
+ Convert.write_disposition(new_write)
247
+ end
248
+
249
+ ##
250
+ # Sets the encryption configuration of the destination table.
251
+ #
252
+ # @param [Google::Cloud::BigQuery::EncryptionConfiguration] val
253
+ # Custom encryption configuration (e.g., Cloud KMS keys).
254
+ #
255
+ # @example
256
+ # require "google/cloud/bigquery"
257
+ #
258
+ # bigquery = Google::Cloud::Bigquery.new
259
+ # dataset = bigquery.dataset "my_dataset"
260
+ # table = dataset.table "my_table"
261
+ #
262
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
263
+ # encrypt_config = bigquery.encryption kms_key: key_name
264
+ # job = table.copy_job "my_dataset.new_table" do |job|
265
+ # job.encryption = encrypt_config
266
+ # end
267
+ #
268
+ # @!group Attributes
269
+ def encryption= val
270
+ @gapi.configuration.copy.update!(
271
+ destination_encryption_configuration: val.to_gapi
272
+ )
273
+ end
274
+
275
+ ##
276
+ # Sets the labels to use for the job.
277
+ #
278
+ # @param [Hash] value A hash of user-provided labels associated with
279
+ # the job. You can use these to organize and group your jobs. Label
280
+ # keys and values can be no longer than 63 characters, can only
281
+ # contain lowercase letters, numeric characters, underscores and
282
+ # dashes. International characters are allowed. Label values are
283
+ # optional. Label keys must start with a letter and each label in
284
+ # the list must have a different key.
285
+ #
286
+ # @!group Attributes
287
+ def labels= value
288
+ @gapi.configuration.update! labels: value
289
+ end
290
+
291
+ ##
292
+ # @private Returns the Google API client library version of this job.
293
+ #
294
+ # @return [<Google::Apis::BigqueryV2::Job>] (See
295
+ # {Google::Apis::BigqueryV2::Job})
296
+ def to_gapi
297
+ @gapi
298
+ end
299
+ end
132
300
  end
133
301
  end
134
302
  end
@@ -266,8 +266,8 @@ module Google
266
266
  # The geographic location where the dataset should reside. Possible
267
267
  # values include `EU` and `US`. The default value is `US`.
268
268
  #
269
- # @return [String, nil] The location code, or `nil` if the object is a
270
- # reference (see {#reference?}).
269
+ # @return [String, nil] The geographic location, or `nil` if the object
270
+ # is a reference (see {#reference?}).
271
271
  #
272
272
  # @!group Attributes
273
273
  #
@@ -696,6 +696,12 @@ module Google
696
696
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
697
697
  # for an overview of each BigQuery data type, including allowed values.
698
698
  #
699
+ # The geographic location for the job ("US", "EU", etc.) can be set via
700
+ # {QueryJob::Updater#location=} in a block passed to this method. If the
701
+ # dataset is a full resource representation (see {#resource_full?}), the
702
+ # location of the job will be automatically set to the location of the
703
+ # dataset.
704
+ #
699
705
  # @param [String] query A query string, following the BigQuery [query
700
706
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
701
707
  # query to execute. Example: "SELECT count(f1) FROM
@@ -761,11 +767,6 @@ module Google
761
767
  # Flattens all nested and repeated fields in the query results. The
762
768
  # default value is `true`. `large_results` parameter must be `true` if
763
769
  # this is set to `false`.
764
- # @param [Integer] maximum_billing_tier Limits the billing tier for this
765
- # job. Queries that have resource usage beyond this tier will fail
766
- # (without incurring a charge). Optional. If unspecified, this will be
767
- # set to your project default. For more information, see [High-Compute
768
- # queries](https://cloud.google.com/bigquery/pricing#high-compute).
769
770
  # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
770
771
  # job. Queries that will have bytes billed beyond this limit will fail
771
772
  # (without incurring a charge). Optional. If unspecified, this will be
@@ -799,6 +800,11 @@ module Google
799
800
  # inline code resource is equivalent to providing a URI for a file
800
801
  # containing the same code. See [User-Defined
801
802
  # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
803
+ # @param [Integer] maximum_billing_tier Deprecated: Change the billing
804
+ # tier to allow high-compute queries.
805
+ # @yield [job] a job configuration object
806
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
807
+ # configuration object for setting additional options for the query.
802
808
  #
803
809
  # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
804
810
  #
@@ -865,7 +871,7 @@ module Google
865
871
  # end
866
872
  # end
867
873
  #
868
- # @example Query using external data source:
874
+ # @example Query using external data source, set destination:
869
875
  # require "google/cloud/bigquery"
870
876
  #
871
877
  # bigquery = Google::Cloud::Bigquery.new
@@ -877,8 +883,10 @@ module Google
877
883
  # csv.skip_leading_rows = 1
878
884
  # end
879
885
  #
880
- # job = dataset.query_job "SELECT * FROM my_ext_table",
881
- # external: { my_ext_table: csv_table }
886
+ # job = dataset.query_job "SELECT * FROM my_ext_table" do |query|
887
+ # query.external = { my_ext_table: csv_table }
888
+ # query.table = dataset.table "my_table", skip_lookup: true
889
+ # end
882
890
  #
883
891
  # job.wait_until_done!
884
892
  # if !job.failed?
@@ -895,17 +903,23 @@ module Google
895
903
  legacy_sql: nil, large_results: nil, flatten: nil,
896
904
  maximum_billing_tier: nil, maximum_bytes_billed: nil,
897
905
  job_id: nil, prefix: nil, labels: nil, udfs: nil
906
+ ensure_service!
898
907
  options = { priority: priority, cache: cache, table: table,
899
908
  create: create, write: write,
900
909
  large_results: large_results, flatten: flatten,
901
910
  legacy_sql: legacy_sql, standard_sql: standard_sql,
902
911
  maximum_billing_tier: maximum_billing_tier,
903
912
  maximum_bytes_billed: maximum_bytes_billed,
904
- params: params, external: external, labels: labels,
905
- job_id: job_id, prefix: prefix, udfs: udfs }
906
- options[:dataset] ||= self
907
- ensure_service!
908
- gapi = service.query_job query, options
913
+ job_id: job_id, prefix: prefix, params: params,
914
+ external: external, labels: labels, udfs: udfs }
915
+
916
+ updater = QueryJob::Updater.from_options service, query, options
917
+ updater.dataset = self
918
+ updater.location = location if location # may be dataset reference
919
+
920
+ yield updater if block_given?
921
+
922
+ gapi = service.query_job updater.to_gapi
909
923
  Job.from_gapi gapi, service
910
924
  end
911
925
 
@@ -938,6 +952,12 @@ module Google
938
952
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
939
953
  # for an overview of each BigQuery data type, including allowed values.
940
954
  #
955
+ # The geographic location for the job ("US", "EU", etc.) can be set via
956
+ # {QueryJob::Updater#location=} in a block passed to this method. If the
957
+ # dataset is a full resource representation (see {#resource_full?}), the
958
+ # location of the job will be automatically set to the location of the
959
+ # dataset.
960
+ #
941
961
  # @see https://cloud.google.com/bigquery/querying-data Querying Data
942
962
  #
943
963
  # @param [String] query A query string, following the BigQuery [query
@@ -985,6 +1005,9 @@ module Google
985
1005
  # When set to false, the values of `large_results` and `flatten` are
986
1006
  # ignored; the query will be run as if `large_results` is true and
987
1007
  # `flatten` is false. Optional. The default value is false.
1008
+ # @yield [job] a job configuration object
1009
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
1010
+ # configuration object for setting additional options for the query.
988
1011
  #
989
1012
  # @return [Google::Cloud::Bigquery::Data] A new data object.
990
1013
  #
@@ -1039,7 +1062,7 @@ module Google
1039
1062
  # puts row[:name]
1040
1063
  # end
1041
1064
  #
1042
- # @example Query using external data source:
1065
+ # @example Query using external data source, set destination:
1043
1066
  # require "google/cloud/bigquery"
1044
1067
  #
1045
1068
  # bigquery = Google::Cloud::Bigquery.new
@@ -1051,8 +1074,10 @@ module Google
1051
1074
  # csv.skip_leading_rows = 1
1052
1075
  # end
1053
1076
  #
1054
- # data = dataset.query "SELECT * FROM my_ext_table",
1055
- # external: { my_ext_table: csv_table }
1077
+ # data = dataset.query "SELECT * FROM my_ext_table" do |query|
1078
+ # query.external = { my_ext_table: csv_table }
1079
+ # query.table = dataset.table "my_table", skip_lookup: true
1080
+ # end
1056
1081
  #
1057
1082
  # data.each do |row|
1058
1083
  # puts row[:name]
@@ -1063,21 +1088,19 @@ module Google
1063
1088
  def query query, params: nil, external: nil, max: nil, cache: true,
1064
1089
  standard_sql: nil, legacy_sql: nil
1065
1090
  ensure_service!
1066
- options = { params: params, external: external, cache: cache,
1067
- legacy_sql: legacy_sql, standard_sql: standard_sql }
1091
+ options = { priority: "INTERACTIVE", external: external, cache: cache,
1092
+ legacy_sql: legacy_sql, standard_sql: standard_sql,
1093
+ params: params }
1094
+ options[:dataset] ||= self
1095
+ updater = QueryJob::Updater.from_options service, query, options
1096
+ updater.location = location if location # may be dataset reference
1068
1097
 
1069
- job = query_job query, options
1070
- job.wait_until_done!
1098
+ yield updater if block_given?
1071
1099
 
1072
- if job.failed?
1073
- begin
1074
- # raise to activate ruby exception cause handling
1075
- raise job.gapi_error
1076
- rescue StandardError => e
1077
- # wrap Google::Apis::Error with Google::Cloud::Error
1078
- raise Google::Cloud::Error.from_error(e)
1079
- end
1080
- end
1100
+ gapi = service.query_job updater.to_gapi
1101
+ job = Job.from_gapi gapi, service
1102
+ job.wait_until_done!
1103
+ ensure_job_succeeded! job
1081
1104
 
1082
1105
  job.data max: max
1083
1106
  end
@@ -1147,10 +1170,17 @@ module Google
1147
1170
  # file directly. See [Loading Data with a POST
1148
1171
  # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1149
1172
  #
1173
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1174
+ # {LoadJob::Updater#location=} in a block passed to this method. If the
1175
+ # dataset is a full resource representation (see {#resource_full?}), the
1176
+ # location of the job will be automatically set to the location of the
1177
+ # dataset.
1178
+ #
1150
1179
  # @param [String] table_id The destination table to load the data into.
1151
- # @param [File, Google::Cloud::Storage::File, String, URI] file A file
1152
- # or the URI of a Google Cloud Storage file containing data to load
1153
- # into the table.
1180
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1181
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1182
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1183
+ # those, containing data to load into the table.
1154
1184
  # @param [String] format The exported file format. The default value is
1155
1185
  # `csv`.
1156
1186
  #
@@ -1269,13 +1299,12 @@ module Google
1269
1299
  # optional. Label keys must start with a letter and each label in the
1270
1300
  # list must have a different key.
1271
1301
  #
1272
- # @yield [schema] A block for setting the schema for the destination
1273
- # table. The schema can be omitted if the destination table already
1274
- # exists, or if you're loading data from a Google Cloud Datastore
1275
- # backup.
1276
- # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
1277
- # instance provided using the `schema` option, or a new, empty schema
1278
- # instance
1302
+ # @yield [updater] A block for setting the schema and other
1303
+ # options for the destination table. The schema can be omitted if the
1304
+ # destination table already exists, or if you're loading data from a
1305
+ # Google Cloud Datastore backup.
1306
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1307
+ # updater to modify the load job and its schema.
1279
1308
  #
1280
1309
  # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
1281
1310
  #
@@ -1312,6 +1341,25 @@ module Google
1312
1341
  # end
1313
1342
  # end
1314
1343
  #
1344
+ # @example Pass a list of google-cloud-storage files:
1345
+ # require "google/cloud/bigquery"
1346
+ # require "google/cloud/storage"
1347
+ #
1348
+ # bigquery = Google::Cloud::Bigquery.new
1349
+ # dataset = bigquery.dataset "my_dataset"
1350
+ #
1351
+ # storage = Google::Cloud::Storage.new
1352
+ # bucket = storage.bucket "my-bucket"
1353
+ # file = bucket.file "file-name.csv"
1354
+ # list = [file, "gs://my-bucket/file-name2.csv"]
1355
+ # load_job = dataset.load_job "my_new_table", list do |schema|
1356
+ # schema.string "first_name", mode: :required
1357
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1358
+ # nested_schema.string "place", mode: :required
1359
+ # nested_schema.integer "number_of_years", mode: :required
1360
+ # end
1361
+ # end
1362
+ #
1315
1363
  # @example Upload a file directly:
1316
1364
  # require "google/cloud/bigquery"
1317
1365
  #
@@ -1333,13 +1381,15 @@ module Google
1333
1381
  # bigquery = Google::Cloud::Bigquery.new
1334
1382
  # dataset = bigquery.dataset "my_dataset"
1335
1383
  #
1336
- # load_job = dataset.load_job "my_new_table",
1337
- # "gs://my-bucket/xxxx.kind_name.backup_info",
1338
- # format: "datastore_backup"
1384
+ # load_job = dataset.load_job(
1385
+ # "my_new_table",
1386
+ # "gs://my-bucket/xxxx.kind_name.backup_info") do |j|
1387
+ # j.format = "datastore_backup"
1388
+ # end
1339
1389
  #
1340
1390
  # @!group Data
1341
1391
  #
1342
- def load_job table_id, file, format: nil, create: nil, write: nil,
1392
+ def load_job table_id, files, format: nil, create: nil, write: nil,
1343
1393
  projection_fields: nil, jagged_rows: nil,
1344
1394
  quoted_newlines: nil, encoding: nil, delimiter: nil,
1345
1395
  ignore_unknown: nil, max_bad_records: nil, quote: nil,
@@ -1347,25 +1397,25 @@ module Google
1347
1397
  prefix: nil, labels: nil, autodetect: nil, null_marker: nil
1348
1398
  ensure_service!
1349
1399
 
1350
- if block_given?
1351
- schema ||= Schema.from_gapi
1352
- yield schema
1353
- end
1354
- schema_gapi = schema.to_gapi if schema
1355
-
1356
- options = { format: format, create: create, write: write,
1357
- projection_fields: projection_fields,
1358
- jagged_rows: jagged_rows,
1359
- quoted_newlines: quoted_newlines, encoding: encoding,
1360
- delimiter: delimiter, ignore_unknown: ignore_unknown,
1361
- max_bad_records: max_bad_records, quote: quote,
1362
- skip_leading: skip_leading, dryrun: dryrun,
1363
- schema: schema_gapi, job_id: job_id, prefix: prefix,
1364
- labels: labels, autodetect: autodetect,
1365
- null_marker: null_marker }
1366
- return load_storage(table_id, file, options) if storage_url? file
1367
- return load_local(table_id, file, options) if local_file? file
1368
- raise Google::Cloud::Error, "Don't know how to load #{file}"
1400
+ updater = load_job_updater table_id,
1401
+ format: format, create: create,
1402
+ write: write,
1403
+ projection_fields: projection_fields,
1404
+ jagged_rows: jagged_rows,
1405
+ quoted_newlines: quoted_newlines,
1406
+ encoding: encoding,
1407
+ delimiter: delimiter,
1408
+ ignore_unknown: ignore_unknown,
1409
+ max_bad_records: max_bad_records,
1410
+ quote: quote, skip_leading: skip_leading,
1411
+ dryrun: dryrun, schema: schema,
1412
+ job_id: job_id, prefix: prefix,
1413
+ labels: labels, autodetect: autodetect,
1414
+ null_marker: null_marker
1415
+
1416
+ yield updater if block_given?
1417
+
1418
+ load_local_or_uri files, updater
1369
1419
  end
1370
1420
 
1371
1421
  ##
@@ -1379,10 +1429,17 @@ module Google
1379
1429
  # file directly. See [Loading Data with a POST
1380
1430
  # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1381
1431
  #
1432
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1433
+ # {LoadJob::Updater#location=} in a block passed to this method. If the
1434
+ # dataset is a full resource representation (see {#resource_full?}), the
1435
+ # location of the job will be automatically set to the location of the
1436
+ # dataset.
1437
+ #
1382
1438
  # @param [String] table_id The destination table to load the data into.
1383
- # @param [File, Google::Cloud::Storage::File, String, URI] file A file
1384
- # or the URI of a Google Cloud Storage file containing data to load
1385
- # into the table.
1439
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1440
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1441
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1442
+ # those, containing data to load into the table.
1386
1443
  # @param [String] format The exported file format. The default value is
1387
1444
  # `csv`.
1388
1445
  #
@@ -1479,13 +1536,12 @@ module Google
1479
1536
  # this option. Also note that for most use cases, the block yielded by
1480
1537
  # this method is a more convenient way to configure the schema.
1481
1538
  #
1482
- # @yield [schema] A block for setting the schema for the destination
1483
- # table. The schema can be omitted if the destination table already
1484
- # exists, or if you're loading data from a Google Cloud Datastore
1485
- # backup.
1486
- # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
1487
- # instance provided using the `schema` option, or a new, empty schema
1488
- # instance
1539
+ # @yield [updater] A block for setting the schema of the destination
1540
+ # table and other options for the load job. The schema can be omitted
1541
+ # if the destination table already exists, or if you're loading data
1542
+ # from a Google Cloud Datastore backup.
1543
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1544
+ # updater to modify the load job and its schema.
1489
1545
  #
1490
1546
  # @return [Boolean] Returns `true` if the load job was successful.
1491
1547
  #
@@ -1522,6 +1578,25 @@ module Google
1522
1578
  # end
1523
1579
  # end
1524
1580
  #
1581
+ # @example Pass a list of google-cloud-storage files:
1582
+ # require "google/cloud/bigquery"
1583
+ # require "google/cloud/storage"
1584
+ #
1585
+ # bigquery = Google::Cloud::Bigquery.new
1586
+ # dataset = bigquery.dataset "my_dataset"
1587
+ #
1588
+ # storage = Google::Cloud::Storage.new
1589
+ # bucket = storage.bucket "my-bucket"
1590
+ # file = bucket.file "file-name.csv"
1591
+ # list = [file, "gs://my-bucket/file-name2.csv"]
1592
+ # dataset.load "my_new_table", list do |schema|
1593
+ # schema.string "first_name", mode: :required
1594
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1595
+ # nested_schema.string "place", mode: :required
1596
+ # nested_schema.integer "number_of_years", mode: :required
1597
+ # end
1598
+ # end
1599
+ #
1525
1600
  # @example Upload a file directly:
1526
1601
  # require "google/cloud/bigquery"
1527
1602
  #
@@ -1544,41 +1619,39 @@ module Google
1544
1619
  # dataset = bigquery.dataset "my_dataset"
1545
1620
  #
1546
1621
  # dataset.load "my_new_table",
1547
- # "gs://my-bucket/xxxx.kind_name.backup_info",
1548
- # format: "datastore_backup"
1622
+ # "gs://my-bucket/xxxx.kind_name.backup_info" do |j|
1623
+ # j.format = "datastore_backup"
1624
+ # end
1549
1625
  #
1550
1626
  # @!group Data
1551
1627
  #
1552
- def load table_id, file, format: nil, create: nil, write: nil,
1628
+ def load table_id, files, format: nil, create: nil, write: nil,
1553
1629
  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1554
1630
  encoding: nil, delimiter: nil, ignore_unknown: nil,
1555
1631
  max_bad_records: nil, quote: nil, skip_leading: nil,
1556
1632
  schema: nil, autodetect: nil, null_marker: nil
1633
+ ensure_service!
1557
1634
 
1558
- yield (schema ||= Schema.from_gapi) if block_given?
1635
+ updater = load_job_updater table_id,
1636
+ format: format, create: create,
1637
+ write: write,
1638
+ projection_fields: projection_fields,
1639
+ jagged_rows: jagged_rows,
1640
+ quoted_newlines: quoted_newlines,
1641
+ encoding: encoding,
1642
+ delimiter: delimiter,
1643
+ ignore_unknown: ignore_unknown,
1644
+ max_bad_records: max_bad_records,
1645
+ quote: quote, skip_leading: skip_leading,
1646
+ schema: schema,
1647
+ autodetect: autodetect,
1648
+ null_marker: null_marker
1559
1649
 
1560
- options = { format: format, create: create, write: write,
1561
- projection_fields: projection_fields,
1562
- jagged_rows: jagged_rows,
1563
- quoted_newlines: quoted_newlines, encoding: encoding,
1564
- delimiter: delimiter, ignore_unknown: ignore_unknown,
1565
- max_bad_records: max_bad_records, quote: quote,
1566
- skip_leading: skip_leading, schema: schema,
1567
- autodetect: autodetect, null_marker: null_marker }
1568
- job = load_job table_id, file, options
1650
+ yield updater if block_given?
1569
1651
 
1652
+ job = load_local_or_uri files, updater
1570
1653
  job.wait_until_done!
1571
-
1572
- if job.failed?
1573
- begin
1574
- # raise to activate ruby exception cause handling
1575
- raise job.gapi_error
1576
- rescue StandardError => e
1577
- # wrap Google::Apis::Error with Google::Cloud::Error
1578
- raise Google::Cloud::Error.from_error(e)
1579
- end
1580
- end
1581
-
1654
+ ensure_job_succeeded! job
1582
1655
  true
1583
1656
  end
1584
1657
 
@@ -1946,29 +2019,157 @@ module Google
1946
2019
  reload! if resource_partial?
1947
2020
  end
1948
2021
 
1949
- def load_storage table_id, url, options = {}
2022
+ def ensure_job_succeeded! job
2023
+ return unless job.failed?
2024
+ begin
2025
+ # raise to activate ruby exception cause handling
2026
+ raise job.gapi_error
2027
+ rescue StandardError => e
2028
+ # wrap Google::Apis::Error with Google::Cloud::Error
2029
+ raise Google::Cloud::Error.from_error(e)
2030
+ end
2031
+ end
2032
+
2033
+ def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil
2034
+ job_ref = service.job_ref_from job_id, prefix
2035
+ Google::Apis::BigqueryV2::Job.new(
2036
+ job_reference: job_ref,
2037
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
2038
+ load: Google::Apis::BigqueryV2::JobConfigurationLoad.new(
2039
+ destination_table: Google::Apis::BigqueryV2::TableReference.new(
2040
+ project_id: @service.project,
2041
+ dataset_id: dataset_id,
2042
+ table_id: table_id
2043
+ )
2044
+ ),
2045
+ dry_run: dryrun
2046
+ )
2047
+ )
2048
+ end
2049
+
2050
+ def load_job_csv_options! job, jagged_rows: nil,
2051
+ quoted_newlines: nil,
2052
+ delimiter: nil,
2053
+ quote: nil, skip_leading: nil,
2054
+ null_marker: nil
2055
+ job.jagged_rows = jagged_rows unless jagged_rows.nil?
2056
+ job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
2057
+ job.delimiter = delimiter unless delimiter.nil?
2058
+ job.null_marker = null_marker unless null_marker.nil?
2059
+ job.quote = quote unless quote.nil?
2060
+ job.skip_leading = skip_leading unless skip_leading.nil?
2061
+ end
2062
+
2063
+ def load_job_file_options! job, format: nil,
2064
+ projection_fields: nil,
2065
+ jagged_rows: nil, quoted_newlines: nil,
2066
+ encoding: nil, delimiter: nil,
2067
+ ignore_unknown: nil, max_bad_records: nil,
2068
+ quote: nil, skip_leading: nil,
2069
+ null_marker: nil
2070
+ job.format = format unless format.nil?
2071
+ unless projection_fields.nil?
2072
+ job.projection_fields = projection_fields
2073
+ end
2074
+ job.encoding = encoding unless encoding.nil?
2075
+ job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
2076
+ job.max_bad_records = max_bad_records unless max_bad_records.nil?
2077
+ load_job_csv_options! job, jagged_rows: jagged_rows,
2078
+ quoted_newlines: quoted_newlines,
2079
+ delimiter: delimiter,
2080
+ quote: quote,
2081
+ skip_leading: skip_leading,
2082
+ null_marker: null_marker
2083
+ end
2084
+
2085
+ def load_job_updater table_id, format: nil, create: nil,
2086
+ write: nil, projection_fields: nil,
2087
+ jagged_rows: nil, quoted_newlines: nil,
2088
+ encoding: nil, delimiter: nil,
2089
+ ignore_unknown: nil, max_bad_records: nil,
2090
+ quote: nil, skip_leading: nil, dryrun: nil,
2091
+ schema: nil, job_id: nil, prefix: nil, labels: nil,
2092
+ autodetect: nil, null_marker: nil
2093
+ new_job = load_job_gapi table_id, dryrun, job_id: job_id,
2094
+ prefix: prefix
2095
+ LoadJob::Updater.new(new_job).tap do |job|
2096
+ job.location = location if location # may be dataset reference
2097
+ job.create = create unless create.nil?
2098
+ job.write = write unless write.nil?
2099
+ job.schema = schema unless schema.nil?
2100
+ job.autodetect = autodetect unless autodetect.nil?
2101
+ job.labels = labels unless labels.nil?
2102
+ load_job_file_options! job, format: format,
2103
+ projection_fields: projection_fields,
2104
+ jagged_rows: jagged_rows,
2105
+ quoted_newlines: quoted_newlines,
2106
+ encoding: encoding,
2107
+ delimiter: delimiter,
2108
+ ignore_unknown: ignore_unknown,
2109
+ max_bad_records: max_bad_records,
2110
+ quote: quote,
2111
+ skip_leading: skip_leading,
2112
+ null_marker: null_marker
2113
+ end
2114
+ end
2115
+
2116
+ def load_storage urls, job_gapi
1950
2117
  # Convert to storage URL
1951
- url = url.to_gs_url if url.respond_to? :to_gs_url
1952
- url = url.to_s if url.is_a? URI
2118
+ urls = [urls].flatten.map do |url|
2119
+ if url.respond_to? :to_gs_url
2120
+ url.to_gs_url
2121
+ elsif url.is_a? URI
2122
+ url.to_s
2123
+ else
2124
+ url
2125
+ end
2126
+ end
1953
2127
 
1954
- gapi = service.load_table_gs_url dataset_id, table_id, url, options
2128
+ unless urls.nil?
2129
+ job_gapi.configuration.load.update! source_uris: urls
2130
+ if job_gapi.configuration.load.source_format.nil?
2131
+ source_format = Convert.derive_source_format_from_list urls
2132
+ unless source_format.nil?
2133
+ job_gapi.configuration.load.source_format = source_format
2134
+ end
2135
+ end
2136
+ end
2137
+
2138
+ gapi = service.load_table_gs_url job_gapi
1955
2139
  Job.from_gapi gapi, service
1956
2140
  end
1957
2141
 
1958
- def load_local table_id, file, options = {}
1959
- # Convert to storage URL
1960
- file = file.to_gs_url if file.respond_to? :to_gs_url
2142
+ def load_local file, job_gapi
2143
+ path = Pathname(file).to_path
2144
+ if job_gapi.configuration.load.source_format.nil?
2145
+ source_format = Convert.derive_source_format path
2146
+ unless source_format.nil?
2147
+ job_gapi.configuration.load.source_format = source_format
2148
+ end
2149
+ end
1961
2150
 
1962
- gapi = service.load_table_file dataset_id, table_id, file, options
2151
+ gapi = service.load_table_file file, job_gapi
1963
2152
  Job.from_gapi gapi, service
1964
2153
  end
1965
2154
 
1966
- def storage_url? file
1967
- file.respond_to?(:to_gs_url) ||
1968
- (file.respond_to?(:to_str) &&
1969
- file.to_str.downcase.start_with?("gs://")) ||
1970
- (file.is_a?(URI) &&
1971
- file.to_s.downcase.start_with?("gs://"))
2155
+ def load_local_or_uri file, updater
2156
+ job_gapi = updater.to_gapi
2157
+ job = if local_file? file
2158
+ load_local file, job_gapi
2159
+ else
2160
+ load_storage file, job_gapi
2161
+ end
2162
+ job
2163
+ end
2164
+
2165
+ def storage_url? files
2166
+ [files].flatten.all? do |file|
2167
+ file.respond_to?(:to_gs_url) ||
2168
+ (file.respond_to?(:to_str) &&
2169
+ file.to_str.downcase.start_with?("gs://")) ||
2170
+ (file.is_a?(URI) &&
2171
+ file.to_s.downcase.start_with?("gs://"))
2172
+ end
1972
2173
  end
1973
2174
 
1974
2175
  def local_file? file