google-cloud-bigquery 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb7f8896ef2f04b07d335a2d619a60557246633ef343255c7fd6f2c31c1afbcf
4
- data.tar.gz: 8445e2df96afbd0be615ee17d891d3b192a2a6a32f21f010113809c020e6aa76
3
+ metadata.gz: 41d3da96cf5cfe992d89be7d76eae719bee105acab2b2621ae9d96637963c4fa
4
+ data.tar.gz: 2da003b89f6ab554f97941a56aa54bc35b0a9bb239b1bf19638a1ed148df8592
5
5
  SHA512:
6
- metadata.gz: ead4df8de6a2db97edf826bfdf82c49b48e7eebd999d3ee1448616a5b1d53d219bcc66dfe13fbe6e005282d7a59582ff4ead46adbe10a25cb365b19ebb0a5ba2
7
- data.tar.gz: f1de83104a82b84673b071395aa1527bc4ed3d992c2ed0b7b04b9cbb13f3f303277da04ee93e944411ae0cab10cdea573783da6ef9ece20782f429c34da76ac2
6
+ metadata.gz: a049fc85d22b3ea866a5beff61c46e987411acf3d946837adf465ebfabfade7d4fc633e487b29eb3b3b0be36c0f08e3b8740cc636a8c5c362b40b13fc47ad127
7
+ data.tar.gz: 7f8c25afa22dfb9259e73ac5b3ae6917629b84bfcde9cf52055d0a88bb680450ef1ed173d8c64edfae06afbb535b3f02e75f024a7032b14907908525d6c4d582
data/README.md CHANGED
@@ -23,26 +23,27 @@ Instructions and configuration options are covered in the [Authentication Guide]
23
23
  ```ruby
24
24
  require "google/cloud/bigquery"
25
25
 
26
- bigquery = Google::Cloud::Bigquery.new(
27
- project_id: "my-todo-project",
28
- credentials: "/path/to/keyfile.json"
29
- )
30
-
31
- # Create a new table to archive todos
32
- dataset = bigquery.dataset "my-todo-archive"
33
- table = dataset.create_table "todos",
34
- name: "Todos Archive",
35
- description: "Archive for completed TODO records"
36
-
37
- # Load data into the table
38
- file = File.open "/archive/todos/completed-todos.csv"
39
- table.load file
40
-
41
- # Run a query for the number of completed todos by owner
42
- count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
43
- data = bigquery.query count_sql
26
+ bigquery = Google::Cloud::Bigquery.new
27
+ dataset = bigquery.create_dataset "my_dataset"
28
+
29
+ table = dataset.create_table "my_table" do |t|
30
+ t.name = "My Table",
31
+ t.description = "A description of my table."
32
+ t.schema do |s|
33
+ s.string "first_name", mode: :required
34
+ s.string "last_name", mode: :required
35
+ s.integer "age", mode: :required
36
+ end
37
+ end
38
+
39
+ # Load data into the table from Google Cloud Storage
40
+ table.load "gs://my-bucket/file-name.csv"
41
+
42
+ # Run a query
43
+ data = dataset.query "SELECT first_name FROM my_table"
44
+
44
45
  data.each do |row|
45
- puts row[:name]
46
+ puts row[:first_name]
46
47
  end
47
48
  ```
48
49
 
@@ -50,6 +51,13 @@ end
50
51
 
51
52
  This library is supported on Ruby 2.0+.
52
53
 
54
+ However, Ruby 2.3 or later is strongly recommended, as earlier releases have
55
+ reached or are nearing end-of-life. After June 1, 2018, Google will provide
56
+ official support only for Ruby versions that are considered current and
57
+ supported by Ruby Core (that is, Ruby versions that are either in normal
58
+ maintenance or in security maintenance).
59
+ See https://www.ruby-lang.org/en/downloads/branches/ for further details.
60
+
53
61
  ## Versioning
54
62
 
55
63
  This library follows [Semantic Versioning](http://semver.org/).
@@ -22,7 +22,7 @@ require "date"
22
22
  module Google
23
23
  module Cloud
24
24
  module Bigquery
25
- # rubocop:disable all
25
+ # rubocop:disable Metrics/ModuleLength
26
26
 
27
27
  ##
28
28
  # @private
@@ -42,7 +42,6 @@ module Google
42
42
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
43
43
  # | `ARRAY` | `Array` | Nested arrays, `nil` values are not supported. |
44
44
  # | `STRUCT` | `Hash` | Hash keys may be strings or symbols. |
45
-
46
45
  module Convert
47
46
  ##
48
47
  # @private
@@ -62,6 +61,8 @@ module Google
62
61
  Hash[row_pairs]
63
62
  end
64
63
 
64
+ # rubocop:disable all
65
+
65
66
  def self.format_value value, field
66
67
  if value.nil?
67
68
  nil
@@ -212,16 +213,6 @@ module Google
212
213
  end
213
214
  end
214
215
 
215
- ##
216
- # @private
217
- def self.to_json_rows rows
218
- rows.map { |row| to_json_row row }
219
- end
220
- ##
221
- # @private
222
- def self.to_json_row row
223
- Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
224
- end
225
216
  ##
226
217
  # @private
227
218
  def self.to_json_value value
@@ -245,14 +236,116 @@ module Google
245
236
  end
246
237
  end
247
238
 
239
+ # rubocop:enable all
240
+
241
+ ##
242
+ # @private
243
+ def self.to_json_rows rows
244
+ rows.map { |row| to_json_row row }
245
+ end
246
+
247
+ ##
248
+ # @private
249
+ def self.to_json_row row
250
+ Hash[row.map { |k, v| [k.to_s, to_json_value(v)] }]
251
+ end
252
+
248
253
  def self.resolve_legacy_sql standard_sql, legacy_sql
249
254
  return !standard_sql unless standard_sql.nil?
250
255
  return legacy_sql unless legacy_sql.nil?
251
256
  false
252
257
  end
253
258
 
254
- # rubocop:enable all
259
+ ##
260
+ # @private
261
+ #
262
+ # Converts create disposition strings to API values.
263
+ #
264
+ # @return [String] API representation of create disposition.
265
+ def self.create_disposition str
266
+ val = {
267
+ "create_if_needed" => "CREATE_IF_NEEDED",
268
+ "createifneeded" => "CREATE_IF_NEEDED",
269
+ "if_needed" => "CREATE_IF_NEEDED",
270
+ "needed" => "CREATE_IF_NEEDED",
271
+ "create_never" => "CREATE_NEVER",
272
+ "createnever" => "CREATE_NEVER",
273
+ "never" => "CREATE_NEVER"
274
+ }[str.to_s.downcase]
275
+ return val unless val.nil?
276
+ str
277
+ end
278
+
279
+ ##
280
+ # @private
281
+ #
282
+ # Converts write disposition strings to API values.
283
+ #
284
+ # @return [String] API representation of write disposition.
285
+ def self.write_disposition str
286
+ val = {
287
+ "write_truncate" => "WRITE_TRUNCATE",
288
+ "writetruncate" => "WRITE_TRUNCATE",
289
+ "truncate" => "WRITE_TRUNCATE",
290
+ "write_append" => "WRITE_APPEND",
291
+ "writeappend" => "WRITE_APPEND",
292
+ "append" => "WRITE_APPEND",
293
+ "write_empty" => "WRITE_EMPTY",
294
+ "writeempty" => "WRITE_EMPTY",
295
+ "empty" => "WRITE_EMPTY"
296
+ }[str.to_s.downcase]
297
+ return val unless val.nil?
298
+ str
299
+ end
300
+
301
+ ##
302
+ # @private
303
+ #
304
+ # Converts source format strings to API values.
305
+ #
306
+ # @return [String] API representation of source format.
307
+ def self.source_format format
308
+ val = {
309
+ "csv" => "CSV",
310
+ "json" => "NEWLINE_DELIMITED_JSON",
311
+ "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
312
+ "avro" => "AVRO",
313
+ "datastore" => "DATASTORE_BACKUP",
314
+ "backup" => "DATASTORE_BACKUP",
315
+ "datastore_backup" => "DATASTORE_BACKUP"
316
+ }[format.to_s.downcase]
317
+ return val unless val.nil?
318
+ format
319
+ end
320
+
321
+ ##
322
+ # @private
323
+ #
324
+ # Converts file paths into source format by extension.
325
+ #
326
+ # @return [String] API representation of source format.
327
+ def self.derive_source_format_from_list paths
328
+ paths.map do |path|
329
+ derive_source_format path
330
+ end.compact.uniq.first
331
+ end
332
+
333
+ ##
334
+ # @private
335
+ #
336
+ # Converts file path into source format by extension.
337
+ #
338
+ # @return [String] API representation of source format.
339
+ def self.derive_source_format path
340
+ return "CSV" if path.end_with? ".csv"
341
+ return "NEWLINE_DELIMITED_JSON" if path.end_with? ".json"
342
+ return "AVRO" if path.end_with? ".avro"
343
+ return "DATASTORE_BACKUP" if path.end_with? ".backup_info"
344
+ nil
345
+ end
255
346
  end
347
+
348
+ # rubocop:enable Metrics/ModuleLength
256
349
  end
257
350
  end
258
351
  end
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ require "google/cloud/bigquery/encryption_configuration"
15
16
 
16
17
  module Google
17
18
  module Cloud
@@ -129,6 +130,173 @@ module Google
129
130
  disp = @gapi.configuration.copy.write_disposition
130
131
  disp == "WRITE_EMPTY"
131
132
  end
133
+
134
+ ##
135
+ # The encryption configuration of the destination table.
136
+ #
137
+ # @return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom
138
+ # encryption configuration (e.g., Cloud KMS keys).
139
+ #
140
+ # @!group Attributes
141
+ def encryption
142
+ EncryptionConfiguration.from_gapi(
143
+ @gapi.configuration.copy.destination_encryption_configuration
144
+ )
145
+ end
146
+
147
+ ##
148
+ # Yielded to a block to accumulate changes for an API request.
149
+ class Updater < CopyJob
150
+ ##
151
+ # @private Create an Updater object.
152
+ def initialize gapi
153
+ @gapi = gapi
154
+ end
155
+
156
+ ##
157
+ # @private Create an Updater from an options hash.
158
+ #
159
+ # @return [Google::Cloud::Bigquery::CopyJob::Updater] A job
160
+ # configuration object for setting copy options.
161
+ def self.from_options service, source, target, options = {}
162
+ job_ref = service.job_ref_from options[:job_id], options[:prefix]
163
+ req = Google::Apis::BigqueryV2::Job.new(
164
+ job_reference: job_ref,
165
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
166
+ copy: Google::Apis::BigqueryV2::JobConfigurationTableCopy.new(
167
+ source_table: source,
168
+ destination_table: target
169
+ ),
170
+ dry_run: options[:dryrun]
171
+ )
172
+ )
173
+
174
+ updater = CopyJob::Updater.new req
175
+ updater.create = options[:create]
176
+ updater.write = options[:write]
177
+ updater.labels = options[:labels] if options[:labels]
178
+ updater
179
+ end
180
+
181
+ ##
182
+ # Sets the geographic location where the job should run. Required
183
+ # except for US and EU.
184
+ #
185
+ # @param [String] value A geographic location, such as "US", "EU" or
186
+ # "asia-northeast1". Required except for US and EU.
187
+ #
188
+ # @example
189
+ # require "google/cloud/bigquery"
190
+ #
191
+ # bigquery = Google::Cloud::Bigquery.new
192
+ # dataset = bigquery.dataset "my_dataset"
193
+ # table = dataset.table "my_table"
194
+ # destination_table = dataset.table "my_destination_table"
195
+ #
196
+ # copy_job = table.copy_job destination_table do |j|
197
+ # j.location = "EU"
198
+ # end
199
+ #
200
+ # copy_job.wait_until_done!
201
+ # copy_job.done? #=> true
202
+ #
203
+ # @!group Attributes
204
+ def location= value
205
+ @gapi.job_reference.location = value
206
+ end
207
+
208
+ ##
209
+ # Sets the create disposition.
210
+ #
211
+ # This specifies whether the job is allowed to create new tables. The
212
+ # default value is `needed`.
213
+ #
214
+ # The following values are supported:
215
+ #
216
+ # * `needed` - Create the table if it does not exist.
217
+ # * `never` - The table must already exist. A 'notFound' error is
218
+ # raised if the table does not exist.
219
+ #
220
+ # @param [String] new_create The new create disposition.
221
+ #
222
+ # @!group Attributes
223
+ def create= new_create
224
+ @gapi.configuration.copy.update! create_disposition:
225
+ Convert.create_disposition(new_create)
226
+ end
227
+
228
+ ##
229
+ # Sets the write disposition.
230
+ #
231
+ # This specifies how to handle data already present in the table. The
232
+ # default value is `append`.
233
+ #
234
+ # The following values are supported:
235
+ #
236
+ # * `truncate` - BigQuery overwrites the table data.
237
+ # * `append` - BigQuery appends the data to the table.
238
+ # * `empty` - An error will be returned if the table already contains
239
+ # data.
240
+ #
241
+ # @param [String] new_write The new write disposition.
242
+ #
243
+ # @!group Attributes
244
+ def write= new_write
245
+ @gapi.configuration.copy.update! write_disposition:
246
+ Convert.write_disposition(new_write)
247
+ end
248
+
249
+ ##
250
+ # Sets the encryption configuration of the destination table.
251
+ #
252
+ # @param [Google::Cloud::BigQuery::EncryptionConfiguration] val
253
+ # Custom encryption configuration (e.g., Cloud KMS keys).
254
+ #
255
+ # @example
256
+ # require "google/cloud/bigquery"
257
+ #
258
+ # bigquery = Google::Cloud::Bigquery.new
259
+ # dataset = bigquery.dataset "my_dataset"
260
+ # table = dataset.table "my_table"
261
+ #
262
+ # key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
263
+ # encrypt_config = bigquery.encryption kms_key: key_name
264
+ # job = table.copy_job "my_dataset.new_table" do |job|
265
+ # job.encryption = encrypt_config
266
+ # end
267
+ #
268
+ # @!group Attributes
269
+ def encryption= val
270
+ @gapi.configuration.copy.update!(
271
+ destination_encryption_configuration: val.to_gapi
272
+ )
273
+ end
274
+
275
+ ##
276
+ # Sets the labels to use for the job.
277
+ #
278
+ # @param [Hash] value A hash of user-provided labels associated with
279
+ # the job. You can use these to organize and group your jobs. Label
280
+ # keys and values can be no longer than 63 characters, can only
281
+ # contain lowercase letters, numeric characters, underscores and
282
+ # dashes. International characters are allowed. Label values are
283
+ # optional. Label keys must start with a letter and each label in
284
+ # the list must have a different key.
285
+ #
286
+ # @!group Attributes
287
+ def labels= value
288
+ @gapi.configuration.update! labels: value
289
+ end
290
+
291
+ ##
292
+ # @private Returns the Google API client library version of this job.
293
+ #
294
+ # @return [<Google::Apis::BigqueryV2::Job>] (See
295
+ # {Google::Apis::BigqueryV2::Job})
296
+ def to_gapi
297
+ @gapi
298
+ end
299
+ end
132
300
  end
133
301
  end
134
302
  end
@@ -266,8 +266,8 @@ module Google
266
266
  # The geographic location where the dataset should reside. Possible
267
267
  # values include `EU` and `US`. The default value is `US`.
268
268
  #
269
- # @return [String, nil] The location code, or `nil` if the object is a
270
- # reference (see {#reference?}).
269
+ # @return [String, nil] The geographic location, or `nil` if the object
270
+ # is a reference (see {#reference?}).
271
271
  #
272
272
  # @!group Attributes
273
273
  #
@@ -696,6 +696,12 @@ module Google
696
696
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
697
697
  # for an overview of each BigQuery data type, including allowed values.
698
698
  #
699
+ # The geographic location for the job ("US", "EU", etc.) can be set via
700
+ # {QueryJob::Updater#location=} in a block passed to this method. If the
701
+ # dataset is a full resource representation (see {#resource_full?}), the
702
+ # location of the job will be automatically set to the location of the
703
+ # dataset.
704
+ #
699
705
  # @param [String] query A query string, following the BigQuery [query
700
706
  # syntax](https://cloud.google.com/bigquery/query-reference), of the
701
707
  # query to execute. Example: "SELECT count(f1) FROM
@@ -761,11 +767,6 @@ module Google
761
767
  # Flattens all nested and repeated fields in the query results. The
762
768
  # default value is `true`. `large_results` parameter must be `true` if
763
769
  # this is set to `false`.
764
- # @param [Integer] maximum_billing_tier Limits the billing tier for this
765
- # job. Queries that have resource usage beyond this tier will fail
766
- # (without incurring a charge). Optional. If unspecified, this will be
767
- # set to your project default. For more information, see [High-Compute
768
- # queries](https://cloud.google.com/bigquery/pricing#high-compute).
769
770
  # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
770
771
  # job. Queries that will have bytes billed beyond this limit will fail
771
772
  # (without incurring a charge). Optional. If unspecified, this will be
@@ -799,6 +800,11 @@ module Google
799
800
  # inline code resource is equivalent to providing a URI for a file
800
801
  # containing the same code. See [User-Defined
801
802
  # Functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
803
+ # @param [Integer] maximum_billing_tier Deprecated: Change the billing
804
+ # tier to allow high-compute queries.
805
+ # @yield [job] a job configuration object
806
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
807
+ # configuration object for setting additional options for the query.
802
808
  #
803
809
  # @return [Google::Cloud::Bigquery::QueryJob] A new query job object.
804
810
  #
@@ -865,7 +871,7 @@ module Google
865
871
  # end
866
872
  # end
867
873
  #
868
- # @example Query using external data source:
874
+ # @example Query using external data source, set destination:
869
875
  # require "google/cloud/bigquery"
870
876
  #
871
877
  # bigquery = Google::Cloud::Bigquery.new
@@ -877,8 +883,10 @@ module Google
877
883
  # csv.skip_leading_rows = 1
878
884
  # end
879
885
  #
880
- # job = dataset.query_job "SELECT * FROM my_ext_table",
881
- # external: { my_ext_table: csv_table }
886
+ # job = dataset.query_job "SELECT * FROM my_ext_table" do |query|
887
+ # query.external = { my_ext_table: csv_table }
888
+ # query.table = dataset.table "my_table", skip_lookup: true
889
+ # end
882
890
  #
883
891
  # job.wait_until_done!
884
892
  # if !job.failed?
@@ -895,17 +903,23 @@ module Google
895
903
  legacy_sql: nil, large_results: nil, flatten: nil,
896
904
  maximum_billing_tier: nil, maximum_bytes_billed: nil,
897
905
  job_id: nil, prefix: nil, labels: nil, udfs: nil
906
+ ensure_service!
898
907
  options = { priority: priority, cache: cache, table: table,
899
908
  create: create, write: write,
900
909
  large_results: large_results, flatten: flatten,
901
910
  legacy_sql: legacy_sql, standard_sql: standard_sql,
902
911
  maximum_billing_tier: maximum_billing_tier,
903
912
  maximum_bytes_billed: maximum_bytes_billed,
904
- params: params, external: external, labels: labels,
905
- job_id: job_id, prefix: prefix, udfs: udfs }
906
- options[:dataset] ||= self
907
- ensure_service!
908
- gapi = service.query_job query, options
913
+ job_id: job_id, prefix: prefix, params: params,
914
+ external: external, labels: labels, udfs: udfs }
915
+
916
+ updater = QueryJob::Updater.from_options service, query, options
917
+ updater.dataset = self
918
+ updater.location = location if location # may be dataset reference
919
+
920
+ yield updater if block_given?
921
+
922
+ gapi = service.query_job updater.to_gapi
909
923
  Job.from_gapi gapi, service
910
924
  end
911
925
 
@@ -938,6 +952,12 @@ module Google
938
952
  # See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types)
939
953
  # for an overview of each BigQuery data type, including allowed values.
940
954
  #
955
+ # The geographic location for the job ("US", "EU", etc.) can be set via
956
+ # {QueryJob::Updater#location=} in a block passed to this method. If the
957
+ # dataset is a full resource representation (see {#resource_full?}), the
958
+ # location of the job will be automatically set to the location of the
959
+ # dataset.
960
+ #
941
961
  # @see https://cloud.google.com/bigquery/querying-data Querying Data
942
962
  #
943
963
  # @param [String] query A query string, following the BigQuery [query
@@ -985,6 +1005,9 @@ module Google
985
1005
  # When set to false, the values of `large_results` and `flatten` are
986
1006
  # ignored; the query will be run as if `large_results` is true and
987
1007
  # `flatten` is false. Optional. The default value is false.
1008
+ # @yield [job] a job configuration object
1009
+ # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
1010
+ # configuration object for setting additional options for the query.
988
1011
  #
989
1012
  # @return [Google::Cloud::Bigquery::Data] A new data object.
990
1013
  #
@@ -1039,7 +1062,7 @@ module Google
1039
1062
  # puts row[:name]
1040
1063
  # end
1041
1064
  #
1042
- # @example Query using external data source:
1065
+ # @example Query using external data source, set destination:
1043
1066
  # require "google/cloud/bigquery"
1044
1067
  #
1045
1068
  # bigquery = Google::Cloud::Bigquery.new
@@ -1051,8 +1074,10 @@ module Google
1051
1074
  # csv.skip_leading_rows = 1
1052
1075
  # end
1053
1076
  #
1054
- # data = dataset.query "SELECT * FROM my_ext_table",
1055
- # external: { my_ext_table: csv_table }
1077
+ # data = dataset.query "SELECT * FROM my_ext_table" do |query|
1078
+ # query.external = { my_ext_table: csv_table }
1079
+ # query.table = dataset.table "my_table", skip_lookup: true
1080
+ # end
1056
1081
  #
1057
1082
  # data.each do |row|
1058
1083
  # puts row[:name]
@@ -1063,21 +1088,19 @@ module Google
1063
1088
  def query query, params: nil, external: nil, max: nil, cache: true,
1064
1089
  standard_sql: nil, legacy_sql: nil
1065
1090
  ensure_service!
1066
- options = { params: params, external: external, cache: cache,
1067
- legacy_sql: legacy_sql, standard_sql: standard_sql }
1091
+ options = { priority: "INTERACTIVE", external: external, cache: cache,
1092
+ legacy_sql: legacy_sql, standard_sql: standard_sql,
1093
+ params: params }
1094
+ options[:dataset] ||= self
1095
+ updater = QueryJob::Updater.from_options service, query, options
1096
+ updater.location = location if location # may be dataset reference
1068
1097
 
1069
- job = query_job query, options
1070
- job.wait_until_done!
1098
+ yield updater if block_given?
1071
1099
 
1072
- if job.failed?
1073
- begin
1074
- # raise to activate ruby exception cause handling
1075
- raise job.gapi_error
1076
- rescue StandardError => e
1077
- # wrap Google::Apis::Error with Google::Cloud::Error
1078
- raise Google::Cloud::Error.from_error(e)
1079
- end
1080
- end
1100
+ gapi = service.query_job updater.to_gapi
1101
+ job = Job.from_gapi gapi, service
1102
+ job.wait_until_done!
1103
+ ensure_job_succeeded! job
1081
1104
 
1082
1105
  job.data max: max
1083
1106
  end
@@ -1147,10 +1170,17 @@ module Google
1147
1170
  # file directly. See [Loading Data with a POST
1148
1171
  # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1149
1172
  #
1173
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1174
+ # {LoadJob::Updater#location=} in a block passed to this method. If the
1175
+ # dataset is a full resource representation (see {#resource_full?}), the
1176
+ # location of the job will be automatically set to the location of the
1177
+ # dataset.
1178
+ #
1150
1179
  # @param [String] table_id The destination table to load the data into.
1151
- # @param [File, Google::Cloud::Storage::File, String, URI] file A file
1152
- # or the URI of a Google Cloud Storage file containing data to load
1153
- # into the table.
1180
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1181
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1182
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1183
+ # those, containing data to load into the table.
1154
1184
  # @param [String] format The exported file format. The default value is
1155
1185
  # `csv`.
1156
1186
  #
@@ -1269,13 +1299,12 @@ module Google
1269
1299
  # optional. Label keys must start with a letter and each label in the
1270
1300
  # list must have a different key.
1271
1301
  #
1272
- # @yield [schema] A block for setting the schema for the destination
1273
- # table. The schema can be omitted if the destination table already
1274
- # exists, or if you're loading data from a Google Cloud Datastore
1275
- # backup.
1276
- # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
1277
- # instance provided using the `schema` option, or a new, empty schema
1278
- # instance
1302
+ # @yield [updater] A block for setting the schema and other
1303
+ # options for the destination table. The schema can be omitted if the
1304
+ # destination table already exists, or if you're loading data from a
1305
+ # Google Cloud Datastore backup.
1306
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1307
+ # updater to modify the load job and its schema.
1279
1308
  #
1280
1309
  # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
1281
1310
  #
@@ -1312,6 +1341,25 @@ module Google
1312
1341
  # end
1313
1342
  # end
1314
1343
  #
1344
+ # @example Pass a list of google-cloud-storage files:
1345
+ # require "google/cloud/bigquery"
1346
+ # require "google/cloud/storage"
1347
+ #
1348
+ # bigquery = Google::Cloud::Bigquery.new
1349
+ # dataset = bigquery.dataset "my_dataset"
1350
+ #
1351
+ # storage = Google::Cloud::Storage.new
1352
+ # bucket = storage.bucket "my-bucket"
1353
+ # file = bucket.file "file-name.csv"
1354
+ # list = [file, "gs://my-bucket/file-name2.csv"]
1355
+ # load_job = dataset.load_job "my_new_table", list do |schema|
1356
+ # schema.string "first_name", mode: :required
1357
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1358
+ # nested_schema.string "place", mode: :required
1359
+ # nested_schema.integer "number_of_years", mode: :required
1360
+ # end
1361
+ # end
1362
+ #
1315
1363
  # @example Upload a file directly:
1316
1364
  # require "google/cloud/bigquery"
1317
1365
  #
@@ -1333,13 +1381,15 @@ module Google
1333
1381
  # bigquery = Google::Cloud::Bigquery.new
1334
1382
  # dataset = bigquery.dataset "my_dataset"
1335
1383
  #
1336
- # load_job = dataset.load_job "my_new_table",
1337
- # "gs://my-bucket/xxxx.kind_name.backup_info",
1338
- # format: "datastore_backup"
1384
+ # load_job = dataset.load_job(
1385
+ # "my_new_table",
1386
+ # "gs://my-bucket/xxxx.kind_name.backup_info") do |j|
1387
+ # j.format = "datastore_backup"
1388
+ # end
1339
1389
  #
1340
1390
  # @!group Data
1341
1391
  #
1342
- def load_job table_id, file, format: nil, create: nil, write: nil,
1392
+ def load_job table_id, files, format: nil, create: nil, write: nil,
1343
1393
  projection_fields: nil, jagged_rows: nil,
1344
1394
  quoted_newlines: nil, encoding: nil, delimiter: nil,
1345
1395
  ignore_unknown: nil, max_bad_records: nil, quote: nil,
@@ -1347,25 +1397,25 @@ module Google
1347
1397
  prefix: nil, labels: nil, autodetect: nil, null_marker: nil
1348
1398
  ensure_service!
1349
1399
 
1350
- if block_given?
1351
- schema ||= Schema.from_gapi
1352
- yield schema
1353
- end
1354
- schema_gapi = schema.to_gapi if schema
1355
-
1356
- options = { format: format, create: create, write: write,
1357
- projection_fields: projection_fields,
1358
- jagged_rows: jagged_rows,
1359
- quoted_newlines: quoted_newlines, encoding: encoding,
1360
- delimiter: delimiter, ignore_unknown: ignore_unknown,
1361
- max_bad_records: max_bad_records, quote: quote,
1362
- skip_leading: skip_leading, dryrun: dryrun,
1363
- schema: schema_gapi, job_id: job_id, prefix: prefix,
1364
- labels: labels, autodetect: autodetect,
1365
- null_marker: null_marker }
1366
- return load_storage(table_id, file, options) if storage_url? file
1367
- return load_local(table_id, file, options) if local_file? file
1368
- raise Google::Cloud::Error, "Don't know how to load #{file}"
1400
+ updater = load_job_updater table_id,
1401
+ format: format, create: create,
1402
+ write: write,
1403
+ projection_fields: projection_fields,
1404
+ jagged_rows: jagged_rows,
1405
+ quoted_newlines: quoted_newlines,
1406
+ encoding: encoding,
1407
+ delimiter: delimiter,
1408
+ ignore_unknown: ignore_unknown,
1409
+ max_bad_records: max_bad_records,
1410
+ quote: quote, skip_leading: skip_leading,
1411
+ dryrun: dryrun, schema: schema,
1412
+ job_id: job_id, prefix: prefix,
1413
+ labels: labels, autodetect: autodetect,
1414
+ null_marker: null_marker
1415
+
1416
+ yield updater if block_given?
1417
+
1418
+ load_local_or_uri files, updater
1369
1419
  end
1370
1420
 
1371
1421
  ##
@@ -1379,10 +1429,17 @@ module Google
1379
1429
  # file directly. See [Loading Data with a POST
1380
1430
  # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1381
1431
  #
1432
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1433
+ # {LoadJob::Updater#location=} in a block passed to this method. If the
1434
+ # dataset is a full resource representation (see {#resource_full?}), the
1435
+ # location of the job will be automatically set to the location of the
1436
+ # dataset.
1437
+ #
1382
1438
  # @param [String] table_id The destination table to load the data into.
1383
- # @param [File, Google::Cloud::Storage::File, String, URI] file A file
1384
- # or the URI of a Google Cloud Storage file containing data to load
1385
- # into the table.
1439
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1440
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1441
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1442
+ # those, containing data to load into the table.
1386
1443
  # @param [String] format The exported file format. The default value is
1387
1444
  # `csv`.
1388
1445
  #
@@ -1479,13 +1536,12 @@ module Google
1479
1536
  # this option. Also note that for most use cases, the block yielded by
1480
1537
  # this method is a more convenient way to configure the schema.
1481
1538
  #
1482
- # @yield [schema] A block for setting the schema for the destination
1483
- # table. The schema can be omitted if the destination table already
1484
- # exists, or if you're loading data from a Google Cloud Datastore
1485
- # backup.
1486
- # @yieldparam [Google::Cloud::Bigquery::Schema] schema The schema
1487
- # instance provided using the `schema` option, or a new, empty schema
1488
- # instance
1539
+ # @yield [updater] A block for setting the schema of the destination
1540
+ # table and other options for the load job. The schema can be omitted
1541
+ # if the destination table already exists, or if you're loading data
1542
+ # from a Google Cloud Datastore backup.
1543
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1544
+ # updater to modify the load job and its schema.
1489
1545
  #
1490
1546
  # @return [Boolean] Returns `true` if the load job was successful.
1491
1547
  #
@@ -1522,6 +1578,25 @@ module Google
1522
1578
  # end
1523
1579
  # end
1524
1580
  #
1581
+ # @example Pass a list of google-cloud-storage files:
1582
+ # require "google/cloud/bigquery"
1583
+ # require "google/cloud/storage"
1584
+ #
1585
+ # bigquery = Google::Cloud::Bigquery.new
1586
+ # dataset = bigquery.dataset "my_dataset"
1587
+ #
1588
+ # storage = Google::Cloud::Storage.new
1589
+ # bucket = storage.bucket "my-bucket"
1590
+ # file = bucket.file "file-name.csv"
1591
+ # list = [file, "gs://my-bucket/file-name2.csv"]
1592
+ # dataset.load "my_new_table", list do |schema|
1593
+ # schema.string "first_name", mode: :required
1594
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1595
+ # nested_schema.string "place", mode: :required
1596
+ # nested_schema.integer "number_of_years", mode: :required
1597
+ # end
1598
+ # end
1599
+ #
1525
1600
  # @example Upload a file directly:
1526
1601
  # require "google/cloud/bigquery"
1527
1602
  #
@@ -1544,41 +1619,39 @@ module Google
1544
1619
  # dataset = bigquery.dataset "my_dataset"
1545
1620
  #
1546
1621
  # dataset.load "my_new_table",
1547
- # "gs://my-bucket/xxxx.kind_name.backup_info",
1548
- # format: "datastore_backup"
1622
+ # "gs://my-bucket/xxxx.kind_name.backup_info" do |j|
1623
+ # j.format = "datastore_backup"
1624
+ # end
1549
1625
  #
1550
1626
  # @!group Data
1551
1627
  #
1552
- def load table_id, file, format: nil, create: nil, write: nil,
1628
+ def load table_id, files, format: nil, create: nil, write: nil,
1553
1629
  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1554
1630
  encoding: nil, delimiter: nil, ignore_unknown: nil,
1555
1631
  max_bad_records: nil, quote: nil, skip_leading: nil,
1556
1632
  schema: nil, autodetect: nil, null_marker: nil
1633
+ ensure_service!
1557
1634
 
1558
- yield (schema ||= Schema.from_gapi) if block_given?
1635
+ updater = load_job_updater table_id,
1636
+ format: format, create: create,
1637
+ write: write,
1638
+ projection_fields: projection_fields,
1639
+ jagged_rows: jagged_rows,
1640
+ quoted_newlines: quoted_newlines,
1641
+ encoding: encoding,
1642
+ delimiter: delimiter,
1643
+ ignore_unknown: ignore_unknown,
1644
+ max_bad_records: max_bad_records,
1645
+ quote: quote, skip_leading: skip_leading,
1646
+ schema: schema,
1647
+ autodetect: autodetect,
1648
+ null_marker: null_marker
1559
1649
 
1560
- options = { format: format, create: create, write: write,
1561
- projection_fields: projection_fields,
1562
- jagged_rows: jagged_rows,
1563
- quoted_newlines: quoted_newlines, encoding: encoding,
1564
- delimiter: delimiter, ignore_unknown: ignore_unknown,
1565
- max_bad_records: max_bad_records, quote: quote,
1566
- skip_leading: skip_leading, schema: schema,
1567
- autodetect: autodetect, null_marker: null_marker }
1568
- job = load_job table_id, file, options
1650
+ yield updater if block_given?
1569
1651
 
1652
+ job = load_local_or_uri files, updater
1570
1653
  job.wait_until_done!
1571
-
1572
- if job.failed?
1573
- begin
1574
- # raise to activate ruby exception cause handling
1575
- raise job.gapi_error
1576
- rescue StandardError => e
1577
- # wrap Google::Apis::Error with Google::Cloud::Error
1578
- raise Google::Cloud::Error.from_error(e)
1579
- end
1580
- end
1581
-
1654
+ ensure_job_succeeded! job
1582
1655
  true
1583
1656
  end
1584
1657
 
@@ -1946,29 +2019,157 @@ module Google
1946
2019
  reload! if resource_partial?
1947
2020
  end
1948
2021
 
1949
- def load_storage table_id, url, options = {}
2022
+ def ensure_job_succeeded! job
2023
+ return unless job.failed?
2024
+ begin
2025
+ # raise to activate ruby exception cause handling
2026
+ raise job.gapi_error
2027
+ rescue StandardError => e
2028
+ # wrap Google::Apis::Error with Google::Cloud::Error
2029
+ raise Google::Cloud::Error.from_error(e)
2030
+ end
2031
+ end
2032
+
2033
+ def load_job_gapi table_id, dryrun, job_id: nil, prefix: nil
2034
+ job_ref = service.job_ref_from job_id, prefix
2035
+ Google::Apis::BigqueryV2::Job.new(
2036
+ job_reference: job_ref,
2037
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
2038
+ load: Google::Apis::BigqueryV2::JobConfigurationLoad.new(
2039
+ destination_table: Google::Apis::BigqueryV2::TableReference.new(
2040
+ project_id: @service.project,
2041
+ dataset_id: dataset_id,
2042
+ table_id: table_id
2043
+ )
2044
+ ),
2045
+ dry_run: dryrun
2046
+ )
2047
+ )
2048
+ end
2049
+
2050
+ def load_job_csv_options! job, jagged_rows: nil,
2051
+ quoted_newlines: nil,
2052
+ delimiter: nil,
2053
+ quote: nil, skip_leading: nil,
2054
+ null_marker: nil
2055
+ job.jagged_rows = jagged_rows unless jagged_rows.nil?
2056
+ job.quoted_newlines = quoted_newlines unless quoted_newlines.nil?
2057
+ job.delimiter = delimiter unless delimiter.nil?
2058
+ job.null_marker = null_marker unless null_marker.nil?
2059
+ job.quote = quote unless quote.nil?
2060
+ job.skip_leading = skip_leading unless skip_leading.nil?
2061
+ end
2062
+
2063
+ def load_job_file_options! job, format: nil,
2064
+ projection_fields: nil,
2065
+ jagged_rows: nil, quoted_newlines: nil,
2066
+ encoding: nil, delimiter: nil,
2067
+ ignore_unknown: nil, max_bad_records: nil,
2068
+ quote: nil, skip_leading: nil,
2069
+ null_marker: nil
2070
+ job.format = format unless format.nil?
2071
+ unless projection_fields.nil?
2072
+ job.projection_fields = projection_fields
2073
+ end
2074
+ job.encoding = encoding unless encoding.nil?
2075
+ job.ignore_unknown = ignore_unknown unless ignore_unknown.nil?
2076
+ job.max_bad_records = max_bad_records unless max_bad_records.nil?
2077
+ load_job_csv_options! job, jagged_rows: jagged_rows,
2078
+ quoted_newlines: quoted_newlines,
2079
+ delimiter: delimiter,
2080
+ quote: quote,
2081
+ skip_leading: skip_leading,
2082
+ null_marker: null_marker
2083
+ end
2084
+
2085
+ def load_job_updater table_id, format: nil, create: nil,
2086
+ write: nil, projection_fields: nil,
2087
+ jagged_rows: nil, quoted_newlines: nil,
2088
+ encoding: nil, delimiter: nil,
2089
+ ignore_unknown: nil, max_bad_records: nil,
2090
+ quote: nil, skip_leading: nil, dryrun: nil,
2091
+ schema: nil, job_id: nil, prefix: nil, labels: nil,
2092
+ autodetect: nil, null_marker: nil
2093
+ new_job = load_job_gapi table_id, dryrun, job_id: job_id,
2094
+ prefix: prefix
2095
+ LoadJob::Updater.new(new_job).tap do |job|
2096
+ job.location = location if location # may be dataset reference
2097
+ job.create = create unless create.nil?
2098
+ job.write = write unless write.nil?
2099
+ job.schema = schema unless schema.nil?
2100
+ job.autodetect = autodetect unless autodetect.nil?
2101
+ job.labels = labels unless labels.nil?
2102
+ load_job_file_options! job, format: format,
2103
+ projection_fields: projection_fields,
2104
+ jagged_rows: jagged_rows,
2105
+ quoted_newlines: quoted_newlines,
2106
+ encoding: encoding,
2107
+ delimiter: delimiter,
2108
+ ignore_unknown: ignore_unknown,
2109
+ max_bad_records: max_bad_records,
2110
+ quote: quote,
2111
+ skip_leading: skip_leading,
2112
+ null_marker: null_marker
2113
+ end
2114
+ end
2115
+
2116
+ def load_storage urls, job_gapi
1950
2117
  # Convert to storage URL
1951
- url = url.to_gs_url if url.respond_to? :to_gs_url
1952
- url = url.to_s if url.is_a? URI
2118
+ urls = [urls].flatten.map do |url|
2119
+ if url.respond_to? :to_gs_url
2120
+ url.to_gs_url
2121
+ elsif url.is_a? URI
2122
+ url.to_s
2123
+ else
2124
+ url
2125
+ end
2126
+ end
1953
2127
 
1954
- gapi = service.load_table_gs_url dataset_id, table_id, url, options
2128
+ unless urls.nil?
2129
+ job_gapi.configuration.load.update! source_uris: urls
2130
+ if job_gapi.configuration.load.source_format.nil?
2131
+ source_format = Convert.derive_source_format_from_list urls
2132
+ unless source_format.nil?
2133
+ job_gapi.configuration.load.source_format = source_format
2134
+ end
2135
+ end
2136
+ end
2137
+
2138
+ gapi = service.load_table_gs_url job_gapi
1955
2139
  Job.from_gapi gapi, service
1956
2140
  end
1957
2141
 
1958
- def load_local table_id, file, options = {}
1959
- # Convert to storage URL
1960
- file = file.to_gs_url if file.respond_to? :to_gs_url
2142
+ def load_local file, job_gapi
2143
+ path = Pathname(file).to_path
2144
+ if job_gapi.configuration.load.source_format.nil?
2145
+ source_format = Convert.derive_source_format path
2146
+ unless source_format.nil?
2147
+ job_gapi.configuration.load.source_format = source_format
2148
+ end
2149
+ end
1961
2150
 
1962
- gapi = service.load_table_file dataset_id, table_id, file, options
2151
+ gapi = service.load_table_file file, job_gapi
1963
2152
  Job.from_gapi gapi, service
1964
2153
  end
1965
2154
 
1966
- def storage_url? file
1967
- file.respond_to?(:to_gs_url) ||
1968
- (file.respond_to?(:to_str) &&
1969
- file.to_str.downcase.start_with?("gs://")) ||
1970
- (file.is_a?(URI) &&
1971
- file.to_s.downcase.start_with?("gs://"))
2155
+ def load_local_or_uri file, updater
2156
+ job_gapi = updater.to_gapi
2157
+ job = if local_file? file
2158
+ load_local file, job_gapi
2159
+ else
2160
+ load_storage file, job_gapi
2161
+ end
2162
+ job
2163
+ end
2164
+
2165
+ def storage_url? files
2166
+ [files].flatten.all? do |file|
2167
+ file.respond_to?(:to_gs_url) ||
2168
+ (file.respond_to?(:to_str) &&
2169
+ file.to_str.downcase.start_with?("gs://")) ||
2170
+ (file.is_a?(URI) &&
2171
+ file.to_s.downcase.start_with?("gs://"))
2172
+ end
1972
2173
  end
1973
2174
 
1974
2175
  def local_file? file