google-cloud-bigquery 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41d3da96cf5cfe992d89be7d76eae719bee105acab2b2621ae9d96637963c4fa
4
- data.tar.gz: 2da003b89f6ab554f97941a56aa54bc35b0a9bb239b1bf19638a1ed148df8592
3
+ metadata.gz: d79d312ec01c34460988a536e01fe9b32d937cd357baa16275921fa327f5474d
4
+ data.tar.gz: 2b24200431e4a883dbb914db75e3b0507f1b3c29d3c0401929772c1c7f4e54a4
5
5
  SHA512:
6
- metadata.gz: a049fc85d22b3ea866a5beff61c46e987411acf3d946837adf465ebfabfade7d4fc633e487b29eb3b3b0be36c0f08e3b8740cc636a8c5c362b40b13fc47ad127
7
- data.tar.gz: 7f8c25afa22dfb9259e73ac5b3ae6917629b84bfcde9cf52055d0a88bb680450ef1ed173d8c64edfae06afbb535b3f02e75f024a7032b14907908525d6c4d582
6
+ metadata.gz: ebf35594fb06f2aadd3a60de6dd44218c5e87e91ef4c735d76f640c36315df1380aa9f69d02f3ca39c89b13a1cbd2808e87c8c9fab33cb9cda588843797ae692
7
+ data.tar.gz: c2a19edf84a43c4c6f2cbc6e99145acf8db20a8385ce4a8e5553203811270f49dcd940ae234b192073800b2da20fb1930fe93474578fe36fd5325fb32335afb4
@@ -203,6 +203,11 @@ module Google
203
203
  # @!group Attributes
204
204
  def location= value
205
205
  @gapi.job_reference.location = value
206
+ return unless value.nil?
207
+
208
+ # Treat assigning value of nil the same as unsetting the value.
209
+ unset = @gapi.job_reference.instance_variables.include? :@location
210
+ @gapi.job_reference.remove_instance_variable :@location if unset
206
211
  end
207
212
 
208
213
  ##
@@ -433,7 +433,9 @@ module Google
433
433
  # @param [String] name A descriptive name for the table.
434
434
  # @param [String] description A user-friendly description of the table.
435
435
  # @yield [table] a block for setting the table
436
- # @yieldparam [Table] table the table object to be updated
436
+ # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
437
+ # to set additional properties on the table in the API request to
438
+ # create it.
437
439
  #
438
440
  # @return [Google::Cloud::Bigquery::Table] A new table object.
439
441
  #
@@ -1086,19 +1088,10 @@ module Google
1086
1088
  # @!group Data
1087
1089
  #
1088
1090
  def query query, params: nil, external: nil, max: nil, cache: true,
1089
- standard_sql: nil, legacy_sql: nil
1090
- ensure_service!
1091
- options = { priority: "INTERACTIVE", external: external, cache: cache,
1092
- legacy_sql: legacy_sql, standard_sql: standard_sql,
1093
- params: params }
1094
- options[:dataset] ||= self
1095
- updater = QueryJob::Updater.from_options service, query, options
1096
- updater.location = location if location # may be dataset reference
1097
-
1098
- yield updater if block_given?
1099
-
1100
- gapi = service.query_job updater.to_gapi
1101
- job = Job.from_gapi gapi, service
1091
+ standard_sql: nil, legacy_sql: nil, &block
1092
+ job = query_job query, params: params, external: external,
1093
+ cache: cache, standard_sql: standard_sql,
1094
+ legacy_sql: legacy_sql, &block
1102
1095
  job.wait_until_done!
1103
1096
  ensure_job_succeeded! job
1104
1097
 
@@ -1629,27 +1622,19 @@ module Google
1629
1622
  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1630
1623
  encoding: nil, delimiter: nil, ignore_unknown: nil,
1631
1624
  max_bad_records: nil, quote: nil, skip_leading: nil,
1632
- schema: nil, autodetect: nil, null_marker: nil
1633
- ensure_service!
1634
-
1635
- updater = load_job_updater table_id,
1636
- format: format, create: create,
1637
- write: write,
1638
- projection_fields: projection_fields,
1639
- jagged_rows: jagged_rows,
1640
- quoted_newlines: quoted_newlines,
1641
- encoding: encoding,
1642
- delimiter: delimiter,
1643
- ignore_unknown: ignore_unknown,
1644
- max_bad_records: max_bad_records,
1645
- quote: quote, skip_leading: skip_leading,
1646
- schema: schema,
1647
- autodetect: autodetect,
1648
- null_marker: null_marker
1649
-
1650
- yield updater if block_given?
1625
+ schema: nil, autodetect: nil, null_marker: nil, &block
1626
+ job = load_job table_id, files,
1627
+ format: format, create: create, write: write,
1628
+ projection_fields: projection_fields,
1629
+ jagged_rows: jagged_rows,
1630
+ quoted_newlines: quoted_newlines,
1631
+ encoding: encoding, delimiter: delimiter,
1632
+ ignore_unknown: ignore_unknown,
1633
+ max_bad_records: max_bad_records,
1634
+ quote: quote, skip_leading: skip_leading,
1635
+ schema: schema, autodetect: autodetect,
1636
+ null_marker: null_marker, &block
1651
1637
 
1652
- job = load_local_or_uri files, updater
1653
1638
  job.wait_until_done!
1654
1639
  ensure_job_succeeded! job
1655
1640
  true
@@ -1829,6 +1814,12 @@ module Google
1829
1814
  # @param [String] table_id The ID of the destination table.
1830
1815
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1831
1816
  # containing the data. Required.
1817
+ # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
1818
+ # uses this property to detect duplicate insertion requests on a
1819
+ # best-effort basis. For more information, see [data
1820
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
1821
+ # Optional. If not provided, the client library will assign a UUID to
1822
+ # each row before the request is sent.
1832
1823
  # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1833
1824
  # if invalid rows exist. The default value is `false`, which causes
1834
1825
  # the entire request to fail if any invalid rows exist.
@@ -1884,12 +1875,19 @@ module Google
1884
1875
  #
1885
1876
  # @!group Data
1886
1877
  #
1887
- def insert table_id, rows, skip_invalid: nil, ignore_unknown: nil,
1888
- autocreate: nil
1878
+ def insert table_id, rows, insert_ids: nil, skip_invalid: nil,
1879
+ ignore_unknown: nil, autocreate: nil
1880
+ rows = [rows] if rows.is_a? Hash
1881
+ insert_ids = Array insert_ids
1882
+ if insert_ids.count > 0 && insert_ids.count != rows.count
1883
+ raise ArgumentError, "insert_ids must be the same size as rows"
1884
+ end
1885
+
1889
1886
  if autocreate
1890
1887
  begin
1891
1888
  insert_data table_id, rows, skip_invalid: skip_invalid,
1892
- ignore_unknown: ignore_unknown
1889
+ ignore_unknown: ignore_unknown,
1890
+ insert_ids: insert_ids
1893
1891
  rescue Google::Cloud::NotFoundError
1894
1892
  sleep rand(1..60)
1895
1893
  begin
@@ -1904,11 +1902,13 @@ module Google
1904
1902
  sleep 60
1905
1903
  insert table_id, rows, skip_invalid: skip_invalid,
1906
1904
  ignore_unknown: ignore_unknown,
1907
- autocreate: true
1905
+ autocreate: true,
1906
+ insert_ids: insert_ids
1908
1907
  end
1909
1908
  else
1910
1909
  insert_data table_id, rows, skip_invalid: skip_invalid,
1911
- ignore_unknown: ignore_unknown
1910
+ ignore_unknown: ignore_unknown,
1911
+ insert_ids: insert_ids
1912
1912
  end
1913
1913
  end
1914
1914
 
@@ -1976,12 +1976,14 @@ module Google
1976
1976
 
1977
1977
  protected
1978
1978
 
1979
- def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil
1979
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil,
1980
+ insert_ids: nil
1980
1981
  rows = [rows] if rows.is_a? Hash
1981
1982
  raise ArgumentError, "No rows provided" if rows.empty?
1982
1983
  ensure_service!
1983
1984
  options = { skip_invalid: skip_invalid,
1984
- ignore_unknown: ignore_unknown }
1985
+ ignore_unknown: ignore_unknown,
1986
+ insert_ids: insert_ids }
1985
1987
  gapi = service.insert_tabledata dataset_id, table_id, rows, options
1986
1988
  InsertResponse.from_gapi rows, gapi
1987
1989
  end
@@ -224,6 +224,11 @@ module Google
224
224
  # @!group Attributes
225
225
  def location= value
226
226
  @gapi.job_reference.location = value
227
+ return unless value.nil?
228
+
229
+ # Treat assigning value of nil the same as unsetting the value.
230
+ unset = @gapi.job_reference.instance_variables.include? :@location
231
+ @gapi.job_reference.remove_instance_variable :@location if unset
227
232
  end
228
233
 
229
234
  ##
@@ -745,6 +745,11 @@ module Google
745
745
  # @!group Attributes
746
746
  def location= value
747
747
  @gapi.job_reference.location = value
748
+ return unless value.nil?
749
+
750
+ # Treat assigning value of nil the same as unsetting the value.
751
+ unset = @gapi.job_reference.instance_variables.include? :@location
752
+ @gapi.job_reference.remove_instance_variable :@location if unset
748
753
  end
749
754
 
750
755
  ##
@@ -84,6 +84,17 @@ module Google
84
84
  end
85
85
  alias project project_id
86
86
 
87
+ ##
88
+ # The email address of the service account for the project used to
89
+ # connect to BigQuery. (See also {#project_id}.)
90
+ #
91
+ # @return [String] The service account email address.
92
+ #
93
+ def service_account_email
94
+ @service_account_email ||= \
95
+ service.project_service_account.email
96
+ end
97
+
87
98
  ##
88
99
  # Queries data by creating a [query
89
100
  # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
@@ -514,18 +525,12 @@ module Google
514
525
  # end
515
526
  #
516
527
  def query query, params: nil, external: nil, max: nil, cache: true,
517
- dataset: nil, project: nil, standard_sql: nil, legacy_sql: nil
518
- ensure_service!
519
- options = { priority: "INTERACTIVE", cache: cache, dataset: dataset,
520
- project: project || self.project,
521
- legacy_sql: legacy_sql, standard_sql: standard_sql,
522
- params: params, external: external }
523
- updater = QueryJob::Updater.from_options service, query, options
524
-
525
- yield updater if block_given?
526
-
527
- gapi = service.query_job updater.to_gapi
528
- job = Job.from_gapi gapi, service
528
+ dataset: nil, project: nil, standard_sql: nil,
529
+ legacy_sql: nil, &block
530
+ job = query_job query, params: params, external: external,
531
+ cache: cache, dataset: dataset,
532
+ project: project, standard_sql: standard_sql,
533
+ legacy_sql: legacy_sql, &block
529
534
  job.wait_until_done!
530
535
 
531
536
  if job.failed?
@@ -402,6 +402,11 @@ module Google
402
402
  # @!group Attributes
403
403
  def location= value
404
404
  @gapi.job_reference.location = value
405
+ return unless value.nil?
406
+
407
+ # Treat assigning value of nil the same as unsetting the value.
408
+ unset = @gapi.job_reference.instance_variables.include? :@location
409
+ @gapi.job_reference.remove_instance_variable :@location if unset
405
410
  end
406
411
 
407
412
  ##
@@ -70,6 +70,10 @@ module Google
70
70
  end
71
71
  attr_accessor :mocked_service
72
72
 
73
+ def project_service_account
74
+ service.get_project_service_account project
75
+ end
76
+
73
77
  ##
74
78
  # Lists all datasets in the specified project to which you have
75
79
  # been granted the READER dataset role.
@@ -202,15 +206,17 @@ module Google
202
206
 
203
207
  def insert_tabledata dataset_id, table_id, rows, options = {}
204
208
  json_rows = Array(rows).map { |row| Convert.to_json_row row }
205
-
206
209
  insert_tabledata_json_rows dataset_id, table_id, json_rows, options
207
210
  end
208
211
 
209
212
  def insert_tabledata_json_rows dataset_id, table_id, json_rows,
210
213
  options = {}
211
- insert_rows = Array(json_rows).map do |json_row|
214
+
215
+ rows_and_ids = Array(json_rows).zip Array(options[:insert_ids])
216
+ insert_rows = rows_and_ids.map do |json_row, insert_id|
217
+ insert_id ||= SecureRandom.uuid
212
218
  {
213
- insertId: SecureRandom.uuid,
219
+ insertId: insert_id,
214
220
  json: json_row
215
221
  }
216
222
  end
@@ -1836,26 +1836,18 @@ module Google
1836
1836
  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1837
1837
  encoding: nil, delimiter: nil, ignore_unknown: nil,
1838
1838
  max_bad_records: nil, quote: nil, skip_leading: nil,
1839
- autodetect: nil, null_marker: nil
1840
- ensure_service!
1839
+ autodetect: nil, null_marker: nil, &block
1840
+ job = load_job files, format: format, create: create, write: write,
1841
+ projection_fields: projection_fields,
1842
+ jagged_rows: jagged_rows,
1843
+ quoted_newlines: quoted_newlines,
1844
+ encoding: encoding, delimiter: delimiter,
1845
+ ignore_unknown: ignore_unknown,
1846
+ max_bad_records: max_bad_records,
1847
+ quote: quote, skip_leading: skip_leading,
1848
+ autodetect: autodetect,
1849
+ null_marker: null_marker, &block
1841
1850
 
1842
- updater = load_job_updater format: format, create: create,
1843
- write: write,
1844
- projection_fields: projection_fields,
1845
- jagged_rows: jagged_rows,
1846
- quoted_newlines: quoted_newlines,
1847
- encoding: encoding,
1848
- delimiter: delimiter,
1849
- ignore_unknown: ignore_unknown,
1850
- max_bad_records: max_bad_records,
1851
- quote: quote, skip_leading: skip_leading,
1852
- schema: schema,
1853
- autodetect: autodetect,
1854
- null_marker: null_marker
1855
-
1856
- yield updater if block_given?
1857
-
1858
- job = load_local_or_uri files, updater
1859
1851
  job.wait_until_done!
1860
1852
  ensure_job_succeeded! job
1861
1853
  true
@@ -1871,6 +1863,12 @@ module Google
1871
1863
  #
1872
1864
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1873
1865
  # containing the data. Required.
1866
+ # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
1867
+ # uses this property to detect duplicate insertion requests on a
1868
+ # best-effort basis. For more information, see [data
1869
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
1870
+ # Optional. If not provided, the client library will assign a UUID to
1871
+ # each row before the request is sent.
1874
1872
  # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1875
1873
  # if invalid rows exist. The default value is `false`, which causes
1876
1874
  # the entire request to fail if any invalid rows exist.
@@ -1908,12 +1906,18 @@ module Google
1908
1906
  #
1909
1907
  # @!group Data
1910
1908
  #
1911
- def insert rows, skip_invalid: nil, ignore_unknown: nil
1909
+ def insert rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil
1910
+ rows = [rows] if rows.is_a? Hash
1911
+ insert_ids = Array insert_ids
1912
+ if insert_ids.count > 0 && insert_ids.count != rows.count
1913
+ raise ArgumentError, "insert_ids must be the same size as rows"
1914
+ end
1912
1915
  rows = [rows] if rows.is_a? Hash
1913
1916
  raise ArgumentError, "No rows provided" if rows.empty?
1914
1917
  ensure_service!
1915
1918
  options = { skip_invalid: skip_invalid,
1916
- ignore_unknown: ignore_unknown }
1919
+ ignore_unknown: ignore_unknown,
1920
+ insert_ids: insert_ids }
1917
1921
  gapi = service.insert_tabledata dataset_id, table_id, rows, options
1918
1922
  InsertResponse.from_gapi rows, gapi
1919
1923
  end
@@ -16,6 +16,7 @@
16
16
  require "google/cloud/bigquery/convert"
17
17
  require "monitor"
18
18
  require "concurrent"
19
+ require "securerandom"
19
20
 
20
21
  module Google
21
22
  module Cloud
@@ -100,24 +101,30 @@ module Google
100
101
  #
101
102
  # @param [Hash, Array<Hash>] rows A hash object or array of hash
102
103
  # objects containing the data.
104
+ # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
105
+ # uses this property to detect duplicate insertion requests on a
106
+ # best-effort basis. For more information, see [data
107
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
108
+ # Optional. If not provided, the client library will assign a UUID
109
+ # to each row before the request is sent.
103
110
  #
104
- def insert rows
111
+ def insert rows, insert_ids: nil
105
112
  return nil if rows.nil?
106
113
  return nil if rows.is_a?(Array) && rows.empty?
107
- rows = [rows] if rows.is_a? Hash
114
+ rows, insert_ids = validate_insert_args rows, insert_ids
108
115
 
109
116
  synchronize do
110
- rows.each do |row|
117
+ rows.zip(Array(insert_ids)).each do |row, insert_id|
111
118
  if @batch.nil?
112
119
  @batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
113
- @batch.insert row
120
+ @batch.insert row, insert_id
114
121
  else
115
- unless @batch.try_insert row
122
+ unless @batch.try_insert row, insert_id
116
123
  push_batch_request!
117
124
 
118
125
  @batch = Batch.new max_bytes: @max_bytes,
119
126
  max_rows: @max_rows
120
- @batch.insert row
127
+ @batch.insert row, insert_id
121
128
  end
122
129
  end
123
130
 
@@ -204,6 +211,15 @@ module Google
204
211
 
205
212
  protected
206
213
 
214
+ def validate_insert_args rows, insert_ids
215
+ rows = [rows] if rows.is_a? Hash
216
+ insert_ids = Array insert_ids
217
+ if insert_ids.count > 0 && insert_ids.count != rows.count
218
+ raise ArgumentError, "insert_ids must be the same size as rows"
219
+ end
220
+ [rows, insert_ids]
221
+ end
222
+
207
223
  def run_background
208
224
  synchronize do
209
225
  until @stopped
@@ -230,11 +246,13 @@ module Google
230
246
 
231
247
  orig_rows = @batch.rows
232
248
  json_rows = @batch.json_rows
249
+ insert_ids = @batch.insert_ids
233
250
  Concurrent::Future.new(executor: @thread_pool) do
234
251
  begin
235
252
  raise ArgumentError, "No rows provided" if json_rows.empty?
236
253
  options = { skip_invalid: @skip_invalid,
237
- ignore_unknown: @ignore_unknown }
254
+ ignore_unknown: @ignore_unknown,
255
+ insert_ids: insert_ids }
238
256
  insert_resp = @table.service.insert_tabledata_json_rows(
239
257
  @table.dataset_id, @table.table_id, json_rows, options
240
258
  )
@@ -255,30 +273,37 @@ module Google
255
273
  ##
256
274
  # @private
257
275
  class Batch
258
- attr_reader :max_bytes, :max_rows, :rows, :json_rows
276
+ attr_reader :max_bytes, :max_rows, :rows, :json_rows, :insert_ids
259
277
 
260
278
  def initialize max_bytes: 10000000, max_rows: 500
261
279
  @max_bytes = max_bytes
262
280
  @max_rows = max_rows
263
281
  @rows = []
264
282
  @json_rows = []
265
- @current_bytes = 0
283
+ @insert_ids = []
284
+ # The default request byte size overhead is 63.
285
+ # "{\"rows\":[],\"ignoreUnknownValues\":false,
286
+ # \"skipInvalidRows\":false}".bytesize #=> 63
287
+ @current_bytes = 63
266
288
  end
267
289
 
268
- def insert row
290
+ def insert row, insert_id
291
+ insert_id ||= SecureRandom.uuid
269
292
  json_row = to_json_row row
270
293
 
271
- insert_rows_bytes row, json_row, addl_bytes_for_json_row(json_row)
294
+ insert_rows_bytes \
295
+ row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
272
296
  end
273
297
 
274
- def try_insert row
298
+ def try_insert row, insert_id
299
+ insert_id ||= SecureRandom.uuid
275
300
  json_row = to_json_row row
276
- addl_bytes = addl_bytes_for_json_row json_row
301
+ addl_bytes = addl_bytes_for json_row, insert_id
277
302
 
278
303
  return false if @current_bytes + addl_bytes >= @max_bytes
279
304
  return false if @rows.count + 1 >= @max_rows
280
305
 
281
- insert_rows_bytes row, json_row, addl_bytes
306
+ insert_rows_bytes row, json_row, insert_id, addl_bytes
282
307
  true
283
308
  end
284
309
 
@@ -288,9 +313,10 @@ module Google
288
313
 
289
314
  private
290
315
 
291
- def insert_rows_bytes row, json_row, addl_bytes
316
+ def insert_rows_bytes row, json_row, insert_id, addl_bytes
292
317
  @rows << row
293
318
  @json_rows << json_row
319
+ @insert_ids << insert_id if insert_id
294
320
  @current_bytes += addl_bytes
295
321
  end
296
322
 
@@ -298,12 +324,9 @@ module Google
298
324
  Convert.to_json_row row
299
325
  end
300
326
 
301
- def addl_bytes_for row
302
- addl_bytes_for_json_row Convert.to_json_row(row)
303
- end
304
-
305
- def addl_bytes_for_json_row json_row
306
- json_row.to_json.bytesize + 1
327
+ def addl_bytes_for json_row, insert_id
328
+ # "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
329
+ 24 + json_row.to_json.bytesize + insert_id.bytesize
307
330
  end
308
331
  end
309
332
 
@@ -16,7 +16,7 @@
16
16
  module Google
17
17
  module Cloud
18
18
  module Bigquery
19
- VERSION = "1.2.0".freeze
19
+ VERSION = "1.3.0".freeze
20
20
  end
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Moore
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-03-31 00:00:00.000000000 Z
12
+ date: 2018-04-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-cloud-core