google-cloud-bigquery 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41d3da96cf5cfe992d89be7d76eae719bee105acab2b2621ae9d96637963c4fa
4
- data.tar.gz: 2da003b89f6ab554f97941a56aa54bc35b0a9bb239b1bf19638a1ed148df8592
3
+ metadata.gz: d79d312ec01c34460988a536e01fe9b32d937cd357baa16275921fa327f5474d
4
+ data.tar.gz: 2b24200431e4a883dbb914db75e3b0507f1b3c29d3c0401929772c1c7f4e54a4
5
5
  SHA512:
6
- metadata.gz: a049fc85d22b3ea866a5beff61c46e987411acf3d946837adf465ebfabfade7d4fc633e487b29eb3b3b0be36c0f08e3b8740cc636a8c5c362b40b13fc47ad127
7
- data.tar.gz: 7f8c25afa22dfb9259e73ac5b3ae6917629b84bfcde9cf52055d0a88bb680450ef1ed173d8c64edfae06afbb535b3f02e75f024a7032b14907908525d6c4d582
6
+ metadata.gz: ebf35594fb06f2aadd3a60de6dd44218c5e87e91ef4c735d76f640c36315df1380aa9f69d02f3ca39c89b13a1cbd2808e87c8c9fab33cb9cda588843797ae692
7
+ data.tar.gz: c2a19edf84a43c4c6f2cbc6e99145acf8db20a8385ce4a8e5553203811270f49dcd940ae234b192073800b2da20fb1930fe93474578fe36fd5325fb32335afb4
@@ -203,6 +203,11 @@ module Google
203
203
  # @!group Attributes
204
204
  def location= value
205
205
  @gapi.job_reference.location = value
206
+ return unless value.nil?
207
+
208
+ # Treat assigning value of nil the same as unsetting the value.
209
+ unset = @gapi.job_reference.instance_variables.include? :@location
210
+ @gapi.job_reference.remove_instance_variable :@location if unset
206
211
  end
207
212
 
208
213
  ##
@@ -433,7 +433,9 @@ module Google
433
433
  # @param [String] name A descriptive name for the table.
434
434
  # @param [String] description A user-friendly description of the table.
435
435
  # @yield [table] a block for setting the table
436
- # @yieldparam [Table] table the table object to be updated
436
+ # @yieldparam [Google::Cloud::Bigquery::Table::Updater] table An updater
437
+ # to set additional properties on the table in the API request to
438
+ # create it.
437
439
  #
438
440
  # @return [Google::Cloud::Bigquery::Table] A new table object.
439
441
  #
@@ -1086,19 +1088,10 @@ module Google
1086
1088
  # @!group Data
1087
1089
  #
1088
1090
  def query query, params: nil, external: nil, max: nil, cache: true,
1089
- standard_sql: nil, legacy_sql: nil
1090
- ensure_service!
1091
- options = { priority: "INTERACTIVE", external: external, cache: cache,
1092
- legacy_sql: legacy_sql, standard_sql: standard_sql,
1093
- params: params }
1094
- options[:dataset] ||= self
1095
- updater = QueryJob::Updater.from_options service, query, options
1096
- updater.location = location if location # may be dataset reference
1097
-
1098
- yield updater if block_given?
1099
-
1100
- gapi = service.query_job updater.to_gapi
1101
- job = Job.from_gapi gapi, service
1091
+ standard_sql: nil, legacy_sql: nil, &block
1092
+ job = query_job query, params: params, external: external,
1093
+ cache: cache, standard_sql: standard_sql,
1094
+ legacy_sql: legacy_sql, &block
1102
1095
  job.wait_until_done!
1103
1096
  ensure_job_succeeded! job
1104
1097
 
@@ -1629,27 +1622,19 @@ module Google
1629
1622
  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1630
1623
  encoding: nil, delimiter: nil, ignore_unknown: nil,
1631
1624
  max_bad_records: nil, quote: nil, skip_leading: nil,
1632
- schema: nil, autodetect: nil, null_marker: nil
1633
- ensure_service!
1634
-
1635
- updater = load_job_updater table_id,
1636
- format: format, create: create,
1637
- write: write,
1638
- projection_fields: projection_fields,
1639
- jagged_rows: jagged_rows,
1640
- quoted_newlines: quoted_newlines,
1641
- encoding: encoding,
1642
- delimiter: delimiter,
1643
- ignore_unknown: ignore_unknown,
1644
- max_bad_records: max_bad_records,
1645
- quote: quote, skip_leading: skip_leading,
1646
- schema: schema,
1647
- autodetect: autodetect,
1648
- null_marker: null_marker
1649
-
1650
- yield updater if block_given?
1625
+ schema: nil, autodetect: nil, null_marker: nil, &block
1626
+ job = load_job table_id, files,
1627
+ format: format, create: create, write: write,
1628
+ projection_fields: projection_fields,
1629
+ jagged_rows: jagged_rows,
1630
+ quoted_newlines: quoted_newlines,
1631
+ encoding: encoding, delimiter: delimiter,
1632
+ ignore_unknown: ignore_unknown,
1633
+ max_bad_records: max_bad_records,
1634
+ quote: quote, skip_leading: skip_leading,
1635
+ schema: schema, autodetect: autodetect,
1636
+ null_marker: null_marker, &block
1651
1637
 
1652
- job = load_local_or_uri files, updater
1653
1638
  job.wait_until_done!
1654
1639
  ensure_job_succeeded! job
1655
1640
  true
@@ -1829,6 +1814,12 @@ module Google
1829
1814
  # @param [String] table_id The ID of the destination table.
1830
1815
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1831
1816
  # containing the data. Required.
1817
+ # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
1818
+ # uses this property to detect duplicate insertion requests on a
1819
+ # best-effort basis. For more information, see [data
1820
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
1821
+ # Optional. If not provided, the client library will assign a UUID to
1822
+ # each row before the request is sent.
1832
1823
  # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1833
1824
  # if invalid rows exist. The default value is `false`, which causes
1834
1825
  # the entire request to fail if any invalid rows exist.
@@ -1884,12 +1875,19 @@ module Google
1884
1875
  #
1885
1876
  # @!group Data
1886
1877
  #
1887
- def insert table_id, rows, skip_invalid: nil, ignore_unknown: nil,
1888
- autocreate: nil
1878
+ def insert table_id, rows, insert_ids: nil, skip_invalid: nil,
1879
+ ignore_unknown: nil, autocreate: nil
1880
+ rows = [rows] if rows.is_a? Hash
1881
+ insert_ids = Array insert_ids
1882
+ if insert_ids.count > 0 && insert_ids.count != rows.count
1883
+ raise ArgumentError, "insert_ids must be the same size as rows"
1884
+ end
1885
+
1889
1886
  if autocreate
1890
1887
  begin
1891
1888
  insert_data table_id, rows, skip_invalid: skip_invalid,
1892
- ignore_unknown: ignore_unknown
1889
+ ignore_unknown: ignore_unknown,
1890
+ insert_ids: insert_ids
1893
1891
  rescue Google::Cloud::NotFoundError
1894
1892
  sleep rand(1..60)
1895
1893
  begin
@@ -1904,11 +1902,13 @@ module Google
1904
1902
  sleep 60
1905
1903
  insert table_id, rows, skip_invalid: skip_invalid,
1906
1904
  ignore_unknown: ignore_unknown,
1907
- autocreate: true
1905
+ autocreate: true,
1906
+ insert_ids: insert_ids
1908
1907
  end
1909
1908
  else
1910
1909
  insert_data table_id, rows, skip_invalid: skip_invalid,
1911
- ignore_unknown: ignore_unknown
1910
+ ignore_unknown: ignore_unknown,
1911
+ insert_ids: insert_ids
1912
1912
  end
1913
1913
  end
1914
1914
 
@@ -1976,12 +1976,14 @@ module Google
1976
1976
 
1977
1977
  protected
1978
1978
 
1979
- def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil
1979
+ def insert_data table_id, rows, skip_invalid: nil, ignore_unknown: nil,
1980
+ insert_ids: nil
1980
1981
  rows = [rows] if rows.is_a? Hash
1981
1982
  raise ArgumentError, "No rows provided" if rows.empty?
1982
1983
  ensure_service!
1983
1984
  options = { skip_invalid: skip_invalid,
1984
- ignore_unknown: ignore_unknown }
1985
+ ignore_unknown: ignore_unknown,
1986
+ insert_ids: insert_ids }
1985
1987
  gapi = service.insert_tabledata dataset_id, table_id, rows, options
1986
1988
  InsertResponse.from_gapi rows, gapi
1987
1989
  end
@@ -224,6 +224,11 @@ module Google
224
224
  # @!group Attributes
225
225
  def location= value
226
226
  @gapi.job_reference.location = value
227
+ return unless value.nil?
228
+
229
+ # Treat assigning value of nil the same as unsetting the value.
230
+ unset = @gapi.job_reference.instance_variables.include? :@location
231
+ @gapi.job_reference.remove_instance_variable :@location if unset
227
232
  end
228
233
 
229
234
  ##
@@ -745,6 +745,11 @@ module Google
745
745
  # @!group Attributes
746
746
  def location= value
747
747
  @gapi.job_reference.location = value
748
+ return unless value.nil?
749
+
750
+ # Treat assigning value of nil the same as unsetting the value.
751
+ unset = @gapi.job_reference.instance_variables.include? :@location
752
+ @gapi.job_reference.remove_instance_variable :@location if unset
748
753
  end
749
754
 
750
755
  ##
@@ -84,6 +84,17 @@ module Google
84
84
  end
85
85
  alias project project_id
86
86
 
87
+ ##
88
+ # The email address of the service account for the project used to
89
+ # connect to BigQuery. (See also {#project_id}.)
90
+ #
91
+ # @return [String] The service account email address.
92
+ #
93
+ def service_account_email
94
+ @service_account_email ||= \
95
+ service.project_service_account.email
96
+ end
97
+
87
98
  ##
88
99
  # Queries data by creating a [query
89
100
  # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
@@ -514,18 +525,12 @@ module Google
514
525
  # end
515
526
  #
516
527
  def query query, params: nil, external: nil, max: nil, cache: true,
517
- dataset: nil, project: nil, standard_sql: nil, legacy_sql: nil
518
- ensure_service!
519
- options = { priority: "INTERACTIVE", cache: cache, dataset: dataset,
520
- project: project || self.project,
521
- legacy_sql: legacy_sql, standard_sql: standard_sql,
522
- params: params, external: external }
523
- updater = QueryJob::Updater.from_options service, query, options
524
-
525
- yield updater if block_given?
526
-
527
- gapi = service.query_job updater.to_gapi
528
- job = Job.from_gapi gapi, service
528
+ dataset: nil, project: nil, standard_sql: nil,
529
+ legacy_sql: nil, &block
530
+ job = query_job query, params: params, external: external,
531
+ cache: cache, dataset: dataset,
532
+ project: project, standard_sql: standard_sql,
533
+ legacy_sql: legacy_sql, &block
529
534
  job.wait_until_done!
530
535
 
531
536
  if job.failed?
@@ -402,6 +402,11 @@ module Google
402
402
  # @!group Attributes
403
403
  def location= value
404
404
  @gapi.job_reference.location = value
405
+ return unless value.nil?
406
+
407
+ # Treat assigning value of nil the same as unsetting the value.
408
+ unset = @gapi.job_reference.instance_variables.include? :@location
409
+ @gapi.job_reference.remove_instance_variable :@location if unset
405
410
  end
406
411
 
407
412
  ##
@@ -70,6 +70,10 @@ module Google
70
70
  end
71
71
  attr_accessor :mocked_service
72
72
 
73
+ def project_service_account
74
+ service.get_project_service_account project
75
+ end
76
+
73
77
  ##
74
78
  # Lists all datasets in the specified project to which you have
75
79
  # been granted the READER dataset role.
@@ -202,15 +206,17 @@ module Google
202
206
 
203
207
  def insert_tabledata dataset_id, table_id, rows, options = {}
204
208
  json_rows = Array(rows).map { |row| Convert.to_json_row row }
205
-
206
209
  insert_tabledata_json_rows dataset_id, table_id, json_rows, options
207
210
  end
208
211
 
209
212
  def insert_tabledata_json_rows dataset_id, table_id, json_rows,
210
213
  options = {}
211
- insert_rows = Array(json_rows).map do |json_row|
214
+
215
+ rows_and_ids = Array(json_rows).zip Array(options[:insert_ids])
216
+ insert_rows = rows_and_ids.map do |json_row, insert_id|
217
+ insert_id ||= SecureRandom.uuid
212
218
  {
213
- insertId: SecureRandom.uuid,
219
+ insertId: insert_id,
214
220
  json: json_row
215
221
  }
216
222
  end
@@ -1836,26 +1836,18 @@ module Google
1836
1836
  projection_fields: nil, jagged_rows: nil, quoted_newlines: nil,
1837
1837
  encoding: nil, delimiter: nil, ignore_unknown: nil,
1838
1838
  max_bad_records: nil, quote: nil, skip_leading: nil,
1839
- autodetect: nil, null_marker: nil
1840
- ensure_service!
1839
+ autodetect: nil, null_marker: nil, &block
1840
+ job = load_job files, format: format, create: create, write: write,
1841
+ projection_fields: projection_fields,
1842
+ jagged_rows: jagged_rows,
1843
+ quoted_newlines: quoted_newlines,
1844
+ encoding: encoding, delimiter: delimiter,
1845
+ ignore_unknown: ignore_unknown,
1846
+ max_bad_records: max_bad_records,
1847
+ quote: quote, skip_leading: skip_leading,
1848
+ autodetect: autodetect,
1849
+ null_marker: null_marker, &block
1841
1850
 
1842
- updater = load_job_updater format: format, create: create,
1843
- write: write,
1844
- projection_fields: projection_fields,
1845
- jagged_rows: jagged_rows,
1846
- quoted_newlines: quoted_newlines,
1847
- encoding: encoding,
1848
- delimiter: delimiter,
1849
- ignore_unknown: ignore_unknown,
1850
- max_bad_records: max_bad_records,
1851
- quote: quote, skip_leading: skip_leading,
1852
- schema: schema,
1853
- autodetect: autodetect,
1854
- null_marker: null_marker
1855
-
1856
- yield updater if block_given?
1857
-
1858
- job = load_local_or_uri files, updater
1859
1851
  job.wait_until_done!
1860
1852
  ensure_job_succeeded! job
1861
1853
  true
@@ -1871,6 +1863,12 @@ module Google
1871
1863
  #
1872
1864
  # @param [Hash, Array<Hash>] rows A hash object or array of hash objects
1873
1865
  # containing the data. Required.
1866
+ # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
1867
+ # uses this property to detect duplicate insertion requests on a
1868
+ # best-effort basis. For more information, see [data
1869
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
1870
+ # Optional. If not provided, the client library will assign a UUID to
1871
+ # each row before the request is sent.
1874
1872
  # @param [Boolean] skip_invalid Insert all valid rows of a request, even
1875
1873
  # if invalid rows exist. The default value is `false`, which causes
1876
1874
  # the entire request to fail if any invalid rows exist.
@@ -1908,12 +1906,18 @@ module Google
1908
1906
  #
1909
1907
  # @!group Data
1910
1908
  #
1911
- def insert rows, skip_invalid: nil, ignore_unknown: nil
1909
+ def insert rows, insert_ids: nil, skip_invalid: nil, ignore_unknown: nil
1910
+ rows = [rows] if rows.is_a? Hash
1911
+ insert_ids = Array insert_ids
1912
+ if insert_ids.count > 0 && insert_ids.count != rows.count
1913
+ raise ArgumentError, "insert_ids must be the same size as rows"
1914
+ end
1912
1915
  rows = [rows] if rows.is_a? Hash
1913
1916
  raise ArgumentError, "No rows provided" if rows.empty?
1914
1917
  ensure_service!
1915
1918
  options = { skip_invalid: skip_invalid,
1916
- ignore_unknown: ignore_unknown }
1919
+ ignore_unknown: ignore_unknown,
1920
+ insert_ids: insert_ids }
1917
1921
  gapi = service.insert_tabledata dataset_id, table_id, rows, options
1918
1922
  InsertResponse.from_gapi rows, gapi
1919
1923
  end
@@ -16,6 +16,7 @@
16
16
  require "google/cloud/bigquery/convert"
17
17
  require "monitor"
18
18
  require "concurrent"
19
+ require "securerandom"
19
20
 
20
21
  module Google
21
22
  module Cloud
@@ -100,24 +101,30 @@ module Google
100
101
  #
101
102
  # @param [Hash, Array<Hash>] rows A hash object or array of hash
102
103
  # objects containing the data.
104
+ # @param [Array<String>] insert_ids A unique ID for each row. BigQuery
105
+ # uses this property to detect duplicate insertion requests on a
106
+ # best-effort basis. For more information, see [data
107
+ # consistency](https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency).
108
+ # Optional. If not provided, the client library will assign a UUID
109
+ # to each row before the request is sent.
103
110
  #
104
- def insert rows
111
+ def insert rows, insert_ids: nil
105
112
  return nil if rows.nil?
106
113
  return nil if rows.is_a?(Array) && rows.empty?
107
- rows = [rows] if rows.is_a? Hash
114
+ rows, insert_ids = validate_insert_args rows, insert_ids
108
115
 
109
116
  synchronize do
110
- rows.each do |row|
117
+ rows.zip(Array(insert_ids)).each do |row, insert_id|
111
118
  if @batch.nil?
112
119
  @batch = Batch.new max_bytes: @max_bytes, max_rows: @max_rows
113
- @batch.insert row
120
+ @batch.insert row, insert_id
114
121
  else
115
- unless @batch.try_insert row
122
+ unless @batch.try_insert row, insert_id
116
123
  push_batch_request!
117
124
 
118
125
  @batch = Batch.new max_bytes: @max_bytes,
119
126
  max_rows: @max_rows
120
- @batch.insert row
127
+ @batch.insert row, insert_id
121
128
  end
122
129
  end
123
130
 
@@ -204,6 +211,15 @@ module Google
204
211
 
205
212
  protected
206
213
 
214
+ def validate_insert_args rows, insert_ids
215
+ rows = [rows] if rows.is_a? Hash
216
+ insert_ids = Array insert_ids
217
+ if insert_ids.count > 0 && insert_ids.count != rows.count
218
+ raise ArgumentError, "insert_ids must be the same size as rows"
219
+ end
220
+ [rows, insert_ids]
221
+ end
222
+
207
223
  def run_background
208
224
  synchronize do
209
225
  until @stopped
@@ -230,11 +246,13 @@ module Google
230
246
 
231
247
  orig_rows = @batch.rows
232
248
  json_rows = @batch.json_rows
249
+ insert_ids = @batch.insert_ids
233
250
  Concurrent::Future.new(executor: @thread_pool) do
234
251
  begin
235
252
  raise ArgumentError, "No rows provided" if json_rows.empty?
236
253
  options = { skip_invalid: @skip_invalid,
237
- ignore_unknown: @ignore_unknown }
254
+ ignore_unknown: @ignore_unknown,
255
+ insert_ids: insert_ids }
238
256
  insert_resp = @table.service.insert_tabledata_json_rows(
239
257
  @table.dataset_id, @table.table_id, json_rows, options
240
258
  )
@@ -255,30 +273,37 @@ module Google
255
273
  ##
256
274
  # @private
257
275
  class Batch
258
- attr_reader :max_bytes, :max_rows, :rows, :json_rows
276
+ attr_reader :max_bytes, :max_rows, :rows, :json_rows, :insert_ids
259
277
 
260
278
  def initialize max_bytes: 10000000, max_rows: 500
261
279
  @max_bytes = max_bytes
262
280
  @max_rows = max_rows
263
281
  @rows = []
264
282
  @json_rows = []
265
- @current_bytes = 0
283
+ @insert_ids = []
284
+ # The default request byte size overhead is 63.
285
+ # "{\"rows\":[],\"ignoreUnknownValues\":false,
286
+ # \"skipInvalidRows\":false}".bytesize #=> 63
287
+ @current_bytes = 63
266
288
  end
267
289
 
268
- def insert row
290
+ def insert row, insert_id
291
+ insert_id ||= SecureRandom.uuid
269
292
  json_row = to_json_row row
270
293
 
271
- insert_rows_bytes row, json_row, addl_bytes_for_json_row(json_row)
294
+ insert_rows_bytes \
295
+ row, json_row, insert_id, addl_bytes_for(json_row, insert_id)
272
296
  end
273
297
 
274
- def try_insert row
298
+ def try_insert row, insert_id
299
+ insert_id ||= SecureRandom.uuid
275
300
  json_row = to_json_row row
276
- addl_bytes = addl_bytes_for_json_row json_row
301
+ addl_bytes = addl_bytes_for json_row, insert_id
277
302
 
278
303
  return false if @current_bytes + addl_bytes >= @max_bytes
279
304
  return false if @rows.count + 1 >= @max_rows
280
305
 
281
- insert_rows_bytes row, json_row, addl_bytes
306
+ insert_rows_bytes row, json_row, insert_id, addl_bytes
282
307
  true
283
308
  end
284
309
 
@@ -288,9 +313,10 @@ module Google
288
313
 
289
314
  private
290
315
 
291
- def insert_rows_bytes row, json_row, addl_bytes
316
+ def insert_rows_bytes row, json_row, insert_id, addl_bytes
292
317
  @rows << row
293
318
  @json_rows << json_row
319
+ @insert_ids << insert_id if insert_id
294
320
  @current_bytes += addl_bytes
295
321
  end
296
322
 
@@ -298,12 +324,9 @@ module Google
298
324
  Convert.to_json_row row
299
325
  end
300
326
 
301
- def addl_bytes_for row
302
- addl_bytes_for_json_row Convert.to_json_row(row)
303
- end
304
-
305
- def addl_bytes_for_json_row json_row
306
- json_row.to_json.bytesize + 1
327
+ def addl_bytes_for json_row, insert_id
328
+ # "{\"insertId\":\"\",\"json\":},".bytesize #=> 24
329
+ 24 + json_row.to_json.bytesize + insert_id.bytesize
307
330
  end
308
331
  end
309
332
 
@@ -16,7 +16,7 @@
16
16
  module Google
17
17
  module Cloud
18
18
  module Bigquery
19
- VERSION = "1.2.0".freeze
19
+ VERSION = "1.3.0".freeze
20
20
  end
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google-cloud-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Moore
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-03-31 00:00:00.000000000 Z
12
+ date: 2018-04-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: google-cloud-core