google-cloud-bigquery 1.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +16 -0
  3. data/AUTHENTICATION.md +158 -0
  4. data/CHANGELOG.md +397 -0
  5. data/CODE_OF_CONDUCT.md +40 -0
  6. data/CONTRIBUTING.md +188 -0
  7. data/LICENSE +201 -0
  8. data/LOGGING.md +27 -0
  9. data/OVERVIEW.md +463 -0
  10. data/TROUBLESHOOTING.md +31 -0
  11. data/lib/google-cloud-bigquery.rb +139 -0
  12. data/lib/google/cloud/bigquery.rb +145 -0
  13. data/lib/google/cloud/bigquery/argument.rb +197 -0
  14. data/lib/google/cloud/bigquery/convert.rb +383 -0
  15. data/lib/google/cloud/bigquery/copy_job.rb +316 -0
  16. data/lib/google/cloud/bigquery/credentials.rb +50 -0
  17. data/lib/google/cloud/bigquery/data.rb +526 -0
  18. data/lib/google/cloud/bigquery/dataset.rb +2845 -0
  19. data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
  20. data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
  21. data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
  22. data/lib/google/cloud/bigquery/external.rb +2432 -0
  23. data/lib/google/cloud/bigquery/extract_job.rb +368 -0
  24. data/lib/google/cloud/bigquery/insert_response.rb +180 -0
  25. data/lib/google/cloud/bigquery/job.rb +657 -0
  26. data/lib/google/cloud/bigquery/job/list.rb +162 -0
  27. data/lib/google/cloud/bigquery/load_job.rb +1704 -0
  28. data/lib/google/cloud/bigquery/model.rb +740 -0
  29. data/lib/google/cloud/bigquery/model/list.rb +164 -0
  30. data/lib/google/cloud/bigquery/project.rb +1655 -0
  31. data/lib/google/cloud/bigquery/project/list.rb +161 -0
  32. data/lib/google/cloud/bigquery/query_job.rb +1695 -0
  33. data/lib/google/cloud/bigquery/routine.rb +1108 -0
  34. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  35. data/lib/google/cloud/bigquery/schema.rb +564 -0
  36. data/lib/google/cloud/bigquery/schema/field.rb +668 -0
  37. data/lib/google/cloud/bigquery/service.rb +589 -0
  38. data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
  39. data/lib/google/cloud/bigquery/table.rb +3340 -0
  40. data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
  41. data/lib/google/cloud/bigquery/table/list.rb +172 -0
  42. data/lib/google/cloud/bigquery/time.rb +65 -0
  43. data/lib/google/cloud/bigquery/version.rb +22 -0
  44. metadata +297 -0
@@ -0,0 +1,368 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ ##
20
+ # # ExtractJob
21
+ #
22
+ # A {Job} subclass representing an export operation that may be performed
23
+ # on a {Table}. A ExtractJob instance is created when you call
24
+ # {Table#extract_job}.
25
+ #
26
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
27
+ # Exporting Data From BigQuery
28
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
+ # reference
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ # dataset = bigquery.dataset "my_dataset"
36
+ # table = dataset.table "my_table"
37
+ #
38
+ # extract_job = table.extract_job "gs://my-bucket/file-name.json",
39
+ # format: "json"
40
+ # extract_job.wait_until_done!
41
+ # extract_job.done? #=> true
42
+ #
43
+ class ExtractJob < Job
44
+ ##
45
+ # The URI or URIs representing the Google Cloud Storage files to which
46
+ # the data is exported.
47
+ def destinations
48
+ Array @gapi.configuration.extract.destination_uris
49
+ end
50
+
51
+ ##
52
+ # The table from which the data is exported. This is the table upon
53
+ # which {Table#extract_job} was called.
54
+ #
55
+ # @return [Table] A table instance.
56
+ #
57
+ def source
58
+ table = @gapi.configuration.extract.source_table
59
+ return nil unless table
60
+ retrieve_table table.project_id, table.dataset_id, table.table_id
61
+ end
62
+
63
+ ##
64
+ # Checks if the export operation compresses the data using gzip. The
65
+ # default is `false`.
66
+ #
67
+ # @return [Boolean] `true` when `GZIP`, `false` otherwise.
68
+ #
69
+ def compression?
70
+ val = @gapi.configuration.extract.compression
71
+ val == "GZIP"
72
+ end
73
+
74
+ ##
75
+ # Checks if the destination format for the data is [newline-delimited
76
+ # JSON](http://jsonlines.org/). The default is `false`.
77
+ #
78
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
+ # otherwise.
80
+ #
81
+ def json?
82
+ val = @gapi.configuration.extract.destination_format
83
+ val == "NEWLINE_DELIMITED_JSON"
84
+ end
85
+
86
+ ##
87
+ # Checks if the destination format for the data is CSV. Tables with
88
+ # nested or repeated fields cannot be exported as CSV. The default is
89
+ # `true`.
90
+ #
91
+ # @return [Boolean] `true` when `CSV`, `false` otherwise.
92
+ #
93
+ def csv?
94
+ val = @gapi.configuration.extract.destination_format
95
+ return true if val.nil?
96
+ val == "CSV"
97
+ end
98
+
99
+ ##
100
+ # Checks if the destination format for the data is
101
+ # [Avro](http://avro.apache.org/). The default is `false`.
102
+ #
103
+ # @return [Boolean] `true` when `AVRO`, `false` otherwise.
104
+ #
105
+ def avro?
106
+ val = @gapi.configuration.extract.destination_format
107
+ val == "AVRO"
108
+ end
109
+
110
+ ##
111
+ # The character or symbol the operation uses to delimit fields in the
112
+ # exported data. The default is a comma (,).
113
+ #
114
+ # @return [String] A string containing the character, such as `","`.
115
+ #
116
+ def delimiter
117
+ val = @gapi.configuration.extract.field_delimiter
118
+ val = "," if val.nil?
119
+ val
120
+ end
121
+
122
+ ##
123
+ # Checks if the exported data contains a header row. The default is
124
+ # `true`.
125
+ #
126
+ # @return [Boolean] `true` when the print header configuration is
127
+ # present or `nil`, `false` otherwise.
128
+ #
129
+ def print_header?
130
+ val = @gapi.configuration.extract.print_header
131
+ val = true if val.nil?
132
+ val
133
+ end
134
+
135
+ ##
136
+ # The number of files per destination URI or URI pattern specified in
137
+ # {#destinations}.
138
+ #
139
+ # @return [Array<Integer>] An array of values in the same order as the
140
+ # URI patterns.
141
+ #
142
+ def destinations_file_counts
143
+ Array @gapi.statistics.extract.destination_uri_file_counts
144
+ end
145
+
146
+ ##
147
+ # A hash containing the URI or URI pattern specified in
148
+ # {#destinations} mapped to the counts of files per destination.
149
+ #
150
+ # @return [Hash<String, Integer>] A Hash with the URI patterns as keys
151
+ # and the counts as values.
152
+ #
153
+ def destinations_counts
154
+ Hash[destinations.zip destinations_file_counts]
155
+ end
156
+
157
+ ##
158
+ # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
159
+ # whether to enable extracting applicable column types (such as
160
+ # `TIMESTAMP`) to their corresponding AVRO logical types
161
+ # (`timestamp-micros`), instead of only using their raw types
162
+ # (`avro-long`).
163
+ #
164
+ # @return [Boolean] `true` when applicable column types will use their
165
+ # corresponding AVRO logical types, `false` otherwise.
166
+ #
167
+ def use_avro_logical_types?
168
+ @gapi.configuration.extract.use_avro_logical_types
169
+ end
170
+
171
+ ##
172
+ # Yielded to a block to accumulate changes for an API request.
173
+ class Updater < ExtractJob
174
+ ##
175
+ # @private Create an Updater object.
176
+ def initialize gapi
177
+ @gapi = gapi
178
+ end
179
+
180
+ ##
181
+ # @private Create an Updater from an options hash.
182
+ #
183
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
+ # configuration object for setting query options.
185
+ def self.from_options service, table, storage_files, options
186
+ job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
+ storage_urls = Array(storage_files).map do |url|
188
+ url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
+ end
190
+ options[:format] ||= Convert.derive_source_format storage_urls.first
191
+ job = Google::Apis::BigqueryV2::Job.new(
192
+ job_reference: job_ref,
193
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
+ extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
+ destination_uris: Array(storage_urls),
196
+ source_table: table
197
+ ),
198
+ dry_run: options[:dryrun]
199
+ )
200
+ )
201
+
202
+ from_job_and_options job, options
203
+ end
204
+
205
+ ##
206
+ # @private Create an Updater from a Job and options hash.
207
+ #
208
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
209
+ # configuration object for setting query options.
210
+ def self.from_job_and_options request, options
211
+ updater = ExtractJob::Updater.new request
212
+ updater.compression = options[:compression]
213
+ updater.delimiter = options[:delimiter]
214
+ updater.format = options[:format]
215
+ updater.header = options[:header]
216
+ updater.labels = options[:labels] if options[:labels]
217
+ unless options[:use_avro_logical_types].nil?
218
+ updater.use_avro_logical_types = options[:use_avro_logical_types]
219
+ end
220
+ updater
221
+ end
222
+
223
+ ##
224
+ # Sets the geographic location where the job should run. Required
225
+ # except for US and EU.
226
+ #
227
+ # @param [String] value A geographic location, such as "US", "EU" or
228
+ # "asia-northeast1". Required except for US and EU.
229
+ #
230
+ # @example
231
+ # require "google/cloud/bigquery"
232
+ #
233
+ # bigquery = Google::Cloud::Bigquery.new
234
+ # dataset = bigquery.dataset "my_dataset"
235
+ # table = dataset.table "my_table"
236
+ #
237
+ # destination = "gs://my-bucket/file-name.csv"
238
+ # extract_job = table.extract_job destination do |j|
239
+ # j.location = "EU"
240
+ # end
241
+ #
242
+ # extract_job.wait_until_done!
243
+ # extract_job.done? #=> true
244
+ #
245
+ # @!group Attributes
246
+ def location= value
247
+ @gapi.job_reference.location = value
248
+ return unless value.nil?
249
+
250
+ # Treat assigning value of nil the same as unsetting the value.
251
+ unset = @gapi.job_reference.instance_variables.include? :@location
252
+ @gapi.job_reference.remove_instance_variable :@location if unset
253
+ end
254
+
255
+ ##
256
+ # Sets the compression type.
257
+ #
258
+ # @param [String] value The compression type to use for exported
259
+ # files. Possible values include `GZIP` and `NONE`. The default
260
+ # value is `NONE`.
261
+ #
262
+ # @!group Attributes
263
+ def compression= value
264
+ @gapi.configuration.extract.compression = value
265
+ end
266
+
267
+ ##
268
+ # Sets the field delimiter.
269
+ #
270
+ # @param [String] value Delimiter to use between fields in the
271
+ # exported data. Default is <code>,</code>.
272
+ #
273
+ # @!group Attributes
274
+ def delimiter= value
275
+ @gapi.configuration.extract.field_delimiter = value
276
+ end
277
+
278
+ ##
279
+ # Sets the destination file format. The default value is `csv`.
280
+ #
281
+ # The following values are supported:
282
+ #
283
+ # * `csv` - CSV
284
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
+ # * `avro` - [Avro](http://avro.apache.org/)
286
+ #
287
+ # @param [String] new_format The new source format.
288
+ #
289
+ # @!group Attributes
290
+ #
291
+ def format= new_format
292
+ @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
293
+ end
294
+
295
+ ##
296
+ # Print a header row in the exported file.
297
+ #
298
+ # @param [Boolean] value Whether to print out a header row in the
299
+ # results. Default is `true`.
300
+ #
301
+ # @!group Attributes
302
+ def header= value
303
+ @gapi.configuration.extract.print_header = value
304
+ end
305
+
306
+ ##
307
+ # Sets the labels to use for the job.
308
+ #
309
+ # @param [Hash] value A hash of user-provided labels associated with
310
+ # the job. You can use these to organize and group your jobs. Label
311
+ # keys and values can be no longer than 63 characters, can only
312
+ # contain lowercase letters, numeric characters, underscores and
313
+ # dashes. International characters are allowed. Label values are
314
+ # optional. Label keys must start with a letter and each label in
315
+ # the list must have a different key.
316
+ #
317
+ # @!group Attributes
318
+ #
319
+ def labels= value
320
+ @gapi.configuration.update! labels: value
321
+ end
322
+
323
+ ##
324
+ # Indicate whether to enable extracting applicable column types (such
325
+ # as `TIMESTAMP`) to their corresponding AVRO logical types
326
+ # (`timestamp-micros`), instead of only using their raw types
327
+ # (`avro-long`).
328
+ #
329
+ # Only used when `#format` is set to `"AVRO"` (`#avro?`).
330
+ #
331
+ # @param [Boolean] value Whether applicable column types will use
332
+ # their corresponding AVRO logical types.
333
+ #
334
+ # @!group Attributes
335
+ def use_avro_logical_types= value
336
+ @gapi.configuration.extract.use_avro_logical_types = value
337
+ end
338
+
339
+ def cancel
340
+ raise "not implemented in #{self.class}"
341
+ end
342
+
343
+ def rerun!
344
+ raise "not implemented in #{self.class}"
345
+ end
346
+
347
+ def reload!
348
+ raise "not implemented in #{self.class}"
349
+ end
350
+ alias refresh! reload!
351
+
352
+ def wait_until_done!
353
+ raise "not implemented in #{self.class}"
354
+ end
355
+
356
+ ##
357
+ # @private Returns the Google API client library version of this job.
358
+ #
359
+ # @return [<Google::Apis::BigqueryV2::Job>] (See
360
+ # {Google::Apis::BigqueryV2::Job})
361
+ def to_gapi
362
+ @gapi
363
+ end
364
+ end
365
+ end
366
+ end
367
+ end
368
+ end
@@ -0,0 +1,180 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "json"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # InsertResponse
23
+ #
24
+ # Represents the response from BigQuery when data is inserted into a table
25
+ # for near-immediate querying, without the need to complete a load
26
+ # operation before the data can appear in query results. See
27
+ # {Dataset#insert} and {Table#insert}.
28
+ #
29
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
30
+ # Streaming Data Into BigQuery
31
+ #
32
+ # @example
33
+ # require "google/cloud/bigquery"
34
+ #
35
+ # bigquery = Google::Cloud::Bigquery.new
36
+ # dataset = bigquery.dataset "my_dataset"
37
+ #
38
+ # rows = [
39
+ # { "first_name" => "Alice", "age" => 21 },
40
+ # { "first_name" => "Bob", "age" => 22 }
41
+ # ]
42
+ #
43
+ # insert_response = dataset.insert "my_table", rows
44
+ #
45
+ class InsertResponse
46
+ # @private
47
+ def initialize rows, gapi
48
+ @rows = rows
49
+ @gapi = gapi
50
+ end
51
+
52
+ ##
53
+ # Checks if the error count is zero, meaning that all of the rows were
54
+ # inserted. Use {#insert_errors} to access the errors.
55
+ #
56
+ # @return [Boolean] `true` when the error count is zero, `false`
57
+ # otherwise.
58
+ #
59
+ def success?
60
+ error_count.zero?
61
+ end
62
+
63
+ ##
64
+ # The count of rows in the response, minus the count of errors for rows
65
+ # that were not inserted.
66
+ #
67
+ # @return [Integer] The number of rows inserted.
68
+ #
69
+ def insert_count
70
+ @rows.count - error_count
71
+ end
72
+
73
+ ##
74
+ # The count of errors for rows that were not inserted.
75
+ #
76
+ # @return [Integer] The number of errors.
77
+ #
78
+ def error_count
79
+ Array(@gapi.insert_errors).count
80
+ end
81
+
82
+ ##
83
+ # The error objects for rows that were not inserted.
84
+ #
85
+ # @return [Array<InsertError>] An array containing error objects.
86
+ #
87
+ def insert_errors
88
+ Array(@gapi.insert_errors).map do |ie|
89
+ row = @rows[ie.index]
90
+ errors = ie.errors.map { |e| JSON.parse e.to_json }
91
+ InsertError.new ie.index, row, errors
92
+ end
93
+ end
94
+
95
+ ##
96
+ # The rows that were not inserted.
97
+ #
98
+ # @return [Array<Hash>] An array of hash objects containing the row
99
+ # data.
100
+ #
101
+ def error_rows
102
+ Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
103
+ end
104
+
105
+ ##
106
+ # Returns the error object for a row that was not inserted.
107
+ #
108
+ # @param [Hash] row A hash containing the data for a row.
109
+ #
110
+ # @return [InsertError, nil] An error object, or `nil` if no error is
111
+ # found in the response for the row.
112
+ #
113
+ def insert_error_for row
114
+ insert_errors.detect { |e| e.row == row }
115
+ end
116
+
117
+ ##
118
+ # Returns the error hashes for a row that was not inserted. Each error
119
+ # hash contains the following keys: `reason`, `location`, `debugInfo`,
120
+ # and `message`.
121
+ #
122
+ # @param [Hash] row A hash containing the data for a row.
123
+ #
124
+ # @return [Array<Hash>, nil] An array of error hashes, or `nil` if no
125
+ # errors are found in the response for the row.
126
+ #
127
+ def errors_for row
128
+ ie = insert_error_for row
129
+ return ie.errors if ie
130
+ []
131
+ end
132
+
133
+ ##
134
+ # Returns the index for a row that was not inserted.
135
+ #
136
+ # @param [Hash] row A hash containing the data for a row.
137
+ #
138
+ # @return [Integer, nil] An error object, or `nil` if no error is
139
+ # found in the response for the row.
140
+ #
141
+ def index_for row
142
+ ie = insert_error_for row
143
+ return ie.index if ie
144
+ nil
145
+ end
146
+
147
+ # @private New InsertResponse from the inserted rows and a
148
+ # Google::Apis::BigqueryV2::InsertAllTableDataResponse object.
149
+ def self.from_gapi rows, gapi
150
+ new rows, gapi
151
+ end
152
+
153
+ ##
154
+ # InsertError
155
+ #
156
+ # Represents the errors for a row that was not inserted.
157
+ #
158
+ # @attr_reader [Integer] index The index of the row that error applies
159
+ # to.
160
+ # @attr_reader [Hash] row The row that error applies to.
161
+ # @attr_reader [Hash] errors Error information for the row indicated by
162
+ # the index property, with the following keys: `reason`, `location`,
163
+ # `debugInfo`, and `message`.
164
+ #
165
+ class InsertError
166
+ attr_reader :index
167
+ attr_reader :row
168
+ attr_reader :errors
169
+
170
+ # @private
171
+ def initialize index, row, errors
172
+ @index = index
173
+ @row = row
174
+ @errors = errors
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end