google-cloud-bigquery 1.21.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +16 -0
  3. data/AUTHENTICATION.md +158 -0
  4. data/CHANGELOG.md +397 -0
  5. data/CODE_OF_CONDUCT.md +40 -0
  6. data/CONTRIBUTING.md +188 -0
  7. data/LICENSE +201 -0
  8. data/LOGGING.md +27 -0
  9. data/OVERVIEW.md +463 -0
  10. data/TROUBLESHOOTING.md +31 -0
  11. data/lib/google-cloud-bigquery.rb +139 -0
  12. data/lib/google/cloud/bigquery.rb +145 -0
  13. data/lib/google/cloud/bigquery/argument.rb +197 -0
  14. data/lib/google/cloud/bigquery/convert.rb +383 -0
  15. data/lib/google/cloud/bigquery/copy_job.rb +316 -0
  16. data/lib/google/cloud/bigquery/credentials.rb +50 -0
  17. data/lib/google/cloud/bigquery/data.rb +526 -0
  18. data/lib/google/cloud/bigquery/dataset.rb +2845 -0
  19. data/lib/google/cloud/bigquery/dataset/access.rb +1021 -0
  20. data/lib/google/cloud/bigquery/dataset/list.rb +162 -0
  21. data/lib/google/cloud/bigquery/encryption_configuration.rb +123 -0
  22. data/lib/google/cloud/bigquery/external.rb +2432 -0
  23. data/lib/google/cloud/bigquery/extract_job.rb +368 -0
  24. data/lib/google/cloud/bigquery/insert_response.rb +180 -0
  25. data/lib/google/cloud/bigquery/job.rb +657 -0
  26. data/lib/google/cloud/bigquery/job/list.rb +162 -0
  27. data/lib/google/cloud/bigquery/load_job.rb +1704 -0
  28. data/lib/google/cloud/bigquery/model.rb +740 -0
  29. data/lib/google/cloud/bigquery/model/list.rb +164 -0
  30. data/lib/google/cloud/bigquery/project.rb +1655 -0
  31. data/lib/google/cloud/bigquery/project/list.rb +161 -0
  32. data/lib/google/cloud/bigquery/query_job.rb +1695 -0
  33. data/lib/google/cloud/bigquery/routine.rb +1108 -0
  34. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  35. data/lib/google/cloud/bigquery/schema.rb +564 -0
  36. data/lib/google/cloud/bigquery/schema/field.rb +668 -0
  37. data/lib/google/cloud/bigquery/service.rb +589 -0
  38. data/lib/google/cloud/bigquery/standard_sql.rb +495 -0
  39. data/lib/google/cloud/bigquery/table.rb +3340 -0
  40. data/lib/google/cloud/bigquery/table/async_inserter.rb +520 -0
  41. data/lib/google/cloud/bigquery/table/list.rb +172 -0
  42. data/lib/google/cloud/bigquery/time.rb +65 -0
  43. data/lib/google/cloud/bigquery/version.rb +22 -0
  44. metadata +297 -0
@@ -0,0 +1,368 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ ##
20
+ # # ExtractJob
21
+ #
22
+ # A {Job} subclass representing an export operation that may be performed
23
+ # on a {Table}. A ExtractJob instance is created when you call
24
+ # {Table#extract_job}.
25
+ #
26
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
27
+ # Exporting Data From BigQuery
28
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
29
+ # reference
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ # dataset = bigquery.dataset "my_dataset"
36
+ # table = dataset.table "my_table"
37
+ #
38
+ # extract_job = table.extract_job "gs://my-bucket/file-name.json",
39
+ # format: "json"
40
+ # extract_job.wait_until_done!
41
+ # extract_job.done? #=> true
42
+ #
43
+ class ExtractJob < Job
44
+ ##
45
+ # The URI or URIs representing the Google Cloud Storage files to which
46
+ # the data is exported.
47
+ def destinations
48
+ Array @gapi.configuration.extract.destination_uris
49
+ end
50
+
51
+ ##
52
+ # The table from which the data is exported. This is the table upon
53
+ # which {Table#extract_job} was called.
54
+ #
55
+ # @return [Table] A table instance.
56
+ #
57
+ def source
58
+ table = @gapi.configuration.extract.source_table
59
+ return nil unless table
60
+ retrieve_table table.project_id, table.dataset_id, table.table_id
61
+ end
62
+
63
+ ##
64
+ # Checks if the export operation compresses the data using gzip. The
65
+ # default is `false`.
66
+ #
67
+ # @return [Boolean] `true` when `GZIP`, `false` otherwise.
68
+ #
69
+ def compression?
70
+ val = @gapi.configuration.extract.compression
71
+ val == "GZIP"
72
+ end
73
+
74
+ ##
75
+ # Checks if the destination format for the data is [newline-delimited
76
+ # JSON](http://jsonlines.org/). The default is `false`.
77
+ #
78
+ # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false`
79
+ # otherwise.
80
+ #
81
+ def json?
82
+ val = @gapi.configuration.extract.destination_format
83
+ val == "NEWLINE_DELIMITED_JSON"
84
+ end
85
+
86
+ ##
87
+ # Checks if the destination format for the data is CSV. Tables with
88
+ # nested or repeated fields cannot be exported as CSV. The default is
89
+ # `true`.
90
+ #
91
+ # @return [Boolean] `true` when `CSV`, `false` otherwise.
92
+ #
93
+ def csv?
94
+ val = @gapi.configuration.extract.destination_format
95
+ return true if val.nil?
96
+ val == "CSV"
97
+ end
98
+
99
+ ##
100
+ # Checks if the destination format for the data is
101
+ # [Avro](http://avro.apache.org/). The default is `false`.
102
+ #
103
+ # @return [Boolean] `true` when `AVRO`, `false` otherwise.
104
+ #
105
+ def avro?
106
+ val = @gapi.configuration.extract.destination_format
107
+ val == "AVRO"
108
+ end
109
+
110
+ ##
111
+ # The character or symbol the operation uses to delimit fields in the
112
+ # exported data. The default is a comma (,).
113
+ #
114
+ # @return [String] A string containing the character, such as `","`.
115
+ #
116
+ def delimiter
117
+ val = @gapi.configuration.extract.field_delimiter
118
+ val = "," if val.nil?
119
+ val
120
+ end
121
+
122
+ ##
123
+ # Checks if the exported data contains a header row. The default is
124
+ # `true`.
125
+ #
126
+ # @return [Boolean] `true` when the print header configuration is
127
+ # present or `nil`, `false` otherwise.
128
+ #
129
+ def print_header?
130
+ val = @gapi.configuration.extract.print_header
131
+ val = true if val.nil?
132
+ val
133
+ end
134
+
135
+ ##
136
+ # The number of files per destination URI or URI pattern specified in
137
+ # {#destinations}.
138
+ #
139
+ # @return [Array<Integer>] An array of values in the same order as the
140
+ # URI patterns.
141
+ #
142
+ def destinations_file_counts
143
+ Array @gapi.statistics.extract.destination_uri_file_counts
144
+ end
145
+
146
+ ##
147
+ # A hash containing the URI or URI pattern specified in
148
+ # {#destinations} mapped to the counts of files per destination.
149
+ #
150
+ # @return [Hash<String, Integer>] A Hash with the URI patterns as keys
151
+ # and the counts as values.
152
+ #
153
+ def destinations_counts
154
+ Hash[destinations.zip destinations_file_counts]
155
+ end
156
+
157
+ ##
158
+ # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates
159
+ # whether to enable extracting applicable column types (such as
160
+ # `TIMESTAMP`) to their corresponding AVRO logical types
161
+ # (`timestamp-micros`), instead of only using their raw types
162
+ # (`avro-long`).
163
+ #
164
+ # @return [Boolean] `true` when applicable column types will use their
165
+ # corresponding AVRO logical types, `false` otherwise.
166
+ #
167
+ def use_avro_logical_types?
168
+ @gapi.configuration.extract.use_avro_logical_types
169
+ end
170
+
171
+ ##
172
+ # Yielded to a block to accumulate changes for an API request.
173
+ class Updater < ExtractJob
174
+ ##
175
+ # @private Create an Updater object.
176
+ def initialize gapi
177
+ @gapi = gapi
178
+ end
179
+
180
+ ##
181
+ # @private Create an Updater from an options hash.
182
+ #
183
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
184
+ # configuration object for setting query options.
185
+ def self.from_options service, table, storage_files, options
186
+ job_ref = service.job_ref_from options[:job_id], options[:prefix]
187
+ storage_urls = Array(storage_files).map do |url|
188
+ url.respond_to?(:to_gs_url) ? url.to_gs_url : url
189
+ end
190
+ options[:format] ||= Convert.derive_source_format storage_urls.first
191
+ job = Google::Apis::BigqueryV2::Job.new(
192
+ job_reference: job_ref,
193
+ configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
194
+ extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new(
195
+ destination_uris: Array(storage_urls),
196
+ source_table: table
197
+ ),
198
+ dry_run: options[:dryrun]
199
+ )
200
+ )
201
+
202
+ from_job_and_options job, options
203
+ end
204
+
205
+ ##
206
+ # @private Create an Updater from a Job and options hash.
207
+ #
208
+ # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
209
+ # configuration object for setting query options.
210
+ def self.from_job_and_options request, options
211
+ updater = ExtractJob::Updater.new request
212
+ updater.compression = options[:compression]
213
+ updater.delimiter = options[:delimiter]
214
+ updater.format = options[:format]
215
+ updater.header = options[:header]
216
+ updater.labels = options[:labels] if options[:labels]
217
+ unless options[:use_avro_logical_types].nil?
218
+ updater.use_avro_logical_types = options[:use_avro_logical_types]
219
+ end
220
+ updater
221
+ end
222
+
223
+ ##
224
+ # Sets the geographic location where the job should run. Required
225
+ # except for US and EU.
226
+ #
227
+ # @param [String] value A geographic location, such as "US", "EU" or
228
+ # "asia-northeast1". Required except for US and EU.
229
+ #
230
+ # @example
231
+ # require "google/cloud/bigquery"
232
+ #
233
+ # bigquery = Google::Cloud::Bigquery.new
234
+ # dataset = bigquery.dataset "my_dataset"
235
+ # table = dataset.table "my_table"
236
+ #
237
+ # destination = "gs://my-bucket/file-name.csv"
238
+ # extract_job = table.extract_job destination do |j|
239
+ # j.location = "EU"
240
+ # end
241
+ #
242
+ # extract_job.wait_until_done!
243
+ # extract_job.done? #=> true
244
+ #
245
+ # @!group Attributes
246
+ def location= value
247
+ @gapi.job_reference.location = value
248
+ return unless value.nil?
249
+
250
+ # Treat assigning value of nil the same as unsetting the value.
251
+ unset = @gapi.job_reference.instance_variables.include? :@location
252
+ @gapi.job_reference.remove_instance_variable :@location if unset
253
+ end
254
+
255
+ ##
256
+ # Sets the compression type.
257
+ #
258
+ # @param [String] value The compression type to use for exported
259
+ # files. Possible values include `GZIP` and `NONE`. The default
260
+ # value is `NONE`.
261
+ #
262
+ # @!group Attributes
263
+ def compression= value
264
+ @gapi.configuration.extract.compression = value
265
+ end
266
+
267
+ ##
268
+ # Sets the field delimiter.
269
+ #
270
+ # @param [String] value Delimiter to use between fields in the
271
+ # exported data. Default is <code>,</code>.
272
+ #
273
+ # @!group Attributes
274
+ def delimiter= value
275
+ @gapi.configuration.extract.field_delimiter = value
276
+ end
277
+
278
+ ##
279
+ # Sets the destination file format. The default value is `csv`.
280
+ #
281
+ # The following values are supported:
282
+ #
283
+ # * `csv` - CSV
284
+ # * `json` - [Newline-delimited JSON](http://jsonlines.org/)
285
+ # * `avro` - [Avro](http://avro.apache.org/)
286
+ #
287
+ # @param [String] new_format The new source format.
288
+ #
289
+ # @!group Attributes
290
+ #
291
+ def format= new_format
292
+ @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format)
293
+ end
294
+
295
+ ##
296
+ # Print a header row in the exported file.
297
+ #
298
+ # @param [Boolean] value Whether to print out a header row in the
299
+ # results. Default is `true`.
300
+ #
301
+ # @!group Attributes
302
+ def header= value
303
+ @gapi.configuration.extract.print_header = value
304
+ end
305
+
306
+ ##
307
+ # Sets the labels to use for the job.
308
+ #
309
+ # @param [Hash] value A hash of user-provided labels associated with
310
+ # the job. You can use these to organize and group your jobs. Label
311
+ # keys and values can be no longer than 63 characters, can only
312
+ # contain lowercase letters, numeric characters, underscores and
313
+ # dashes. International characters are allowed. Label values are
314
+ # optional. Label keys must start with a letter and each label in
315
+ # the list must have a different key.
316
+ #
317
+ # @!group Attributes
318
+ #
319
+ def labels= value
320
+ @gapi.configuration.update! labels: value
321
+ end
322
+
323
+ ##
324
+ # Indicate whether to enable extracting applicable column types (such
325
+ # as `TIMESTAMP`) to their corresponding AVRO logical types
326
+ # (`timestamp-micros`), instead of only using their raw types
327
+ # (`avro-long`).
328
+ #
329
+ # Only used when `#format` is set to `"AVRO"` (`#avro?`).
330
+ #
331
+ # @param [Boolean] value Whether applicable column types will use
332
+ # their corresponding AVRO logical types.
333
+ #
334
+ # @!group Attributes
335
+ def use_avro_logical_types= value
336
+ @gapi.configuration.extract.use_avro_logical_types = value
337
+ end
338
+
339
+ def cancel
340
+ raise "not implemented in #{self.class}"
341
+ end
342
+
343
+ def rerun!
344
+ raise "not implemented in #{self.class}"
345
+ end
346
+
347
+ def reload!
348
+ raise "not implemented in #{self.class}"
349
+ end
350
+ alias refresh! reload!
351
+
352
+ def wait_until_done!
353
+ raise "not implemented in #{self.class}"
354
+ end
355
+
356
+ ##
357
+ # @private Returns the Google API client library version of this job.
358
+ #
359
+ # @return [<Google::Apis::BigqueryV2::Job>] (See
360
+ # {Google::Apis::BigqueryV2::Job})
361
+ def to_gapi
362
+ @gapi
363
+ end
364
+ end
365
+ end
366
+ end
367
+ end
368
+ end
@@ -0,0 +1,180 @@
1
+ # Copyright 2015 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "json"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # InsertResponse
23
+ #
24
+ # Represents the response from BigQuery when data is inserted into a table
25
+ # for near-immediate querying, without the need to complete a load
26
+ # operation before the data can appear in query results. See
27
+ # {Dataset#insert} and {Table#insert}.
28
+ #
29
+ # @see https://cloud.google.com/bigquery/streaming-data-into-bigquery
30
+ # Streaming Data Into BigQuery
31
+ #
32
+ # @example
33
+ # require "google/cloud/bigquery"
34
+ #
35
+ # bigquery = Google::Cloud::Bigquery.new
36
+ # dataset = bigquery.dataset "my_dataset"
37
+ #
38
+ # rows = [
39
+ # { "first_name" => "Alice", "age" => 21 },
40
+ # { "first_name" => "Bob", "age" => 22 }
41
+ # ]
42
+ #
43
+ # insert_response = dataset.insert "my_table", rows
44
+ #
45
+ class InsertResponse
46
+ # @private
47
+ def initialize rows, gapi
48
+ @rows = rows
49
+ @gapi = gapi
50
+ end
51
+
52
+ ##
53
+ # Checks if the error count is zero, meaning that all of the rows were
54
+ # inserted. Use {#insert_errors} to access the errors.
55
+ #
56
+ # @return [Boolean] `true` when the error count is zero, `false`
57
+ # otherwise.
58
+ #
59
+ def success?
60
+ error_count.zero?
61
+ end
62
+
63
+ ##
64
+ # The count of rows in the response, minus the count of errors for rows
65
+ # that were not inserted.
66
+ #
67
+ # @return [Integer] The number of rows inserted.
68
+ #
69
+ def insert_count
70
+ @rows.count - error_count
71
+ end
72
+
73
+ ##
74
+ # The count of errors for rows that were not inserted.
75
+ #
76
+ # @return [Integer] The number of errors.
77
+ #
78
+ def error_count
79
+ Array(@gapi.insert_errors).count
80
+ end
81
+
82
+ ##
83
+ # The error objects for rows that were not inserted.
84
+ #
85
+ # @return [Array<InsertError>] An array containing error objects.
86
+ #
87
+ def insert_errors
88
+ Array(@gapi.insert_errors).map do |ie|
89
+ row = @rows[ie.index]
90
+ errors = ie.errors.map { |e| JSON.parse e.to_json }
91
+ InsertError.new ie.index, row, errors
92
+ end
93
+ end
94
+
95
+ ##
96
+ # The rows that were not inserted.
97
+ #
98
+ # @return [Array<Hash>] An array of hash objects containing the row
99
+ # data.
100
+ #
101
+ def error_rows
102
+ Array(@gapi.insert_errors).map { |ie| @rows[ie.index] }
103
+ end
104
+
105
+ ##
106
+ # Returns the error object for a row that was not inserted.
107
+ #
108
+ # @param [Hash] row A hash containing the data for a row.
109
+ #
110
+ # @return [InsertError, nil] An error object, or `nil` if no error is
111
+ # found in the response for the row.
112
+ #
113
+ def insert_error_for row
114
+ insert_errors.detect { |e| e.row == row }
115
+ end
116
+
117
+ ##
118
+ # Returns the error hashes for a row that was not inserted. Each error
119
+ # hash contains the following keys: `reason`, `location`, `debugInfo`,
120
+ # and `message`.
121
+ #
122
+ # @param [Hash] row A hash containing the data for a row.
123
+ #
124
+ # @return [Array<Hash>, nil] An array of error hashes, or `nil` if no
125
+ # errors are found in the response for the row.
126
+ #
127
+ def errors_for row
128
+ ie = insert_error_for row
129
+ return ie.errors if ie
130
+ []
131
+ end
132
+
133
+ ##
134
+ # Returns the index for a row that was not inserted.
135
+ #
136
+ # @param [Hash] row A hash containing the data for a row.
137
+ #
138
+ # @return [Integer, nil] An error object, or `nil` if no error is
139
+ # found in the response for the row.
140
+ #
141
+ def index_for row
142
+ ie = insert_error_for row
143
+ return ie.index if ie
144
+ nil
145
+ end
146
+
147
+ # @private New InsertResponse from the inserted rows and a
148
+ # Google::Apis::BigqueryV2::InsertAllTableDataResponse object.
149
+ def self.from_gapi rows, gapi
150
+ new rows, gapi
151
+ end
152
+
153
+ ##
154
+ # InsertError
155
+ #
156
+ # Represents the errors for a row that was not inserted.
157
+ #
158
+ # @attr_reader [Integer] index The index of the row that error applies
159
+ # to.
160
+ # @attr_reader [Hash] row The row that error applies to.
161
+ # @attr_reader [Hash] errors Error information for the row indicated by
162
+ # the index property, with the following keys: `reason`, `location`,
163
+ # `debugInfo`, and `message`.
164
+ #
165
+ class InsertError
166
+ attr_reader :index
167
+ attr_reader :row
168
+ attr_reader :errors
169
+
170
+ # @private
171
+ def initialize index, row, errors
172
+ @index = index
173
+ @row = row
174
+ @errors = errors
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end