google-cloud-bigquery 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58e73b43f4053457d2df061703e3314483552dbb
|
4
|
+
data.tar.gz: 927a49cb45ff1a1c2aac5fa319e19dc0b422af23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d86f37859a1a6cf2b682afd8b4e3f77b178fb8333e47606673e02e0a5e6ba76e61c6ee3c9bfbd5bd8d4763eff03f9c54bc067c10f39883772a6c730c0d8b443
|
7
|
+
data.tar.gz: d9d5afd3aac9c23e523909d78b7ae5fc886ce783e5b3cad0068aff21a815841a564e9baf6f35b2235adf0ed743df55d41c33b570f9b99a5d5a96ea50768882d0
|
data/README.md
CHANGED
@@ -36,7 +36,7 @@ table = dataset.create_table "todos",
|
|
36
36
|
|
37
37
|
# Load data into the table
|
38
38
|
file = File.open "/archive/todos/completed-todos.csv"
|
39
|
-
|
39
|
+
table.load file
|
40
40
|
|
41
41
|
# Run a query for the number of completed todos by owner
|
42
42
|
count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
|
@@ -39,7 +39,7 @@ module Google
|
|
39
39
|
#
|
40
40
|
# * `https://www.googleapis.com/auth/bigquery`
|
41
41
|
# @param [Integer] retries Number of times to retry requests on server
|
42
|
-
# error. The default value is `
|
42
|
+
# error. The default value is `5`. Optional.
|
43
43
|
# @param [Integer] timeout Default request timeout in seconds. Optional.
|
44
44
|
#
|
45
45
|
# @return [Google::Cloud::Bigquery::Project]
|
@@ -88,7 +88,7 @@ module Google
|
|
88
88
|
#
|
89
89
|
# * `https://www.googleapis.com/auth/bigquery`
|
90
90
|
# @param [Integer] retries Number of times to retry requests on server
|
91
|
-
# error. The default value is `
|
91
|
+
# error. The default value is `5`. Optional.
|
92
92
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
93
93
|
#
|
94
94
|
# @return [Google::Cloud::Bigquery::Project]
|
@@ -232,7 +232,7 @@ module Google
|
|
232
232
|
# BigQuery API provides facilities for managing longer-running jobs. With
|
233
233
|
# the asynchronous approach to running a query, an instance of
|
234
234
|
# {Google::Cloud::Bigquery::QueryJob} is returned, rather than an instance
|
235
|
-
# of {Google::Cloud::Bigquery::
|
235
|
+
# of {Google::Cloud::Bigquery::Data}.
|
236
236
|
#
|
237
237
|
# ```ruby
|
238
238
|
# require "google/cloud/bigquery"
|
@@ -246,17 +246,17 @@ module Google
|
|
246
246
|
#
|
247
247
|
# job.wait_until_done!
|
248
248
|
# if !job.failed?
|
249
|
-
# job.
|
249
|
+
# job.data.first
|
250
250
|
# #=> {:title=>[{:value=>"hamlet", :count=>5318}, ...}
|
251
251
|
# end
|
252
252
|
# ```
|
253
253
|
#
|
254
254
|
# Once you have determined that the job is done and has not failed, you can
|
255
|
-
# obtain an instance of {Google::Cloud::Bigquery::
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
259
|
-
#
|
255
|
+
# obtain an instance of {Google::Cloud::Bigquery::Data} by calling `data` on
|
256
|
+
# the job instance. The query results for both of the above examples are
|
257
|
+
# stored in temporary tables with a lifetime of about 24 hours. See the
|
258
|
+
# final example below for a demonstration of how to store query results in a
|
259
|
+
# permanent table.
|
260
260
|
#
|
261
261
|
# ## Creating Datasets and Tables
|
262
262
|
#
|
@@ -370,7 +370,7 @@ module Google
|
|
370
370
|
# end
|
371
371
|
#
|
372
372
|
# file = File.open "names/yob2014.txt"
|
373
|
-
#
|
373
|
+
# table.load file, format: "csv"
|
374
374
|
# ```
|
375
375
|
#
|
376
376
|
# Because the names data, although formatted as CSV, is distributed in files
|
@@ -411,9 +411,7 @@ module Google
|
|
411
411
|
# bucket = storage.create_bucket bucket_id
|
412
412
|
# extract_url = "gs://#{bucket.id}/baby-names.csv"
|
413
413
|
#
|
414
|
-
#
|
415
|
-
#
|
416
|
-
# extract_job.wait_until_done!
|
414
|
+
# result_table.extract extract_url
|
417
415
|
#
|
418
416
|
# # Download to local filesystem
|
419
417
|
# bucket.files.first.download "baby-names.csv"
|
@@ -470,7 +468,7 @@ module Google
|
|
470
468
|
#
|
471
469
|
# * `https://www.googleapis.com/auth/bigquery`
|
472
470
|
# @param [Integer] retries Number of times to retry requests on server
|
473
|
-
# error. The default value is `
|
471
|
+
# error. The default value is `5`. Optional.
|
474
472
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
475
473
|
#
|
476
474
|
# @return [Google::Cloud::Bigquery::Project]
|
@@ -20,17 +20,33 @@ module Google
|
|
20
20
|
# # CopyJob
|
21
21
|
#
|
22
22
|
# A {Job} subclass representing a copy operation that may be performed on
|
23
|
-
# a {Table}. A CopyJob instance is created when you call {Table#
|
23
|
+
# a {Table}. A CopyJob instance is created when you call {Table#copy_job}.
|
24
24
|
#
|
25
|
-
# @see https://cloud.google.com/bigquery/docs/tables#
|
25
|
+
# @see https://cloud.google.com/bigquery/docs/tables#copy-table Copying
|
26
26
|
# an Existing Table
|
27
27
|
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
28
28
|
# reference
|
29
29
|
#
|
30
|
+
# @example
|
31
|
+
# require "google/cloud/bigquery"
|
32
|
+
#
|
33
|
+
# bigquery = Google::Cloud::Bigquery.new
|
34
|
+
# dataset = bigquery.dataset "my_dataset"
|
35
|
+
# table = dataset.table "my_table"
|
36
|
+
# destination_table = dataset.table "my_destination_table"
|
37
|
+
#
|
38
|
+
# copy_job = table.copy_job destination_table
|
39
|
+
#
|
40
|
+
# copy_job.wait_until_done!
|
41
|
+
# copy_job.done? #=> true
|
42
|
+
#
|
30
43
|
class CopyJob < Job
|
31
44
|
##
|
32
45
|
# The table from which data is copied. This is the table on
|
33
|
-
# which {Table#
|
46
|
+
# which {Table#copy_job} was called.
|
47
|
+
#
|
48
|
+
# @return [Table] A table instance.
|
49
|
+
#
|
34
50
|
def source
|
35
51
|
table = @gapi.configuration.copy.source_table
|
36
52
|
return nil unless table
|
@@ -40,7 +56,10 @@ module Google
|
|
40
56
|
end
|
41
57
|
|
42
58
|
##
|
43
|
-
# The table to which data is copied.
|
59
|
+
# The table to which data is copied.
|
60
|
+
#
|
61
|
+
# @return [Table] A table instance.
|
62
|
+
#
|
44
63
|
def destination
|
45
64
|
table = @gapi.configuration.copy.destination_table
|
46
65
|
return nil unless table
|
@@ -52,7 +71,11 @@ module Google
|
|
52
71
|
##
|
53
72
|
# Checks if the create disposition for the job is `CREATE_IF_NEEDED`,
|
54
73
|
# which provides the following behavior: If the table does not exist,
|
55
|
-
# the copy operation creates the table. This is the default
|
74
|
+
# the copy operation creates the table. This is the default create
|
75
|
+
# disposition for copy jobs.
|
76
|
+
#
|
77
|
+
# @return [Boolean] `true` when `CREATE_IF_NEEDED`, `false` otherwise.
|
78
|
+
#
|
56
79
|
def create_if_needed?
|
57
80
|
disp = @gapi.configuration.copy.create_disposition
|
58
81
|
disp == "CREATE_IF_NEEDED"
|
@@ -62,6 +85,9 @@ module Google
|
|
62
85
|
# Checks if the create disposition for the job is `CREATE_NEVER`, which
|
63
86
|
# provides the following behavior: The table must already exist; if it
|
64
87
|
# does not, an error is returned in the job result.
|
88
|
+
#
|
89
|
+
# @return [Boolean] `true` when `CREATE_NEVER`, `false` otherwise.
|
90
|
+
#
|
65
91
|
def create_never?
|
66
92
|
disp = @gapi.configuration.copy.create_disposition
|
67
93
|
disp == "CREATE_NEVER"
|
@@ -71,6 +97,9 @@ module Google
|
|
71
97
|
# Checks if the write disposition for the job is `WRITE_TRUNCATE`, which
|
72
98
|
# provides the following behavior: If the table already exists, the copy
|
73
99
|
# operation overwrites the table data.
|
100
|
+
#
|
101
|
+
# @return [Boolean] `true` when `WRITE_TRUNCATE`, `false` otherwise.
|
102
|
+
#
|
74
103
|
def write_truncate?
|
75
104
|
disp = @gapi.configuration.copy.write_disposition
|
76
105
|
disp == "WRITE_TRUNCATE"
|
@@ -80,6 +109,9 @@ module Google
|
|
80
109
|
# Checks if the write disposition for the job is `WRITE_APPEND`, which
|
81
110
|
# provides the following behavior: If the table already exists, the copy
|
82
111
|
# operation appends the data to the table.
|
112
|
+
#
|
113
|
+
# @return [Boolean] `true` when `WRITE_APPEND`, `false` otherwise.
|
114
|
+
#
|
83
115
|
def write_append?
|
84
116
|
disp = @gapi.configuration.copy.write_disposition
|
85
117
|
disp == "WRITE_APPEND"
|
@@ -88,7 +120,11 @@ module Google
|
|
88
120
|
##
|
89
121
|
# Checks if the write disposition for the job is `WRITE_EMPTY`, which
|
90
122
|
# provides the following behavior: If the table already exists and
|
91
|
-
# contains data, the job will have an error. This is the default
|
123
|
+
# contains data, the job will have an error. This is the default write
|
124
|
+
# disposition for copy jobs.
|
125
|
+
#
|
126
|
+
# @return [Boolean] `true` when `WRITE_EMPTY`, `false` otherwise.
|
127
|
+
#
|
92
128
|
def write_empty?
|
93
129
|
disp = @gapi.configuration.copy.write_disposition
|
94
130
|
disp == "WRITE_EMPTY"
|
@@ -22,12 +22,31 @@ module Google
|
|
22
22
|
##
|
23
23
|
# # Data
|
24
24
|
#
|
25
|
-
# Represents {Table} Data as a list of name/value pairs.
|
26
|
-
# Also contains metadata such as `etag` and `total
|
25
|
+
# Represents {Table} Data as a list of name/value pairs (hashes.)
|
26
|
+
# Also contains metadata such as `etag` and `total`, and provides access
|
27
|
+
# to the schema of the table from which the data was read.
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# require "google/cloud/bigquery"
|
31
|
+
#
|
32
|
+
# bigquery = Google::Cloud::Bigquery.new
|
33
|
+
# dataset = bigquery.dataset "my_dataset"
|
34
|
+
# table = dataset.table "my_table"
|
35
|
+
#
|
36
|
+
# data = table.data
|
37
|
+
# puts "#{data.count} of #{data.total}"
|
38
|
+
# if data.next?
|
39
|
+
# next_data = data.next
|
40
|
+
# end
|
41
|
+
#
|
27
42
|
class Data < DelegateClass(::Array)
|
43
|
+
##
|
44
|
+
# @private The Service object.
|
45
|
+
attr_accessor :service
|
46
|
+
|
28
47
|
##
|
29
48
|
# @private The {Table} object the data belongs to.
|
30
|
-
attr_accessor :
|
49
|
+
attr_accessor :table_gapi
|
31
50
|
|
32
51
|
##
|
33
52
|
# @private The Google API Client object.
|
@@ -35,30 +54,58 @@ module Google
|
|
35
54
|
|
36
55
|
# @private
|
37
56
|
def initialize arr = []
|
38
|
-
@
|
39
|
-
@
|
57
|
+
@service = nil
|
58
|
+
@table_gapi = nil
|
59
|
+
@gapi = nil
|
40
60
|
super arr
|
41
61
|
end
|
42
62
|
|
43
63
|
##
|
44
64
|
# The resource type of the API response.
|
65
|
+
#
|
66
|
+
# @return [String] The resource type.
|
67
|
+
#
|
45
68
|
def kind
|
46
69
|
@gapi.kind
|
47
70
|
end
|
48
71
|
|
49
72
|
##
|
50
|
-
#
|
73
|
+
# An ETag hash for the page of results represented by the data instance.
|
74
|
+
#
|
75
|
+
# @return [String] The ETag hash.
|
76
|
+
#
|
51
77
|
def etag
|
52
78
|
@gapi.etag
|
53
79
|
end
|
54
80
|
|
55
81
|
##
|
56
|
-
# A token used for paging results.
|
82
|
+
# A token used for paging results. Used by the data instance to retrieve
|
83
|
+
# subsequent pages. See {#next}.
|
84
|
+
#
|
85
|
+
# @return [String] The pagination token.
|
86
|
+
#
|
57
87
|
def token
|
58
88
|
@gapi.page_token
|
59
89
|
end
|
60
90
|
|
91
|
+
##
|
61
92
|
# The total number of rows in the complete table.
|
93
|
+
#
|
94
|
+
# @return [Integer] The number of rows.
|
95
|
+
#
|
96
|
+
# @example
|
97
|
+
# require "google/cloud/bigquery"
|
98
|
+
#
|
99
|
+
# bigquery = Google::Cloud::Bigquery.new
|
100
|
+
# dataset = bigquery.dataset "my_dataset"
|
101
|
+
# table = dataset.table "my_table"
|
102
|
+
#
|
103
|
+
# data = table.data
|
104
|
+
# puts "#{data.count} of #{data.total}"
|
105
|
+
# if data.next?
|
106
|
+
# next_data = data.next
|
107
|
+
# end
|
108
|
+
#
|
62
109
|
def total
|
63
110
|
Integer @gapi.total_rows
|
64
111
|
rescue
|
@@ -66,19 +113,72 @@ module Google
|
|
66
113
|
end
|
67
114
|
|
68
115
|
##
|
69
|
-
# The schema of the data.
|
116
|
+
# The schema of the table from which the data was read.
|
117
|
+
#
|
118
|
+
# The returned object is frozen and changes are not allowed. Use
|
119
|
+
# {Table#schema} to update the schema.
|
120
|
+
#
|
121
|
+
# @return [Schema] A schema object.
|
122
|
+
#
|
123
|
+
# @example
|
124
|
+
# require "google/cloud/bigquery"
|
125
|
+
#
|
126
|
+
# bigquery = Google::Cloud::Bigquery.new
|
127
|
+
# dataset = bigquery.dataset "my_dataset"
|
128
|
+
# table = dataset.table "my_table"
|
129
|
+
#
|
130
|
+
# data = table.data
|
131
|
+
#
|
132
|
+
# schema = data.schema
|
133
|
+
# field = schema.field "name"
|
134
|
+
# field.required? #=> true
|
135
|
+
#
|
70
136
|
def schema
|
71
|
-
|
137
|
+
Schema.from_gapi(@table_gapi.schema).freeze
|
72
138
|
end
|
73
139
|
|
74
140
|
##
|
75
|
-
# The fields of the data
|
141
|
+
# The fields of the data, obtained from the schema of the table from
|
142
|
+
# which the data was read.
|
143
|
+
#
|
144
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
145
|
+
#
|
146
|
+
# @example
|
147
|
+
# require "google/cloud/bigquery"
|
148
|
+
#
|
149
|
+
# bigquery = Google::Cloud::Bigquery.new
|
150
|
+
# dataset = bigquery.dataset "my_dataset"
|
151
|
+
# table = dataset.table "my_table"
|
152
|
+
#
|
153
|
+
# data = table.data
|
154
|
+
#
|
155
|
+
# data.fields.each do |field|
|
156
|
+
# puts field.name
|
157
|
+
# end
|
158
|
+
#
|
76
159
|
def fields
|
77
160
|
schema.fields
|
78
161
|
end
|
79
162
|
|
80
163
|
##
|
81
|
-
# The
|
164
|
+
# The names of the columns in the data, obtained from the schema of the
|
165
|
+
# table from which the data was read.
|
166
|
+
#
|
167
|
+
# @return [Array<Symbol>] An array of column names.
|
168
|
+
#
|
169
|
+
# @example
|
170
|
+
# require "google/cloud/bigquery"
|
171
|
+
#
|
172
|
+
# bigquery = Google::Cloud::Bigquery.new
|
173
|
+
# dataset = bigquery.dataset "my_dataset"
|
174
|
+
# table = dataset.table "my_table"
|
175
|
+
#
|
176
|
+
# data = table.data
|
177
|
+
#
|
178
|
+
# data.headers.each do |header|
|
179
|
+
# puts header
|
180
|
+
# end
|
181
|
+
#
|
82
182
|
def headers
|
83
183
|
schema.headers
|
84
184
|
end
|
@@ -86,7 +186,7 @@ module Google
|
|
86
186
|
##
|
87
187
|
# Whether there is a next page of data.
|
88
188
|
#
|
89
|
-
# @return [Boolean]
|
189
|
+
# @return [Boolean] `true` when there is a next page, `false` otherwise.
|
90
190
|
#
|
91
191
|
# @example
|
92
192
|
# require "google/cloud/bigquery"
|
@@ -105,9 +205,9 @@ module Google
|
|
105
205
|
end
|
106
206
|
|
107
207
|
##
|
108
|
-
#
|
208
|
+
# Retrieves the next page of data.
|
109
209
|
#
|
110
|
-
# @return [Data]
|
210
|
+
# @return [Data] A new instance providing the next page of data.
|
111
211
|
#
|
112
212
|
# @example
|
113
213
|
# require "google/cloud/bigquery"
|
@@ -123,8 +223,12 @@ module Google
|
|
123
223
|
#
|
124
224
|
def next
|
125
225
|
return nil unless next?
|
126
|
-
|
127
|
-
|
226
|
+
ensure_service!
|
227
|
+
data_gapi = service.list_tabledata \
|
228
|
+
@table_gapi.table_reference.dataset_id,
|
229
|
+
@table_gapi.table_reference.table_id,
|
230
|
+
token: token
|
231
|
+
self.class.from_gapi data_gapi, @table_gapi, @service
|
128
232
|
end
|
129
233
|
|
130
234
|
##
|
@@ -132,7 +236,7 @@ module Google
|
|
132
236
|
# returns `false`. Calls the given block once for each row, which is
|
133
237
|
# passed as the parameter.
|
134
238
|
#
|
135
|
-
# An
|
239
|
+
# An enumerator is returned if no block is given.
|
136
240
|
#
|
137
241
|
# This method may make several API calls until all rows are retrieved.
|
138
242
|
# Be sure to use as narrow a search criteria as possible. Please use
|
@@ -143,7 +247,8 @@ module Google
|
|
143
247
|
# @yield [row] The block for accessing each row of data.
|
144
248
|
# @yieldparam [Hash] row The row object.
|
145
249
|
#
|
146
|
-
# @return [Enumerator]
|
250
|
+
# @return [Enumerator] An enumerator providing access to all of the
|
251
|
+
# data.
|
147
252
|
#
|
148
253
|
# @example Iterating each rows by passing a block:
|
149
254
|
# require "google/cloud/bigquery"
|
@@ -197,13 +302,14 @@ module Google
|
|
197
302
|
|
198
303
|
##
|
199
304
|
# @private New Data from a response object.
|
200
|
-
def self.from_gapi gapi,
|
305
|
+
def self.from_gapi gapi, table_gapi, service
|
201
306
|
formatted_rows = Convert.format_rows(gapi.rows,
|
202
|
-
|
307
|
+
table_gapi.schema.fields)
|
203
308
|
|
204
309
|
data = new formatted_rows
|
205
|
-
data.
|
310
|
+
data.table_gapi = table_gapi
|
206
311
|
data.gapi = gapi
|
312
|
+
data.service = service
|
207
313
|
data
|
208
314
|
end
|
209
315
|
|
@@ -211,8 +317,8 @@ module Google
|
|
211
317
|
|
212
318
|
##
|
213
319
|
# Raise an error unless an active service is available.
|
214
|
-
def
|
215
|
-
fail "Must have active connection" unless
|
320
|
+
def ensure_service!
|
321
|
+
fail "Must have active connection" unless service
|
216
322
|
end
|
217
323
|
end
|
218
324
|
end
|