google-cloud-bigquery 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/google-cloud-bigquery.rb +2 -2
- data/lib/google/cloud/bigquery.rb +10 -12
- data/lib/google/cloud/bigquery/copy_job.rb +42 -6
- data/lib/google/cloud/bigquery/data.rb +129 -23
- data/lib/google/cloud/bigquery/dataset.rb +708 -66
- data/lib/google/cloud/bigquery/dataset/access.rb +533 -27
- data/lib/google/cloud/bigquery/dataset/list.rb +5 -3
- data/lib/google/cloud/bigquery/external.rb +2353 -0
- data/lib/google/cloud/bigquery/extract_job.rb +52 -11
- data/lib/google/cloud/bigquery/insert_response.rb +90 -2
- data/lib/google/cloud/bigquery/job.rb +160 -21
- data/lib/google/cloud/bigquery/load_job.rb +128 -11
- data/lib/google/cloud/bigquery/project.rb +187 -44
- data/lib/google/cloud/bigquery/query_job.rb +323 -13
- data/lib/google/cloud/bigquery/schema.rb +57 -1
- data/lib/google/cloud/bigquery/schema/field.rb +118 -17
- data/lib/google/cloud/bigquery/service.rb +196 -43
- data/lib/google/cloud/bigquery/table.rb +739 -49
- data/lib/google/cloud/bigquery/table/async_inserter.rb +280 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery/view.rb +306 -69
- metadata +18 -3
- data/lib/google/cloud/bigquery/query_data.rb +0 -234
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 58e73b43f4053457d2df061703e3314483552dbb
|
4
|
+
data.tar.gz: 927a49cb45ff1a1c2aac5fa319e19dc0b422af23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d86f37859a1a6cf2b682afd8b4e3f77b178fb8333e47606673e02e0a5e6ba76e61c6ee3c9bfbd5bd8d4763eff03f9c54bc067c10f39883772a6c730c0d8b443
|
7
|
+
data.tar.gz: d9d5afd3aac9c23e523909d78b7ae5fc886ce783e5b3cad0068aff21a815841a564e9baf6f35b2235adf0ed743df55d41c33b570f9b99a5d5a96ea50768882d0
|
data/README.md
CHANGED
@@ -36,7 +36,7 @@ table = dataset.create_table "todos",
|
|
36
36
|
|
37
37
|
# Load data into the table
|
38
38
|
file = File.open "/archive/todos/completed-todos.csv"
|
39
|
-
|
39
|
+
table.load file
|
40
40
|
|
41
41
|
# Run a query for the number of completed todos by owner
|
42
42
|
count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
|
@@ -39,7 +39,7 @@ module Google
|
|
39
39
|
#
|
40
40
|
# * `https://www.googleapis.com/auth/bigquery`
|
41
41
|
# @param [Integer] retries Number of times to retry requests on server
|
42
|
-
# error. The default value is `
|
42
|
+
# error. The default value is `5`. Optional.
|
43
43
|
# @param [Integer] timeout Default request timeout in seconds. Optional.
|
44
44
|
#
|
45
45
|
# @return [Google::Cloud::Bigquery::Project]
|
@@ -88,7 +88,7 @@ module Google
|
|
88
88
|
#
|
89
89
|
# * `https://www.googleapis.com/auth/bigquery`
|
90
90
|
# @param [Integer] retries Number of times to retry requests on server
|
91
|
-
# error. The default value is `
|
91
|
+
# error. The default value is `5`. Optional.
|
92
92
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
93
93
|
#
|
94
94
|
# @return [Google::Cloud::Bigquery::Project]
|
@@ -232,7 +232,7 @@ module Google
|
|
232
232
|
# BigQuery API provides facilities for managing longer-running jobs. With
|
233
233
|
# the asynchronous approach to running a query, an instance of
|
234
234
|
# {Google::Cloud::Bigquery::QueryJob} is returned, rather than an instance
|
235
|
-
# of {Google::Cloud::Bigquery::
|
235
|
+
# of {Google::Cloud::Bigquery::Data}.
|
236
236
|
#
|
237
237
|
# ```ruby
|
238
238
|
# require "google/cloud/bigquery"
|
@@ -246,17 +246,17 @@ module Google
|
|
246
246
|
#
|
247
247
|
# job.wait_until_done!
|
248
248
|
# if !job.failed?
|
249
|
-
# job.
|
249
|
+
# job.data.first
|
250
250
|
# #=> {:title=>[{:value=>"hamlet", :count=>5318}, ...}
|
251
251
|
# end
|
252
252
|
# ```
|
253
253
|
#
|
254
254
|
# Once you have determined that the job is done and has not failed, you can
|
255
|
-
# obtain an instance of {Google::Cloud::Bigquery::
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
259
|
-
#
|
255
|
+
# obtain an instance of {Google::Cloud::Bigquery::Data} by calling `data` on
|
256
|
+
# the job instance. The query results for both of the above examples are
|
257
|
+
# stored in temporary tables with a lifetime of about 24 hours. See the
|
258
|
+
# final example below for a demonstration of how to store query results in a
|
259
|
+
# permanent table.
|
260
260
|
#
|
261
261
|
# ## Creating Datasets and Tables
|
262
262
|
#
|
@@ -370,7 +370,7 @@ module Google
|
|
370
370
|
# end
|
371
371
|
#
|
372
372
|
# file = File.open "names/yob2014.txt"
|
373
|
-
#
|
373
|
+
# table.load file, format: "csv"
|
374
374
|
# ```
|
375
375
|
#
|
376
376
|
# Because the names data, although formatted as CSV, is distributed in files
|
@@ -411,9 +411,7 @@ module Google
|
|
411
411
|
# bucket = storage.create_bucket bucket_id
|
412
412
|
# extract_url = "gs://#{bucket.id}/baby-names.csv"
|
413
413
|
#
|
414
|
-
#
|
415
|
-
#
|
416
|
-
# extract_job.wait_until_done!
|
414
|
+
# result_table.extract extract_url
|
417
415
|
#
|
418
416
|
# # Download to local filesystem
|
419
417
|
# bucket.files.first.download "baby-names.csv"
|
@@ -470,7 +468,7 @@ module Google
|
|
470
468
|
#
|
471
469
|
# * `https://www.googleapis.com/auth/bigquery`
|
472
470
|
# @param [Integer] retries Number of times to retry requests on server
|
473
|
-
# error. The default value is `
|
471
|
+
# error. The default value is `5`. Optional.
|
474
472
|
# @param [Integer] timeout Default timeout to use in requests. Optional.
|
475
473
|
#
|
476
474
|
# @return [Google::Cloud::Bigquery::Project]
|
@@ -20,17 +20,33 @@ module Google
|
|
20
20
|
# # CopyJob
|
21
21
|
#
|
22
22
|
# A {Job} subclass representing a copy operation that may be performed on
|
23
|
-
# a {Table}. A CopyJob instance is created when you call {Table#
|
23
|
+
# a {Table}. A CopyJob instance is created when you call {Table#copy_job}.
|
24
24
|
#
|
25
|
-
# @see https://cloud.google.com/bigquery/docs/tables#
|
25
|
+
# @see https://cloud.google.com/bigquery/docs/tables#copy-table Copying
|
26
26
|
# an Existing Table
|
27
27
|
# @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
|
28
28
|
# reference
|
29
29
|
#
|
30
|
+
# @example
|
31
|
+
# require "google/cloud/bigquery"
|
32
|
+
#
|
33
|
+
# bigquery = Google::Cloud::Bigquery.new
|
34
|
+
# dataset = bigquery.dataset "my_dataset"
|
35
|
+
# table = dataset.table "my_table"
|
36
|
+
# destination_table = dataset.table "my_destination_table"
|
37
|
+
#
|
38
|
+
# copy_job = table.copy_job destination_table
|
39
|
+
#
|
40
|
+
# copy_job.wait_until_done!
|
41
|
+
# copy_job.done? #=> true
|
42
|
+
#
|
30
43
|
class CopyJob < Job
|
31
44
|
##
|
32
45
|
# The table from which data is copied. This is the table on
|
33
|
-
# which {Table#
|
46
|
+
# which {Table#copy_job} was called.
|
47
|
+
#
|
48
|
+
# @return [Table] A table instance.
|
49
|
+
#
|
34
50
|
def source
|
35
51
|
table = @gapi.configuration.copy.source_table
|
36
52
|
return nil unless table
|
@@ -40,7 +56,10 @@ module Google
|
|
40
56
|
end
|
41
57
|
|
42
58
|
##
|
43
|
-
# The table to which data is copied.
|
59
|
+
# The table to which data is copied.
|
60
|
+
#
|
61
|
+
# @return [Table] A table instance.
|
62
|
+
#
|
44
63
|
def destination
|
45
64
|
table = @gapi.configuration.copy.destination_table
|
46
65
|
return nil unless table
|
@@ -52,7 +71,11 @@ module Google
|
|
52
71
|
##
|
53
72
|
# Checks if the create disposition for the job is `CREATE_IF_NEEDED`,
|
54
73
|
# which provides the following behavior: If the table does not exist,
|
55
|
-
# the copy operation creates the table. This is the default
|
74
|
+
# the copy operation creates the table. This is the default create
|
75
|
+
# disposition for copy jobs.
|
76
|
+
#
|
77
|
+
# @return [Boolean] `true` when `CREATE_IF_NEEDED`, `false` otherwise.
|
78
|
+
#
|
56
79
|
def create_if_needed?
|
57
80
|
disp = @gapi.configuration.copy.create_disposition
|
58
81
|
disp == "CREATE_IF_NEEDED"
|
@@ -62,6 +85,9 @@ module Google
|
|
62
85
|
# Checks if the create disposition for the job is `CREATE_NEVER`, which
|
63
86
|
# provides the following behavior: The table must already exist; if it
|
64
87
|
# does not, an error is returned in the job result.
|
88
|
+
#
|
89
|
+
# @return [Boolean] `true` when `CREATE_NEVER`, `false` otherwise.
|
90
|
+
#
|
65
91
|
def create_never?
|
66
92
|
disp = @gapi.configuration.copy.create_disposition
|
67
93
|
disp == "CREATE_NEVER"
|
@@ -71,6 +97,9 @@ module Google
|
|
71
97
|
# Checks if the write disposition for the job is `WRITE_TRUNCATE`, which
|
72
98
|
# provides the following behavior: If the table already exists, the copy
|
73
99
|
# operation overwrites the table data.
|
100
|
+
#
|
101
|
+
# @return [Boolean] `true` when `WRITE_TRUNCATE`, `false` otherwise.
|
102
|
+
#
|
74
103
|
def write_truncate?
|
75
104
|
disp = @gapi.configuration.copy.write_disposition
|
76
105
|
disp == "WRITE_TRUNCATE"
|
@@ -80,6 +109,9 @@ module Google
|
|
80
109
|
# Checks if the write disposition for the job is `WRITE_APPEND`, which
|
81
110
|
# provides the following behavior: If the table already exists, the copy
|
82
111
|
# operation appends the data to the table.
|
112
|
+
#
|
113
|
+
# @return [Boolean] `true` when `WRITE_APPEND`, `false` otherwise.
|
114
|
+
#
|
83
115
|
def write_append?
|
84
116
|
disp = @gapi.configuration.copy.write_disposition
|
85
117
|
disp == "WRITE_APPEND"
|
@@ -88,7 +120,11 @@ module Google
|
|
88
120
|
##
|
89
121
|
# Checks if the write disposition for the job is `WRITE_EMPTY`, which
|
90
122
|
# provides the following behavior: If the table already exists and
|
91
|
-
# contains data, the job will have an error. This is the default
|
123
|
+
# contains data, the job will have an error. This is the default write
|
124
|
+
# disposition for copy jobs.
|
125
|
+
#
|
126
|
+
# @return [Boolean] `true` when `WRITE_EMPTY`, `false` otherwise.
|
127
|
+
#
|
92
128
|
def write_empty?
|
93
129
|
disp = @gapi.configuration.copy.write_disposition
|
94
130
|
disp == "WRITE_EMPTY"
|
@@ -22,12 +22,31 @@ module Google
|
|
22
22
|
##
|
23
23
|
# # Data
|
24
24
|
#
|
25
|
-
# Represents {Table} Data as a list of name/value pairs.
|
26
|
-
# Also contains metadata such as `etag` and `total
|
25
|
+
# Represents {Table} Data as a list of name/value pairs (hashes.)
|
26
|
+
# Also contains metadata such as `etag` and `total`, and provides access
|
27
|
+
# to the schema of the table from which the data was read.
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# require "google/cloud/bigquery"
|
31
|
+
#
|
32
|
+
# bigquery = Google::Cloud::Bigquery.new
|
33
|
+
# dataset = bigquery.dataset "my_dataset"
|
34
|
+
# table = dataset.table "my_table"
|
35
|
+
#
|
36
|
+
# data = table.data
|
37
|
+
# puts "#{data.count} of #{data.total}"
|
38
|
+
# if data.next?
|
39
|
+
# next_data = data.next
|
40
|
+
# end
|
41
|
+
#
|
27
42
|
class Data < DelegateClass(::Array)
|
43
|
+
##
|
44
|
+
# @private The Service object.
|
45
|
+
attr_accessor :service
|
46
|
+
|
28
47
|
##
|
29
48
|
# @private The {Table} object the data belongs to.
|
30
|
-
attr_accessor :
|
49
|
+
attr_accessor :table_gapi
|
31
50
|
|
32
51
|
##
|
33
52
|
# @private The Google API Client object.
|
@@ -35,30 +54,58 @@ module Google
|
|
35
54
|
|
36
55
|
# @private
|
37
56
|
def initialize arr = []
|
38
|
-
@
|
39
|
-
@
|
57
|
+
@service = nil
|
58
|
+
@table_gapi = nil
|
59
|
+
@gapi = nil
|
40
60
|
super arr
|
41
61
|
end
|
42
62
|
|
43
63
|
##
|
44
64
|
# The resource type of the API response.
|
65
|
+
#
|
66
|
+
# @return [String] The resource type.
|
67
|
+
#
|
45
68
|
def kind
|
46
69
|
@gapi.kind
|
47
70
|
end
|
48
71
|
|
49
72
|
##
|
50
|
-
#
|
73
|
+
# An ETag hash for the page of results represented by the data instance.
|
74
|
+
#
|
75
|
+
# @return [String] The ETag hash.
|
76
|
+
#
|
51
77
|
def etag
|
52
78
|
@gapi.etag
|
53
79
|
end
|
54
80
|
|
55
81
|
##
|
56
|
-
# A token used for paging results.
|
82
|
+
# A token used for paging results. Used by the data instance to retrieve
|
83
|
+
# subsequent pages. See {#next}.
|
84
|
+
#
|
85
|
+
# @return [String] The pagination token.
|
86
|
+
#
|
57
87
|
def token
|
58
88
|
@gapi.page_token
|
59
89
|
end
|
60
90
|
|
91
|
+
##
|
61
92
|
# The total number of rows in the complete table.
|
93
|
+
#
|
94
|
+
# @return [Integer] The number of rows.
|
95
|
+
#
|
96
|
+
# @example
|
97
|
+
# require "google/cloud/bigquery"
|
98
|
+
#
|
99
|
+
# bigquery = Google::Cloud::Bigquery.new
|
100
|
+
# dataset = bigquery.dataset "my_dataset"
|
101
|
+
# table = dataset.table "my_table"
|
102
|
+
#
|
103
|
+
# data = table.data
|
104
|
+
# puts "#{data.count} of #{data.total}"
|
105
|
+
# if data.next?
|
106
|
+
# next_data = data.next
|
107
|
+
# end
|
108
|
+
#
|
62
109
|
def total
|
63
110
|
Integer @gapi.total_rows
|
64
111
|
rescue
|
@@ -66,19 +113,72 @@ module Google
|
|
66
113
|
end
|
67
114
|
|
68
115
|
##
|
69
|
-
# The schema of the data.
|
116
|
+
# The schema of the table from which the data was read.
|
117
|
+
#
|
118
|
+
# The returned object is frozen and changes are not allowed. Use
|
119
|
+
# {Table#schema} to update the schema.
|
120
|
+
#
|
121
|
+
# @return [Schema] A schema object.
|
122
|
+
#
|
123
|
+
# @example
|
124
|
+
# require "google/cloud/bigquery"
|
125
|
+
#
|
126
|
+
# bigquery = Google::Cloud::Bigquery.new
|
127
|
+
# dataset = bigquery.dataset "my_dataset"
|
128
|
+
# table = dataset.table "my_table"
|
129
|
+
#
|
130
|
+
# data = table.data
|
131
|
+
#
|
132
|
+
# schema = data.schema
|
133
|
+
# field = schema.field "name"
|
134
|
+
# field.required? #=> true
|
135
|
+
#
|
70
136
|
def schema
|
71
|
-
|
137
|
+
Schema.from_gapi(@table_gapi.schema).freeze
|
72
138
|
end
|
73
139
|
|
74
140
|
##
|
75
|
-
# The fields of the data
|
141
|
+
# The fields of the data, obtained from the schema of the table from
|
142
|
+
# which the data was read.
|
143
|
+
#
|
144
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
145
|
+
#
|
146
|
+
# @example
|
147
|
+
# require "google/cloud/bigquery"
|
148
|
+
#
|
149
|
+
# bigquery = Google::Cloud::Bigquery.new
|
150
|
+
# dataset = bigquery.dataset "my_dataset"
|
151
|
+
# table = dataset.table "my_table"
|
152
|
+
#
|
153
|
+
# data = table.data
|
154
|
+
#
|
155
|
+
# data.fields.each do |field|
|
156
|
+
# puts field.name
|
157
|
+
# end
|
158
|
+
#
|
76
159
|
def fields
|
77
160
|
schema.fields
|
78
161
|
end
|
79
162
|
|
80
163
|
##
|
81
|
-
# The
|
164
|
+
# The names of the columns in the data, obtained from the schema of the
|
165
|
+
# table from which the data was read.
|
166
|
+
#
|
167
|
+
# @return [Array<Symbol>] An array of column names.
|
168
|
+
#
|
169
|
+
# @example
|
170
|
+
# require "google/cloud/bigquery"
|
171
|
+
#
|
172
|
+
# bigquery = Google::Cloud::Bigquery.new
|
173
|
+
# dataset = bigquery.dataset "my_dataset"
|
174
|
+
# table = dataset.table "my_table"
|
175
|
+
#
|
176
|
+
# data = table.data
|
177
|
+
#
|
178
|
+
# data.headers.each do |header|
|
179
|
+
# puts header
|
180
|
+
# end
|
181
|
+
#
|
82
182
|
def headers
|
83
183
|
schema.headers
|
84
184
|
end
|
@@ -86,7 +186,7 @@ module Google
|
|
86
186
|
##
|
87
187
|
# Whether there is a next page of data.
|
88
188
|
#
|
89
|
-
# @return [Boolean]
|
189
|
+
# @return [Boolean] `true` when there is a next page, `false` otherwise.
|
90
190
|
#
|
91
191
|
# @example
|
92
192
|
# require "google/cloud/bigquery"
|
@@ -105,9 +205,9 @@ module Google
|
|
105
205
|
end
|
106
206
|
|
107
207
|
##
|
108
|
-
#
|
208
|
+
# Retrieves the next page of data.
|
109
209
|
#
|
110
|
-
# @return [Data]
|
210
|
+
# @return [Data] A new instance providing the next page of data.
|
111
211
|
#
|
112
212
|
# @example
|
113
213
|
# require "google/cloud/bigquery"
|
@@ -123,8 +223,12 @@ module Google
|
|
123
223
|
#
|
124
224
|
def next
|
125
225
|
return nil unless next?
|
126
|
-
|
127
|
-
|
226
|
+
ensure_service!
|
227
|
+
data_gapi = service.list_tabledata \
|
228
|
+
@table_gapi.table_reference.dataset_id,
|
229
|
+
@table_gapi.table_reference.table_id,
|
230
|
+
token: token
|
231
|
+
self.class.from_gapi data_gapi, @table_gapi, @service
|
128
232
|
end
|
129
233
|
|
130
234
|
##
|
@@ -132,7 +236,7 @@ module Google
|
|
132
236
|
# returns `false`. Calls the given block once for each row, which is
|
133
237
|
# passed as the parameter.
|
134
238
|
#
|
135
|
-
# An
|
239
|
+
# An enumerator is returned if no block is given.
|
136
240
|
#
|
137
241
|
# This method may make several API calls until all rows are retrieved.
|
138
242
|
# Be sure to use as narrow a search criteria as possible. Please use
|
@@ -143,7 +247,8 @@ module Google
|
|
143
247
|
# @yield [row] The block for accessing each row of data.
|
144
248
|
# @yieldparam [Hash] row The row object.
|
145
249
|
#
|
146
|
-
# @return [Enumerator]
|
250
|
+
# @return [Enumerator] An enumerator providing access to all of the
|
251
|
+
# data.
|
147
252
|
#
|
148
253
|
# @example Iterating each rows by passing a block:
|
149
254
|
# require "google/cloud/bigquery"
|
@@ -197,13 +302,14 @@ module Google
|
|
197
302
|
|
198
303
|
##
|
199
304
|
# @private New Data from a response object.
|
200
|
-
def self.from_gapi gapi,
|
305
|
+
def self.from_gapi gapi, table_gapi, service
|
201
306
|
formatted_rows = Convert.format_rows(gapi.rows,
|
202
|
-
|
307
|
+
table_gapi.schema.fields)
|
203
308
|
|
204
309
|
data = new formatted_rows
|
205
|
-
data.
|
310
|
+
data.table_gapi = table_gapi
|
206
311
|
data.gapi = gapi
|
312
|
+
data.service = service
|
207
313
|
data
|
208
314
|
end
|
209
315
|
|
@@ -211,8 +317,8 @@ module Google
|
|
211
317
|
|
212
318
|
##
|
213
319
|
# Raise an error unless an active service is available.
|
214
|
-
def
|
215
|
-
fail "Must have active connection" unless
|
320
|
+
def ensure_service!
|
321
|
+
fail "Must have active connection" unless service
|
216
322
|
end
|
217
323
|
end
|
218
324
|
end
|