google-cloud-bigquery 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ ##
20
+ # # CopyJob
21
+ #
22
+ # A {Job} subclass representing a copy operation that may be performed on
23
+ # a {Table}. A CopyJob instance is created when you call {Table#copy}.
24
+ #
25
+ # @see https://cloud.google.com/bigquery/docs/tables#copyingtable Copying
26
+ # an Existing Table
27
+ # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
28
+ # reference
29
+ #
30
+ class CopyJob < Job
31
+ ##
32
+ # The table from which data is copied. This is the table on
33
+ # which {Table#copy} was called. Returns a {Table} instance.
34
+ def source
35
+ table = @gapi.configuration.copy.source_table
36
+ return nil unless table
37
+ retrieve_table table.project_id,
38
+ table.dataset_id,
39
+ table.table_id
40
+ end
41
+
42
+ ##
43
+ # The table to which data is copied. Returns a {Table} instance.
44
+ def destination
45
+ table = @gapi.configuration.copy.destination_table
46
+ return nil unless table
47
+ retrieve_table table.project_id,
48
+ table.dataset_id,
49
+ table.table_id
50
+ end
51
+
52
+ ##
53
+ # Checks if the create disposition for the job is `CREATE_IF_NEEDED`,
54
+ # which provides the following behavior: If the table does not exist,
55
+ # the copy operation creates the table. This is the default.
56
+ def create_if_needed?
57
+ disp = @gapi.configuration.copy.create_disposition
58
+ disp == "CREATE_IF_NEEDED"
59
+ end
60
+
61
+ ##
62
+ # Checks if the create disposition for the job is `CREATE_NEVER`, which
63
+ # provides the following behavior: The table must already exist; if it
64
+ # does not, an error is returned in the job result.
65
+ def create_never?
66
+ disp = @gapi.configuration.copy.create_disposition
67
+ disp == "CREATE_NEVER"
68
+ end
69
+
70
+ ##
71
+ # Checks if the write disposition for the job is `WRITE_TRUNCATE`, which
72
+ # provides the following behavior: If the table already exists, the copy
73
+ # operation overwrites the table data.
74
+ def write_truncate?
75
+ disp = @gapi.configuration.copy.write_disposition
76
+ disp == "WRITE_TRUNCATE"
77
+ end
78
+
79
+ ##
80
+ # Checks if the write disposition for the job is `WRITE_APPEND`, which
81
+ # provides the following behavior: If the table already exists, the copy
82
+ # operation appends the data to the table.
83
+ def write_append?
84
+ disp = @gapi.configuration.copy.write_disposition
85
+ disp == "WRITE_APPEND"
86
+ end
87
+
88
+ ##
89
+ # Checks if the write disposition for the job is `WRITE_EMPTY`, which
90
+ # provides the following behavior: If the table already exists and
91
+ # contains data, the job will have an error. This is the default.
92
+ def write_empty?
93
+ disp = @gapi.configuration.copy.write_disposition
94
+ disp == "WRITE_EMPTY"
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,31 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/cloud/credentials"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ ##
22
+ # @private Represents the Oauth2 signing logic for Bigquery.
23
+ class Credentials < Google::Cloud::Credentials
24
+ SCOPE = ["https://www.googleapis.com/auth/bigquery"]
25
+ PATH_ENV_VARS = %w(BIGQUERY_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE)
26
+ JSON_ENV_VARS = %w(BIGQUERY_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON
27
+ GCLOUD_KEYFILE_JSON)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,244 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "delegate"
17
+ require "google/cloud/bigquery/service"
18
+
19
+ module Google
20
+ module Cloud
21
+ module Bigquery
22
+ ##
23
+ # # Data
24
+ #
25
+ # Represents {Table} Data as a list of name/value pairs.
26
+ # Also contains metadata such as `etag` and `total`.
27
+ class Data < DelegateClass(::Array)
28
+ ##
29
+ # @private The {Table} object the data belongs to.
30
+ attr_accessor :table
31
+
32
+ ##
33
+ # @private The Google API Client object.
34
+ attr_accessor :gapi
35
+
36
+ # @private
37
+ def initialize arr = []
38
+ @table = nil
39
+ @gapi = {}
40
+ super arr
41
+ end
42
+
43
+ ##
44
+ # The resource type of the API response.
45
+ def kind
46
+ @gapi.kind
47
+ end
48
+
49
+ ##
50
+ # The etag.
51
+ def etag
52
+ @gapi.etag
53
+ end
54
+
55
+ ##
56
+ # A token used for paging results.
57
+ def token
58
+ @gapi.page_token
59
+ end
60
+
61
+ # The total number of rows in the complete table.
62
+ def total
63
+ Integer @gapi.total_rows
64
+ rescue
65
+ nil
66
+ end
67
+
68
+ ##
69
+ # Whether there is a next page of data.
70
+ #
71
+ # @return [Boolean]
72
+ #
73
+ # @example
74
+ # require "google/cloud"
75
+ #
76
+ # gcloud = Google::Cloud.new
77
+ # bigquery = gcloud.bigquery
78
+ # table = dataset.table "my_table"
79
+ #
80
+ # data = table.data
81
+ # if data.next?
82
+ # next_data = data.next
83
+ # end
84
+ #
85
+ def next?
86
+ !token.nil?
87
+ end
88
+
89
+ ##
90
+ # Retrieve the next page of data.
91
+ #
92
+ # @return [Data]
93
+ #
94
+ # @example
95
+ # require "google/cloud"
96
+ #
97
+ # gcloud = Google::Cloud.new
98
+ # bigquery = gcloud.bigquery
99
+ # table = dataset.table "my_table"
100
+ #
101
+ # data = table.data
102
+ # if data.next?
103
+ # next_data = data.next
104
+ # end
105
+ #
106
+ def next
107
+ return nil unless next?
108
+ ensure_table!
109
+ table.data token: token
110
+ end
111
+
112
+ ##
113
+ # Retrieves all rows by repeatedly loading {#next} until {#next?}
114
+ # returns `false`. Calls the given block once for each row, which is
115
+ # passed as the parameter.
116
+ #
117
+ # An Enumerator is returned if no block is given.
118
+ #
119
+ # This method may make several API calls until all rows are retrieved.
120
+ # Be sure to use as narrow a search criteria as possible. Please use
121
+ # with caution.
122
+ #
123
+ # @param [Integer] request_limit The upper limit of API requests to make
124
+ # to load all data. Default is no limit.
125
+ # @yield [row] The block for accessing each row of data.
126
+ # @yieldparam [Hash] row The row object.
127
+ #
128
+ # @return [Enumerator]
129
+ #
130
+ # @example Iterating each rows by passing a block:
131
+ # require "google/cloud"
132
+ #
133
+ # gcloud = Google::Cloud.new
134
+ # bigquery = gcloud.bigquery
135
+ # table = dataset.table "my_table"
136
+ #
137
+ # table.data.all do |row|
138
+ # puts row["word"]
139
+ # end
140
+ #
141
+ # @example Using the enumerator by not passing a block:
142
+ # require "google/cloud"
143
+ #
144
+ # gcloud = Google::Cloud.new
145
+ # bigquery = gcloud.bigquery
146
+ # table = dataset.table "my_table"
147
+ #
148
+ # words = table.data.all.map do |row|
149
+ # row["word"]
150
+ # end
151
+ #
152
+ # @example Limit the number of API calls made:
153
+ # require "google/cloud"
154
+ #
155
+ # gcloud = Google::Cloud.new
156
+ # bigquery = gcloud.bigquery
157
+ # table = dataset.table "my_table"
158
+ #
159
+ # table.data.all(request_limit: 10) do |row|
160
+ # puts row["word"]
161
+ # end
162
+ #
163
+ def all request_limit: nil
164
+ request_limit = request_limit.to_i if request_limit
165
+ unless block_given?
166
+ return enum_for(:all, request_limit: request_limit)
167
+ end
168
+ results = self
169
+ loop do
170
+ results.each { |r| yield r }
171
+ if request_limit
172
+ request_limit -= 1
173
+ break if request_limit < 0
174
+ end
175
+ break unless results.next?
176
+ results = results.next
177
+ end
178
+ end
179
+
180
+ ##
181
+ # Represents Table Data as a list of positional values (array of
182
+ # arrays). No type conversion is made, e.g. numbers are formatted as
183
+ # strings.
184
+ def raw
185
+ Array(gapi.rows).map { |row| row.f.map(&:v) }
186
+ end
187
+
188
+ ##
189
+ # @private New Data from a response object.
190
+ def self.from_gapi gapi, table
191
+ formatted_rows = format_rows gapi.rows, table.fields
192
+
193
+ data = new formatted_rows
194
+ data.table = table
195
+ data.gapi = gapi
196
+ data
197
+ end
198
+
199
+ # rubocop:disable all
200
+ # Disabled rubocop because this implementation will not last.
201
+
202
+ def self.format_rows rows, fields
203
+ headers = Array(fields).map { |f| f.name }
204
+ field_types = Array(fields).map { |f| f.type }
205
+
206
+ Array(rows).map do |row|
207
+ values = row.f.map { |f| f.v }
208
+ formatted_values = format_values field_types, values
209
+ Hash[headers.zip formatted_values]
210
+ end
211
+ end
212
+
213
+ def self.format_values field_types, values
214
+ field_types.zip(values).map do |type, value|
215
+ begin
216
+ if value.nil?
217
+ nil
218
+ elsif type == "INTEGER"
219
+ Integer value
220
+ elsif type == "FLOAT"
221
+ Float value
222
+ elsif type == "BOOLEAN"
223
+ (value == "true" ? true : (value == "false" ? false : nil))
224
+ else
225
+ value
226
+ end
227
+ rescue
228
+ value
229
+ end
230
+ end
231
+ end
232
+ # rubocop:enable all
233
+
234
+ protected
235
+
236
+ ##
237
+ # Raise an error unless an active service is available.
238
+ def ensure_table!
239
+ fail "Must have active connection" unless table
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,758 @@
1
+ # Copyright 2015 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "json"
17
+ require "google/cloud/errors"
18
+ require "google/cloud/bigquery/service"
19
+ require "google/cloud/bigquery/table"
20
+ require "google/cloud/bigquery/dataset/list"
21
+ require "google/cloud/bigquery/dataset/access"
22
+ require "google/apis/bigquery_v2"
23
+
24
+ module Google
25
+ module Cloud
26
+ module Bigquery
27
+ ##
28
+ # # Dataset
29
+ #
30
+ # Represents a Dataset. A dataset is a grouping mechanism that holds zero
31
+ # or more tables. Datasets are the lowest level unit of access control;
32
+ # you cannot control access at the table level. A dataset is contained
33
+ # within a specific project.
34
+ #
35
+ # @example
36
+ # require "google/cloud"
37
+ #
38
+ # gcloud = Google::Cloud.new
39
+ # bigquery = gcloud.bigquery
40
+ #
41
+ # dataset = bigquery.create_dataset "my_dataset",
42
+ # name: "My Dataset",
43
+ # description: "This is my Dataset"
44
+ #
45
+ class Dataset
46
+ ##
47
+ # @private The Connection object.
48
+ attr_accessor :service
49
+
50
+ ##
51
+ # @private The Google API Client object.
52
+ attr_accessor :gapi
53
+
54
+ ##
55
+ # @private Create an empty Dataset object.
56
+ def initialize
57
+ @service = nil
58
+ @gapi = {}
59
+ end
60
+
61
+ ##
62
+ # A unique ID for this dataset, without the project name.
63
+ # The ID must contain only letters (a-z, A-Z), numbers (0-9),
64
+ # or underscores (_). The maximum length is 1,024 characters.
65
+ #
66
+ # @!group Attributes
67
+ #
68
+ def dataset_id
69
+ @gapi.dataset_reference.dataset_id
70
+ end
71
+
72
+ ##
73
+ # The ID of the project containing this dataset.
74
+ #
75
+ # @!group Attributes
76
+ #
77
+ def project_id
78
+ @gapi.dataset_reference.project_id
79
+ end
80
+
81
+ ##
82
+ # @private
83
+ # The gapi fragment containing the Project ID and Dataset ID as a
84
+ # camel-cased hash.
85
+ def dataset_ref
86
+ dataset_ref = @gapi.dataset_reference
87
+ dataset_ref = dataset_ref.to_h if dataset_ref.respond_to? :to_h
88
+ dataset_ref
89
+ end
90
+
91
+ ##
92
+ # A descriptive name for the dataset.
93
+ #
94
+ # @!group Attributes
95
+ #
96
+ def name
97
+ @gapi.friendly_name
98
+ end
99
+
100
+ ##
101
+ # Updates the descriptive name for the dataset.
102
+ #
103
+ # @!group Attributes
104
+ #
105
+ def name= new_name
106
+ @gapi.update! friendly_name: new_name
107
+ patch_gapi! :friendly_name
108
+ end
109
+
110
+ ##
111
+ # A string hash of the dataset.
112
+ #
113
+ # @!group Attributes
114
+ #
115
+ def etag
116
+ ensure_full_data!
117
+ @gapi.etag
118
+ end
119
+
120
+ ##
121
+ # A URL that can be used to access the dataset using the REST API.
122
+ #
123
+ # @!group Attributes
124
+ #
125
+ def api_url
126
+ ensure_full_data!
127
+ @gapi.self_link
128
+ end
129
+
130
+ ##
131
+ # A user-friendly description of the dataset.
132
+ #
133
+ # @!group Attributes
134
+ #
135
+ def description
136
+ ensure_full_data!
137
+ @gapi.description
138
+ end
139
+
140
+ ##
141
+ # Updates the user-friendly description of the dataset.
142
+ #
143
+ # @!group Attributes
144
+ #
145
+ def description= new_description
146
+ @gapi.update! description: new_description
147
+ patch_gapi! :description
148
+ end
149
+
150
+ ##
151
+ # The default lifetime of all tables in the dataset, in milliseconds.
152
+ #
153
+ # @!group Attributes
154
+ #
155
+ def default_expiration
156
+ ensure_full_data!
157
+ begin
158
+ Integer @gapi.default_table_expiration_ms
159
+ rescue
160
+ nil
161
+ end
162
+ end
163
+
164
+ ##
165
+ # Updates the default lifetime of all tables in the dataset, in
166
+ # milliseconds.
167
+ #
168
+ # @!group Attributes
169
+ #
170
+ def default_expiration= new_default_expiration
171
+ @gapi.update! default_table_expiration_ms: new_default_expiration
172
+ patch_gapi! :default_table_expiration_ms
173
+ end
174
+
175
+ ##
176
+ # The time when this dataset was created.
177
+ #
178
+ # @!group Attributes
179
+ #
180
+ def created_at
181
+ ensure_full_data!
182
+ begin
183
+ Time.at(Integer(@gapi.creation_time) / 1000.0)
184
+ rescue
185
+ nil
186
+ end
187
+ end
188
+
189
+ ##
190
+ # The date when this dataset or any of its tables was last modified.
191
+ #
192
+ # @!group Attributes
193
+ #
194
+ def modified_at
195
+ ensure_full_data!
196
+ begin
197
+ Time.at(Integer(@gapi.last_modified_time) / 1000.0)
198
+ rescue
199
+ nil
200
+ end
201
+ end
202
+
203
+ ##
204
+ # The geographic location where the dataset should reside. Possible
205
+ # values include EU and US. The default value is US.
206
+ #
207
+ # @!group Attributes
208
+ #
209
+ def location
210
+ ensure_full_data!
211
+ @gapi.location
212
+ end
213
+
214
+ ##
215
+ # Retrieves the access rules for a Dataset. The rules can be updated
216
+ # when passing a block, see {Dataset::Access} for all the methods
217
+ # available.
218
+ #
219
+ # @see https://cloud.google.com/bigquery/access-control BigQuery Access
220
+ # Control
221
+ #
222
+ # @yield [access] a block for setting rules
223
+ # @yieldparam [Dataset::Access] access the object accepting rules
224
+ #
225
+ # @return [Google::Cloud::Bigquery::Dataset::Access]
226
+ #
227
+ # @example
228
+ # require "google/cloud"
229
+ #
230
+ # gcloud = Google::Cloud.new
231
+ # bigquery = gcloud.bigquery
232
+ # dataset = bigquery.dataset "my_dataset"
233
+ #
234
+ # dataset.access #=> [{"role"=>"OWNER",
235
+ # # "specialGroup"=>"projectOwners"},
236
+ # # {"role"=>"WRITER",
237
+ # # "specialGroup"=>"projectWriters"},
238
+ # # {"role"=>"READER",
239
+ # # "specialGroup"=>"projectReaders"},
240
+ # # {"role"=>"OWNER",
241
+ # # "userByEmail"=>"123456789-...com"}]
242
+ #
243
+ # @example Manage the access rules by passing a block:
244
+ # require "google/cloud"
245
+ #
246
+ # gcloud = Google::Cloud.new
247
+ # bigquery = gcloud.bigquery
248
+ # dataset = bigquery.dataset "my_dataset"
249
+ #
250
+ # dataset.access do |access|
251
+ # access.add_owner_group "owners@example.com"
252
+ # access.add_writer_user "writer@example.com"
253
+ # access.remove_writer_user "readers@example.com"
254
+ # access.add_reader_special :all
255
+ # access.add_reader_view other_dataset_view_object
256
+ # end
257
+ #
258
+ def access
259
+ ensure_full_data!
260
+ access_builder = Access.from_gapi @gapi
261
+ if block_given?
262
+ yield access_builder
263
+ if access_builder.changed?
264
+ @gapi.update! access: access_builder.to_gapi
265
+ patch_gapi! :access
266
+ end
267
+ end
268
+ access_builder.freeze
269
+ end
270
+
271
+ ##
272
+ # Permanently deletes the dataset. The dataset must be empty before it
273
+ # can be deleted unless the `force` option is set to `true`.
274
+ #
275
+ # @param [Boolean] force If `true`, delete all the tables in the
276
+ # dataset. If `false` and the dataset contains tables, the request
277
+ # will fail. Default is `false`.
278
+ #
279
+ # @return [Boolean] Returns `true` if the dataset was deleted.
280
+ #
281
+ # @example
282
+ # require "google/cloud"
283
+ #
284
+ # gcloud = Google::Cloud.new
285
+ # bigquery = gcloud.bigquery
286
+ #
287
+ # dataset = bigquery.dataset "my_dataset"
288
+ # dataset.delete
289
+ #
290
+ # @!group Lifecycle
291
+ #
292
+ def delete force: nil
293
+ ensure_service!
294
+ service.delete_dataset dataset_id, force
295
+ true
296
+ end
297
+
298
+ ##
299
+ # Creates a new table. If you are adapting existing code that was
300
+ # written for the [Rest API
301
+ # ](https://cloud.google.com/bigquery/docs/reference/v2/tables#resource),
302
+ # you can pass the table's schema as a hash (see example.)
303
+ #
304
+ # @param [String] table_id The ID of the table. The ID must contain only
305
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
306
+ # length is 1,024 characters.
307
+ # @param [String] name A descriptive name for the table.
308
+ # @param [String] description A user-friendly description of the table.
309
+ # @param [Array<Schema::Field>] fields An array of Schema::Field objects
310
+ # specifying the schema's data types for the table. The schema may
311
+ # also be configured when passing a block.
312
+ # @yield [table] a block for setting the table
313
+ # @yieldparam [Table] table the table object to be updated
314
+ #
315
+ # @return [Google::Cloud::Bigquery::Table]
316
+ #
317
+ # @example
318
+ # require "google/cloud"
319
+ #
320
+ # gcloud = Google::Cloud.new
321
+ # bigquery = gcloud.bigquery
322
+ # dataset = bigquery.dataset "my_dataset"
323
+ # table = dataset.create_table "my_table"
324
+ #
325
+ # @example You can also pass name and description options.
326
+ # require "google/cloud"
327
+ #
328
+ # gcloud = Google::Cloud.new
329
+ # bigquery = gcloud.bigquery
330
+ # dataset = bigquery.dataset "my_dataset"
331
+ # table = dataset.create_table "my_table"
332
+ # name: "My Table",
333
+ # description: "A description of table."
334
+ #
335
+ # @example The table's schema fields can be passed as an argument.
336
+ # require "google/cloud"
337
+ #
338
+ # gcloud = Google::Cloud.new
339
+ # bigquery = gcloud.bigquery
340
+ # dataset = bigquery.dataset "my_dataset"
341
+ #
342
+ # schema_fields = [
343
+ # Google::Cloud::Bigquery::Schema::Field.new(
344
+ # "first_name", :string, mode: :required),
345
+ # Google::Cloud::Bigquery::Schema::Field.new(
346
+ # "cities_lived", :record, mode: :repeated
347
+ # fields: [
348
+ # Google::Cloud::Bigquery::Schema::Field.new(
349
+ # "place", :string, mode: :required),
350
+ # Google::Cloud::Bigquery::Schema::Field.new(
351
+ # "number_of_years", :integer, mode: :required),
352
+ # ])
353
+ # ]
354
+ # table = dataset.create_table "my_table", fields: schema_fields
355
+ #
356
+ # @example Or the table's schema can be configured with the block.
357
+ # require "google/cloud"
358
+ #
359
+ # gcloud = Google::Cloud.new
360
+ # bigquery = gcloud.bigquery
361
+ # dataset = bigquery.dataset "my_dataset"
362
+ #
363
+ # table = dataset.create_table "my_table" do |t|
364
+ # t.schema.string "first_name", mode: :required
365
+ # t.schema.record "cities_lived", mode: :required do |s|
366
+ # s.string "place", mode: :required
367
+ # s.integer "number_of_years", mode: :required
368
+ # end
369
+ # end
370
+ #
371
+ # @example You can define the schema using a nested block.
372
+ # require "google/cloud"
373
+ #
374
+ # gcloud = Google::Cloud.new
375
+ # bigquery = gcloud.bigquery
376
+ # dataset = bigquery.dataset "my_dataset"
377
+ # table = dataset.create_table "my_table" do |t|
378
+ # t.name = "My Table",
379
+ # t.description = "A description of my table."
380
+ # t.schema do |s|
381
+ # s.string "first_name", mode: :required
382
+ # s.record "cities_lived", mode: :repeated do |r|
383
+ # r.string "place", mode: :required
384
+ # r.integer "number_of_years", mode: :required
385
+ # end
386
+ # end
387
+ # end
388
+ #
389
+ # @!group Table
390
+ #
391
+ def create_table table_id, name: nil, description: nil, fields: nil
392
+ ensure_service!
393
+ new_tb = Google::Apis::BigqueryV2::Table.new(
394
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
395
+ project_id: project_id, dataset_id: dataset_id,
396
+ table_id: table_id))
397
+ updater = Table::Updater.new(new_tb).tap do |tb|
398
+ tb.name = name unless name.nil?
399
+ tb.description = description unless description.nil?
400
+ tb.schema.fields = fields unless fields.nil?
401
+ end
402
+
403
+ yield updater if block_given?
404
+
405
+ gapi = service.insert_table dataset_id, updater.to_gapi
406
+ Table.from_gapi gapi, service
407
+ end
408
+
409
+ ##
410
+ # Creates a new view table from the given query.
411
+ #
412
+ # @param [String] table_id The ID of the view table. The ID must contain
413
+ # only letters (a-z, A-Z), numbers (0-9), or underscores (_). The
414
+ # maximum length is 1,024 characters.
415
+ # @param [String] query The query that BigQuery executes when the view
416
+ # is referenced.
417
+ # @param [String] name A descriptive name for the table.
418
+ # @param [String] description A user-friendly description of the table.
419
+ #
420
+ # @return [Google::Cloud::Bigquery::View]
421
+ #
422
+ # @example
423
+ # require "google/cloud"
424
+ #
425
+ # gcloud = Google::Cloud.new
426
+ # bigquery = gcloud.bigquery
427
+ # dataset = bigquery.dataset "my_dataset"
428
+ # view = dataset.create_view "my_view",
429
+ # "SELECT name, age FROM [proj:dataset.users]"
430
+ #
431
+ # @example A name and description can be provided:
432
+ # require "google/cloud"
433
+ #
434
+ # gcloud = Google::Cloud.new
435
+ # bigquery = gcloud.bigquery
436
+ # dataset = bigquery.dataset "my_dataset"
437
+ # view = dataset.create_view "my_view",
438
+ # "SELECT name, age FROM [proj:dataset.users]",
439
+ # name: "My View", description: "This is my view"
440
+ #
441
+ # @!group Table
442
+ #
443
+ def create_view table_id, query, name: nil, description: nil
444
+ new_view_opts = {
445
+ table_reference: Google::Apis::BigqueryV2::TableReference.new(
446
+ project_id: project_id, dataset_id: dataset_id, table_id: table_id
447
+ ),
448
+ friendly_name: name,
449
+ description: description,
450
+ view: Google::Apis::BigqueryV2::ViewDefinition.new(
451
+ query: query
452
+ )
453
+ }.delete_if { |_, v| v.nil? }
454
+ new_view = Google::Apis::BigqueryV2::Table.new new_view_opts
455
+
456
+ gapi = service.insert_table dataset_id, new_view
457
+ Table.from_gapi gapi, service
458
+ end
459
+
460
+ ##
461
+ # Retrieves an existing table by ID.
462
+ #
463
+ # @param [String] table_id The ID of a table.
464
+ #
465
+ # @return [Google::Cloud::Bigquery::Table,
466
+ # Google::Cloud::Bigquery::View, nil] Returns `nil` if the table does
467
+ # not exist
468
+ #
469
+ # @example
470
+ # require "google/cloud"
471
+ #
472
+ # gcloud = Google::Cloud.new
473
+ # bigquery = gcloud.bigquery
474
+ # dataset = bigquery.dataset "my_dataset"
475
+ # table = dataset.table "my_table"
476
+ # puts table.name
477
+ #
478
+ # @!group Table
479
+ #
480
+ def table table_id
481
+ ensure_service!
482
+ gapi = service.get_table dataset_id, table_id
483
+ Table.from_gapi gapi, service
484
+ rescue Google::Cloud::NotFoundError
485
+ nil
486
+ end
487
+
488
+ ##
489
+ # Retrieves the list of tables belonging to the dataset.
490
+ #
491
+ # @param [String] token A previously-returned page token representing
492
+ # part of the larger set of results to view.
493
+ # @param [Integer] max Maximum number of tables to return.
494
+ #
495
+ # @return [Array<Google::Cloud::Bigquery::Table>,
496
+ # Array<Google::Cloud::Bigquery::View>] (See
497
+ # {Google::Cloud::Bigquery::Table::List})
498
+ #
499
+ # @example
500
+ # require "google/cloud"
501
+ #
502
+ # gcloud = Google::Cloud.new
503
+ # bigquery = gcloud.bigquery
504
+ # dataset = bigquery.dataset "my_dataset"
505
+ # tables = dataset.tables
506
+ # tables.each do |table|
507
+ # puts table.name
508
+ # end
509
+ #
510
+ # @example Retrieve all tables: (See {Table::List#all})
511
+ # require "google/cloud"
512
+ #
513
+ # gcloud = Google::Cloud.new
514
+ # bigquery = gcloud.bigquery
515
+ # dataset = bigquery.dataset "my_dataset"
516
+ # tables = dataset.tables
517
+ # tables.all do |table|
518
+ # puts table.name
519
+ # end
520
+ #
521
+ # @!group Table
522
+ #
523
+ def tables token: nil, max: nil
524
+ ensure_service!
525
+ options = { token: token, max: max }
526
+ gapi = service.list_tables dataset_id, options
527
+ Table::List.from_gapi gapi, service, dataset_id, max
528
+ end
529
+
530
+ ##
531
+ # Queries data using the [asynchronous
532
+ # method](https://cloud.google.com/bigquery/querying-data).
533
+ #
534
+ # Sets the current dataset as the default dataset in the query. Useful
535
+ # for using unqualified table names.
536
+ #
537
+ # @param [String] query A query string, following the BigQuery [query
538
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
539
+ # query to execute. Example: "SELECT count(f1) FROM
540
+ # [myProjectId:myDatasetId.myTableId]".
541
+ # @param [String] priority Specifies a priority for the query. Possible
542
+ # values include `INTERACTIVE` and `BATCH`. The default value is
543
+ # `INTERACTIVE`.
544
+ # @param [Boolean] cache Whether to look for the result in the query
545
+ # cache. The query cache is a best-effort cache that will be flushed
546
+ # whenever tables in the query are modified. The default value is
547
+ # true. For more information, see [query
548
+ # caching](https://developers.google.com/bigquery/querying-data).
549
+ # @param [Table] table The destination table where the query results
550
+ # should be stored. If not present, a new table will be created to
551
+ # store the results.
552
+ # @param [String] create Specifies whether the job is allowed to create
553
+ # new tables.
554
+ #
555
+ # The following values are supported:
556
+ #
557
+ # * `needed` - Create the table if it does not exist.
558
+ # * `never` - The table must already exist. A 'notFound' error is
559
+ # raised if the table does not exist.
560
+ # @param [String] write Specifies the action that occurs if the
561
+ # destination table already exists.
562
+ #
563
+ # The following values are supported:
564
+ #
565
+ # * `truncate` - BigQuery overwrites the table data.
566
+ # * `append` - BigQuery appends the data to the table.
567
+ # * `empty` - A 'duplicate' error is returned in the job result if the
568
+ # table exists and contains data.
569
+ # @param [Boolean] large_results If `true`, allows the query to produce
570
+ # arbitrarily large result tables at a slight cost in performance.
571
+ # Requires `table` parameter to be set.
572
+ # @param [Boolean] flatten Flattens all nested and repeated fields in
573
+ # the query results. The default value is `true`. `large_results`
574
+ # parameter must be `true` if this is set to `false`.
575
+ #
576
+ # @return [Google::Cloud::Bigquery::QueryJob]
577
+ #
578
+ # @example
579
+ # require "google/cloud"
580
+ #
581
+ # gcloud = Google::Cloud.new
582
+ # bigquery = gcloud.bigquery
583
+ #
584
+ # job = bigquery.query_job "SELECT name FROM my_table"
585
+ #
586
+ # job.wait_until_done!
587
+ # if !job.failed?
588
+ # job.query_results.each do |row|
589
+ # puts row["name"]
590
+ # end
591
+ # end
592
+ #
593
+ # @!group Data
594
+ #
595
+ def query_job query, priority: "INTERACTIVE", cache: true, table: nil,
596
+ create: nil, write: nil, large_results: nil, flatten: nil
597
+ options = { priority: priority, cache: cache, table: table,
598
+ create: create, write: write,
599
+ large_results: large_results, flatten: flatten }
600
+ options[:dataset] ||= self
601
+ ensure_service!
602
+ gapi = service.query_job query, options
603
+ Job.from_gapi gapi, service
604
+ end
605
+
606
+ ##
607
+ # Queries data using the [synchronous
608
+ # method](https://cloud.google.com/bigquery/querying-data).
609
+ #
610
+ # Sets the current dataset as the default dataset in the query. Useful
611
+ # for using unqualified table names.
612
+ #
613
+ # @param [String] query A query string, following the BigQuery [query
614
+ # syntax](https://cloud.google.com/bigquery/query-reference), of the
615
+ # query to execute. Example: "SELECT count(f1) FROM
616
+ # [myProjectId:myDatasetId.myTableId]".
617
+ # @param [Integer] max The maximum number of rows of data to return per
618
+ # page of results. Setting this flag to a small value such as 1000 and
619
+ # then paging through results might improve reliability when the query
620
+ # result set is large. In addition to this limit, responses are also
621
+ # limited to 10 MB. By default, there is no maximum row count, and
622
+ # only the byte limit applies.
623
+ # @param [Integer] timeout How long to wait for the query to complete,
624
+ # in milliseconds, before the request times out and returns. Note that
625
+ # this is only a timeout for the request, not the query. If the query
626
+ # takes longer to run than the timeout value, the call returns without
627
+ # any results and with QueryData#complete? set to false. The default
628
+ # value is 10000 milliseconds (10 seconds).
629
+ # @param [Boolean] dryrun If set to `true`, BigQuery doesn't run the
630
+ # job. Instead, if the query is valid, BigQuery returns statistics
631
+ # about the job such as how many bytes would be processed. If the
632
+ # query is invalid, an error returns. The default value is `false`.
633
+ # @param [Boolean] cache Whether to look for the result in the query
634
+ # cache. The query cache is a best-effort cache that will be flushed
635
+ # whenever tables in the query are modified. The default value is
636
+ # true. For more information, see [query
637
+ # caching](https://developers.google.com/bigquery/querying-data).
638
+ #
639
+ # @return [Google::Cloud::Bigquery::QueryData]
640
+ #
641
+ # @example
642
+ # require "google/cloud"
643
+ #
644
+ # gcloud = Google::Cloud.new
645
+ # bigquery = gcloud.bigquery
646
+ #
647
+ # data = bigquery.query "SELECT name FROM my_table"
648
+ # data.each do |row|
649
+ # puts row["name"]
650
+ # end
651
+ #
652
+ # @!group Data
653
+ #
654
+ def query query, max: nil, timeout: 10000, dryrun: nil, cache: true
655
+ options = { max: max, timeout: timeout, dryrun: dryrun, cache: cache }
656
+ options[:dataset] ||= dataset_id
657
+ options[:project] ||= project_id
658
+ ensure_service!
659
+ gapi = service.query query, options
660
+ QueryData.from_gapi gapi, service
661
+ end
662
+
663
+ ##
664
+ # @private New Dataset from a Google API Client object.
665
+ def self.from_gapi gapi, conn
666
+ new.tap do |f|
667
+ f.gapi = gapi
668
+ f.service = conn
669
+ end
670
+ end
671
+
672
+ protected
673
+
674
+ ##
675
+ # Raise an error unless an active service is available.
676
+ def ensure_service!
677
+ fail "Must have active connection" unless service
678
+ end
679
+
680
+ def patch_gapi! *attributes
681
+ return if attributes.empty?
682
+ ensure_service!
683
+ patch_args = Hash[attributes.map do |attr|
684
+ [attr, @gapi.send(attr)]
685
+ end]
686
+ patch_gapi = Google::Apis::BigqueryV2::Dataset.new patch_args
687
+ @gapi = service.patch_dataset dataset_id, patch_gapi
688
+ end
689
+
690
+ ##
691
+ # Load the complete representation of the dataset if it has been
692
+ # only partially loaded by a request to the API list method.
693
+ def ensure_full_data!
694
+ reload_gapi! unless data_complete?
695
+ end
696
+
697
+ def reload_gapi!
698
+ ensure_service!
699
+ gapi = service.get_dataset dataset_id
700
+ @gapi = gapi
701
+ end
702
+
703
+ def data_complete?
704
+ @gapi.is_a? Google::Apis::BigqueryV2::Dataset
705
+ end
706
+
707
+ ##
708
+ # Yielded to a block to accumulate changes for a patch request.
709
+ class Updater < Dataset
710
+ ##
711
+ # A list of attributes that were updated.
712
+ attr_reader :updates
713
+
714
+ ##
715
+ # Create an Updater object.
716
+ def initialize gapi
717
+ @updates = []
718
+ @gapi = gapi
719
+ end
720
+
721
+ def access
722
+ # TODO: make sure to call ensure_full_data! on Dataset#update
723
+ @access ||= Access.from_gapi @gapi
724
+ if block_given?
725
+ yield @access
726
+ check_for_mutated_access!
727
+ end
728
+ # Same as Dataset#access, but not frozen
729
+ @access
730
+ end
731
+
732
+ ##
733
+ # Make sure any access changes are saved
734
+ def check_for_mutated_access!
735
+ return if @access.nil?
736
+ return unless @access.changed?
737
+ @gapi.update! access: @access.to_gapi
738
+ patch_gapi! :access
739
+ end
740
+
741
+ def to_gapi
742
+ check_for_mutated_access!
743
+ @gapi
744
+ end
745
+
746
+ protected
747
+
748
+ ##
749
+ # Queue up all the updates instead of making them.
750
+ def patch_gapi! attribute
751
+ @updates << attribute
752
+ @updates.uniq!
753
+ end
754
+ end
755
+ end
756
+ end
757
+ end
758
+ end