google-cloud-bigquery 0.28.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41cd0596408cd35451c4db0c52fe0b7901dff7ea
4
- data.tar.gz: fb36dac2c6d09940b4335980b808fc6d9564b84d
3
+ metadata.gz: 58e73b43f4053457d2df061703e3314483552dbb
4
+ data.tar.gz: 927a49cb45ff1a1c2aac5fa319e19dc0b422af23
5
5
  SHA512:
6
- metadata.gz: cf8190937417f431a6221c408b911c958c2c83be1b4cce3688a130e2f9f44a5645c68e81f16368a3b3e524e9c5ae87ea095e4b3791bbd8970d93dc5afafb4d32
7
- data.tar.gz: 2ad979a3b853e4e2fc6813f5e98279181a39d082b843079752d3d4fd63c1732f37e261584b5b77c3aa0f47f6b3a45386bd1ea5e3ac97cb4acaf35d84d3d83ff7
6
+ metadata.gz: 8d86f37859a1a6cf2b682afd8b4e3f77b178fb8333e47606673e02e0a5e6ba76e61c6ee3c9bfbd5bd8d4763eff03f9c54bc067c10f39883772a6c730c0d8b443
7
+ data.tar.gz: d9d5afd3aac9c23e523909d78b7ae5fc886ce783e5b3cad0068aff21a815841a564e9baf6f35b2235adf0ed743df55d41c33b570f9b99a5d5a96ea50768882d0
data/README.md CHANGED
@@ -36,7 +36,7 @@ table = dataset.create_table "todos",
36
36
 
37
37
  # Load data into the table
38
38
  file = File.open "/archive/todos/completed-todos.csv"
39
- load_job = table.load file
39
+ table.load file
40
40
 
41
41
  # Run a query for the number of completed todos by owner
42
42
  count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
@@ -39,7 +39,7 @@ module Google
39
39
  #
40
40
  # * `https://www.googleapis.com/auth/bigquery`
41
41
  # @param [Integer] retries Number of times to retry requests on server
42
- # error. The default value is `3`. Optional.
42
+ # error. The default value is `5`. Optional.
43
43
  # @param [Integer] timeout Default request timeout in seconds. Optional.
44
44
  #
45
45
  # @return [Google::Cloud::Bigquery::Project]
@@ -88,7 +88,7 @@ module Google
88
88
  #
89
89
  # * `https://www.googleapis.com/auth/bigquery`
90
90
  # @param [Integer] retries Number of times to retry requests on server
91
- # error. The default value is `3`. Optional.
91
+ # error. The default value is `5`. Optional.
92
92
  # @param [Integer] timeout Default timeout to use in requests. Optional.
93
93
  #
94
94
  # @return [Google::Cloud::Bigquery::Project]
@@ -232,7 +232,7 @@ module Google
232
232
  # BigQuery API provides facilities for managing longer-running jobs. With
233
233
  # the asynchronous approach to running a query, an instance of
234
234
  # {Google::Cloud::Bigquery::QueryJob} is returned, rather than an instance
235
- # of {Google::Cloud::Bigquery::QueryData}.
235
+ # of {Google::Cloud::Bigquery::Data}.
236
236
  #
237
237
  # ```ruby
238
238
  # require "google/cloud/bigquery"
@@ -246,17 +246,17 @@ module Google
246
246
  #
247
247
  # job.wait_until_done!
248
248
  # if !job.failed?
249
- # job.query_results.first
249
+ # job.data.first
250
250
  # #=> {:title=>[{:value=>"hamlet", :count=>5318}, ...}
251
251
  # end
252
252
  # ```
253
253
  #
254
254
  # Once you have determined that the job is done and has not failed, you can
255
- # obtain an instance of {Google::Cloud::Bigquery::QueryData} by calling
256
- # `query_results` on the job instance. The query results for both of the
257
- # above examples are stored in temporary tables with a lifetime of about 24
258
- # hours. See the final example below for a demonstration of how to store
259
- # query results in a permanent table.
255
+ # obtain an instance of {Google::Cloud::Bigquery::Data} by calling `data` on
256
+ # the job instance. The query results for both of the above examples are
257
+ # stored in temporary tables with a lifetime of about 24 hours. See the
258
+ # final example below for a demonstration of how to store query results in a
259
+ # permanent table.
260
260
  #
261
261
  # ## Creating Datasets and Tables
262
262
  #
@@ -370,7 +370,7 @@ module Google
370
370
  # end
371
371
  #
372
372
  # file = File.open "names/yob2014.txt"
373
- # load_job = table.load file, format: "csv"
373
+ # table.load file, format: "csv"
374
374
  # ```
375
375
  #
376
376
  # Because the names data, although formatted as CSV, is distributed in files
@@ -411,9 +411,7 @@ module Google
411
411
  # bucket = storage.create_bucket bucket_id
412
412
  # extract_url = "gs://#{bucket.id}/baby-names.csv"
413
413
  #
414
- # extract_job = result_table.extract extract_url
415
- #
416
- # extract_job.wait_until_done!
414
+ # result_table.extract extract_url
417
415
  #
418
416
  # # Download to local filesystem
419
417
  # bucket.files.first.download "baby-names.csv"
@@ -470,7 +468,7 @@ module Google
470
468
  #
471
469
  # * `https://www.googleapis.com/auth/bigquery`
472
470
  # @param [Integer] retries Number of times to retry requests on server
473
- # error. The default value is `3`. Optional.
471
+ # error. The default value is `5`. Optional.
474
472
  # @param [Integer] timeout Default timeout to use in requests. Optional.
475
473
  #
476
474
  # @return [Google::Cloud::Bigquery::Project]
@@ -20,17 +20,33 @@ module Google
20
20
  # # CopyJob
21
21
  #
22
22
  # A {Job} subclass representing a copy operation that may be performed on
23
- # a {Table}. A CopyJob instance is created when you call {Table#copy}.
23
+ # a {Table}. A CopyJob instance is created when you call {Table#copy_job}.
24
24
  #
25
- # @see https://cloud.google.com/bigquery/docs/tables#copyingtable Copying
25
+ # @see https://cloud.google.com/bigquery/docs/tables#copy-table Copying
26
26
  # an Existing Table
27
27
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
28
28
  # reference
29
29
  #
30
+ # @example
31
+ # require "google/cloud/bigquery"
32
+ #
33
+ # bigquery = Google::Cloud::Bigquery.new
34
+ # dataset = bigquery.dataset "my_dataset"
35
+ # table = dataset.table "my_table"
36
+ # destination_table = dataset.table "my_destination_table"
37
+ #
38
+ # copy_job = table.copy_job destination_table
39
+ #
40
+ # copy_job.wait_until_done!
41
+ # copy_job.done? #=> true
42
+ #
30
43
  class CopyJob < Job
31
44
  ##
32
45
  # The table from which data is copied. This is the table on
33
- # which {Table#copy} was called. Returns a {Table} instance.
46
+ # which {Table#copy_job} was called.
47
+ #
48
+ # @return [Table] A table instance.
49
+ #
34
50
  def source
35
51
  table = @gapi.configuration.copy.source_table
36
52
  return nil unless table
@@ -40,7 +56,10 @@ module Google
40
56
  end
41
57
 
42
58
  ##
43
- # The table to which data is copied. Returns a {Table} instance.
59
+ # The table to which data is copied.
60
+ #
61
+ # @return [Table] A table instance.
62
+ #
44
63
  def destination
45
64
  table = @gapi.configuration.copy.destination_table
46
65
  return nil unless table
@@ -52,7 +71,11 @@ module Google
52
71
  ##
53
72
  # Checks if the create disposition for the job is `CREATE_IF_NEEDED`,
54
73
  # which provides the following behavior: If the table does not exist,
55
- # the copy operation creates the table. This is the default.
74
+ # the copy operation creates the table. This is the default create
75
+ # disposition for copy jobs.
76
+ #
77
+ # @return [Boolean] `true` when `CREATE_IF_NEEDED`, `false` otherwise.
78
+ #
56
79
  def create_if_needed?
57
80
  disp = @gapi.configuration.copy.create_disposition
58
81
  disp == "CREATE_IF_NEEDED"
@@ -62,6 +85,9 @@ module Google
62
85
  # Checks if the create disposition for the job is `CREATE_NEVER`, which
63
86
  # provides the following behavior: The table must already exist; if it
64
87
  # does not, an error is returned in the job result.
88
+ #
89
+ # @return [Boolean] `true` when `CREATE_NEVER`, `false` otherwise.
90
+ #
65
91
  def create_never?
66
92
  disp = @gapi.configuration.copy.create_disposition
67
93
  disp == "CREATE_NEVER"
@@ -71,6 +97,9 @@ module Google
71
97
  # Checks if the write disposition for the job is `WRITE_TRUNCATE`, which
72
98
  # provides the following behavior: If the table already exists, the copy
73
99
  # operation overwrites the table data.
100
+ #
101
+ # @return [Boolean] `true` when `WRITE_TRUNCATE`, `false` otherwise.
102
+ #
74
103
  def write_truncate?
75
104
  disp = @gapi.configuration.copy.write_disposition
76
105
  disp == "WRITE_TRUNCATE"
@@ -80,6 +109,9 @@ module Google
80
109
  # Checks if the write disposition for the job is `WRITE_APPEND`, which
81
110
  # provides the following behavior: If the table already exists, the copy
82
111
  # operation appends the data to the table.
112
+ #
113
+ # @return [Boolean] `true` when `WRITE_APPEND`, `false` otherwise.
114
+ #
83
115
  def write_append?
84
116
  disp = @gapi.configuration.copy.write_disposition
85
117
  disp == "WRITE_APPEND"
@@ -88,7 +120,11 @@ module Google
88
120
  ##
89
121
  # Checks if the write disposition for the job is `WRITE_EMPTY`, which
90
122
  # provides the following behavior: If the table already exists and
91
- # contains data, the job will have an error. This is the default.
123
+ # contains data, the job will have an error. This is the default write
124
+ # disposition for copy jobs.
125
+ #
126
+ # @return [Boolean] `true` when `WRITE_EMPTY`, `false` otherwise.
127
+ #
92
128
  def write_empty?
93
129
  disp = @gapi.configuration.copy.write_disposition
94
130
  disp == "WRITE_EMPTY"
@@ -22,12 +22,31 @@ module Google
22
22
  ##
23
23
  # # Data
24
24
  #
25
- # Represents {Table} Data as a list of name/value pairs.
26
- # Also contains metadata such as `etag` and `total`.
25
+ # Represents {Table} Data as a list of name/value pairs (hashes.)
26
+ # Also contains metadata such as `etag` and `total`, and provides access
27
+ # to the schema of the table from which the data was read.
28
+ #
29
+ # @example
30
+ # require "google/cloud/bigquery"
31
+ #
32
+ # bigquery = Google::Cloud::Bigquery.new
33
+ # dataset = bigquery.dataset "my_dataset"
34
+ # table = dataset.table "my_table"
35
+ #
36
+ # data = table.data
37
+ # puts "#{data.count} of #{data.total}"
38
+ # if data.next?
39
+ # next_data = data.next
40
+ # end
41
+ #
27
42
  class Data < DelegateClass(::Array)
43
+ ##
44
+ # @private The Service object.
45
+ attr_accessor :service
46
+
28
47
  ##
29
48
  # @private The {Table} object the data belongs to.
30
- attr_accessor :table
49
+ attr_accessor :table_gapi
31
50
 
32
51
  ##
33
52
  # @private The Google API Client object.
@@ -35,30 +54,58 @@ module Google
35
54
 
36
55
  # @private
37
56
  def initialize arr = []
38
- @table = nil
39
- @gapi = {}
57
+ @service = nil
58
+ @table_gapi = nil
59
+ @gapi = nil
40
60
  super arr
41
61
  end
42
62
 
43
63
  ##
44
64
  # The resource type of the API response.
65
+ #
66
+ # @return [String] The resource type.
67
+ #
45
68
  def kind
46
69
  @gapi.kind
47
70
  end
48
71
 
49
72
  ##
50
- # The etag.
73
+ # An ETag hash for the page of results represented by the data instance.
74
+ #
75
+ # @return [String] The ETag hash.
76
+ #
51
77
  def etag
52
78
  @gapi.etag
53
79
  end
54
80
 
55
81
  ##
56
- # A token used for paging results.
82
+ # A token used for paging results. Used by the data instance to retrieve
83
+ # subsequent pages. See {#next}.
84
+ #
85
+ # @return [String] The pagination token.
86
+ #
57
87
  def token
58
88
  @gapi.page_token
59
89
  end
60
90
 
91
+ ##
61
92
  # The total number of rows in the complete table.
93
+ #
94
+ # @return [Integer] The number of rows.
95
+ #
96
+ # @example
97
+ # require "google/cloud/bigquery"
98
+ #
99
+ # bigquery = Google::Cloud::Bigquery.new
100
+ # dataset = bigquery.dataset "my_dataset"
101
+ # table = dataset.table "my_table"
102
+ #
103
+ # data = table.data
104
+ # puts "#{data.count} of #{data.total}"
105
+ # if data.next?
106
+ # next_data = data.next
107
+ # end
108
+ #
62
109
  def total
63
110
  Integer @gapi.total_rows
64
111
  rescue
@@ -66,19 +113,72 @@ module Google
66
113
  end
67
114
 
68
115
  ##
69
- # The schema of the data.
116
+ # The schema of the table from which the data was read.
117
+ #
118
+ # The returned object is frozen and changes are not allowed. Use
119
+ # {Table#schema} to update the schema.
120
+ #
121
+ # @return [Schema] A schema object.
122
+ #
123
+ # @example
124
+ # require "google/cloud/bigquery"
125
+ #
126
+ # bigquery = Google::Cloud::Bigquery.new
127
+ # dataset = bigquery.dataset "my_dataset"
128
+ # table = dataset.table "my_table"
129
+ #
130
+ # data = table.data
131
+ #
132
+ # schema = data.schema
133
+ # field = schema.field "name"
134
+ # field.required? #=> true
135
+ #
70
136
  def schema
71
- table.schema
137
+ Schema.from_gapi(@table_gapi.schema).freeze
72
138
  end
73
139
 
74
140
  ##
75
- # The fields of the data.
141
+ # The fields of the data, obtained from the schema of the table from
142
+ # which the data was read.
143
+ #
144
+ # @return [Array<Schema::Field>] An array of field objects.
145
+ #
146
+ # @example
147
+ # require "google/cloud/bigquery"
148
+ #
149
+ # bigquery = Google::Cloud::Bigquery.new
150
+ # dataset = bigquery.dataset "my_dataset"
151
+ # table = dataset.table "my_table"
152
+ #
153
+ # data = table.data
154
+ #
155
+ # data.fields.each do |field|
156
+ # puts field.name
157
+ # end
158
+ #
76
159
  def fields
77
160
  schema.fields
78
161
  end
79
162
 
80
163
  ##
81
- # The name of the columns in the data.
164
+ # The names of the columns in the data, obtained from the schema of the
165
+ # table from which the data was read.
166
+ #
167
+ # @return [Array<Symbol>] An array of column names.
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ # dataset = bigquery.dataset "my_dataset"
174
+ # table = dataset.table "my_table"
175
+ #
176
+ # data = table.data
177
+ #
178
+ # data.headers.each do |header|
179
+ # puts header
180
+ # end
181
+ #
82
182
  def headers
83
183
  schema.headers
84
184
  end
@@ -86,7 +186,7 @@ module Google
86
186
  ##
87
187
  # Whether there is a next page of data.
88
188
  #
89
- # @return [Boolean]
189
+ # @return [Boolean] `true` when there is a next page, `false` otherwise.
90
190
  #
91
191
  # @example
92
192
  # require "google/cloud/bigquery"
@@ -105,9 +205,9 @@ module Google
105
205
  end
106
206
 
107
207
  ##
108
- # Retrieve the next page of data.
208
+ # Retrieves the next page of data.
109
209
  #
110
- # @return [Data]
210
+ # @return [Data] A new instance providing the next page of data.
111
211
  #
112
212
  # @example
113
213
  # require "google/cloud/bigquery"
@@ -123,8 +223,12 @@ module Google
123
223
  #
124
224
  def next
125
225
  return nil unless next?
126
- ensure_table!
127
- table.data token: token
226
+ ensure_service!
227
+ data_gapi = service.list_tabledata \
228
+ @table_gapi.table_reference.dataset_id,
229
+ @table_gapi.table_reference.table_id,
230
+ token: token
231
+ self.class.from_gapi data_gapi, @table_gapi, @service
128
232
  end
129
233
 
130
234
  ##
@@ -132,7 +236,7 @@ module Google
132
236
  # returns `false`. Calls the given block once for each row, which is
133
237
  # passed as the parameter.
134
238
  #
135
- # An Enumerator is returned if no block is given.
239
+ # An enumerator is returned if no block is given.
136
240
  #
137
241
  # This method may make several API calls until all rows are retrieved.
138
242
  # Be sure to use as narrow a search criteria as possible. Please use
@@ -143,7 +247,8 @@ module Google
143
247
  # @yield [row] The block for accessing each row of data.
144
248
  # @yieldparam [Hash] row The row object.
145
249
  #
146
- # @return [Enumerator]
250
+ # @return [Enumerator] An enumerator providing access to all of the
251
+ # data.
147
252
  #
148
253
  # @example Iterating each rows by passing a block:
149
254
  # require "google/cloud/bigquery"
@@ -197,13 +302,14 @@ module Google
197
302
 
198
303
  ##
199
304
  # @private New Data from a response object.
200
- def self.from_gapi gapi, table
305
+ def self.from_gapi gapi, table_gapi, service
201
306
  formatted_rows = Convert.format_rows(gapi.rows,
202
- table.gapi.schema.fields)
307
+ table_gapi.schema.fields)
203
308
 
204
309
  data = new formatted_rows
205
- data.table = table
310
+ data.table_gapi = table_gapi
206
311
  data.gapi = gapi
312
+ data.service = service
207
313
  data
208
314
  end
209
315
 
@@ -211,8 +317,8 @@ module Google
211
317
 
212
318
  ##
213
319
  # Raise an error unless an active service is available.
214
- def ensure_table!
215
- fail "Must have active connection" unless table
320
+ def ensure_service!
321
+ fail "Must have active connection" unless service
216
322
  end
217
323
  end
218
324
  end