google-cloud-bigquery 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41cd0596408cd35451c4db0c52fe0b7901dff7ea
4
- data.tar.gz: fb36dac2c6d09940b4335980b808fc6d9564b84d
3
+ metadata.gz: 58e73b43f4053457d2df061703e3314483552dbb
4
+ data.tar.gz: 927a49cb45ff1a1c2aac5fa319e19dc0b422af23
5
5
  SHA512:
6
- metadata.gz: cf8190937417f431a6221c408b911c958c2c83be1b4cce3688a130e2f9f44a5645c68e81f16368a3b3e524e9c5ae87ea095e4b3791bbd8970d93dc5afafb4d32
7
- data.tar.gz: 2ad979a3b853e4e2fc6813f5e98279181a39d082b843079752d3d4fd63c1732f37e261584b5b77c3aa0f47f6b3a45386bd1ea5e3ac97cb4acaf35d84d3d83ff7
6
+ metadata.gz: 8d86f37859a1a6cf2b682afd8b4e3f77b178fb8333e47606673e02e0a5e6ba76e61c6ee3c9bfbd5bd8d4763eff03f9c54bc067c10f39883772a6c730c0d8b443
7
+ data.tar.gz: d9d5afd3aac9c23e523909d78b7ae5fc886ce783e5b3cad0068aff21a815841a564e9baf6f35b2235adf0ed743df55d41c33b570f9b99a5d5a96ea50768882d0
data/README.md CHANGED
@@ -36,7 +36,7 @@ table = dataset.create_table "todos",
36
36
 
37
37
  # Load data into the table
38
38
  file = File.open "/archive/todos/completed-todos.csv"
39
- load_job = table.load file
39
+ table.load file
40
40
 
41
41
  # Run a query for the number of completed todos by owner
42
42
  count_sql = "SELECT owner, COUNT(*) AS complete_count FROM todos GROUP BY owner"
@@ -39,7 +39,7 @@ module Google
39
39
  #
40
40
  # * `https://www.googleapis.com/auth/bigquery`
41
41
  # @param [Integer] retries Number of times to retry requests on server
42
- # error. The default value is `3`. Optional.
42
+ # error. The default value is `5`. Optional.
43
43
  # @param [Integer] timeout Default request timeout in seconds. Optional.
44
44
  #
45
45
  # @return [Google::Cloud::Bigquery::Project]
@@ -88,7 +88,7 @@ module Google
88
88
  #
89
89
  # * `https://www.googleapis.com/auth/bigquery`
90
90
  # @param [Integer] retries Number of times to retry requests on server
91
- # error. The default value is `3`. Optional.
91
+ # error. The default value is `5`. Optional.
92
92
  # @param [Integer] timeout Default timeout to use in requests. Optional.
93
93
  #
94
94
  # @return [Google::Cloud::Bigquery::Project]
@@ -232,7 +232,7 @@ module Google
232
232
  # BigQuery API provides facilities for managing longer-running jobs. With
233
233
  # the asynchronous approach to running a query, an instance of
234
234
  # {Google::Cloud::Bigquery::QueryJob} is returned, rather than an instance
235
- # of {Google::Cloud::Bigquery::QueryData}.
235
+ # of {Google::Cloud::Bigquery::Data}.
236
236
  #
237
237
  # ```ruby
238
238
  # require "google/cloud/bigquery"
@@ -246,17 +246,17 @@ module Google
246
246
  #
247
247
  # job.wait_until_done!
248
248
  # if !job.failed?
249
- # job.query_results.first
249
+ # job.data.first
250
250
  # #=> {:title=>[{:value=>"hamlet", :count=>5318}, ...}
251
251
  # end
252
252
  # ```
253
253
  #
254
254
  # Once you have determined that the job is done and has not failed, you can
255
- # obtain an instance of {Google::Cloud::Bigquery::QueryData} by calling
256
- # `query_results` on the job instance. The query results for both of the
257
- # above examples are stored in temporary tables with a lifetime of about 24
258
- # hours. See the final example below for a demonstration of how to store
259
- # query results in a permanent table.
255
+ # obtain an instance of {Google::Cloud::Bigquery::Data} by calling `data` on
256
+ # the job instance. The query results for both of the above examples are
257
+ # stored in temporary tables with a lifetime of about 24 hours. See the
258
+ # final example below for a demonstration of how to store query results in a
259
+ # permanent table.
260
260
  #
261
261
  # ## Creating Datasets and Tables
262
262
  #
@@ -370,7 +370,7 @@ module Google
370
370
  # end
371
371
  #
372
372
  # file = File.open "names/yob2014.txt"
373
- # load_job = table.load file, format: "csv"
373
+ # table.load file, format: "csv"
374
374
  # ```
375
375
  #
376
376
  # Because the names data, although formatted as CSV, is distributed in files
@@ -411,9 +411,7 @@ module Google
411
411
  # bucket = storage.create_bucket bucket_id
412
412
  # extract_url = "gs://#{bucket.id}/baby-names.csv"
413
413
  #
414
- # extract_job = result_table.extract extract_url
415
- #
416
- # extract_job.wait_until_done!
414
+ # result_table.extract extract_url
417
415
  #
418
416
  # # Download to local filesystem
419
417
  # bucket.files.first.download "baby-names.csv"
@@ -470,7 +468,7 @@ module Google
470
468
  #
471
469
  # * `https://www.googleapis.com/auth/bigquery`
472
470
  # @param [Integer] retries Number of times to retry requests on server
473
- # error. The default value is `3`. Optional.
471
+ # error. The default value is `5`. Optional.
474
472
  # @param [Integer] timeout Default timeout to use in requests. Optional.
475
473
  #
476
474
  # @return [Google::Cloud::Bigquery::Project]
@@ -20,17 +20,33 @@ module Google
20
20
  # # CopyJob
21
21
  #
22
22
  # A {Job} subclass representing a copy operation that may be performed on
23
- # a {Table}. A CopyJob instance is created when you call {Table#copy}.
23
+ # a {Table}. A CopyJob instance is created when you call {Table#copy_job}.
24
24
  #
25
- # @see https://cloud.google.com/bigquery/docs/tables#copyingtable Copying
25
+ # @see https://cloud.google.com/bigquery/docs/tables#copy-table Copying
26
26
  # an Existing Table
27
27
  # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
28
28
  # reference
29
29
  #
30
+ # @example
31
+ # require "google/cloud/bigquery"
32
+ #
33
+ # bigquery = Google::Cloud::Bigquery.new
34
+ # dataset = bigquery.dataset "my_dataset"
35
+ # table = dataset.table "my_table"
36
+ # destination_table = dataset.table "my_destination_table"
37
+ #
38
+ # copy_job = table.copy_job destination_table
39
+ #
40
+ # copy_job.wait_until_done!
41
+ # copy_job.done? #=> true
42
+ #
30
43
  class CopyJob < Job
31
44
  ##
32
45
  # The table from which data is copied. This is the table on
33
- # which {Table#copy} was called. Returns a {Table} instance.
46
+ # which {Table#copy_job} was called.
47
+ #
48
+ # @return [Table] A table instance.
49
+ #
34
50
  def source
35
51
  table = @gapi.configuration.copy.source_table
36
52
  return nil unless table
@@ -40,7 +56,10 @@ module Google
40
56
  end
41
57
 
42
58
  ##
43
- # The table to which data is copied. Returns a {Table} instance.
59
+ # The table to which data is copied.
60
+ #
61
+ # @return [Table] A table instance.
62
+ #
44
63
  def destination
45
64
  table = @gapi.configuration.copy.destination_table
46
65
  return nil unless table
@@ -52,7 +71,11 @@ module Google
52
71
  ##
53
72
  # Checks if the create disposition for the job is `CREATE_IF_NEEDED`,
54
73
  # which provides the following behavior: If the table does not exist,
55
- # the copy operation creates the table. This is the default.
74
+ # the copy operation creates the table. This is the default create
75
+ # disposition for copy jobs.
76
+ #
77
+ # @return [Boolean] `true` when `CREATE_IF_NEEDED`, `false` otherwise.
78
+ #
56
79
  def create_if_needed?
57
80
  disp = @gapi.configuration.copy.create_disposition
58
81
  disp == "CREATE_IF_NEEDED"
@@ -62,6 +85,9 @@ module Google
62
85
  # Checks if the create disposition for the job is `CREATE_NEVER`, which
63
86
  # provides the following behavior: The table must already exist; if it
64
87
  # does not, an error is returned in the job result.
88
+ #
89
+ # @return [Boolean] `true` when `CREATE_NEVER`, `false` otherwise.
90
+ #
65
91
  def create_never?
66
92
  disp = @gapi.configuration.copy.create_disposition
67
93
  disp == "CREATE_NEVER"
@@ -71,6 +97,9 @@ module Google
71
97
  # Checks if the write disposition for the job is `WRITE_TRUNCATE`, which
72
98
  # provides the following behavior: If the table already exists, the copy
73
99
  # operation overwrites the table data.
100
+ #
101
+ # @return [Boolean] `true` when `WRITE_TRUNCATE`, `false` otherwise.
102
+ #
74
103
  def write_truncate?
75
104
  disp = @gapi.configuration.copy.write_disposition
76
105
  disp == "WRITE_TRUNCATE"
@@ -80,6 +109,9 @@ module Google
80
109
  # Checks if the write disposition for the job is `WRITE_APPEND`, which
81
110
  # provides the following behavior: If the table already exists, the copy
82
111
  # operation appends the data to the table.
112
+ #
113
+ # @return [Boolean] `true` when `WRITE_APPEND`, `false` otherwise.
114
+ #
83
115
  def write_append?
84
116
  disp = @gapi.configuration.copy.write_disposition
85
117
  disp == "WRITE_APPEND"
@@ -88,7 +120,11 @@ module Google
88
120
  ##
89
121
  # Checks if the write disposition for the job is `WRITE_EMPTY`, which
90
122
  # provides the following behavior: If the table already exists and
91
- # contains data, the job will have an error. This is the default.
123
+ # contains data, the job will have an error. This is the default write
124
+ # disposition for copy jobs.
125
+ #
126
+ # @return [Boolean] `true` when `WRITE_EMPTY`, `false` otherwise.
127
+ #
92
128
  def write_empty?
93
129
  disp = @gapi.configuration.copy.write_disposition
94
130
  disp == "WRITE_EMPTY"
@@ -22,12 +22,31 @@ module Google
22
22
  ##
23
23
  # # Data
24
24
  #
25
- # Represents {Table} Data as a list of name/value pairs.
26
- # Also contains metadata such as `etag` and `total`.
25
+ # Represents {Table} Data as a list of name/value pairs (hashes.)
26
+ # Also contains metadata such as `etag` and `total`, and provides access
27
+ # to the schema of the table from which the data was read.
28
+ #
29
+ # @example
30
+ # require "google/cloud/bigquery"
31
+ #
32
+ # bigquery = Google::Cloud::Bigquery.new
33
+ # dataset = bigquery.dataset "my_dataset"
34
+ # table = dataset.table "my_table"
35
+ #
36
+ # data = table.data
37
+ # puts "#{data.count} of #{data.total}"
38
+ # if data.next?
39
+ # next_data = data.next
40
+ # end
41
+ #
27
42
  class Data < DelegateClass(::Array)
43
+ ##
44
+ # @private The Service object.
45
+ attr_accessor :service
46
+
28
47
  ##
29
48
  # @private The {Table} object the data belongs to.
30
- attr_accessor :table
49
+ attr_accessor :table_gapi
31
50
 
32
51
  ##
33
52
  # @private The Google API Client object.
@@ -35,30 +54,58 @@ module Google
35
54
 
36
55
  # @private
37
56
  def initialize arr = []
38
- @table = nil
39
- @gapi = {}
57
+ @service = nil
58
+ @table_gapi = nil
59
+ @gapi = nil
40
60
  super arr
41
61
  end
42
62
 
43
63
  ##
44
64
  # The resource type of the API response.
65
+ #
66
+ # @return [String] The resource type.
67
+ #
45
68
  def kind
46
69
  @gapi.kind
47
70
  end
48
71
 
49
72
  ##
50
- # The etag.
73
+ # An ETag hash for the page of results represented by the data instance.
74
+ #
75
+ # @return [String] The ETag hash.
76
+ #
51
77
  def etag
52
78
  @gapi.etag
53
79
  end
54
80
 
55
81
  ##
56
- # A token used for paging results.
82
+ # A token used for paging results. Used by the data instance to retrieve
83
+ # subsequent pages. See {#next}.
84
+ #
85
+ # @return [String] The pagination token.
86
+ #
57
87
  def token
58
88
  @gapi.page_token
59
89
  end
60
90
 
91
+ ##
61
92
  # The total number of rows in the complete table.
93
+ #
94
+ # @return [Integer] The number of rows.
95
+ #
96
+ # @example
97
+ # require "google/cloud/bigquery"
98
+ #
99
+ # bigquery = Google::Cloud::Bigquery.new
100
+ # dataset = bigquery.dataset "my_dataset"
101
+ # table = dataset.table "my_table"
102
+ #
103
+ # data = table.data
104
+ # puts "#{data.count} of #{data.total}"
105
+ # if data.next?
106
+ # next_data = data.next
107
+ # end
108
+ #
62
109
  def total
63
110
  Integer @gapi.total_rows
64
111
  rescue
@@ -66,19 +113,72 @@ module Google
66
113
  end
67
114
 
68
115
  ##
69
- # The schema of the data.
116
+ # The schema of the table from which the data was read.
117
+ #
118
+ # The returned object is frozen and changes are not allowed. Use
119
+ # {Table#schema} to update the schema.
120
+ #
121
+ # @return [Schema] A schema object.
122
+ #
123
+ # @example
124
+ # require "google/cloud/bigquery"
125
+ #
126
+ # bigquery = Google::Cloud::Bigquery.new
127
+ # dataset = bigquery.dataset "my_dataset"
128
+ # table = dataset.table "my_table"
129
+ #
130
+ # data = table.data
131
+ #
132
+ # schema = data.schema
133
+ # field = schema.field "name"
134
+ # field.required? #=> true
135
+ #
70
136
  def schema
71
- table.schema
137
+ Schema.from_gapi(@table_gapi.schema).freeze
72
138
  end
73
139
 
74
140
  ##
75
- # The fields of the data.
141
+ # The fields of the data, obtained from the schema of the table from
142
+ # which the data was read.
143
+ #
144
+ # @return [Array<Schema::Field>] An array of field objects.
145
+ #
146
+ # @example
147
+ # require "google/cloud/bigquery"
148
+ #
149
+ # bigquery = Google::Cloud::Bigquery.new
150
+ # dataset = bigquery.dataset "my_dataset"
151
+ # table = dataset.table "my_table"
152
+ #
153
+ # data = table.data
154
+ #
155
+ # data.fields.each do |field|
156
+ # puts field.name
157
+ # end
158
+ #
76
159
  def fields
77
160
  schema.fields
78
161
  end
79
162
 
80
163
  ##
81
- # The name of the columns in the data.
164
+ # The names of the columns in the data, obtained from the schema of the
165
+ # table from which the data was read.
166
+ #
167
+ # @return [Array<Symbol>] An array of column names.
168
+ #
169
+ # @example
170
+ # require "google/cloud/bigquery"
171
+ #
172
+ # bigquery = Google::Cloud::Bigquery.new
173
+ # dataset = bigquery.dataset "my_dataset"
174
+ # table = dataset.table "my_table"
175
+ #
176
+ # data = table.data
177
+ #
178
+ # data.headers.each do |header|
179
+ # puts header
180
+ # end
181
+ #
82
182
  def headers
83
183
  schema.headers
84
184
  end
@@ -86,7 +186,7 @@ module Google
86
186
  ##
87
187
  # Whether there is a next page of data.
88
188
  #
89
- # @return [Boolean]
189
+ # @return [Boolean] `true` when there is a next page, `false` otherwise.
90
190
  #
91
191
  # @example
92
192
  # require "google/cloud/bigquery"
@@ -105,9 +205,9 @@ module Google
105
205
  end
106
206
 
107
207
  ##
108
- # Retrieve the next page of data.
208
+ # Retrieves the next page of data.
109
209
  #
110
- # @return [Data]
210
+ # @return [Data] A new instance providing the next page of data.
111
211
  #
112
212
  # @example
113
213
  # require "google/cloud/bigquery"
@@ -123,8 +223,12 @@ module Google
123
223
  #
124
224
  def next
125
225
  return nil unless next?
126
- ensure_table!
127
- table.data token: token
226
+ ensure_service!
227
+ data_gapi = service.list_tabledata \
228
+ @table_gapi.table_reference.dataset_id,
229
+ @table_gapi.table_reference.table_id,
230
+ token: token
231
+ self.class.from_gapi data_gapi, @table_gapi, @service
128
232
  end
129
233
 
130
234
  ##
@@ -132,7 +236,7 @@ module Google
132
236
  # returns `false`. Calls the given block once for each row, which is
133
237
  # passed as the parameter.
134
238
  #
135
- # An Enumerator is returned if no block is given.
239
+ # An enumerator is returned if no block is given.
136
240
  #
137
241
  # This method may make several API calls until all rows are retrieved.
138
242
  # Be sure to use as narrow a search criteria as possible. Please use
@@ -143,7 +247,8 @@ module Google
143
247
  # @yield [row] The block for accessing each row of data.
144
248
  # @yieldparam [Hash] row The row object.
145
249
  #
146
- # @return [Enumerator]
250
+ # @return [Enumerator] An enumerator providing access to all of the
251
+ # data.
147
252
  #
148
253
  # @example Iterating each rows by passing a block:
149
254
  # require "google/cloud/bigquery"
@@ -197,13 +302,14 @@ module Google
197
302
 
198
303
  ##
199
304
  # @private New Data from a response object.
200
- def self.from_gapi gapi, table
305
+ def self.from_gapi gapi, table_gapi, service
201
306
  formatted_rows = Convert.format_rows(gapi.rows,
202
- table.gapi.schema.fields)
307
+ table_gapi.schema.fields)
203
308
 
204
309
  data = new formatted_rows
205
- data.table = table
310
+ data.table_gapi = table_gapi
206
311
  data.gapi = gapi
312
+ data.service = service
207
313
  data
208
314
  end
209
315
 
@@ -211,8 +317,8 @@ module Google
211
317
 
212
318
  ##
213
319
  # Raise an error unless an active service is available.
214
- def ensure_table!
215
- fail "Must have active connection" unless table
320
+ def ensure_service!
321
+ fail "Must have active connection" unless service
216
322
  end
217
323
  end
218
324
  end