google-cloud-bigquery 1.14.0 → 1.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/AUTHENTICATION.md +17 -54
  3. data/CHANGELOG.md +377 -0
  4. data/CONTRIBUTING.md +328 -116
  5. data/LOGGING.md +1 -1
  6. data/OVERVIEW.md +21 -20
  7. data/TROUBLESHOOTING.md +2 -8
  8. data/lib/google/cloud/bigquery/argument.rb +197 -0
  9. data/lib/google/cloud/bigquery/convert.rb +155 -173
  10. data/lib/google/cloud/bigquery/copy_job.rb +74 -26
  11. data/lib/google/cloud/bigquery/credentials.rb +5 -12
  12. data/lib/google/cloud/bigquery/data.rb +109 -18
  13. data/lib/google/cloud/bigquery/dataset/access.rb +474 -52
  14. data/lib/google/cloud/bigquery/dataset/list.rb +7 -13
  15. data/lib/google/cloud/bigquery/dataset/tag.rb +67 -0
  16. data/lib/google/cloud/bigquery/dataset.rb +1044 -287
  17. data/lib/google/cloud/bigquery/external/avro_source.rb +107 -0
  18. data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
  19. data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
  20. data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
  21. data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
  22. data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
  23. data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
  24. data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
  25. data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
  26. data/lib/google/cloud/bigquery/external.rb +50 -2256
  27. data/lib/google/cloud/bigquery/extract_job.rb +226 -61
  28. data/lib/google/cloud/bigquery/insert_response.rb +1 -3
  29. data/lib/google/cloud/bigquery/job/list.rb +10 -14
  30. data/lib/google/cloud/bigquery/job.rb +289 -14
  31. data/lib/google/cloud/bigquery/load_job.rb +810 -136
  32. data/lib/google/cloud/bigquery/model/list.rb +5 -9
  33. data/lib/google/cloud/bigquery/model.rb +247 -16
  34. data/lib/google/cloud/bigquery/policy.rb +432 -0
  35. data/lib/google/cloud/bigquery/project/list.rb +6 -11
  36. data/lib/google/cloud/bigquery/project.rb +509 -250
  37. data/lib/google/cloud/bigquery/query_job.rb +594 -128
  38. data/lib/google/cloud/bigquery/routine/list.rb +165 -0
  39. data/lib/google/cloud/bigquery/routine.rb +1227 -0
  40. data/lib/google/cloud/bigquery/schema/field.rb +413 -63
  41. data/lib/google/cloud/bigquery/schema.rb +221 -48
  42. data/lib/google/cloud/bigquery/service.rb +204 -112
  43. data/lib/google/cloud/bigquery/standard_sql.rb +269 -53
  44. data/lib/google/cloud/bigquery/table/async_inserter.rb +86 -43
  45. data/lib/google/cloud/bigquery/table/list.rb +6 -11
  46. data/lib/google/cloud/bigquery/table.rb +1470 -377
  47. data/lib/google/cloud/bigquery/time.rb +6 -0
  48. data/lib/google/cloud/bigquery/version.rb +1 -1
  49. data/lib/google/cloud/bigquery.rb +4 -6
  50. data/lib/google-cloud-bigquery.rb +14 -13
  51. metadata +66 -38
@@ -0,0 +1,170 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ module External
20
+ ##
21
+ # # JsonSource
22
+ #
23
+ # {External::JsonSource} is a subclass of {External::DataSource} and
24
+ # represents a JSON external data source that can be queried from
25
+ # directly, such as Google Cloud Storage or Google Drive, even though
26
+ # the data is not stored in BigQuery. Instead of loading or streaming
27
+ # the data, this object references the external data source.
28
+ #
29
+ # @example
30
+ # require "google/cloud/bigquery"
31
+ #
32
+ # bigquery = Google::Cloud::Bigquery.new
33
+ #
34
+ # require "google/cloud/bigquery"
35
+ #
36
+ # bigquery = Google::Cloud::Bigquery.new
37
+ #
38
+ # json_url = "gs://bucket/path/to/data.json"
39
+ # json_table = bigquery.external json_url do |json|
40
+ # json.schema do |schema|
41
+ # schema.string "name", mode: :required
42
+ # schema.string "email", mode: :required
43
+ # schema.integer "age", mode: :required
44
+ # schema.boolean "active", mode: :required
45
+ # end
46
+ # end
47
+ #
48
+ # data = bigquery.query "SELECT * FROM my_ext_table",
49
+ # external: { my_ext_table: json_table }
50
+ #
51
+ # # Iterate over the first page of results
52
+ # data.each do |row|
53
+ # puts row[:name]
54
+ # end
55
+ # # Retrieve the next page of results
56
+ # data = data.next if data.next?
57
+ #
58
+ class JsonSource < External::DataSource
59
+ ##
60
+ # The schema for the data.
61
+ #
62
+ # @param [Boolean] replace Whether to replace the existing schema with
63
+ # the new schema. If `true`, the fields will replace the existing
64
+ # schema. If `false`, the fields will be added to the existing
65
+ # schema. The default value is `false`.
66
+ # @yield [schema] a block for setting the schema
67
+ # @yieldparam [Schema] schema the object accepting the schema
68
+ #
69
+ # @return [Google::Cloud::Bigquery::Schema]
70
+ #
71
+ # @example
72
+ # require "google/cloud/bigquery"
73
+ #
74
+ # bigquery = Google::Cloud::Bigquery.new
75
+ #
76
+ # json_url = "gs://bucket/path/to/data.json"
77
+ # json_table = bigquery.external json_url do |json|
78
+ # json.schema do |schema|
79
+ # schema.string "name", mode: :required
80
+ # schema.string "email", mode: :required
81
+ # schema.integer "age", mode: :required
82
+ # schema.boolean "active", mode: :required
83
+ # end
84
+ # end
85
+ #
86
+ def schema replace: false
87
+ @schema ||= Schema.from_gapi @gapi.schema
88
+ if replace
89
+ frozen_check!
90
+ @schema = Schema.from_gapi
91
+ end
92
+ @schema.freeze if frozen?
93
+ yield @schema if block_given?
94
+ @schema
95
+ end
96
+
97
+ ##
98
+ # Set the schema for the data.
99
+ #
100
+ # @param [Schema] new_schema The schema object.
101
+ #
102
+ # @example
103
+ # require "google/cloud/bigquery"
104
+ #
105
+ # bigquery = Google::Cloud::Bigquery.new
106
+ #
107
+ # json_shema = bigquery.schema do |schema|
108
+ # schema.string "name", mode: :required
109
+ # schema.string "email", mode: :required
110
+ # schema.integer "age", mode: :required
111
+ # schema.boolean "active", mode: :required
112
+ # end
113
+ #
114
+ # json_url = "gs://bucket/path/to/data.json"
115
+ # json_table = bigquery.external json_url
116
+ # json_table.schema = json_shema
117
+ #
118
+ def schema= new_schema
119
+ frozen_check!
120
+ @schema = new_schema
121
+ end
122
+
123
+ ##
124
+ # The fields of the schema.
125
+ #
126
+ # @return [Array<Schema::Field>] An array of field objects.
127
+ #
128
+ def fields
129
+ schema.fields
130
+ end
131
+
132
+ ##
133
+ # The names of the columns in the schema.
134
+ #
135
+ # @return [Array<Symbol>] An array of column names.
136
+ #
137
+ def headers
138
+ schema.headers
139
+ end
140
+
141
+ ##
142
+ # The types of the fields in the data in the schema, using the same
143
+ # format as the optional query parameter types.
144
+ #
145
+ # @return [Hash] A hash with field names as keys, and types as values.
146
+ #
147
+ def param_types
148
+ schema.param_types
149
+ end
150
+
151
+ ##
152
+ # @private Google API Client object.
153
+ def to_gapi
154
+ @gapi.schema = @schema.to_gapi if @schema
155
+ @gapi
156
+ end
157
+
158
+ ##
159
+ # @private Google API Client object.
160
+ def self.from_gapi gapi
161
+ new_table = super
162
+ schema = Schema.from_gapi gapi.schema
163
+ new_table.instance_variable_set :@schema, schema
164
+ new_table
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,148 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # ParquetSource
24
+ #
25
+ # {External::ParquetSource} is a subclass of {External::DataSource} and
26
+ # represents a Parquet external data source that can be queried
27
+ # from directly, even though the data is not stored in BigQuery. Instead
28
+ # of loading or streaming the data, this object references the external
29
+ # data source.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # parquet_url = "gs://bucket/path/to/data.parquet"
37
+ # parquet_table = bigquery.external parquet_url do |parquet|
38
+ # parquet.enable_list_inference = 1
39
+ # end
40
+ #
41
+ # data = bigquery.query "SELECT * FROM my_ext_table",
42
+ # external: { my_ext_table: parquet_table }
43
+ #
44
+ # # Iterate over the first page of results
45
+ # data.each do |row|
46
+ # puts row[:name]
47
+ # end
48
+ # # Retrieve the next page of results
49
+ # data = data.next if data.next?
50
+ #
51
+ class ParquetSource < External::DataSource
52
+ ##
53
+ # @private Create an empty ParquetSource object.
54
+ def initialize
55
+ super
56
+ @gapi.parquet_options = Google::Apis::BigqueryV2::ParquetOptions.new
57
+ end
58
+
59
+ ##
60
+ # Indicates whether to use schema inference specifically for Parquet `LIST` logical type.
61
+ #
62
+ # @return [Boolean]
63
+ #
64
+ # @example
65
+ # require "google/cloud/bigquery"
66
+ #
67
+ # bigquery = Google::Cloud::Bigquery.new
68
+ #
69
+ # parquet_url = "gs://bucket/path/to/data.parquet"
70
+ # parquet_table = bigquery.external parquet_url do |parquet|
71
+ # parquet.enable_list_inference = true
72
+ # end
73
+ #
74
+ # parquet_table.enable_list_inference #=> true
75
+ #
76
+ def enable_list_inference
77
+ @gapi.parquet_options.enable_list_inference
78
+ end
79
+
80
+ ##
81
+ # Sets whether to use schema inference specifically for Parquet `LIST` logical type.
82
+ #
83
+ # @param [Boolean] new_enable_list_inference The new `enable_list_inference` value.
84
+ #
85
+ # @example
86
+ # require "google/cloud/bigquery"
87
+ #
88
+ # bigquery = Google::Cloud::Bigquery.new
89
+ #
90
+ # parquet_url = "gs://bucket/path/to/data.parquet"
91
+ # parquet_table = bigquery.external parquet_url do |parquet|
92
+ # parquet.enable_list_inference = true
93
+ # end
94
+ #
95
+ # parquet_table.enable_list_inference #=> true
96
+ #
97
+ def enable_list_inference= new_enable_list_inference
98
+ frozen_check!
99
+ @gapi.parquet_options.enable_list_inference = new_enable_list_inference
100
+ end
101
+
102
+ ##
103
+ # Indicates whether to infer Parquet `ENUM` logical type as `STRING` instead of `BYTES` by default.
104
+ #
105
+ # @return [Boolean]
106
+ #
107
+ # @example
108
+ # require "google/cloud/bigquery"
109
+ #
110
+ # bigquery = Google::Cloud::Bigquery.new
111
+ #
112
+ # parquet_url = "gs://bucket/path/to/data.parquet"
113
+ # parquet_table = bigquery.external parquet_url do |parquet|
114
+ # parquet.enum_as_string = true
115
+ # end
116
+ #
117
+ # parquet_table.enum_as_string #=> true
118
+ #
119
+ def enum_as_string
120
+ @gapi.parquet_options.enum_as_string
121
+ end
122
+
123
+ ##
124
+ # Sets whether to infer Parquet `ENUM` logical type as `STRING` instead of `BYTES` by default.
125
+ #
126
+ # @param [Boolean] new_enum_as_string The new `enum_as_string` value.
127
+ #
128
+ # @example
129
+ # require "google/cloud/bigquery"
130
+ #
131
+ # bigquery = Google::Cloud::Bigquery.new
132
+ #
133
+ # parquet_url = "gs://bucket/path/to/data.parquet"
134
+ # parquet_table = bigquery.external parquet_url do |parquet|
135
+ # parquet.enum_as_string = true
136
+ # end
137
+ #
138
+ # parquet_table.enum_as_string #=> true
139
+ #
140
+ def enum_as_string= new_enum_as_string
141
+ frozen_check!
142
+ @gapi.parquet_options.enum_as_string = new_enum_as_string
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,166 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # SheetsSource
24
+ #
25
+ # {External::SheetsSource} is a subclass of {External::DataSource} and
26
+ # represents a Google Sheets external data source that can be queried
27
+ # from directly, even though the data is not stored in BigQuery. Instead
28
+ # of loading or streaming the data, this object references the external
29
+ # data source.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
37
+ # sheets_table = bigquery.external sheets_url do |sheets|
38
+ # sheets.skip_leading_rows = 1
39
+ # end
40
+ #
41
+ # data = bigquery.query "SELECT * FROM my_ext_table",
42
+ # external: { my_ext_table: sheets_table }
43
+ #
44
+ # # Iterate over the first page of results
45
+ # data.each do |row|
46
+ # puts row[:name]
47
+ # end
48
+ # # Retrieve the next page of results
49
+ # data = data.next if data.next?
50
+ #
51
+ class SheetsSource < External::DataSource
52
+ ##
53
+ # @private Create an empty SheetsSource object.
54
+ def initialize
55
+ super
56
+ @gapi.google_sheets_options = Google::Apis::BigqueryV2::GoogleSheetsOptions.new
57
+ end
58
+
59
+ ##
60
+ # The number of rows at the top of a sheet that BigQuery will skip
61
+ # when reading the data. The default value is `0`.
62
+ #
63
+ # This property is useful if you have header rows that should be
64
+ # skipped. When `autodetect` is on, behavior is the following:
65
+ #
66
+ # * `nil` - Autodetect tries to detect headers in the first row. If
67
+ # they are not detected, the row is read as data. Otherwise data is
68
+ # read starting from the second row.
69
+ # * `0` - Instructs autodetect that there are no headers and data
70
+ # should be read starting from the first row.
71
+ # * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
72
+ # in row `N`. If headers are not detected, row `N` is just skipped.
73
+ # Otherwise row `N` is used to extract column names for the detected
74
+ # schema.
75
+ #
76
+ # @return [Integer]
77
+ #
78
+ # @example
79
+ # require "google/cloud/bigquery"
80
+ #
81
+ # bigquery = Google::Cloud::Bigquery.new
82
+ #
83
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
84
+ # sheets_table = bigquery.external sheets_url do |sheets|
85
+ # sheets.skip_leading_rows = 1
86
+ # end
87
+ #
88
+ # sheets_table.skip_leading_rows #=> 1
89
+ #
90
+ def skip_leading_rows
91
+ @gapi.google_sheets_options.skip_leading_rows
92
+ end
93
+
94
+ ##
95
+ # Set the number of rows at the top of a sheet that BigQuery will skip
96
+ # when reading the data.
97
+ #
98
+ # @param [Integer] row_count New skip_leading_rows value
99
+ #
100
+ # @example
101
+ # require "google/cloud/bigquery"
102
+ #
103
+ # bigquery = Google::Cloud::Bigquery.new
104
+ #
105
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
106
+ # sheets_table = bigquery.external sheets_url do |sheets|
107
+ # sheets.skip_leading_rows = 1
108
+ # end
109
+ #
110
+ # sheets_table.skip_leading_rows #=> 1
111
+ #
112
+ def skip_leading_rows= row_count
113
+ frozen_check!
114
+ @gapi.google_sheets_options.skip_leading_rows = row_count
115
+ end
116
+
117
+ ##
118
+ # Range of a sheet to query from. Only used when non-empty. Typical
119
+ # format: `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
120
+ #
121
+ # @return [String] Range of a sheet to query from.
122
+ #
123
+ # @example
124
+ # require "google/cloud/bigquery"
125
+ #
126
+ # bigquery = Google::Cloud::Bigquery.new
127
+ #
128
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
129
+ # sheets_table = bigquery.external sheets_url do |sheets|
130
+ # sheets.range = "sheet1!A1:B20"
131
+ # end
132
+ #
133
+ # sheets_table.range #=> "sheet1!A1:B20"
134
+ #
135
+ def range
136
+ @gapi.google_sheets_options.range
137
+ end
138
+
139
+ ##
140
+ # Set the range of a sheet to query from. Only used when non-empty.
141
+ # Typical format:
142
+ # `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
143
+ #
144
+ # @param [String] new_range New range of a sheet to query from.
145
+ #
146
+ # @example
147
+ # require "google/cloud/bigquery"
148
+ #
149
+ # bigquery = Google::Cloud::Bigquery.new
150
+ #
151
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
152
+ # sheets_table = bigquery.external sheets_url do |sheets|
153
+ # sheets.range = "sheet1!A1:B20"
154
+ # end
155
+ #
156
+ # sheets_table.range #=> "sheet1!A1:B20"
157
+ #
158
+ def range= new_range
159
+ frozen_check!
160
+ @gapi.google_sheets_options.range = new_range
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end