google-cloud-bigquery 1.31.0 → 1.32.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,170 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ module Google
17
+ module Cloud
18
+ module Bigquery
19
+ module External
20
+ ##
21
+ # # JsonSource
22
+ #
23
+ # {External::JsonSource} is a subclass of {External::DataSource} and
24
+ # represents a JSON external data source that can be queried from
25
+ # directly, such as Google Cloud Storage or Google Drive, even though
26
+ # the data is not stored in BigQuery. Instead of loading or streaming
27
+ # the data, this object references the external data source.
28
+ #
29
+ # @example
30
+ # require "google/cloud/bigquery"
31
+ #
32
+ # bigquery = Google::Cloud::Bigquery.new
33
+ #
34
+ # require "google/cloud/bigquery"
35
+ #
36
+ # bigquery = Google::Cloud::Bigquery.new
37
+ #
38
+ # json_url = "gs://bucket/path/to/data.json"
39
+ # json_table = bigquery.external json_url do |json|
40
+ # json.schema do |schema|
41
+ # schema.string "name", mode: :required
42
+ # schema.string "email", mode: :required
43
+ # schema.integer "age", mode: :required
44
+ # schema.boolean "active", mode: :required
45
+ # end
46
+ # end
47
+ #
48
+ # data = bigquery.query "SELECT * FROM my_ext_table",
49
+ # external: { my_ext_table: json_table }
50
+ #
51
+ # # Iterate over the first page of results
52
+ # data.each do |row|
53
+ # puts row[:name]
54
+ # end
55
+ # # Retrieve the next page of results
56
+ # data = data.next if data.next?
57
+ #
58
+ class JsonSource < External::DataSource
59
+ ##
60
+ # The schema for the data.
61
+ #
62
+ # @param [Boolean] replace Whether to replace the existing schema with
63
+ # the new schema. If `true`, the fields will replace the existing
64
+ # schema. If `false`, the fields will be added to the existing
65
+ # schema. The default value is `false`.
66
+ # @yield [schema] a block for setting the schema
67
+ # @yieldparam [Schema] schema the object accepting the schema
68
+ #
69
+ # @return [Google::Cloud::Bigquery::Schema]
70
+ #
71
+ # @example
72
+ # require "google/cloud/bigquery"
73
+ #
74
+ # bigquery = Google::Cloud::Bigquery.new
75
+ #
76
+ # json_url = "gs://bucket/path/to/data.json"
77
+ # json_table = bigquery.external json_url do |json|
78
+ # json.schema do |schema|
79
+ # schema.string "name", mode: :required
80
+ # schema.string "email", mode: :required
81
+ # schema.integer "age", mode: :required
82
+ # schema.boolean "active", mode: :required
83
+ # end
84
+ # end
85
+ #
86
+ def schema replace: false
87
+ @schema ||= Schema.from_gapi @gapi.schema
88
+ if replace
89
+ frozen_check!
90
+ @schema = Schema.from_gapi
91
+ end
92
+ @schema.freeze if frozen?
93
+ yield @schema if block_given?
94
+ @schema
95
+ end
96
+
97
+ ##
98
+ # Set the schema for the data.
99
+ #
100
+ # @param [Schema] new_schema The schema object.
101
+ #
102
+ # @example
103
+ # require "google/cloud/bigquery"
104
+ #
105
+ # bigquery = Google::Cloud::Bigquery.new
106
+ #
107
+ # json_shema = bigquery.schema do |schema|
108
+ # schema.string "name", mode: :required
109
+ # schema.string "email", mode: :required
110
+ # schema.integer "age", mode: :required
111
+ # schema.boolean "active", mode: :required
112
+ # end
113
+ #
114
+ # json_url = "gs://bucket/path/to/data.json"
115
+ # json_table = bigquery.external json_url
116
+ # json_table.schema = json_shema
117
+ #
118
+ def schema= new_schema
119
+ frozen_check!
120
+ @schema = new_schema
121
+ end
122
+
123
+ ##
124
+ # The fields of the schema.
125
+ #
126
+ # @return [Array<Schema::Field>] An array of field objects.
127
+ #
128
+ def fields
129
+ schema.fields
130
+ end
131
+
132
+ ##
133
+ # The names of the columns in the schema.
134
+ #
135
+ # @return [Array<Symbol>] An array of column names.
136
+ #
137
+ def headers
138
+ schema.headers
139
+ end
140
+
141
+ ##
142
+ # The types of the fields in the data in the schema, using the same
143
+ # format as the optional query parameter types.
144
+ #
145
+ # @return [Hash] A hash with field names as keys, and types as values.
146
+ #
147
+ def param_types
148
+ schema.param_types
149
+ end
150
+
151
+ ##
152
+ # @private Google API Client object.
153
+ def to_gapi
154
+ @gapi.schema = @schema.to_gapi if @schema
155
+ @gapi
156
+ end
157
+
158
+ ##
159
+ # @private Google API Client object.
160
+ def self.from_gapi gapi
161
+ new_table = super
162
+ schema = Schema.from_gapi gapi.schema
163
+ new_table.instance_variable_set :@schema, schema
164
+ new_table
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,148 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # ParquetSource
24
+ #
25
+ # {External::ParquetSource} is a subclass of {External::DataSource} and
26
+ # represents a Parquet external data source that can be queried
27
+ # from directly, even though the data is not stored in BigQuery. Instead
28
+ # of loading or streaming the data, this object references the external
29
+ # data source.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # parquet_url = "gs://bucket/path/to/data.parquet"
37
+ # parquet_table = bigquery.external parquet_url do |parquet|
38
+ # parquet.enable_list_inference = 1
39
+ # end
40
+ #
41
+ # data = bigquery.query "SELECT * FROM my_ext_table",
42
+ # external: { my_ext_table: parquet_table }
43
+ #
44
+ # # Iterate over the first page of results
45
+ # data.each do |row|
46
+ # puts row[:name]
47
+ # end
48
+ # # Retrieve the next page of results
49
+ # data = data.next if data.next?
50
+ #
51
+ class ParquetSource < External::DataSource
52
+ ##
53
+ # @private Create an empty ParquetSource object.
54
+ def initialize
55
+ super
56
+ @gapi.parquet_options = Google::Apis::BigqueryV2::ParquetOptions.new
57
+ end
58
+
59
+ ##
60
+ # Indicates whether to use schema inference specifically for Parquet `LIST` logical type.
61
+ #
62
+ # @return [Boolean]
63
+ #
64
+ # @example
65
+ # require "google/cloud/bigquery"
66
+ #
67
+ # bigquery = Google::Cloud::Bigquery.new
68
+ #
69
+ # parquet_url = "gs://bucket/path/to/data.parquet"
70
+ # parquet_table = bigquery.external parquet_url do |parquet|
71
+ # parquet.enable_list_inference = true
72
+ # end
73
+ #
74
+ # parquet_table.enable_list_inference #=> true
75
+ #
76
+ def enable_list_inference
77
+ @gapi.parquet_options.enable_list_inference
78
+ end
79
+
80
+ ##
81
+ # Sets whether to use schema inference specifically for Parquet `LIST` logical type.
82
+ #
83
+ # @param [Boolean] new_enable_list_inference The new `enable_list_inference` value.
84
+ #
85
+ # @example
86
+ # require "google/cloud/bigquery"
87
+ #
88
+ # bigquery = Google::Cloud::Bigquery.new
89
+ #
90
+ # parquet_url = "gs://bucket/path/to/data.parquet"
91
+ # parquet_table = bigquery.external parquet_url do |parquet|
92
+ # parquet.enable_list_inference = true
93
+ # end
94
+ #
95
+ # parquet_table.enable_list_inference #=> true
96
+ #
97
+ def enable_list_inference= new_enable_list_inference
98
+ frozen_check!
99
+ @gapi.parquet_options.enable_list_inference = new_enable_list_inference
100
+ end
101
+
102
+ ##
103
+ # Indicates whether to infer Parquet `ENUM` logical type as `STRING` instead of `BYTES` by default.
104
+ #
105
+ # @return [Boolean]
106
+ #
107
+ # @example
108
+ # require "google/cloud/bigquery"
109
+ #
110
+ # bigquery = Google::Cloud::Bigquery.new
111
+ #
112
+ # parquet_url = "gs://bucket/path/to/data.parquet"
113
+ # parquet_table = bigquery.external parquet_url do |parquet|
114
+ # parquet.enum_as_string = true
115
+ # end
116
+ #
117
+ # parquet_table.enum_as_string #=> true
118
+ #
119
+ def enum_as_string
120
+ @gapi.parquet_options.enum_as_string
121
+ end
122
+
123
+ ##
124
+ # Sets whether to infer Parquet `ENUM` logical type as `STRING` instead of `BYTES` by default.
125
+ #
126
+ # @param [Boolean] new_enum_as_string The new `enum_as_string` value.
127
+ #
128
+ # @example
129
+ # require "google/cloud/bigquery"
130
+ #
131
+ # bigquery = Google::Cloud::Bigquery.new
132
+ #
133
+ # parquet_url = "gs://bucket/path/to/data.parquet"
134
+ # parquet_table = bigquery.external parquet_url do |parquet|
135
+ # parquet.enum_as_string = true
136
+ # end
137
+ #
138
+ # parquet_table.enum_as_string #=> true
139
+ #
140
+ def enum_as_string= new_enum_as_string
141
+ frozen_check!
142
+ @gapi.parquet_options.enum_as_string = new_enum_as_string
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,166 @@
1
+ # Copyright 2021 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ require "google/apis/bigquery_v2"
17
+
18
+ module Google
19
+ module Cloud
20
+ module Bigquery
21
+ module External
22
+ ##
23
+ # # SheetsSource
24
+ #
25
+ # {External::SheetsSource} is a subclass of {External::DataSource} and
26
+ # represents a Google Sheets external data source that can be queried
27
+ # from directly, even though the data is not stored in BigQuery. Instead
28
+ # of loading or streaming the data, this object references the external
29
+ # data source.
30
+ #
31
+ # @example
32
+ # require "google/cloud/bigquery"
33
+ #
34
+ # bigquery = Google::Cloud::Bigquery.new
35
+ #
36
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
37
+ # sheets_table = bigquery.external sheets_url do |sheets|
38
+ # sheets.skip_leading_rows = 1
39
+ # end
40
+ #
41
+ # data = bigquery.query "SELECT * FROM my_ext_table",
42
+ # external: { my_ext_table: sheets_table }
43
+ #
44
+ # # Iterate over the first page of results
45
+ # data.each do |row|
46
+ # puts row[:name]
47
+ # end
48
+ # # Retrieve the next page of results
49
+ # data = data.next if data.next?
50
+ #
51
+ class SheetsSource < External::DataSource
52
+ ##
53
+ # @private Create an empty SheetsSource object.
54
+ def initialize
55
+ super
56
+ @gapi.google_sheets_options = Google::Apis::BigqueryV2::GoogleSheetsOptions.new
57
+ end
58
+
59
+ ##
60
+ # The number of rows at the top of a sheet that BigQuery will skip
61
+ # when reading the data. The default value is `0`.
62
+ #
63
+ # This property is useful if you have header rows that should be
64
+ # skipped. When `autodetect` is on, behavior is the following:
65
+ #
66
+ # * `nil` - Autodetect tries to detect headers in the first row. If
67
+ # they are not detected, the row is read as data. Otherwise data is
68
+ # read starting from the second row.
69
+ # * `0` - Instructs autodetect that there are no headers and data
70
+ # should be read starting from the first row.
71
+ # * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
72
+ # in row `N`. If headers are not detected, row `N` is just skipped.
73
+ # Otherwise row `N` is used to extract column names for the detected
74
+ # schema.
75
+ #
76
+ # @return [Integer]
77
+ #
78
+ # @example
79
+ # require "google/cloud/bigquery"
80
+ #
81
+ # bigquery = Google::Cloud::Bigquery.new
82
+ #
83
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
84
+ # sheets_table = bigquery.external sheets_url do |sheets|
85
+ # sheets.skip_leading_rows = 1
86
+ # end
87
+ #
88
+ # sheets_table.skip_leading_rows #=> 1
89
+ #
90
+ def skip_leading_rows
91
+ @gapi.google_sheets_options.skip_leading_rows
92
+ end
93
+
94
+ ##
95
+ # Set the number of rows at the top of a sheet that BigQuery will skip
96
+ # when reading the data.
97
+ #
98
+ # @param [Integer] row_count New skip_leading_rows value
99
+ #
100
+ # @example
101
+ # require "google/cloud/bigquery"
102
+ #
103
+ # bigquery = Google::Cloud::Bigquery.new
104
+ #
105
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
106
+ # sheets_table = bigquery.external sheets_url do |sheets|
107
+ # sheets.skip_leading_rows = 1
108
+ # end
109
+ #
110
+ # sheets_table.skip_leading_rows #=> 1
111
+ #
112
+ def skip_leading_rows= row_count
113
+ frozen_check!
114
+ @gapi.google_sheets_options.skip_leading_rows = row_count
115
+ end
116
+
117
+ ##
118
+ # Range of a sheet to query from. Only used when non-empty. Typical
119
+ # format: `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
120
+ #
121
+ # @return [String] Range of a sheet to query from.
122
+ #
123
+ # @example
124
+ # require "google/cloud/bigquery"
125
+ #
126
+ # bigquery = Google::Cloud::Bigquery.new
127
+ #
128
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
129
+ # sheets_table = bigquery.external sheets_url do |sheets|
130
+ # sheets.range = "sheet1!A1:B20"
131
+ # end
132
+ #
133
+ # sheets_table.range #=> "sheet1!A1:B20"
134
+ #
135
+ def range
136
+ @gapi.google_sheets_options.range
137
+ end
138
+
139
+ ##
140
+ # Set the range of a sheet to query from. Only used when non-empty.
141
+ # Typical format:
142
+ # `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
143
+ #
144
+ # @param [String] new_range New range of a sheet to query from.
145
+ #
146
+ # @example
147
+ # require "google/cloud/bigquery"
148
+ #
149
+ # bigquery = Google::Cloud::Bigquery.new
150
+ #
151
+ # sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
152
+ # sheets_table = bigquery.external sheets_url do |sheets|
153
+ # sheets.range = "sheet1!A1:B20"
154
+ # end
155
+ #
156
+ # sheets_table.range #=> "sheet1!A1:B20"
157
+ #
158
+ def range= new_range
159
+ frozen_check!
160
+ @gapi.google_sheets_options.range = new_range
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end