google-cloud-bigquery 1.31.0 → 1.32.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/google/cloud/bigquery/external.rb +9 -2619
- data/lib/google/cloud/bigquery/external/bigtable_source.rb +230 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column.rb +404 -0
- data/lib/google/cloud/bigquery/external/bigtable_source/column_family.rb +945 -0
- data/lib/google/cloud/bigquery/external/csv_source.rb +481 -0
- data/lib/google/cloud/bigquery/external/data_source.rb +771 -0
- data/lib/google/cloud/bigquery/external/json_source.rb +170 -0
- data/lib/google/cloud/bigquery/external/parquet_source.rb +148 -0
- data/lib/google/cloud/bigquery/external/sheets_source.rb +166 -0
- data/lib/google/cloud/bigquery/load_job.rb +103 -0
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +23 -9
@@ -0,0 +1,170 @@
|
|
1
|
+
# Copyright 2021 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
module Google
|
17
|
+
module Cloud
|
18
|
+
module Bigquery
|
19
|
+
module External
|
20
|
+
##
|
21
|
+
# # JsonSource
|
22
|
+
#
|
23
|
+
# {External::JsonSource} is a subclass of {External::DataSource} and
|
24
|
+
# represents a JSON external data source that can be queried from
|
25
|
+
# directly, such as Google Cloud Storage or Google Drive, even though
|
26
|
+
# the data is not stored in BigQuery. Instead of loading or streaming
|
27
|
+
# the data, this object references the external data source.
|
28
|
+
#
|
29
|
+
# @example
|
30
|
+
# require "google/cloud/bigquery"
|
31
|
+
#
|
32
|
+
# bigquery = Google::Cloud::Bigquery.new
|
33
|
+
#
|
34
|
+
# require "google/cloud/bigquery"
|
35
|
+
#
|
36
|
+
# bigquery = Google::Cloud::Bigquery.new
|
37
|
+
#
|
38
|
+
# json_url = "gs://bucket/path/to/data.json"
|
39
|
+
# json_table = bigquery.external json_url do |json|
|
40
|
+
# json.schema do |schema|
|
41
|
+
# schema.string "name", mode: :required
|
42
|
+
# schema.string "email", mode: :required
|
43
|
+
# schema.integer "age", mode: :required
|
44
|
+
# schema.boolean "active", mode: :required
|
45
|
+
# end
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
49
|
+
# external: { my_ext_table: json_table }
|
50
|
+
#
|
51
|
+
# # Iterate over the first page of results
|
52
|
+
# data.each do |row|
|
53
|
+
# puts row[:name]
|
54
|
+
# end
|
55
|
+
# # Retrieve the next page of results
|
56
|
+
# data = data.next if data.next?
|
57
|
+
#
|
58
|
+
class JsonSource < External::DataSource
|
59
|
+
##
|
60
|
+
# The schema for the data.
|
61
|
+
#
|
62
|
+
# @param [Boolean] replace Whether to replace the existing schema with
|
63
|
+
# the new schema. If `true`, the fields will replace the existing
|
64
|
+
# schema. If `false`, the fields will be added to the existing
|
65
|
+
# schema. The default value is `false`.
|
66
|
+
# @yield [schema] a block for setting the schema
|
67
|
+
# @yieldparam [Schema] schema the object accepting the schema
|
68
|
+
#
|
69
|
+
# @return [Google::Cloud::Bigquery::Schema]
|
70
|
+
#
|
71
|
+
# @example
|
72
|
+
# require "google/cloud/bigquery"
|
73
|
+
#
|
74
|
+
# bigquery = Google::Cloud::Bigquery.new
|
75
|
+
#
|
76
|
+
# json_url = "gs://bucket/path/to/data.json"
|
77
|
+
# json_table = bigquery.external json_url do |json|
|
78
|
+
# json.schema do |schema|
|
79
|
+
# schema.string "name", mode: :required
|
80
|
+
# schema.string "email", mode: :required
|
81
|
+
# schema.integer "age", mode: :required
|
82
|
+
# schema.boolean "active", mode: :required
|
83
|
+
# end
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
def schema replace: false
|
87
|
+
@schema ||= Schema.from_gapi @gapi.schema
|
88
|
+
if replace
|
89
|
+
frozen_check!
|
90
|
+
@schema = Schema.from_gapi
|
91
|
+
end
|
92
|
+
@schema.freeze if frozen?
|
93
|
+
yield @schema if block_given?
|
94
|
+
@schema
|
95
|
+
end
|
96
|
+
|
97
|
+
##
|
98
|
+
# Set the schema for the data.
|
99
|
+
#
|
100
|
+
# @param [Schema] new_schema The schema object.
|
101
|
+
#
|
102
|
+
# @example
|
103
|
+
# require "google/cloud/bigquery"
|
104
|
+
#
|
105
|
+
# bigquery = Google::Cloud::Bigquery.new
|
106
|
+
#
|
107
|
+
# json_shema = bigquery.schema do |schema|
|
108
|
+
# schema.string "name", mode: :required
|
109
|
+
# schema.string "email", mode: :required
|
110
|
+
# schema.integer "age", mode: :required
|
111
|
+
# schema.boolean "active", mode: :required
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# json_url = "gs://bucket/path/to/data.json"
|
115
|
+
# json_table = bigquery.external json_url
|
116
|
+
# json_table.schema = json_shema
|
117
|
+
#
|
118
|
+
def schema= new_schema
|
119
|
+
frozen_check!
|
120
|
+
@schema = new_schema
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# The fields of the schema.
|
125
|
+
#
|
126
|
+
# @return [Array<Schema::Field>] An array of field objects.
|
127
|
+
#
|
128
|
+
def fields
|
129
|
+
schema.fields
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# The names of the columns in the schema.
|
134
|
+
#
|
135
|
+
# @return [Array<Symbol>] An array of column names.
|
136
|
+
#
|
137
|
+
def headers
|
138
|
+
schema.headers
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# The types of the fields in the data in the schema, using the same
|
143
|
+
# format as the optional query parameter types.
|
144
|
+
#
|
145
|
+
# @return [Hash] A hash with field names as keys, and types as values.
|
146
|
+
#
|
147
|
+
def param_types
|
148
|
+
schema.param_types
|
149
|
+
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# @private Google API Client object.
|
153
|
+
def to_gapi
|
154
|
+
@gapi.schema = @schema.to_gapi if @schema
|
155
|
+
@gapi
|
156
|
+
end
|
157
|
+
|
158
|
+
##
|
159
|
+
# @private Google API Client object.
|
160
|
+
def self.from_gapi gapi
|
161
|
+
new_table = super
|
162
|
+
schema = Schema.from_gapi gapi.schema
|
163
|
+
new_table.instance_variable_set :@schema, schema
|
164
|
+
new_table
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# Copyright 2021 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/apis/bigquery_v2"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
module External
|
22
|
+
##
|
23
|
+
# # ParquetSource
|
24
|
+
#
|
25
|
+
# {External::ParquetSource} is a subclass of {External::DataSource} and
|
26
|
+
# represents a Parquet external data source that can be queried
|
27
|
+
# from directly, even though the data is not stored in BigQuery. Instead
|
28
|
+
# of loading or streaming the data, this object references the external
|
29
|
+
# data source.
|
30
|
+
#
|
31
|
+
# @example
|
32
|
+
# require "google/cloud/bigquery"
|
33
|
+
#
|
34
|
+
# bigquery = Google::Cloud::Bigquery.new
|
35
|
+
#
|
36
|
+
# parquet_url = "gs://bucket/path/to/data.parquet"
|
37
|
+
# parquet_table = bigquery.external parquet_url do |parquet|
|
38
|
+
# parquet.enable_list_inference = 1
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
42
|
+
# external: { my_ext_table: parquet_table }
|
43
|
+
#
|
44
|
+
# # Iterate over the first page of results
|
45
|
+
# data.each do |row|
|
46
|
+
# puts row[:name]
|
47
|
+
# end
|
48
|
+
# # Retrieve the next page of results
|
49
|
+
# data = data.next if data.next?
|
50
|
+
#
|
51
|
+
class ParquetSource < External::DataSource
|
52
|
+
##
|
53
|
+
# @private Create an empty ParquetSource object.
|
54
|
+
def initialize
|
55
|
+
super
|
56
|
+
@gapi.parquet_options = Google::Apis::BigqueryV2::ParquetOptions.new
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# Indicates whether to use schema inference specifically for Parquet `LIST` logical type.
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# require "google/cloud/bigquery"
|
66
|
+
#
|
67
|
+
# bigquery = Google::Cloud::Bigquery.new
|
68
|
+
#
|
69
|
+
# parquet_url = "gs://bucket/path/to/data.parquet"
|
70
|
+
# parquet_table = bigquery.external parquet_url do |parquet|
|
71
|
+
# parquet.enable_list_inference = true
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# parquet_table.enable_list_inference #=> true
|
75
|
+
#
|
76
|
+
def enable_list_inference
|
77
|
+
@gapi.parquet_options.enable_list_inference
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# Sets whether to use schema inference specifically for Parquet `LIST` logical type.
|
82
|
+
#
|
83
|
+
# @param [Boolean] new_enable_list_inference The new `enable_list_inference` value.
|
84
|
+
#
|
85
|
+
# @example
|
86
|
+
# require "google/cloud/bigquery"
|
87
|
+
#
|
88
|
+
# bigquery = Google::Cloud::Bigquery.new
|
89
|
+
#
|
90
|
+
# parquet_url = "gs://bucket/path/to/data.parquet"
|
91
|
+
# parquet_table = bigquery.external parquet_url do |parquet|
|
92
|
+
# parquet.enable_list_inference = true
|
93
|
+
# end
|
94
|
+
#
|
95
|
+
# parquet_table.enable_list_inference #=> true
|
96
|
+
#
|
97
|
+
def enable_list_inference= new_enable_list_inference
|
98
|
+
frozen_check!
|
99
|
+
@gapi.parquet_options.enable_list_inference = new_enable_list_inference
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Indicates whether to infer Parquet `ENUM` logical type as `STRING` instead of `BYTES` by default.
|
104
|
+
#
|
105
|
+
# @return [Boolean]
|
106
|
+
#
|
107
|
+
# @example
|
108
|
+
# require "google/cloud/bigquery"
|
109
|
+
#
|
110
|
+
# bigquery = Google::Cloud::Bigquery.new
|
111
|
+
#
|
112
|
+
# parquet_url = "gs://bucket/path/to/data.parquet"
|
113
|
+
# parquet_table = bigquery.external parquet_url do |parquet|
|
114
|
+
# parquet.enum_as_string = true
|
115
|
+
# end
|
116
|
+
#
|
117
|
+
# parquet_table.enum_as_string #=> true
|
118
|
+
#
|
119
|
+
def enum_as_string
|
120
|
+
@gapi.parquet_options.enum_as_string
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Sets whether to infer Parquet `ENUM` logical type as `STRING` instead of `BYTES` by default.
|
125
|
+
#
|
126
|
+
# @param [Boolean] new_enum_as_string The new `enum_as_string` value.
|
127
|
+
#
|
128
|
+
# @example
|
129
|
+
# require "google/cloud/bigquery"
|
130
|
+
#
|
131
|
+
# bigquery = Google::Cloud::Bigquery.new
|
132
|
+
#
|
133
|
+
# parquet_url = "gs://bucket/path/to/data.parquet"
|
134
|
+
# parquet_table = bigquery.external parquet_url do |parquet|
|
135
|
+
# parquet.enum_as_string = true
|
136
|
+
# end
|
137
|
+
#
|
138
|
+
# parquet_table.enum_as_string #=> true
|
139
|
+
#
|
140
|
+
def enum_as_string= new_enum_as_string
|
141
|
+
frozen_check!
|
142
|
+
@gapi.parquet_options.enum_as_string = new_enum_as_string
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# Copyright 2021 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require "google/apis/bigquery_v2"
|
17
|
+
|
18
|
+
module Google
|
19
|
+
module Cloud
|
20
|
+
module Bigquery
|
21
|
+
module External
|
22
|
+
##
|
23
|
+
# # SheetsSource
|
24
|
+
#
|
25
|
+
# {External::SheetsSource} is a subclass of {External::DataSource} and
|
26
|
+
# represents a Google Sheets external data source that can be queried
|
27
|
+
# from directly, even though the data is not stored in BigQuery. Instead
|
28
|
+
# of loading or streaming the data, this object references the external
|
29
|
+
# data source.
|
30
|
+
#
|
31
|
+
# @example
|
32
|
+
# require "google/cloud/bigquery"
|
33
|
+
#
|
34
|
+
# bigquery = Google::Cloud::Bigquery.new
|
35
|
+
#
|
36
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
37
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
38
|
+
# sheets.skip_leading_rows = 1
|
39
|
+
# end
|
40
|
+
#
|
41
|
+
# data = bigquery.query "SELECT * FROM my_ext_table",
|
42
|
+
# external: { my_ext_table: sheets_table }
|
43
|
+
#
|
44
|
+
# # Iterate over the first page of results
|
45
|
+
# data.each do |row|
|
46
|
+
# puts row[:name]
|
47
|
+
# end
|
48
|
+
# # Retrieve the next page of results
|
49
|
+
# data = data.next if data.next?
|
50
|
+
#
|
51
|
+
class SheetsSource < External::DataSource
|
52
|
+
##
|
53
|
+
# @private Create an empty SheetsSource object.
|
54
|
+
def initialize
|
55
|
+
super
|
56
|
+
@gapi.google_sheets_options = Google::Apis::BigqueryV2::GoogleSheetsOptions.new
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# The number of rows at the top of a sheet that BigQuery will skip
|
61
|
+
# when reading the data. The default value is `0`.
|
62
|
+
#
|
63
|
+
# This property is useful if you have header rows that should be
|
64
|
+
# skipped. When `autodetect` is on, behavior is the following:
|
65
|
+
#
|
66
|
+
# * `nil` - Autodetect tries to detect headers in the first row. If
|
67
|
+
# they are not detected, the row is read as data. Otherwise data is
|
68
|
+
# read starting from the second row.
|
69
|
+
# * `0` - Instructs autodetect that there are no headers and data
|
70
|
+
# should be read starting from the first row.
|
71
|
+
# * `N > 0` - Autodetect skips `N-1` rows and tries to detect headers
|
72
|
+
# in row `N`. If headers are not detected, row `N` is just skipped.
|
73
|
+
# Otherwise row `N` is used to extract column names for the detected
|
74
|
+
# schema.
|
75
|
+
#
|
76
|
+
# @return [Integer]
|
77
|
+
#
|
78
|
+
# @example
|
79
|
+
# require "google/cloud/bigquery"
|
80
|
+
#
|
81
|
+
# bigquery = Google::Cloud::Bigquery.new
|
82
|
+
#
|
83
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
84
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
85
|
+
# sheets.skip_leading_rows = 1
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# sheets_table.skip_leading_rows #=> 1
|
89
|
+
#
|
90
|
+
def skip_leading_rows
|
91
|
+
@gapi.google_sheets_options.skip_leading_rows
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Set the number of rows at the top of a sheet that BigQuery will skip
|
96
|
+
# when reading the data.
|
97
|
+
#
|
98
|
+
# @param [Integer] row_count New skip_leading_rows value
|
99
|
+
#
|
100
|
+
# @example
|
101
|
+
# require "google/cloud/bigquery"
|
102
|
+
#
|
103
|
+
# bigquery = Google::Cloud::Bigquery.new
|
104
|
+
#
|
105
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
106
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
107
|
+
# sheets.skip_leading_rows = 1
|
108
|
+
# end
|
109
|
+
#
|
110
|
+
# sheets_table.skip_leading_rows #=> 1
|
111
|
+
#
|
112
|
+
def skip_leading_rows= row_count
|
113
|
+
frozen_check!
|
114
|
+
@gapi.google_sheets_options.skip_leading_rows = row_count
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Range of a sheet to query from. Only used when non-empty. Typical
|
119
|
+
# format: `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
|
120
|
+
#
|
121
|
+
# @return [String] Range of a sheet to query from.
|
122
|
+
#
|
123
|
+
# @example
|
124
|
+
# require "google/cloud/bigquery"
|
125
|
+
#
|
126
|
+
# bigquery = Google::Cloud::Bigquery.new
|
127
|
+
#
|
128
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
129
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
130
|
+
# sheets.range = "sheet1!A1:B20"
|
131
|
+
# end
|
132
|
+
#
|
133
|
+
# sheets_table.range #=> "sheet1!A1:B20"
|
134
|
+
#
|
135
|
+
def range
|
136
|
+
@gapi.google_sheets_options.range
|
137
|
+
end
|
138
|
+
|
139
|
+
##
|
140
|
+
# Set the range of a sheet to query from. Only used when non-empty.
|
141
|
+
# Typical format:
|
142
|
+
# `{sheet_name}!{top_left_cell_id}:{bottom_right_cell_id}`.
|
143
|
+
#
|
144
|
+
# @param [String] new_range New range of a sheet to query from.
|
145
|
+
#
|
146
|
+
# @example
|
147
|
+
# require "google/cloud/bigquery"
|
148
|
+
#
|
149
|
+
# bigquery = Google::Cloud::Bigquery.new
|
150
|
+
#
|
151
|
+
# sheets_url = "https://docs.google.com/spreadsheets/d/1234567980"
|
152
|
+
# sheets_table = bigquery.external sheets_url do |sheets|
|
153
|
+
# sheets.range = "sheet1!A1:B20"
|
154
|
+
# end
|
155
|
+
#
|
156
|
+
# sheets_table.range #=> "sheet1!A1:B20"
|
157
|
+
#
|
158
|
+
def range= new_range
|
159
|
+
frozen_check!
|
160
|
+
@gapi.google_sheets_options.range = new_range
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|