google-cloud-bigquery 1.44.2 → 1.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/google/cloud/bigquery/dataset.rb +19 -8
- data/lib/google/cloud/bigquery/load_job.rb +31 -0
- data/lib/google/cloud/bigquery/project.rb +367 -0
- data/lib/google/cloud/bigquery/service.rb +13 -1
- data/lib/google/cloud/bigquery/table.rb +17 -5
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +4 -2
- data/lib/google-cloud-bigquery.rb +1 -0
- metadata +9 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd9801fc2474d21f9508686c06ff273871ccdefd23abd2a9783da879fdeeb48e
|
4
|
+
data.tar.gz: a18f34abee5a59bb94b9816ab0fefea986a5e368986e92f9517b4b4026b828eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed6b34f928dd213a2b8bfa5dd75ddc09c310f4ba8ed68e3092280c9bbb3c9e4bc5644e2a89478fdae3cfb38c1eb822cb8ac4f8051cd2b54fc7fafa98ff9bbb48
|
7
|
+
data.tar.gz: 1996a4f5a9ac8b61e645093589522b772b175c5fdf1a9b9c2687c8e6742d9fcc8705fd270d714d2fa5b20fad8b52291a89b4eb91eb757bf672613c299c40fd75
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Release History
|
2
2
|
|
3
|
+
### 1.46.0 (2024-01-25)
|
4
|
+
|
5
|
+
#### Features
|
6
|
+
|
7
|
+
* Support for universe_domain ([#24448](https://github.com/googleapis/google-cloud-ruby/issues/24448))
|
8
|
+
|
9
|
+
### 1.45.0 (2023-09-25)
|
10
|
+
|
11
|
+
#### Features
|
12
|
+
|
13
|
+
* support load job with session ([#23320](https://github.com/googleapis/google-cloud-ruby/issues/23320))
|
14
|
+
|
3
15
|
### 1.44.2 (2023-09-12)
|
4
16
|
|
5
17
|
#### Bug Fixes
|
@@ -2030,15 +2030,20 @@ module Google
|
|
2030
2030
|
# * The key portion of a label must be unique. However, you can use the
|
2031
2031
|
# same key with multiple resources.
|
2032
2032
|
# * Keys must start with a lowercase letter or international character.
|
2033
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
2034
|
+
# is undefined however for non-query jobs and may result in an error.
|
2035
|
+
# Deprecated.
|
2036
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
2037
|
+
# and the load job will happen in the table created within that session.
|
2038
|
+
# Note: This will work only for _SESSION dataset.
|
2039
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2040
|
+
#
|
2033
2041
|
# @yield [updater] A block for setting the schema and other
|
2034
2042
|
# options for the destination table. The schema can be omitted if the
|
2035
2043
|
# destination table already exists, or if you're loading data from a
|
2036
2044
|
# Google Cloud Datastore backup.
|
2037
2045
|
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
2038
2046
|
# updater to modify the load job and its schema.
|
2039
|
-
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
2040
|
-
# is undefined however for non-query jobs and may result in an error.
|
2041
|
-
# Deprecated.
|
2042
2047
|
#
|
2043
2048
|
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
2044
2049
|
#
|
@@ -2126,7 +2131,7 @@ module Google
|
|
2126
2131
|
def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2127
2132
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2128
2133
|
quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
2129
|
-
null_marker: nil, dryrun: nil
|
2134
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil
|
2130
2135
|
ensure_service!
|
2131
2136
|
|
2132
2137
|
updater = load_job_updater table_id,
|
@@ -2135,7 +2140,8 @@ module Google
|
|
2135
2140
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
2136
2141
|
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
2137
2142
|
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
2138
|
-
autodetect: autodetect, null_marker: null_marker
|
2143
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
2144
|
+
session_id: session_id
|
2139
2145
|
|
2140
2146
|
yield updater if block_given?
|
2141
2147
|
|
@@ -2261,6 +2267,8 @@ module Google
|
|
2261
2267
|
# See {Project#schema} for the creation of the schema for use with
|
2262
2268
|
# this option. Also note that for most use cases, the block yielded by
|
2263
2269
|
# this method is a more convenient way to configure the schema.
|
2270
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2271
|
+
#
|
2264
2272
|
#
|
2265
2273
|
# @yield [updater] A block for setting the schema of the destination
|
2266
2274
|
# table and other options for the load job. The schema can be omitted
|
@@ -2353,13 +2361,13 @@ module Google
|
|
2353
2361
|
#
|
2354
2362
|
def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2355
2363
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2356
|
-
quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
|
2364
|
+
quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
|
2357
2365
|
job = load_job table_id, files,
|
2358
2366
|
format: format, create: create, write: write, projection_fields: projection_fields,
|
2359
2367
|
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2360
2368
|
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
2361
2369
|
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
2362
|
-
null_marker: null_marker, &block
|
2370
|
+
null_marker: null_marker, session_id: session_id, &block
|
2363
2371
|
|
2364
2372
|
job.wait_until_done!
|
2365
2373
|
ensure_job_succeeded! job
|
@@ -2903,7 +2911,8 @@ module Google
|
|
2903
2911
|
def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2904
2912
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
|
2905
2913
|
max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
2906
|
-
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
2914
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil, create_session: nil,
|
2915
|
+
session_id: nil
|
2907
2916
|
new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
|
2908
2917
|
LoadJob::Updater.new(new_job).tap do |job|
|
2909
2918
|
job.location = location if location # may be dataset reference
|
@@ -2912,6 +2921,8 @@ module Google
|
|
2912
2921
|
job.schema = schema unless schema.nil?
|
2913
2922
|
job.autodetect = autodetect unless autodetect.nil?
|
2914
2923
|
job.labels = labels unless labels.nil?
|
2924
|
+
job.create_session = create_session unless create_session.nil?
|
2925
|
+
job.session_id = session_id unless session_id.nil?
|
2915
2926
|
load_job_file_options! job, format: format,
|
2916
2927
|
projection_fields: projection_fields,
|
2917
2928
|
jagged_rows: jagged_rows,
|
@@ -1703,6 +1703,37 @@ module Google
|
|
1703
1703
|
@gapi.configuration.load.update! write_disposition: Convert.write_disposition(new_write)
|
1704
1704
|
end
|
1705
1705
|
|
1706
|
+
##
|
1707
|
+
# Sets the create_session property. If true, creates a new session,
|
1708
|
+
# where session id will be a server generated random id. If false,
|
1709
|
+
# runs query with an existing {#session_id=}, otherwise runs query in
|
1710
|
+
# non-session mode. The default value is `false`.
|
1711
|
+
#
|
1712
|
+
# @param [Boolean] value The create_session property. The default
|
1713
|
+
# value is `false`.
|
1714
|
+
#
|
1715
|
+
# @!group Attributes
|
1716
|
+
def create_session= value
|
1717
|
+
@gapi.configuration.load.create_session = value
|
1718
|
+
end
|
1719
|
+
|
1720
|
+
##
|
1721
|
+
# Sets the session ID for a query run in session mode. See {#create_session=}.
|
1722
|
+
#
|
1723
|
+
# @param [String] value The session ID. The default value is `nil`.
|
1724
|
+
#
|
1725
|
+
# @!group Attributes
|
1726
|
+
def session_id= value
|
1727
|
+
@gapi.configuration.load.connection_properties ||= []
|
1728
|
+
prop = @gapi.configuration.load.connection_properties.find { |cp| cp.key == "session_id" }
|
1729
|
+
if prop
|
1730
|
+
prop.value = value
|
1731
|
+
else
|
1732
|
+
prop = Google::Apis::BigqueryV2::ConnectionProperty.new key: "session_id", value: value
|
1733
|
+
@gapi.configuration.load.connection_properties << prop
|
1734
|
+
end
|
1735
|
+
end
|
1736
|
+
|
1706
1737
|
##
|
1707
1738
|
# Sets the projection fields.
|
1708
1739
|
#
|
@@ -67,6 +67,15 @@ module Google
|
|
67
67
|
@service = service
|
68
68
|
end
|
69
69
|
|
70
|
+
##
|
71
|
+
# The universe domain the client is connected to
|
72
|
+
#
|
73
|
+
# @return [String]
|
74
|
+
#
|
75
|
+
def universe_domain
|
76
|
+
service.universe_domain
|
77
|
+
end
|
78
|
+
|
70
79
|
##
|
71
80
|
# The BigQuery project connected to.
|
72
81
|
#
|
@@ -942,6 +951,364 @@ module Google
|
|
942
951
|
job.data max: max
|
943
952
|
end
|
944
953
|
|
954
|
+
##
|
955
|
+
# Loads data into the provided destination table using an asynchronous
|
956
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
957
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
958
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
959
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
960
|
+
#
|
961
|
+
# For the source of the data, you can pass a google-cloud storage file
|
962
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
963
|
+
# file directly. See [Loading Data with a POST
|
964
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
965
|
+
#
|
966
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
967
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
968
|
+
#
|
969
|
+
# @param [String] table_id The destination table to load the data into.
|
970
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
971
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
972
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
973
|
+
# those, containing data to load into the table.
|
974
|
+
# @param [String] format The exported file format. The default value is
|
975
|
+
# `csv`.
|
976
|
+
#
|
977
|
+
# The following values are supported:
|
978
|
+
#
|
979
|
+
# * `csv` - CSV
|
980
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
981
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
982
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
983
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
984
|
+
# * `datastore_backup` - Cloud Datastore backup
|
985
|
+
# @param [String] dataset_id The destination table to load the data into.
|
986
|
+
# For load job with create_session/session_id it defaults to "_SESSION"
|
987
|
+
# @param [String] create Specifies whether the job is allowed to create
|
988
|
+
# new tables. The default value is `needed`.
|
989
|
+
#
|
990
|
+
# The following values are supported:
|
991
|
+
#
|
992
|
+
# * `needed` - Create the table if it does not exist.
|
993
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
994
|
+
# raised if the table does not exist.
|
995
|
+
# @param [String] write Specifies how to handle data already present in
|
996
|
+
# the table. The default value is `append`.
|
997
|
+
#
|
998
|
+
# The following values are supported:
|
999
|
+
#
|
1000
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1001
|
+
# * `append` - BigQuery appends the data to the table.
|
1002
|
+
# * `empty` - An error will be returned if the table already contains
|
1003
|
+
# data.
|
1004
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1005
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1006
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1007
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1008
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1009
|
+
# backup, an invalid error is returned.
|
1010
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1011
|
+
# optional columns. The missing values are treated as nulls. If
|
1012
|
+
# `false`, records with missing trailing columns are treated as bad
|
1013
|
+
# records, and if there are too many bad records, an invalid error is
|
1014
|
+
# returned in the job result. The default value is `false`. Only
|
1015
|
+
# applicable to CSV, ignored for other formats.
|
1016
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1017
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1018
|
+
# The default value is `false`.
|
1019
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1020
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1021
|
+
# The default value is `false`.
|
1022
|
+
# @param [String] encoding The character encoding of the data. The
|
1023
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1024
|
+
# `UTF-8`.
|
1025
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1026
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1027
|
+
# then uses the first byte of the encoded string to split the data in
|
1028
|
+
# its raw, binary state. Default is <code>,</code>.
|
1029
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1030
|
+
# extra values that are not represented in the table schema. If true,
|
1031
|
+
# the extra values are ignored. If false, records with extra columns
|
1032
|
+
# are treated as bad records, and if there are too many bad records,
|
1033
|
+
# an invalid error is returned in the job result. The default value is
|
1034
|
+
# `false`.
|
1035
|
+
#
|
1036
|
+
# The `format` property determines what BigQuery treats as an extra
|
1037
|
+
# value:
|
1038
|
+
#
|
1039
|
+
# * `CSV`: Trailing columns
|
1040
|
+
# * `JSON`: Named values that don't match any column names
|
1041
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1042
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1043
|
+
# records exceeds this value, an invalid error is returned in the job
|
1044
|
+
# result. The default value is `0`, which requires that all records
|
1045
|
+
# are valid.
|
1046
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1047
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1048
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1049
|
+
# value is the empty string. If you set this property to a custom
|
1050
|
+
# value, BigQuery throws an error if an empty string is present for
|
1051
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1052
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1053
|
+
# @param [String] quote The value that is used to quote data sections in
|
1054
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1055
|
+
# then uses the first byte of the encoded string to split the data in
|
1056
|
+
# its raw, binary state. The default value is a double-quote
|
1057
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1058
|
+
# the property value to an empty string. If your data contains quoted
|
1059
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1060
|
+
# property to true.
|
1061
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1062
|
+
# file that BigQuery will skip when loading the data. The default
|
1063
|
+
# value is `0`. This property is useful if you have header rows in the
|
1064
|
+
# file that should be skipped.
|
1065
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1066
|
+
# destination table. Optional. The schema can be omitted if the
|
1067
|
+
# destination table already exists, or if you're loading data from a
|
1068
|
+
# Google Cloud Datastore backup.
|
1069
|
+
#
|
1070
|
+
# See {Project#schema} for the creation of the schema for use with
|
1071
|
+
# this option. Also note that for most use cases, the block yielded by
|
1072
|
+
# this method is a more convenient way to configure the schema.
|
1073
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1074
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1075
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1076
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1077
|
+
#
|
1078
|
+
# See [Generating a job
|
1079
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1080
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1081
|
+
# prepended to a generated value to produce a unique job ID. For
|
1082
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1083
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1084
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1085
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1086
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1087
|
+
# be used.
|
1088
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1089
|
+
# the job. You can use these to organize and group your jobs.
|
1090
|
+
#
|
1091
|
+
# The labels applied to a resource must meet the following requirements:
|
1092
|
+
#
|
1093
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1094
|
+
# * Each label must be a key-value pair.
|
1095
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1096
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1097
|
+
# a maximum length of 63 characters.
|
1098
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1099
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1100
|
+
# international characters are allowed.
|
1101
|
+
# * The key portion of a label must be unique. However, you can use the
|
1102
|
+
# same key with multiple resources.
|
1103
|
+
# * Keys must start with a lowercase letter or international character.
|
1104
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
1105
|
+
# and the load job will happen in the table created within that session.
|
1106
|
+
# Note: This will work only for tables in _SESSION dataset
|
1107
|
+
# else the property will be ignored by the backend.
|
1108
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1109
|
+
#
|
1110
|
+
# @yield [updater] A block for setting the schema and other
|
1111
|
+
# options for the destination table. The schema can be omitted if the
|
1112
|
+
# destination table already exists, or if you're loading data from a
|
1113
|
+
# Google Cloud Datastore backup.
|
1114
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1115
|
+
# updater to modify the load job and its schema.
|
1116
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
1117
|
+
# is undefined however for non-query jobs and may result in an error.
|
1118
|
+
# Deprecated.
|
1119
|
+
#
|
1120
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
1121
|
+
#
|
1122
|
+
# @example
|
1123
|
+
# require "google/cloud/bigquery"
|
1124
|
+
#
|
1125
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1126
|
+
#
|
1127
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1128
|
+
# load_job = bigquery.load_job "temp_table", gs_url, autodetect: true, create_session: true
|
1129
|
+
# load_job.wait_until_done!
|
1130
|
+
# session_id = load_job.statistics["sessionInfo"]["sessionId"]
|
1131
|
+
#
|
1132
|
+
def load_job table_id, files, dataset_id: nil, format: nil, create: nil, write: nil,
|
1133
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1134
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1135
|
+
skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
1136
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, &block
|
1137
|
+
ensure_service!
|
1138
|
+
dataset_id ||= "_SESSION" unless create_session.nil? && session_id.nil?
|
1139
|
+
session_dataset = dataset dataset_id, skip_lookup: true
|
1140
|
+
table = session_dataset.table table_id, skip_lookup: true
|
1141
|
+
table.load_job files,
|
1142
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1143
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1144
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1145
|
+
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
1146
|
+
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
1147
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
1148
|
+
session_id: session_id, &block
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
##
|
1152
|
+
# Loads data into the provided destination table using a synchronous
|
1153
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1154
|
+
# generally handled as needed to complete the job. See also
|
1155
|
+
# {#load_job}.
|
1156
|
+
#
|
1157
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1158
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1159
|
+
# file directly. See [Loading Data with a POST
|
1160
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1161
|
+
#
|
1162
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1163
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
1164
|
+
#
|
1165
|
+
# @param [String] table_id The destination table to load the data into.
|
1166
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
1167
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
1168
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
1169
|
+
# those, containing data to load into the table.
|
1170
|
+
# @param [String] format The exported file format. The default value is
|
1171
|
+
# `csv`.
|
1172
|
+
#
|
1173
|
+
# The following values are supported:
|
1174
|
+
#
|
1175
|
+
# * `csv` - CSV
|
1176
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1177
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1178
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
1179
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
1180
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1181
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1182
|
+
# new tables. The default value is `needed`.
|
1183
|
+
#
|
1184
|
+
# The following values are supported:
|
1185
|
+
#
|
1186
|
+
# * `needed` - Create the table if it does not exist.
|
1187
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1188
|
+
# raised if the table does not exist.
|
1189
|
+
# @param [String] dataset_id The destination table to load the data into.
|
1190
|
+
# For load job with session it defaults to "_SESSION"
|
1191
|
+
# @param [String] write Specifies how to handle data already present in
|
1192
|
+
# the table. The default value is `append`.
|
1193
|
+
#
|
1194
|
+
# The following values are supported:
|
1195
|
+
#
|
1196
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1197
|
+
# * `append` - BigQuery appends the data to the table.
|
1198
|
+
# * `empty` - An error will be returned if the table already contains
|
1199
|
+
# data.
|
1200
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1201
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1202
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1203
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1204
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1205
|
+
# backup, an invalid error is returned.
|
1206
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1207
|
+
# optional columns. The missing values are treated as nulls. If
|
1208
|
+
# `false`, records with missing trailing columns are treated as bad
|
1209
|
+
# records, and if there are too many bad records, an invalid error is
|
1210
|
+
# returned in the job result. The default value is `false`. Only
|
1211
|
+
# applicable to CSV, ignored for other formats.
|
1212
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1213
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1214
|
+
# The default value is `false`.
|
1215
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1216
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1217
|
+
# The default value is `false`.
|
1218
|
+
# @param [String] encoding The character encoding of the data. The
|
1219
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1220
|
+
# `UTF-8`.
|
1221
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1222
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1223
|
+
# then uses the first byte of the encoded string to split the data in
|
1224
|
+
# its raw, binary state. Default is <code>,</code>.
|
1225
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1226
|
+
# extra values that are not represented in the table schema. If true,
|
1227
|
+
# the extra values are ignored. If false, records with extra columns
|
1228
|
+
# are treated as bad records, and if there are too many bad records,
|
1229
|
+
# an invalid error is returned in the job result. The default value is
|
1230
|
+
# `false`.
|
1231
|
+
#
|
1232
|
+
# The `format` property determines what BigQuery treats as an extra
|
1233
|
+
# value:
|
1234
|
+
#
|
1235
|
+
# * `CSV`: Trailing columns
|
1236
|
+
# * `JSON`: Named values that don't match any column names
|
1237
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1238
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1239
|
+
# records exceeds this value, an invalid error is returned in the job
|
1240
|
+
# result. The default value is `0`, which requires that all records
|
1241
|
+
# are valid.
|
1242
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1243
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1244
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1245
|
+
# value is the empty string. If you set this property to a custom
|
1246
|
+
# value, BigQuery throws an error if an empty string is present for
|
1247
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1248
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1249
|
+
# @param [String] quote The value that is used to quote data sections in
|
1250
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1251
|
+
# then uses the first byte of the encoded string to split the data in
|
1252
|
+
# its raw, binary state. The default value is a double-quote
|
1253
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1254
|
+
# the property value to an empty string. If your data contains quoted
|
1255
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1256
|
+
# property to true.
|
1257
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1258
|
+
# file that BigQuery will skip when loading the data. The default
|
1259
|
+
# value is `0`. This property is useful if you have header rows in the
|
1260
|
+
# file that should be skipped.
|
1261
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1262
|
+
# destination table. Optional. The schema can be omitted if the
|
1263
|
+
# destination table already exists, or if you're loading data from a
|
1264
|
+
# Google Cloud Datastore backup.
|
1265
|
+
#
|
1266
|
+
# See {Project#schema} for the creation of the schema for use with
|
1267
|
+
# this option. Also note that for most use cases, the block yielded by
|
1268
|
+
# this method is a more convenient way to configure the schema.
|
1269
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1270
|
+
#
|
1271
|
+
# @yield [updater] A block for setting the schema of the destination
|
1272
|
+
# table and other options for the load job. The schema can be omitted
|
1273
|
+
# if the destination table already exists, or if you're loading data
|
1274
|
+
# from a Google Cloud Datastore backup.
|
1275
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1276
|
+
# updater to modify the load job and its schema.
|
1277
|
+
#
|
1278
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
1279
|
+
#
|
1280
|
+
# @example
|
1281
|
+
# require "google/cloud/bigquery"
|
1282
|
+
#
|
1283
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1284
|
+
#
|
1285
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1286
|
+
# bigquery.load "my_new_table", gs_url, dataset_id: "my_dataset" do |schema|
|
1287
|
+
# schema.string "first_name", mode: :required
|
1288
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1289
|
+
# nested_schema.string "place", mode: :required
|
1290
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1291
|
+
# end
|
1292
|
+
# end
|
1293
|
+
#
|
1294
|
+
# @!group Data
|
1295
|
+
#
|
1296
|
+
def load table_id, files, dataset_id: "_SESSION", format: nil, create: nil, write: nil,
|
1297
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1298
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1299
|
+
skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
|
1300
|
+
job = load_job table_id, files, dataset_id: dataset_id,
|
1301
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1302
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1303
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
1304
|
+
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
1305
|
+
null_marker: null_marker, session_id: session_id, &block
|
1306
|
+
|
1307
|
+
job.wait_until_done!
|
1308
|
+
ensure_job_succeeded! job
|
1309
|
+
true
|
1310
|
+
end
|
1311
|
+
|
945
1312
|
##
|
946
1313
|
# Creates a new External::DataSource (or subclass) object that
|
947
1314
|
# represents the external data source that can be queried from directly,
|
@@ -41,15 +41,26 @@ module Google
|
|
41
41
|
# @private
|
42
42
|
attr_reader :retries, :timeout, :host
|
43
43
|
|
44
|
+
# @private
|
45
|
+
def universe_domain
|
46
|
+
service.universe_domain
|
47
|
+
end
|
48
|
+
|
44
49
|
##
|
45
50
|
# Creates a new Service instance.
|
46
|
-
def initialize project, credentials,
|
51
|
+
def initialize project, credentials,
|
52
|
+
retries: nil,
|
53
|
+
timeout: nil,
|
54
|
+
host: nil,
|
55
|
+
quota_project: nil,
|
56
|
+
universe_domain: nil
|
47
57
|
@project = project
|
48
58
|
@credentials = credentials
|
49
59
|
@retries = retries
|
50
60
|
@timeout = timeout
|
51
61
|
@host = host
|
52
62
|
@quota_project = quota_project
|
63
|
+
@universe_domain = universe_domain
|
53
64
|
end
|
54
65
|
|
55
66
|
def service
|
@@ -69,6 +80,7 @@ module Google
|
|
69
80
|
service.request_options.query["prettyPrint"] = false
|
70
81
|
service.request_options.quota_project = @quota_project if @quota_project
|
71
82
|
service.authorization = @credentials.client
|
83
|
+
service.universe_domain = @universe_domain
|
72
84
|
service.root_url = host if host
|
73
85
|
service
|
74
86
|
end
|
@@ -2372,6 +2372,11 @@ module Google
|
|
2372
2372
|
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
2373
2373
|
# is undefined however for non-query jobs and may result in an error.
|
2374
2374
|
# Deprecated.
|
2375
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
2376
|
+
# and the load job will happen in the table created within that session.
|
2377
|
+
# Note: This will work only for tables in _SESSION dataset
|
2378
|
+
# else the property will be ignored by the backend.
|
2379
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2375
2380
|
#
|
2376
2381
|
# @yield [load_job] a block for setting the load job
|
2377
2382
|
# @yieldparam [LoadJob] load_job the load job object to be updated
|
@@ -2428,7 +2433,7 @@ module Google
|
|
2428
2433
|
def load_job files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2429
2434
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2430
2435
|
quote: nil, skip_leading: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
2431
|
-
null_marker: nil, dryrun: nil
|
2436
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, schema: self.schema
|
2432
2437
|
ensure_service!
|
2433
2438
|
|
2434
2439
|
updater = load_job_updater format: format, create: create, write: write, projection_fields: projection_fields,
|
@@ -2436,7 +2441,9 @@ module Google
|
|
2436
2441
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
2437
2442
|
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
2438
2443
|
dryrun: dryrun, job_id: job_id, prefix: prefix, schema: schema, labels: labels,
|
2439
|
-
autodetect: autodetect, null_marker: null_marker
|
2444
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
2445
|
+
session_id: session_id
|
2446
|
+
|
2440
2447
|
|
2441
2448
|
yield updater if block_given?
|
2442
2449
|
|
@@ -2551,6 +2558,7 @@ module Google
|
|
2551
2558
|
# file that BigQuery will skip when loading the data. The default
|
2552
2559
|
# value is `0`. This property is useful if you have header rows in the
|
2553
2560
|
# file that should be skipped.
|
2561
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2554
2562
|
#
|
2555
2563
|
# @yield [updater] A block for setting the schema of the destination
|
2556
2564
|
# table and other options for the load job. The schema can be omitted
|
@@ -2612,12 +2620,13 @@ module Google
|
|
2612
2620
|
#
|
2613
2621
|
def load files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2614
2622
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2615
|
-
quote: nil, skip_leading: nil, autodetect: nil, null_marker: nil,
|
2623
|
+
quote: nil, skip_leading: nil, autodetect: nil, null_marker: nil, session_id: nil,
|
2624
|
+
schema: self.schema, &block
|
2616
2625
|
job = load_job files, format: format, create: create, write: write, projection_fields: projection_fields,
|
2617
2626
|
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2618
2627
|
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
2619
2628
|
quote: quote, skip_leading: skip_leading, autodetect: autodetect,
|
2620
|
-
null_marker: null_marker, &block
|
2629
|
+
null_marker: null_marker, session_id: session_id, schema: schema, &block
|
2621
2630
|
|
2622
2631
|
job.wait_until_done!
|
2623
2632
|
ensure_job_succeeded! job
|
@@ -3114,7 +3123,8 @@ module Google
|
|
3114
3123
|
def load_job_updater format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
3115
3124
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
|
3116
3125
|
max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
3117
|
-
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
3126
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil,
|
3127
|
+
create_session: nil, session_id: nil
|
3118
3128
|
new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
|
3119
3129
|
LoadJob::Updater.new(new_job).tap do |job|
|
3120
3130
|
job.location = location if location # may be table reference
|
@@ -3123,6 +3133,8 @@ module Google
|
|
3123
3133
|
job.schema = schema unless schema.nil?
|
3124
3134
|
job.autodetect = autodetect unless autodetect.nil?
|
3125
3135
|
job.labels = labels unless labels.nil?
|
3136
|
+
job.create_session = create_session unless create_session.nil?
|
3137
|
+
job.session_id = session_id unless session_id.nil?
|
3126
3138
|
load_job_file_options! job, format: format,
|
3127
3139
|
projection_fields: projection_fields,
|
3128
3140
|
jagged_rows: jagged_rows,
|
@@ -67,12 +67,13 @@ module Google
|
|
67
67
|
# table = dataset.table "my_table"
|
68
68
|
#
|
69
69
|
def self.new project_id: nil, credentials: nil, scope: nil, retries: nil, timeout: nil, endpoint: nil,
|
70
|
-
project: nil, keyfile: nil
|
70
|
+
project: nil, keyfile: nil, universe_domain: nil
|
71
71
|
scope ||= configure.scope
|
72
72
|
retries ||= configure.retries
|
73
73
|
timeout ||= configure.timeout
|
74
74
|
endpoint ||= configure.endpoint
|
75
75
|
credentials ||= (keyfile || default_credentials(scope: scope))
|
76
|
+
universe_domain ||= configure.universe_domain
|
76
77
|
|
77
78
|
unless credentials.is_a? Google::Auth::Credentials
|
78
79
|
credentials = Bigquery::Credentials.new credentials, scope: scope
|
@@ -84,7 +85,8 @@ module Google
|
|
84
85
|
Bigquery::Project.new(
|
85
86
|
Bigquery::Service.new(
|
86
87
|
project_id, credentials,
|
87
|
-
retries: retries, timeout: timeout, host: endpoint,
|
88
|
+
retries: retries, timeout: timeout, host: endpoint,
|
89
|
+
quota_project: configure.quota_project, universe_domain: universe_domain
|
88
90
|
)
|
89
91
|
)
|
90
92
|
end
|
@@ -140,4 +140,5 @@ Google::Cloud.configure.add_config! :bigquery do |config|
|
|
140
140
|
config.add_field! :retries, nil, match: Integer
|
141
141
|
config.add_field! :timeout, nil, match: Integer
|
142
142
|
config.add_field! :endpoint, default_endpoint, match: String, allow_nil: true
|
143
|
+
config.add_field! :universe_domain, nil, match: String, allow_nil: true
|
143
144
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.46.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2024-01-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -31,34 +31,28 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '0.
|
34
|
+
version: '0.62'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '0.
|
41
|
+
version: '0.62'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: googleauth
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- - "
|
47
|
-
- !ruby/object:Gem::Version
|
48
|
-
version: 0.16.2
|
49
|
-
- - "<"
|
46
|
+
- - "~>"
|
50
47
|
- !ruby/object:Gem::Version
|
51
|
-
version:
|
48
|
+
version: '1.9'
|
52
49
|
type: :runtime
|
53
50
|
prerelease: false
|
54
51
|
version_requirements: !ruby/object:Gem::Requirement
|
55
52
|
requirements:
|
56
|
-
- - "
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
version: 0.16.2
|
59
|
-
- - "<"
|
53
|
+
- - "~>"
|
60
54
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
55
|
+
version: '1.9'
|
62
56
|
- !ruby/object:Gem::Dependency
|
63
57
|
name: google-cloud-core
|
64
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -321,7 +315,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
321
315
|
- !ruby/object:Gem::Version
|
322
316
|
version: '0'
|
323
317
|
requirements: []
|
324
|
-
rubygems_version: 3.
|
318
|
+
rubygems_version: 3.5.3
|
325
319
|
signing_key:
|
326
320
|
specification_version: 4
|
327
321
|
summary: API Client library for Google BigQuery
|