google-cloud-bigquery 1.44.1 → 1.45.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/google/cloud/bigquery/dataset.rb +19 -9
- data/lib/google/cloud/bigquery/load_job.rb +31 -0
- data/lib/google/cloud/bigquery/project.rb +358 -0
- data/lib/google/cloud/bigquery/table.rb +17 -5
- data/lib/google/cloud/bigquery/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '03038d9349c8061b4259b0d954f722cbe1cbe52da5126b1684729bd23ab771ac'
|
4
|
+
data.tar.gz: d56ca5d84af79507553d866be87cf0b38ac14341c00b94fddf71ce6a5275171e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aacd27d7aa0e1301009bacee0569ed5f20280c4f0f7e025acfa0a99251874ee1220079ac2b94478ebb0664262c7ce5720e3df59c0cde71e52adc70ab1e05c766
|
7
|
+
data.tar.gz: 7bb66689b79db32fdc6e1183f365107f99b95272dbca2329acbbb5520e1516da3310ce317a364d19a18cff29d304943d1350f61cc107ee9d6256e8d02855f64e
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# Release History
|
2
2
|
|
3
|
+
### 1.45.0 (2023-09-25)
|
4
|
+
|
5
|
+
#### Features
|
6
|
+
|
7
|
+
* support load job with session ([#23320](https://github.com/googleapis/google-cloud-ruby/issues/23320))
|
8
|
+
|
9
|
+
### 1.44.2 (2023-09-12)
|
10
|
+
|
11
|
+
#### Bug Fixes
|
12
|
+
|
13
|
+
* Avoid dataset reload when accessing location ([#22905](https://github.com/googleapis/google-cloud-ruby/issues/22905))
|
14
|
+
|
3
15
|
### 1.44.1 (2023-09-08)
|
4
16
|
|
5
17
|
#### Bug Fixes
|
@@ -269,7 +269,6 @@ module Google
|
|
269
269
|
#
|
270
270
|
def location
|
271
271
|
return nil if reference?
|
272
|
-
ensure_full_data!
|
273
272
|
@gapi.location
|
274
273
|
end
|
275
274
|
|
@@ -2031,15 +2030,20 @@ module Google
|
|
2031
2030
|
# * The key portion of a label must be unique. However, you can use the
|
2032
2031
|
# same key with multiple resources.
|
2033
2032
|
# * Keys must start with a lowercase letter or international character.
|
2033
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
2034
|
+
# is undefined however for non-query jobs and may result in an error.
|
2035
|
+
# Deprecated.
|
2036
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
2037
|
+
# and the load job will happen in the table created within that session.
|
2038
|
+
# Note: This will work only for _SESSION dataset.
|
2039
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2040
|
+
#
|
2034
2041
|
# @yield [updater] A block for setting the schema and other
|
2035
2042
|
# options for the destination table. The schema can be omitted if the
|
2036
2043
|
# destination table already exists, or if you're loading data from a
|
2037
2044
|
# Google Cloud Datastore backup.
|
2038
2045
|
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
2039
2046
|
# updater to modify the load job and its schema.
|
2040
|
-
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
2041
|
-
# is undefined however for non-query jobs and may result in an error.
|
2042
|
-
# Deprecated.
|
2043
2047
|
#
|
2044
2048
|
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
2045
2049
|
#
|
@@ -2127,7 +2131,7 @@ module Google
|
|
2127
2131
|
def load_job table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2128
2132
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2129
2133
|
quote: nil, skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
2130
|
-
null_marker: nil, dryrun: nil
|
2134
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil
|
2131
2135
|
ensure_service!
|
2132
2136
|
|
2133
2137
|
updater = load_job_updater table_id,
|
@@ -2136,7 +2140,8 @@ module Google
|
|
2136
2140
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
2137
2141
|
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
2138
2142
|
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
2139
|
-
autodetect: autodetect, null_marker: null_marker
|
2143
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
2144
|
+
session_id: session_id
|
2140
2145
|
|
2141
2146
|
yield updater if block_given?
|
2142
2147
|
|
@@ -2262,6 +2267,8 @@ module Google
|
|
2262
2267
|
# See {Project#schema} for the creation of the schema for use with
|
2263
2268
|
# this option. Also note that for most use cases, the block yielded by
|
2264
2269
|
# this method is a more convenient way to configure the schema.
|
2270
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2271
|
+
#
|
2265
2272
|
#
|
2266
2273
|
# @yield [updater] A block for setting the schema of the destination
|
2267
2274
|
# table and other options for the load job. The schema can be omitted
|
@@ -2354,13 +2361,13 @@ module Google
|
|
2354
2361
|
#
|
2355
2362
|
def load table_id, files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2356
2363
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2357
|
-
quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, &block
|
2364
|
+
quote: nil, skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
|
2358
2365
|
job = load_job table_id, files,
|
2359
2366
|
format: format, create: create, write: write, projection_fields: projection_fields,
|
2360
2367
|
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2361
2368
|
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
2362
2369
|
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
2363
|
-
null_marker: null_marker, &block
|
2370
|
+
null_marker: null_marker, session_id: session_id, &block
|
2364
2371
|
|
2365
2372
|
job.wait_until_done!
|
2366
2373
|
ensure_job_succeeded! job
|
@@ -2904,7 +2911,8 @@ module Google
|
|
2904
2911
|
def load_job_updater table_id, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2905
2912
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
|
2906
2913
|
max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
2907
|
-
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
2914
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil, create_session: nil,
|
2915
|
+
session_id: nil
|
2908
2916
|
new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
|
2909
2917
|
LoadJob::Updater.new(new_job).tap do |job|
|
2910
2918
|
job.location = location if location # may be dataset reference
|
@@ -2913,6 +2921,8 @@ module Google
|
|
2913
2921
|
job.schema = schema unless schema.nil?
|
2914
2922
|
job.autodetect = autodetect unless autodetect.nil?
|
2915
2923
|
job.labels = labels unless labels.nil?
|
2924
|
+
job.create_session = create_session unless create_session.nil?
|
2925
|
+
job.session_id = session_id unless session_id.nil?
|
2916
2926
|
load_job_file_options! job, format: format,
|
2917
2927
|
projection_fields: projection_fields,
|
2918
2928
|
jagged_rows: jagged_rows,
|
@@ -1703,6 +1703,37 @@ module Google
|
|
1703
1703
|
@gapi.configuration.load.update! write_disposition: Convert.write_disposition(new_write)
|
1704
1704
|
end
|
1705
1705
|
|
1706
|
+
##
|
1707
|
+
# Sets the create_session property. If true, creates a new session,
|
1708
|
+
# where session id will be a server generated random id. If false,
|
1709
|
+
# runs query with an existing {#session_id=}, otherwise runs query in
|
1710
|
+
# non-session mode. The default value is `false`.
|
1711
|
+
#
|
1712
|
+
# @param [Boolean] value The create_session property. The default
|
1713
|
+
# value is `false`.
|
1714
|
+
#
|
1715
|
+
# @!group Attributes
|
1716
|
+
def create_session= value
|
1717
|
+
@gapi.configuration.load.create_session = value
|
1718
|
+
end
|
1719
|
+
|
1720
|
+
##
|
1721
|
+
# Sets the session ID for a query run in session mode. See {#create_session=}.
|
1722
|
+
#
|
1723
|
+
# @param [String] value The session ID. The default value is `nil`.
|
1724
|
+
#
|
1725
|
+
# @!group Attributes
|
1726
|
+
def session_id= value
|
1727
|
+
@gapi.configuration.load.connection_properties ||= []
|
1728
|
+
prop = @gapi.configuration.load.connection_properties.find { |cp| cp.key == "session_id" }
|
1729
|
+
if prop
|
1730
|
+
prop.value = value
|
1731
|
+
else
|
1732
|
+
prop = Google::Apis::BigqueryV2::ConnectionProperty.new key: "session_id", value: value
|
1733
|
+
@gapi.configuration.load.connection_properties << prop
|
1734
|
+
end
|
1735
|
+
end
|
1736
|
+
|
1706
1737
|
##
|
1707
1738
|
# Sets the projection fields.
|
1708
1739
|
#
|
@@ -942,6 +942,364 @@ module Google
|
|
942
942
|
job.data max: max
|
943
943
|
end
|
944
944
|
|
945
|
+
##
|
946
|
+
# Loads data into the provided destination table using an asynchronous
|
947
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
948
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
949
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
950
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
951
|
+
#
|
952
|
+
# For the source of the data, you can pass a google-cloud storage file
|
953
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
954
|
+
# file directly. See [Loading Data with a POST
|
955
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
956
|
+
#
|
957
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
958
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
959
|
+
#
|
960
|
+
# @param [String] table_id The destination table to load the data into.
|
961
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
962
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
963
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
964
|
+
# those, containing data to load into the table.
|
965
|
+
# @param [String] format The exported file format. The default value is
|
966
|
+
# `csv`.
|
967
|
+
#
|
968
|
+
# The following values are supported:
|
969
|
+
#
|
970
|
+
# * `csv` - CSV
|
971
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
972
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
973
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
974
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
975
|
+
# * `datastore_backup` - Cloud Datastore backup
|
976
|
+
# @param [String] dataset_id The destination table to load the data into.
|
977
|
+
# For load job with create_session/session_id it defaults to "_SESSION"
|
978
|
+
# @param [String] create Specifies whether the job is allowed to create
|
979
|
+
# new tables. The default value is `needed`.
|
980
|
+
#
|
981
|
+
# The following values are supported:
|
982
|
+
#
|
983
|
+
# * `needed` - Create the table if it does not exist.
|
984
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
985
|
+
# raised if the table does not exist.
|
986
|
+
# @param [String] write Specifies how to handle data already present in
|
987
|
+
# the table. The default value is `append`.
|
988
|
+
#
|
989
|
+
# The following values are supported:
|
990
|
+
#
|
991
|
+
# * `truncate` - BigQuery overwrites the table data.
|
992
|
+
# * `append` - BigQuery appends the data to the table.
|
993
|
+
# * `empty` - An error will be returned if the table already contains
|
994
|
+
# data.
|
995
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
996
|
+
# to `datastore_backup`, indicates which entity properties to load
|
997
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
998
|
+
# must be top-level properties. If not set, BigQuery loads all
|
999
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1000
|
+
# backup, an invalid error is returned.
|
1001
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1002
|
+
# optional columns. The missing values are treated as nulls. If
|
1003
|
+
# `false`, records with missing trailing columns are treated as bad
|
1004
|
+
# records, and if there are too many bad records, an invalid error is
|
1005
|
+
# returned in the job result. The default value is `false`. Only
|
1006
|
+
# applicable to CSV, ignored for other formats.
|
1007
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1008
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1009
|
+
# The default value is `false`.
|
1010
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1011
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1012
|
+
# The default value is `false`.
|
1013
|
+
# @param [String] encoding The character encoding of the data. The
|
1014
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1015
|
+
# `UTF-8`.
|
1016
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1017
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1018
|
+
# then uses the first byte of the encoded string to split the data in
|
1019
|
+
# its raw, binary state. Default is <code>,</code>.
|
1020
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1021
|
+
# extra values that are not represented in the table schema. If true,
|
1022
|
+
# the extra values are ignored. If false, records with extra columns
|
1023
|
+
# are treated as bad records, and if there are too many bad records,
|
1024
|
+
# an invalid error is returned in the job result. The default value is
|
1025
|
+
# `false`.
|
1026
|
+
#
|
1027
|
+
# The `format` property determines what BigQuery treats as an extra
|
1028
|
+
# value:
|
1029
|
+
#
|
1030
|
+
# * `CSV`: Trailing columns
|
1031
|
+
# * `JSON`: Named values that don't match any column names
|
1032
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1033
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1034
|
+
# records exceeds this value, an invalid error is returned in the job
|
1035
|
+
# result. The default value is `0`, which requires that all records
|
1036
|
+
# are valid.
|
1037
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1038
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1039
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1040
|
+
# value is the empty string. If you set this property to a custom
|
1041
|
+
# value, BigQuery throws an error if an empty string is present for
|
1042
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1043
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1044
|
+
# @param [String] quote The value that is used to quote data sections in
|
1045
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1046
|
+
# then uses the first byte of the encoded string to split the data in
|
1047
|
+
# its raw, binary state. The default value is a double-quote
|
1048
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1049
|
+
# the property value to an empty string. If your data contains quoted
|
1050
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1051
|
+
# property to true.
|
1052
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1053
|
+
# file that BigQuery will skip when loading the data. The default
|
1054
|
+
# value is `0`. This property is useful if you have header rows in the
|
1055
|
+
# file that should be skipped.
|
1056
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1057
|
+
# destination table. Optional. The schema can be omitted if the
|
1058
|
+
# destination table already exists, or if you're loading data from a
|
1059
|
+
# Google Cloud Datastore backup.
|
1060
|
+
#
|
1061
|
+
# See {Project#schema} for the creation of the schema for use with
|
1062
|
+
# this option. Also note that for most use cases, the block yielded by
|
1063
|
+
# this method is a more convenient way to configure the schema.
|
1064
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1065
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1066
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1067
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1068
|
+
#
|
1069
|
+
# See [Generating a job
|
1070
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1071
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1072
|
+
# prepended to a generated value to produce a unique job ID. For
|
1073
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1074
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1075
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1076
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1077
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1078
|
+
# be used.
|
1079
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1080
|
+
# the job. You can use these to organize and group your jobs.
|
1081
|
+
#
|
1082
|
+
# The labels applied to a resource must meet the following requirements:
|
1083
|
+
#
|
1084
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1085
|
+
# * Each label must be a key-value pair.
|
1086
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1087
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1088
|
+
# a maximum length of 63 characters.
|
1089
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1090
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1091
|
+
# international characters are allowed.
|
1092
|
+
# * The key portion of a label must be unique. However, you can use the
|
1093
|
+
# same key with multiple resources.
|
1094
|
+
# * Keys must start with a lowercase letter or international character.
|
1095
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
1096
|
+
# and the load job will happen in the table created within that session.
|
1097
|
+
# Note: This will work only for tables in _SESSION dataset
|
1098
|
+
# else the property will be ignored by the backend.
|
1099
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1100
|
+
#
|
1101
|
+
# @yield [updater] A block for setting the schema and other
|
1102
|
+
# options for the destination table. The schema can be omitted if the
|
1103
|
+
# destination table already exists, or if you're loading data from a
|
1104
|
+
# Google Cloud Datastore backup.
|
1105
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1106
|
+
# updater to modify the load job and its schema.
|
1107
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
1108
|
+
# is undefined however for non-query jobs and may result in an error.
|
1109
|
+
# Deprecated.
|
1110
|
+
#
|
1111
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
1112
|
+
#
|
1113
|
+
# @example
|
1114
|
+
# require "google/cloud/bigquery"
|
1115
|
+
#
|
1116
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1117
|
+
#
|
1118
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1119
|
+
# load_job = bigquery.load_job "temp_table", gs_url, autodetect: true, create_session: true
|
1120
|
+
# load_job.wait_until_done!
|
1121
|
+
# session_id = load_job.statistics["sessionInfo"]["sessionId"]
|
1122
|
+
#
|
1123
|
+
def load_job table_id, files, dataset_id: nil, format: nil, create: nil, write: nil,
|
1124
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1125
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1126
|
+
skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
1127
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, &block
|
1128
|
+
ensure_service!
|
1129
|
+
dataset_id ||= "_SESSION" unless create_session.nil? && session_id.nil?
|
1130
|
+
session_dataset = dataset dataset_id, skip_lookup: true
|
1131
|
+
table = session_dataset.table table_id, skip_lookup: true
|
1132
|
+
table.load_job files,
|
1133
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1134
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1135
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1136
|
+
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
1137
|
+
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
1138
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
1139
|
+
session_id: session_id, &block
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
##
|
1143
|
+
# Loads data into the provided destination table using a synchronous
|
1144
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1145
|
+
# generally handled as needed to complete the job. See also
|
1146
|
+
# {#load_job}.
|
1147
|
+
#
|
1148
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1149
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1150
|
+
# file directly. See [Loading Data with a POST
|
1151
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1152
|
+
#
|
1153
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1154
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
1155
|
+
#
|
1156
|
+
# @param [String] table_id The destination table to load the data into.
|
1157
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
1158
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
1159
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
1160
|
+
# those, containing data to load into the table.
|
1161
|
+
# @param [String] format The exported file format. The default value is
|
1162
|
+
# `csv`.
|
1163
|
+
#
|
1164
|
+
# The following values are supported:
|
1165
|
+
#
|
1166
|
+
# * `csv` - CSV
|
1167
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1168
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1169
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
1170
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
1171
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1172
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1173
|
+
# new tables. The default value is `needed`.
|
1174
|
+
#
|
1175
|
+
# The following values are supported:
|
1176
|
+
#
|
1177
|
+
# * `needed` - Create the table if it does not exist.
|
1178
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1179
|
+
# raised if the table does not exist.
|
1180
|
+
# @param [String] dataset_id The destination table to load the data into.
|
1181
|
+
# For load job with session it defaults to "_SESSION"
|
1182
|
+
# @param [String] write Specifies how to handle data already present in
|
1183
|
+
# the table. The default value is `append`.
|
1184
|
+
#
|
1185
|
+
# The following values are supported:
|
1186
|
+
#
|
1187
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1188
|
+
# * `append` - BigQuery appends the data to the table.
|
1189
|
+
# * `empty` - An error will be returned if the table already contains
|
1190
|
+
# data.
|
1191
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1192
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1193
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1194
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1195
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1196
|
+
# backup, an invalid error is returned.
|
1197
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1198
|
+
# optional columns. The missing values are treated as nulls. If
|
1199
|
+
# `false`, records with missing trailing columns are treated as bad
|
1200
|
+
# records, and if there are too many bad records, an invalid error is
|
1201
|
+
# returned in the job result. The default value is `false`. Only
|
1202
|
+
# applicable to CSV, ignored for other formats.
|
1203
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1204
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1205
|
+
# The default value is `false`.
|
1206
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1207
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1208
|
+
# The default value is `false`.
|
1209
|
+
# @param [String] encoding The character encoding of the data. The
|
1210
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1211
|
+
# `UTF-8`.
|
1212
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1213
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1214
|
+
# then uses the first byte of the encoded string to split the data in
|
1215
|
+
# its raw, binary state. Default is <code>,</code>.
|
1216
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1217
|
+
# extra values that are not represented in the table schema. If true,
|
1218
|
+
# the extra values are ignored. If false, records with extra columns
|
1219
|
+
# are treated as bad records, and if there are too many bad records,
|
1220
|
+
# an invalid error is returned in the job result. The default value is
|
1221
|
+
# `false`.
|
1222
|
+
#
|
1223
|
+
# The `format` property determines what BigQuery treats as an extra
|
1224
|
+
# value:
|
1225
|
+
#
|
1226
|
+
# * `CSV`: Trailing columns
|
1227
|
+
# * `JSON`: Named values that don't match any column names
|
1228
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1229
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1230
|
+
# records exceeds this value, an invalid error is returned in the job
|
1231
|
+
# result. The default value is `0`, which requires that all records
|
1232
|
+
# are valid.
|
1233
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1234
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1235
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1236
|
+
# value is the empty string. If you set this property to a custom
|
1237
|
+
# value, BigQuery throws an error if an empty string is present for
|
1238
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1239
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1240
|
+
# @param [String] quote The value that is used to quote data sections in
|
1241
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1242
|
+
# then uses the first byte of the encoded string to split the data in
|
1243
|
+
# its raw, binary state. The default value is a double-quote
|
1244
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1245
|
+
# the property value to an empty string. If your data contains quoted
|
1246
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1247
|
+
# property to true.
|
1248
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1249
|
+
# file that BigQuery will skip when loading the data. The default
|
1250
|
+
# value is `0`. This property is useful if you have header rows in the
|
1251
|
+
# file that should be skipped.
|
1252
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1253
|
+
# destination table. Optional. The schema can be omitted if the
|
1254
|
+
# destination table already exists, or if you're loading data from a
|
1255
|
+
# Google Cloud Datastore backup.
|
1256
|
+
#
|
1257
|
+
# See {Project#schema} for the creation of the schema for use with
|
1258
|
+
# this option. Also note that for most use cases, the block yielded by
|
1259
|
+
# this method is a more convenient way to configure the schema.
|
1260
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1261
|
+
#
|
1262
|
+
# @yield [updater] A block for setting the schema of the destination
|
1263
|
+
# table and other options for the load job. The schema can be omitted
|
1264
|
+
# if the destination table already exists, or if you're loading data
|
1265
|
+
# from a Google Cloud Datastore backup.
|
1266
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1267
|
+
# updater to modify the load job and its schema.
|
1268
|
+
#
|
1269
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
1270
|
+
#
|
1271
|
+
# @example
|
1272
|
+
# require "google/cloud/bigquery"
|
1273
|
+
#
|
1274
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1275
|
+
#
|
1276
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1277
|
+
# bigquery.load "my_new_table", gs_url, dataset_id: "my_dataset" do |schema|
|
1278
|
+
# schema.string "first_name", mode: :required
|
1279
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1280
|
+
# nested_schema.string "place", mode: :required
|
1281
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1282
|
+
# end
|
1283
|
+
# end
|
1284
|
+
#
|
1285
|
+
# @!group Data
|
1286
|
+
#
|
1287
|
+
def load table_id, files, dataset_id: "_SESSION", format: nil, create: nil, write: nil,
|
1288
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1289
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1290
|
+
skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
|
1291
|
+
job = load_job table_id, files, dataset_id: dataset_id,
|
1292
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1293
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1294
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
1295
|
+
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
1296
|
+
null_marker: null_marker, session_id: session_id, &block
|
1297
|
+
|
1298
|
+
job.wait_until_done!
|
1299
|
+
ensure_job_succeeded! job
|
1300
|
+
true
|
1301
|
+
end
|
1302
|
+
|
945
1303
|
##
|
946
1304
|
# Creates a new External::DataSource (or subclass) object that
|
947
1305
|
# represents the external data source that can be queried from directly,
|
@@ -2372,6 +2372,11 @@ module Google
|
|
2372
2372
|
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
2373
2373
|
# is undefined however for non-query jobs and may result in an error.
|
2374
2374
|
# Deprecated.
|
2375
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
2376
|
+
# and the load job will happen in the table created within that session.
|
2377
|
+
# Note: This will work only for tables in _SESSION dataset
|
2378
|
+
# else the property will be ignored by the backend.
|
2379
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2375
2380
|
#
|
2376
2381
|
# @yield [load_job] a block for setting the load job
|
2377
2382
|
# @yieldparam [LoadJob] load_job the load job object to be updated
|
@@ -2428,7 +2433,7 @@ module Google
|
|
2428
2433
|
def load_job files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2429
2434
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2430
2435
|
quote: nil, skip_leading: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
2431
|
-
null_marker: nil, dryrun: nil
|
2436
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, schema: self.schema
|
2432
2437
|
ensure_service!
|
2433
2438
|
|
2434
2439
|
updater = load_job_updater format: format, create: create, write: write, projection_fields: projection_fields,
|
@@ -2436,7 +2441,9 @@ module Google
|
|
2436
2441
|
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
2437
2442
|
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
2438
2443
|
dryrun: dryrun, job_id: job_id, prefix: prefix, schema: schema, labels: labels,
|
2439
|
-
autodetect: autodetect, null_marker: null_marker
|
2444
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
2445
|
+
session_id: session_id
|
2446
|
+
|
2440
2447
|
|
2441
2448
|
yield updater if block_given?
|
2442
2449
|
|
@@ -2551,6 +2558,7 @@ module Google
|
|
2551
2558
|
# file that BigQuery will skip when loading the data. The default
|
2552
2559
|
# value is `0`. This property is useful if you have header rows in the
|
2553
2560
|
# file that should be skipped.
|
2561
|
+
# @param [string] session_id Session ID in which the load job must run.
|
2554
2562
|
#
|
2555
2563
|
# @yield [updater] A block for setting the schema of the destination
|
2556
2564
|
# table and other options for the load job. The schema can be omitted
|
@@ -2612,12 +2620,13 @@ module Google
|
|
2612
2620
|
#
|
2613
2621
|
def load files, format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
2614
2622
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil, max_bad_records: nil,
|
2615
|
-
quote: nil, skip_leading: nil, autodetect: nil, null_marker: nil,
|
2623
|
+
quote: nil, skip_leading: nil, autodetect: nil, null_marker: nil, session_id: nil,
|
2624
|
+
schema: self.schema, &block
|
2616
2625
|
job = load_job files, format: format, create: create, write: write, projection_fields: projection_fields,
|
2617
2626
|
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
2618
2627
|
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
2619
2628
|
quote: quote, skip_leading: skip_leading, autodetect: autodetect,
|
2620
|
-
null_marker: null_marker, &block
|
2629
|
+
null_marker: null_marker, session_id: session_id, schema: schema, &block
|
2621
2630
|
|
2622
2631
|
job.wait_until_done!
|
2623
2632
|
ensure_job_succeeded! job
|
@@ -3114,7 +3123,8 @@ module Google
|
|
3114
3123
|
def load_job_updater format: nil, create: nil, write: nil, projection_fields: nil, jagged_rows: nil,
|
3115
3124
|
quoted_newlines: nil, encoding: nil, delimiter: nil, ignore_unknown: nil,
|
3116
3125
|
max_bad_records: nil, quote: nil, skip_leading: nil, dryrun: nil, schema: nil, job_id: nil,
|
3117
|
-
prefix: nil, labels: nil, autodetect: nil, null_marker: nil
|
3126
|
+
prefix: nil, labels: nil, autodetect: nil, null_marker: nil,
|
3127
|
+
create_session: nil, session_id: nil
|
3118
3128
|
new_job = load_job_gapi table_id, dryrun, job_id: job_id, prefix: prefix
|
3119
3129
|
LoadJob::Updater.new(new_job).tap do |job|
|
3120
3130
|
job.location = location if location # may be table reference
|
@@ -3123,6 +3133,8 @@ module Google
|
|
3123
3133
|
job.schema = schema unless schema.nil?
|
3124
3134
|
job.autodetect = autodetect unless autodetect.nil?
|
3125
3135
|
job.labels = labels unless labels.nil?
|
3136
|
+
job.create_session = create_session unless create_session.nil?
|
3137
|
+
job.session_id = session_id unless session_id.nil?
|
3126
3138
|
load_job_file_options! job, format: format,
|
3127
3139
|
projection_fields: projection_fields,
|
3128
3140
|
jagged_rows: jagged_rows,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google-cloud-bigquery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.45.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Moore
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-09-
|
12
|
+
date: 2023-09-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|