google-cloud-bigquery 1.42.0 → 1.49.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/lib/google/cloud/bigquery/convert.rb +2 -3
- data/lib/google/cloud/bigquery/dataset.rb +78 -13
- data/lib/google/cloud/bigquery/load_job.rb +495 -26
- data/lib/google/cloud/bigquery/project.rb +373 -1
- data/lib/google/cloud/bigquery/query_job.rb +6 -4
- data/lib/google/cloud/bigquery/schema/field.rb +83 -0
- data/lib/google/cloud/bigquery/schema.rb +351 -44
- data/lib/google/cloud/bigquery/service.rb +25 -5
- data/lib/google/cloud/bigquery/table/async_inserter.rb +1 -0
- data/lib/google/cloud/bigquery/table.rb +458 -32
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +5 -3
- data/lib/google-cloud-bigquery.rb +9 -3
- metadata +18 -164
@@ -67,6 +67,15 @@ module Google
|
|
67
67
|
@service = service
|
68
68
|
end
|
69
69
|
|
70
|
+
##
|
71
|
+
# The universe domain the client is connected to
|
72
|
+
#
|
73
|
+
# @return [String]
|
74
|
+
#
|
75
|
+
def universe_domain
|
76
|
+
service.universe_domain
|
77
|
+
end
|
78
|
+
|
70
79
|
##
|
71
80
|
# The BigQuery project connected to.
|
72
81
|
#
|
@@ -304,6 +313,7 @@ module Google
|
|
304
313
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
305
314
|
# | `DATE` | `Date` | |
|
306
315
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
316
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
|
307
317
|
# | `TIMESTAMP` | `Time` | |
|
308
318
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
309
319
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -330,6 +340,7 @@ module Google
|
|
330
340
|
# * `:DATETIME`
|
331
341
|
# * `:DATE`
|
332
342
|
# * `:GEOGRAPHY`
|
343
|
+
# * `:JSON`
|
333
344
|
# * `:TIMESTAMP`
|
334
345
|
# * `:TIME`
|
335
346
|
# * `:BYTES`
|
@@ -622,6 +633,7 @@ module Google
|
|
622
633
|
create_session: nil,
|
623
634
|
session_id: nil
|
624
635
|
ensure_service!
|
636
|
+
project ||= self.project
|
625
637
|
options = {
|
626
638
|
params: params,
|
627
639
|
types: types,
|
@@ -633,7 +645,7 @@ module Google
|
|
633
645
|
write: write,
|
634
646
|
dryrun: dryrun,
|
635
647
|
dataset: dataset,
|
636
|
-
project:
|
648
|
+
project: project,
|
637
649
|
standard_sql: standard_sql,
|
638
650
|
legacy_sql: legacy_sql,
|
639
651
|
large_results: large_results,
|
@@ -691,6 +703,7 @@ module Google
|
|
691
703
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
692
704
|
# | `DATE` | `Date` | |
|
693
705
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
706
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
|
694
707
|
# | `TIMESTAMP` | `Time` | |
|
695
708
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
696
709
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -717,6 +730,7 @@ module Google
|
|
717
730
|
# * `:DATETIME`
|
718
731
|
# * `:DATE`
|
719
732
|
# * `:GEOGRAPHY`
|
733
|
+
# * `:JSON`
|
720
734
|
# * `:TIMESTAMP`
|
721
735
|
# * `:TIME`
|
722
736
|
# * `:BYTES`
|
@@ -942,6 +956,364 @@ module Google
|
|
942
956
|
job.data max: max
|
943
957
|
end
|
944
958
|
|
959
|
+
##
|
960
|
+
# Loads data into the provided destination table using an asynchronous
|
961
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
962
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
963
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
964
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
965
|
+
#
|
966
|
+
# For the source of the data, you can pass a google-cloud storage file
|
967
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
968
|
+
# file directly. See [Loading Data with a POST
|
969
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
970
|
+
#
|
971
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
972
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
973
|
+
#
|
974
|
+
# @param [String] table_id The destination table to load the data into.
|
975
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
976
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
977
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
978
|
+
# those, containing data to load into the table.
|
979
|
+
# @param [String] format The exported file format. The default value is
|
980
|
+
# `csv`.
|
981
|
+
#
|
982
|
+
# The following values are supported:
|
983
|
+
#
|
984
|
+
# * `csv` - CSV
|
985
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
986
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
987
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
988
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
989
|
+
# * `datastore_backup` - Cloud Datastore backup
|
990
|
+
# @param [String] dataset_id The destination table to load the data into.
|
991
|
+
# For load job with create_session/session_id it defaults to "_SESSION"
|
992
|
+
# @param [String] create Specifies whether the job is allowed to create
|
993
|
+
# new tables. The default value is `needed`.
|
994
|
+
#
|
995
|
+
# The following values are supported:
|
996
|
+
#
|
997
|
+
# * `needed` - Create the table if it does not exist.
|
998
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
999
|
+
# raised if the table does not exist.
|
1000
|
+
# @param [String] write Specifies how to handle data already present in
|
1001
|
+
# the table. The default value is `append`.
|
1002
|
+
#
|
1003
|
+
# The following values are supported:
|
1004
|
+
#
|
1005
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1006
|
+
# * `append` - BigQuery appends the data to the table.
|
1007
|
+
# * `empty` - An error will be returned if the table already contains
|
1008
|
+
# data.
|
1009
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1010
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1011
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1012
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1013
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1014
|
+
# backup, an invalid error is returned.
|
1015
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1016
|
+
# optional columns. The missing values are treated as nulls. If
|
1017
|
+
# `false`, records with missing trailing columns are treated as bad
|
1018
|
+
# records, and if there are too many bad records, an invalid error is
|
1019
|
+
# returned in the job result. The default value is `false`. Only
|
1020
|
+
# applicable to CSV, ignored for other formats.
|
1021
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1022
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1023
|
+
# The default value is `false`.
|
1024
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1025
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1026
|
+
# The default value is `false`.
|
1027
|
+
# @param [String] encoding The character encoding of the data. The
|
1028
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1029
|
+
# `UTF-8`.
|
1030
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1031
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1032
|
+
# then uses the first byte of the encoded string to split the data in
|
1033
|
+
# its raw, binary state. Default is <code>,</code>.
|
1034
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1035
|
+
# extra values that are not represented in the table schema. If true,
|
1036
|
+
# the extra values are ignored. If false, records with extra columns
|
1037
|
+
# are treated as bad records, and if there are too many bad records,
|
1038
|
+
# an invalid error is returned in the job result. The default value is
|
1039
|
+
# `false`.
|
1040
|
+
#
|
1041
|
+
# The `format` property determines what BigQuery treats as an extra
|
1042
|
+
# value:
|
1043
|
+
#
|
1044
|
+
# * `CSV`: Trailing columns
|
1045
|
+
# * `JSON`: Named values that don't match any column names
|
1046
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1047
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1048
|
+
# records exceeds this value, an invalid error is returned in the job
|
1049
|
+
# result. The default value is `0`, which requires that all records
|
1050
|
+
# are valid.
|
1051
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1052
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1053
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1054
|
+
# value is the empty string. If you set this property to a custom
|
1055
|
+
# value, BigQuery throws an error if an empty string is present for
|
1056
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1057
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1058
|
+
# @param [String] quote The value that is used to quote data sections in
|
1059
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1060
|
+
# then uses the first byte of the encoded string to split the data in
|
1061
|
+
# its raw, binary state. The default value is a double-quote
|
1062
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1063
|
+
# the property value to an empty string. If your data contains quoted
|
1064
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1065
|
+
# property to true.
|
1066
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1067
|
+
# file that BigQuery will skip when loading the data. The default
|
1068
|
+
# value is `0`. This property is useful if you have header rows in the
|
1069
|
+
# file that should be skipped.
|
1070
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1071
|
+
# destination table. Optional. The schema can be omitted if the
|
1072
|
+
# destination table already exists, or if you're loading data from a
|
1073
|
+
# Google Cloud Datastore backup.
|
1074
|
+
#
|
1075
|
+
# See {Project#schema} for the creation of the schema for use with
|
1076
|
+
# this option. Also note that for most use cases, the block yielded by
|
1077
|
+
# this method is a more convenient way to configure the schema.
|
1078
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1079
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1080
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1081
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1082
|
+
#
|
1083
|
+
# See [Generating a job
|
1084
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1085
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1086
|
+
# prepended to a generated value to produce a unique job ID. For
|
1087
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1088
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1089
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1090
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1091
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1092
|
+
# be used.
|
1093
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1094
|
+
# the job. You can use these to organize and group your jobs.
|
1095
|
+
#
|
1096
|
+
# The labels applied to a resource must meet the following requirements:
|
1097
|
+
#
|
1098
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1099
|
+
# * Each label must be a key-value pair.
|
1100
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1101
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1102
|
+
# a maximum length of 63 characters.
|
1103
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1104
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1105
|
+
# international characters are allowed.
|
1106
|
+
# * The key portion of a label must be unique. However, you can use the
|
1107
|
+
# same key with multiple resources.
|
1108
|
+
# * Keys must start with a lowercase letter or international character.
|
1109
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
1110
|
+
# and the load job will happen in the table created within that session.
|
1111
|
+
# Note: This will work only for tables in _SESSION dataset
|
1112
|
+
# else the property will be ignored by the backend.
|
1113
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1114
|
+
#
|
1115
|
+
# @yield [updater] A block for setting the schema and other
|
1116
|
+
# options for the destination table. The schema can be omitted if the
|
1117
|
+
# destination table already exists, or if you're loading data from a
|
1118
|
+
# Google Cloud Datastore backup.
|
1119
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1120
|
+
# updater to modify the load job and its schema.
|
1121
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
1122
|
+
# is undefined however for non-query jobs and may result in an error.
|
1123
|
+
# Deprecated.
|
1124
|
+
#
|
1125
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
1126
|
+
#
|
1127
|
+
# @example
|
1128
|
+
# require "google/cloud/bigquery"
|
1129
|
+
#
|
1130
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1131
|
+
#
|
1132
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1133
|
+
# load_job = bigquery.load_job "temp_table", gs_url, autodetect: true, create_session: true
|
1134
|
+
# load_job.wait_until_done!
|
1135
|
+
# session_id = load_job.statistics["sessionInfo"]["sessionId"]
|
1136
|
+
#
|
1137
|
+
def load_job table_id, files, dataset_id: nil, format: nil, create: nil, write: nil,
|
1138
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1139
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1140
|
+
skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
1141
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, &block
|
1142
|
+
ensure_service!
|
1143
|
+
dataset_id ||= "_SESSION" unless create_session.nil? && session_id.nil?
|
1144
|
+
session_dataset = dataset dataset_id, skip_lookup: true
|
1145
|
+
table = session_dataset.table table_id, skip_lookup: true
|
1146
|
+
table.load_job files,
|
1147
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1148
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1149
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1150
|
+
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
1151
|
+
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
1152
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
1153
|
+
session_id: session_id, &block
|
1154
|
+
end
|
1155
|
+
|
1156
|
+
##
|
1157
|
+
# Loads data into the provided destination table using a synchronous
|
1158
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1159
|
+
# generally handled as needed to complete the job. See also
|
1160
|
+
# {#load_job}.
|
1161
|
+
#
|
1162
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1163
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1164
|
+
# file directly. See [Loading Data with a POST
|
1165
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1166
|
+
#
|
1167
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1168
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
1169
|
+
#
|
1170
|
+
# @param [String] table_id The destination table to load the data into.
|
1171
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
1172
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
1173
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
1174
|
+
# those, containing data to load into the table.
|
1175
|
+
# @param [String] format The exported file format. The default value is
|
1176
|
+
# `csv`.
|
1177
|
+
#
|
1178
|
+
# The following values are supported:
|
1179
|
+
#
|
1180
|
+
# * `csv` - CSV
|
1181
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1182
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1183
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
1184
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
1185
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1186
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1187
|
+
# new tables. The default value is `needed`.
|
1188
|
+
#
|
1189
|
+
# The following values are supported:
|
1190
|
+
#
|
1191
|
+
# * `needed` - Create the table if it does not exist.
|
1192
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1193
|
+
# raised if the table does not exist.
|
1194
|
+
# @param [String] dataset_id The destination table to load the data into.
|
1195
|
+
# For load job with session it defaults to "_SESSION"
|
1196
|
+
# @param [String] write Specifies how to handle data already present in
|
1197
|
+
# the table. The default value is `append`.
|
1198
|
+
#
|
1199
|
+
# The following values are supported:
|
1200
|
+
#
|
1201
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1202
|
+
# * `append` - BigQuery appends the data to the table.
|
1203
|
+
# * `empty` - An error will be returned if the table already contains
|
1204
|
+
# data.
|
1205
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1206
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1207
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1208
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1209
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1210
|
+
# backup, an invalid error is returned.
|
1211
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1212
|
+
# optional columns. The missing values are treated as nulls. If
|
1213
|
+
# `false`, records with missing trailing columns are treated as bad
|
1214
|
+
# records, and if there are too many bad records, an invalid error is
|
1215
|
+
# returned in the job result. The default value is `false`. Only
|
1216
|
+
# applicable to CSV, ignored for other formats.
|
1217
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1218
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1219
|
+
# The default value is `false`.
|
1220
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1221
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1222
|
+
# The default value is `false`.
|
1223
|
+
# @param [String] encoding The character encoding of the data. The
|
1224
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1225
|
+
# `UTF-8`.
|
1226
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1227
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1228
|
+
# then uses the first byte of the encoded string to split the data in
|
1229
|
+
# its raw, binary state. Default is <code>,</code>.
|
1230
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1231
|
+
# extra values that are not represented in the table schema. If true,
|
1232
|
+
# the extra values are ignored. If false, records with extra columns
|
1233
|
+
# are treated as bad records, and if there are too many bad records,
|
1234
|
+
# an invalid error is returned in the job result. The default value is
|
1235
|
+
# `false`.
|
1236
|
+
#
|
1237
|
+
# The `format` property determines what BigQuery treats as an extra
|
1238
|
+
# value:
|
1239
|
+
#
|
1240
|
+
# * `CSV`: Trailing columns
|
1241
|
+
# * `JSON`: Named values that don't match any column names
|
1242
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1243
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1244
|
+
# records exceeds this value, an invalid error is returned in the job
|
1245
|
+
# result. The default value is `0`, which requires that all records
|
1246
|
+
# are valid.
|
1247
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1248
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1249
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1250
|
+
# value is the empty string. If you set this property to a custom
|
1251
|
+
# value, BigQuery throws an error if an empty string is present for
|
1252
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1253
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1254
|
+
# @param [String] quote The value that is used to quote data sections in
|
1255
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1256
|
+
# then uses the first byte of the encoded string to split the data in
|
1257
|
+
# its raw, binary state. The default value is a double-quote
|
1258
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1259
|
+
# the property value to an empty string. If your data contains quoted
|
1260
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1261
|
+
# property to true.
|
1262
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1263
|
+
# file that BigQuery will skip when loading the data. The default
|
1264
|
+
# value is `0`. This property is useful if you have header rows in the
|
1265
|
+
# file that should be skipped.
|
1266
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1267
|
+
# destination table. Optional. The schema can be omitted if the
|
1268
|
+
# destination table already exists, or if you're loading data from a
|
1269
|
+
# Google Cloud Datastore backup.
|
1270
|
+
#
|
1271
|
+
# See {Project#schema} for the creation of the schema for use with
|
1272
|
+
# this option. Also note that for most use cases, the block yielded by
|
1273
|
+
# this method is a more convenient way to configure the schema.
|
1274
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1275
|
+
#
|
1276
|
+
# @yield [updater] A block for setting the schema of the destination
|
1277
|
+
# table and other options for the load job. The schema can be omitted
|
1278
|
+
# if the destination table already exists, or if you're loading data
|
1279
|
+
# from a Google Cloud Datastore backup.
|
1280
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1281
|
+
# updater to modify the load job and its schema.
|
1282
|
+
#
|
1283
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
1284
|
+
#
|
1285
|
+
# @example
|
1286
|
+
# require "google/cloud/bigquery"
|
1287
|
+
#
|
1288
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1289
|
+
#
|
1290
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1291
|
+
# bigquery.load "my_new_table", gs_url, dataset_id: "my_dataset" do |schema|
|
1292
|
+
# schema.string "first_name", mode: :required
|
1293
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1294
|
+
# nested_schema.string "place", mode: :required
|
1295
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1296
|
+
# end
|
1297
|
+
# end
|
1298
|
+
#
|
1299
|
+
# @!group Data
|
1300
|
+
#
|
1301
|
+
def load table_id, files, dataset_id: "_SESSION", format: nil, create: nil, write: nil,
|
1302
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1303
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1304
|
+
skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
|
1305
|
+
job = load_job table_id, files, dataset_id: dataset_id,
|
1306
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1307
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1308
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
1309
|
+
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
1310
|
+
null_marker: null_marker, session_id: session_id, &block
|
1311
|
+
|
1312
|
+
job.wait_until_done!
|
1313
|
+
ensure_job_succeeded! job
|
1314
|
+
true
|
1315
|
+
end
|
1316
|
+
|
945
1317
|
##
|
946
1318
|
# Creates a new External::DataSource (or subclass) object that
|
947
1319
|
# represents the external data source that can be queried from directly,
|
@@ -748,11 +748,10 @@ module Google
|
|
748
748
|
def data token: nil, max: nil, start: nil
|
749
749
|
return nil unless done?
|
750
750
|
return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun?
|
751
|
-
if ddl? || dml?
|
751
|
+
if ddl? || dml? || !ensure_schema!
|
752
752
|
data_hash = { totalRows: nil, rows: [] }
|
753
753
|
return Data.from_gapi_json data_hash, nil, @gapi, service
|
754
754
|
end
|
755
|
-
ensure_schema!
|
756
755
|
|
757
756
|
data_hash = service.list_tabledata destination_table_dataset_id,
|
758
757
|
destination_table_table_id,
|
@@ -928,6 +927,7 @@ module Google
|
|
928
927
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
929
928
|
# | `DATE` | `Date` | |
|
930
929
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
|
930
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
|
931
931
|
# | `TIMESTAMP` | `Time` | |
|
932
932
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
933
933
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -965,6 +965,7 @@ module Google
|
|
965
965
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
966
966
|
# | `DATE` | `Date` | |
|
967
967
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
|
968
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
|
968
969
|
# | `TIMESTAMP` | `Time` | |
|
969
970
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
970
971
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -991,6 +992,7 @@ module Google
|
|
991
992
|
# * `:DATETIME`
|
992
993
|
# * `:DATE`
|
993
994
|
# * `:GEOGRAPHY`
|
995
|
+
# * `:JSON`
|
994
996
|
# * `:TIMESTAMP`
|
995
997
|
# * `:TIME`
|
996
998
|
# * `:BYTES`
|
@@ -1791,10 +1793,10 @@ module Google
|
|
1791
1793
|
protected
|
1792
1794
|
|
1793
1795
|
def ensure_schema!
|
1794
|
-
return unless destination_schema.nil?
|
1796
|
+
return true unless destination_schema.nil?
|
1795
1797
|
|
1796
1798
|
query_results_gapi = service.job_query_results job_id, location: location, max: 0
|
1797
|
-
|
1799
|
+
return false if query_results_gapi.schema.nil?
|
1798
1800
|
@destination_schema_gapi = query_results_gapi.schema
|
1799
1801
|
end
|
1800
1802
|
|
@@ -52,6 +52,7 @@ module Google
|
|
52
52
|
"GEOGRAPHY",
|
53
53
|
"INTEGER",
|
54
54
|
"INT64",
|
55
|
+
"JSON",
|
55
56
|
"NUMERIC",
|
56
57
|
"RECORD",
|
57
58
|
"STRING",
|
@@ -98,6 +99,7 @@ module Google
|
|
98
99
|
# * `FLOAT`
|
99
100
|
# * `FLOAT64` (same as `FLOAT`)
|
100
101
|
# * `GEOGRAPHY`
|
102
|
+
# * `JSON`
|
101
103
|
# * `INTEGER`
|
102
104
|
# * `INT64` (same as `INTEGER`)
|
103
105
|
# * `NUMERIC`
|
@@ -125,6 +127,7 @@ module Google
|
|
125
127
|
# * `FLOAT`
|
126
128
|
# * `FLOAT64` (same as `FLOAT`)
|
127
129
|
# * `GEOGRAPHY`
|
130
|
+
# * `JSON`
|
128
131
|
# * `INTEGER`
|
129
132
|
# * `INT64` (same as `INTEGER`)
|
130
133
|
# * `NUMERIC`
|
@@ -263,6 +266,53 @@ module Google
|
|
263
266
|
@gapi.update! policy_tags: policy_tag_list
|
264
267
|
end
|
265
268
|
|
269
|
+
##
|
270
|
+
# The default value of a field using a SQL expression. It can only
|
271
|
+
# be set for top level fields (columns). Default value for the entire struct or
|
272
|
+
# array is set using a struct or array expression. The valid SQL expressions are:
|
273
|
+
# - Literals for all data types, including STRUCT and ARRAY.
|
274
|
+
# - The following functions:
|
275
|
+
# `CURRENT_TIMESTAMP`
|
276
|
+
# `CURRENT_TIME`
|
277
|
+
# `CURRENT_DATE`
|
278
|
+
# `CURRENT_DATETIME`
|
279
|
+
# `GENERATE_UUID`
|
280
|
+
# `RAND`
|
281
|
+
# `SESSION_USER`
|
282
|
+
# `ST_GEOPOINT`
|
283
|
+
# - Struct or array composed with the above allowed functions, for example:
|
284
|
+
# "[CURRENT_DATE(), DATE '2020-01-01'"]
|
285
|
+
#
|
286
|
+
# @return [String] The default value expression of the field.
|
287
|
+
#
|
288
|
+
def default_value_expression
|
289
|
+
@gapi.default_value_expression
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# Updates the default value expression of the field.
|
294
|
+
#
|
295
|
+
# @param default_value_expression [String] The default value of a field
|
296
|
+
# using a SQL expression. It can only be set for top level fields (columns).
|
297
|
+
# Use a struct or array expression to specify default value for the entire struct or
|
298
|
+
# array. The valid SQL expressions are:
|
299
|
+
# - Literals for all data types, including STRUCT and ARRAY.
|
300
|
+
# - The following functions:
|
301
|
+
# `CURRENT_TIMESTAMP`
|
302
|
+
# `CURRENT_TIME`
|
303
|
+
# `CURRENT_DATE`
|
304
|
+
# `CURRENT_DATETIME`
|
305
|
+
# `GENERATE_UUID`
|
306
|
+
# `RAND`
|
307
|
+
# `SESSION_USER`
|
308
|
+
# `ST_GEOPOINT`
|
309
|
+
# - Struct or array composed with the above allowed functions, for example:
|
310
|
+
# "[CURRENT_DATE(), DATE '2020-01-01'"]
|
311
|
+
#
|
312
|
+
def default_value_expression= default_value_expression
|
313
|
+
@gapi.update! default_value_expression: default_value_expression
|
314
|
+
end
|
315
|
+
|
266
316
|
##
|
267
317
|
# The maximum length of values of this field for {#string?} or {bytes?} fields. If `max_length` is not
|
268
318
|
# specified, no maximum length constraint is imposed on this field. If type = `STRING`, then `max_length`
|
@@ -409,6 +459,15 @@ module Google
|
|
409
459
|
type == "GEOGRAPHY"
|
410
460
|
end
|
411
461
|
|
462
|
+
##
|
463
|
+
# Checks if the type of the field is `JSON`.
|
464
|
+
#
|
465
|
+
# @return [Boolean] `true` when `JSON`, `false` otherwise.
|
466
|
+
#
|
467
|
+
def json?
|
468
|
+
type == "JSON"
|
469
|
+
end
|
470
|
+
|
412
471
|
##
|
413
472
|
# Checks if the type of the field is `RECORD`.
|
414
473
|
#
|
@@ -846,6 +905,30 @@ module Google
|
|
846
905
|
add_field name, :geography, description: description, mode: mode, policy_tags: policy_tags
|
847
906
|
end
|
848
907
|
|
908
|
+
##
|
909
|
+
# Adds a json field to the nested schema of a record field.
|
910
|
+
#
|
911
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type
|
912
|
+
#
|
913
|
+
# @param [String] name The field name. The name must contain only
|
914
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
915
|
+
# start with a letter or underscore. The maximum length is 128
|
916
|
+
# characters.
|
917
|
+
# @param [String] description A description of the field.
|
918
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
919
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
920
|
+
# `:nullable`.
|
921
|
+
# @param [Array<String>, String] policy_tags The policy tag list or
|
922
|
+
# single policy tag for the field. Policy tag identifiers are of
|
923
|
+
# the form `projects/*/locations/*/taxonomies/*/policyTags/*`.
|
924
|
+
# At most 1 policy tag is currently allowed.
|
925
|
+
#
|
926
|
+
def json name, description: nil, mode: :nullable, policy_tags: nil
|
927
|
+
record_check!
|
928
|
+
|
929
|
+
add_field name, :json, description: description, mode: mode, policy_tags: policy_tags
|
930
|
+
end
|
931
|
+
|
849
932
|
##
|
850
933
|
# Adds a record field to the nested schema of a record field. A block
|
851
934
|
# must be passed describing the nested fields of the record. For more
|