google-cloud-bigquery 1.42.0 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/lib/google/cloud/bigquery/convert.rb +2 -3
- data/lib/google/cloud/bigquery/dataset.rb +78 -13
- data/lib/google/cloud/bigquery/load_job.rb +495 -26
- data/lib/google/cloud/bigquery/project.rb +373 -1
- data/lib/google/cloud/bigquery/query_job.rb +6 -4
- data/lib/google/cloud/bigquery/schema/field.rb +83 -0
- data/lib/google/cloud/bigquery/schema.rb +351 -44
- data/lib/google/cloud/bigquery/service.rb +25 -5
- data/lib/google/cloud/bigquery/table/async_inserter.rb +1 -0
- data/lib/google/cloud/bigquery/table.rb +458 -32
- data/lib/google/cloud/bigquery/version.rb +1 -1
- data/lib/google/cloud/bigquery.rb +5 -3
- data/lib/google-cloud-bigquery.rb +9 -3
- metadata +18 -164
@@ -67,6 +67,15 @@ module Google
|
|
67
67
|
@service = service
|
68
68
|
end
|
69
69
|
|
70
|
+
##
|
71
|
+
# The universe domain the client is connected to
|
72
|
+
#
|
73
|
+
# @return [String]
|
74
|
+
#
|
75
|
+
def universe_domain
|
76
|
+
service.universe_domain
|
77
|
+
end
|
78
|
+
|
70
79
|
##
|
71
80
|
# The BigQuery project connected to.
|
72
81
|
#
|
@@ -304,6 +313,7 @@ module Google
|
|
304
313
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
305
314
|
# | `DATE` | `Date` | |
|
306
315
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
316
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
|
307
317
|
# | `TIMESTAMP` | `Time` | |
|
308
318
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
309
319
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -330,6 +340,7 @@ module Google
|
|
330
340
|
# * `:DATETIME`
|
331
341
|
# * `:DATE`
|
332
342
|
# * `:GEOGRAPHY`
|
343
|
+
# * `:JSON`
|
333
344
|
# * `:TIMESTAMP`
|
334
345
|
# * `:TIME`
|
335
346
|
# * `:BYTES`
|
@@ -622,6 +633,7 @@ module Google
|
|
622
633
|
create_session: nil,
|
623
634
|
session_id: nil
|
624
635
|
ensure_service!
|
636
|
+
project ||= self.project
|
625
637
|
options = {
|
626
638
|
params: params,
|
627
639
|
types: types,
|
@@ -633,7 +645,7 @@ module Google
|
|
633
645
|
write: write,
|
634
646
|
dryrun: dryrun,
|
635
647
|
dataset: dataset,
|
636
|
-
project:
|
648
|
+
project: project,
|
637
649
|
standard_sql: standard_sql,
|
638
650
|
legacy_sql: legacy_sql,
|
639
651
|
large_results: large_results,
|
@@ -691,6 +703,7 @@ module Google
|
|
691
703
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
692
704
|
# | `DATE` | `Date` | |
|
693
705
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
|
706
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
|
694
707
|
# | `TIMESTAMP` | `Time` | |
|
695
708
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
696
709
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -717,6 +730,7 @@ module Google
|
|
717
730
|
# * `:DATETIME`
|
718
731
|
# * `:DATE`
|
719
732
|
# * `:GEOGRAPHY`
|
733
|
+
# * `:JSON`
|
720
734
|
# * `:TIMESTAMP`
|
721
735
|
# * `:TIME`
|
722
736
|
# * `:BYTES`
|
@@ -942,6 +956,364 @@ module Google
|
|
942
956
|
job.data max: max
|
943
957
|
end
|
944
958
|
|
959
|
+
##
|
960
|
+
# Loads data into the provided destination table using an asynchronous
|
961
|
+
# method. In this method, a {LoadJob} is immediately returned. The
|
962
|
+
# caller may poll the service by repeatedly calling {Job#reload!} and
|
963
|
+
# {Job#done?} to detect when the job is done, or simply block until the
|
964
|
+
# job is done by calling #{Job#wait_until_done!}. See also {#load}.
|
965
|
+
#
|
966
|
+
# For the source of the data, you can pass a google-cloud storage file
|
967
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
968
|
+
# file directly. See [Loading Data with a POST
|
969
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
970
|
+
#
|
971
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
972
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
973
|
+
#
|
974
|
+
# @param [String] table_id The destination table to load the data into.
|
975
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
976
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
977
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
978
|
+
# those, containing data to load into the table.
|
979
|
+
# @param [String] format The exported file format. The default value is
|
980
|
+
# `csv`.
|
981
|
+
#
|
982
|
+
# The following values are supported:
|
983
|
+
#
|
984
|
+
# * `csv` - CSV
|
985
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
986
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
987
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
988
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
989
|
+
# * `datastore_backup` - Cloud Datastore backup
|
990
|
+
# @param [String] dataset_id The destination table to load the data into.
|
991
|
+
# For load job with create_session/session_id it defaults to "_SESSION"
|
992
|
+
# @param [String] create Specifies whether the job is allowed to create
|
993
|
+
# new tables. The default value is `needed`.
|
994
|
+
#
|
995
|
+
# The following values are supported:
|
996
|
+
#
|
997
|
+
# * `needed` - Create the table if it does not exist.
|
998
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
999
|
+
# raised if the table does not exist.
|
1000
|
+
# @param [String] write Specifies how to handle data already present in
|
1001
|
+
# the table. The default value is `append`.
|
1002
|
+
#
|
1003
|
+
# The following values are supported:
|
1004
|
+
#
|
1005
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1006
|
+
# * `append` - BigQuery appends the data to the table.
|
1007
|
+
# * `empty` - An error will be returned if the table already contains
|
1008
|
+
# data.
|
1009
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1010
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1011
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1012
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1013
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1014
|
+
# backup, an invalid error is returned.
|
1015
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1016
|
+
# optional columns. The missing values are treated as nulls. If
|
1017
|
+
# `false`, records with missing trailing columns are treated as bad
|
1018
|
+
# records, and if there are too many bad records, an invalid error is
|
1019
|
+
# returned in the job result. The default value is `false`. Only
|
1020
|
+
# applicable to CSV, ignored for other formats.
|
1021
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1022
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1023
|
+
# The default value is `false`.
|
1024
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1025
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1026
|
+
# The default value is `false`.
|
1027
|
+
# @param [String] encoding The character encoding of the data. The
|
1028
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1029
|
+
# `UTF-8`.
|
1030
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1031
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1032
|
+
# then uses the first byte of the encoded string to split the data in
|
1033
|
+
# its raw, binary state. Default is <code>,</code>.
|
1034
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1035
|
+
# extra values that are not represented in the table schema. If true,
|
1036
|
+
# the extra values are ignored. If false, records with extra columns
|
1037
|
+
# are treated as bad records, and if there are too many bad records,
|
1038
|
+
# an invalid error is returned in the job result. The default value is
|
1039
|
+
# `false`.
|
1040
|
+
#
|
1041
|
+
# The `format` property determines what BigQuery treats as an extra
|
1042
|
+
# value:
|
1043
|
+
#
|
1044
|
+
# * `CSV`: Trailing columns
|
1045
|
+
# * `JSON`: Named values that don't match any column names
|
1046
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1047
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1048
|
+
# records exceeds this value, an invalid error is returned in the job
|
1049
|
+
# result. The default value is `0`, which requires that all records
|
1050
|
+
# are valid.
|
1051
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1052
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1053
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1054
|
+
# value is the empty string. If you set this property to a custom
|
1055
|
+
# value, BigQuery throws an error if an empty string is present for
|
1056
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1057
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1058
|
+
# @param [String] quote The value that is used to quote data sections in
|
1059
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1060
|
+
# then uses the first byte of the encoded string to split the data in
|
1061
|
+
# its raw, binary state. The default value is a double-quote
|
1062
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1063
|
+
# the property value to an empty string. If your data contains quoted
|
1064
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1065
|
+
# property to true.
|
1066
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1067
|
+
# file that BigQuery will skip when loading the data. The default
|
1068
|
+
# value is `0`. This property is useful if you have header rows in the
|
1069
|
+
# file that should be skipped.
|
1070
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1071
|
+
# destination table. Optional. The schema can be omitted if the
|
1072
|
+
# destination table already exists, or if you're loading data from a
|
1073
|
+
# Google Cloud Datastore backup.
|
1074
|
+
#
|
1075
|
+
# See {Project#schema} for the creation of the schema for use with
|
1076
|
+
# this option. Also note that for most use cases, the block yielded by
|
1077
|
+
# this method is a more convenient way to configure the schema.
|
1078
|
+
# @param [String] job_id A user-defined ID for the load job. The ID
|
1079
|
+
# must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
|
1080
|
+
# (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
|
1081
|
+
# `job_id` is provided, then `prefix` will not be used.
|
1082
|
+
#
|
1083
|
+
# See [Generating a job
|
1084
|
+
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
|
1085
|
+
# @param [String] prefix A string, usually human-readable, that will be
|
1086
|
+
# prepended to a generated value to produce a unique job ID. For
|
1087
|
+
# example, the prefix `daily_import_job_` can be given to generate a
|
1088
|
+
# job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
|
1089
|
+
# prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
|
1090
|
+
# underscores (`_`), or dashes (`-`). The maximum length of the entire ID
|
1091
|
+
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
|
1092
|
+
# be used.
|
1093
|
+
# @param [Hash] labels A hash of user-provided labels associated with
|
1094
|
+
# the job. You can use these to organize and group your jobs.
|
1095
|
+
#
|
1096
|
+
# The labels applied to a resource must meet the following requirements:
|
1097
|
+
#
|
1098
|
+
# * Each resource can have multiple labels, up to a maximum of 64.
|
1099
|
+
# * Each label must be a key-value pair.
|
1100
|
+
# * Keys have a minimum length of 1 character and a maximum length of
|
1101
|
+
# 63 characters, and cannot be empty. Values can be empty, and have
|
1102
|
+
# a maximum length of 63 characters.
|
1103
|
+
# * Keys and values can contain only lowercase letters, numeric characters,
|
1104
|
+
# underscores, and dashes. All characters must use UTF-8 encoding, and
|
1105
|
+
# international characters are allowed.
|
1106
|
+
# * The key portion of a label must be unique. However, you can use the
|
1107
|
+
# same key with multiple resources.
|
1108
|
+
# * Keys must start with a lowercase letter or international character.
|
1109
|
+
# @param [Boolean] create_session If set to true a new session will be created
|
1110
|
+
# and the load job will happen in the table created within that session.
|
1111
|
+
# Note: This will work only for tables in _SESSION dataset
|
1112
|
+
# else the property will be ignored by the backend.
|
1113
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1114
|
+
#
|
1115
|
+
# @yield [updater] A block for setting the schema and other
|
1116
|
+
# options for the destination table. The schema can be omitted if the
|
1117
|
+
# destination table already exists, or if you're loading data from a
|
1118
|
+
# Google Cloud Datastore backup.
|
1119
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1120
|
+
# updater to modify the load job and its schema.
|
1121
|
+
# @param [Boolean] dryrun If set, don't actually run this job. Behavior
|
1122
|
+
# is undefined however for non-query jobs and may result in an error.
|
1123
|
+
# Deprecated.
|
1124
|
+
#
|
1125
|
+
# @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
|
1126
|
+
#
|
1127
|
+
# @example
|
1128
|
+
# require "google/cloud/bigquery"
|
1129
|
+
#
|
1130
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1131
|
+
#
|
1132
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1133
|
+
# load_job = bigquery.load_job "temp_table", gs_url, autodetect: true, create_session: true
|
1134
|
+
# load_job.wait_until_done!
|
1135
|
+
# session_id = load_job.statistics["sessionInfo"]["sessionId"]
|
1136
|
+
#
|
1137
|
+
def load_job table_id, files, dataset_id: nil, format: nil, create: nil, write: nil,
|
1138
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1139
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1140
|
+
skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
|
1141
|
+
null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, &block
|
1142
|
+
ensure_service!
|
1143
|
+
dataset_id ||= "_SESSION" unless create_session.nil? && session_id.nil?
|
1144
|
+
session_dataset = dataset dataset_id, skip_lookup: true
|
1145
|
+
table = session_dataset.table table_id, skip_lookup: true
|
1146
|
+
table.load_job files,
|
1147
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1148
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1149
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown,
|
1150
|
+
max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
|
1151
|
+
dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
|
1152
|
+
autodetect: autodetect, null_marker: null_marker, create_session: create_session,
|
1153
|
+
session_id: session_id, &block
|
1154
|
+
end
|
1155
|
+
|
1156
|
+
##
|
1157
|
+
# Loads data into the provided destination table using a synchronous
|
1158
|
+
# method that blocks for a response. Timeouts and transient errors are
|
1159
|
+
# generally handled as needed to complete the job. See also
|
1160
|
+
# {#load_job}.
|
1161
|
+
#
|
1162
|
+
# For the source of the data, you can pass a google-cloud storage file
|
1163
|
+
# path or a google-cloud-storage `File` instance. Or, you can upload a
|
1164
|
+
# file directly. See [Loading Data with a POST
|
1165
|
+
# Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
|
1166
|
+
#
|
1167
|
+
# The geographic location for the job ("US", "EU", etc.) can be set via
|
1168
|
+
# {LoadJob::Updater#location=} in a block passed to this method.
|
1169
|
+
#
|
1170
|
+
# @param [String] table_id The destination table to load the data into.
|
1171
|
+
# @param [File, Google::Cloud::Storage::File, String, URI,
|
1172
|
+
# Array<Google::Cloud::Storage::File, String, URI>] files
|
1173
|
+
# A file or the URI of a Google Cloud Storage file, or an Array of
|
1174
|
+
# those, containing data to load into the table.
|
1175
|
+
# @param [String] format The exported file format. The default value is
|
1176
|
+
# `csv`.
|
1177
|
+
#
|
1178
|
+
# The following values are supported:
|
1179
|
+
#
|
1180
|
+
# * `csv` - CSV
|
1181
|
+
# * `json` - [Newline-delimited JSON](https://jsonlines.org/)
|
1182
|
+
# * `avro` - [Avro](http://avro.apache.org/)
|
1183
|
+
# * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
|
1184
|
+
# * `parquet` - [Parquet](https://parquet.apache.org/)
|
1185
|
+
# * `datastore_backup` - Cloud Datastore backup
|
1186
|
+
# @param [String] create Specifies whether the job is allowed to create
|
1187
|
+
# new tables. The default value is `needed`.
|
1188
|
+
#
|
1189
|
+
# The following values are supported:
|
1190
|
+
#
|
1191
|
+
# * `needed` - Create the table if it does not exist.
|
1192
|
+
# * `never` - The table must already exist. A 'notFound' error is
|
1193
|
+
# raised if the table does not exist.
|
1194
|
+
# @param [String] dataset_id The destination table to load the data into.
|
1195
|
+
# For load job with session it defaults to "_SESSION"
|
1196
|
+
# @param [String] write Specifies how to handle data already present in
|
1197
|
+
# the table. The default value is `append`.
|
1198
|
+
#
|
1199
|
+
# The following values are supported:
|
1200
|
+
#
|
1201
|
+
# * `truncate` - BigQuery overwrites the table data.
|
1202
|
+
# * `append` - BigQuery appends the data to the table.
|
1203
|
+
# * `empty` - An error will be returned if the table already contains
|
1204
|
+
# data.
|
1205
|
+
# @param [Array<String>] projection_fields If the `format` option is set
|
1206
|
+
# to `datastore_backup`, indicates which entity properties to load
|
1207
|
+
# from a Cloud Datastore backup. Property names are case sensitive and
|
1208
|
+
# must be top-level properties. If not set, BigQuery loads all
|
1209
|
+
# properties. If any named property isn't found in the Cloud Datastore
|
1210
|
+
# backup, an invalid error is returned.
|
1211
|
+
# @param [Boolean] jagged_rows Accept rows that are missing trailing
|
1212
|
+
# optional columns. The missing values are treated as nulls. If
|
1213
|
+
# `false`, records with missing trailing columns are treated as bad
|
1214
|
+
# records, and if there are too many bad records, an invalid error is
|
1215
|
+
# returned in the job result. The default value is `false`. Only
|
1216
|
+
# applicable to CSV, ignored for other formats.
|
1217
|
+
# @param [Boolean] quoted_newlines Indicates if BigQuery should allow
|
1218
|
+
# quoted data sections that contain newline characters in a CSV file.
|
1219
|
+
# The default value is `false`.
|
1220
|
+
# @param [Boolean] autodetect Indicates if BigQuery should
|
1221
|
+
# automatically infer the options and schema for CSV and JSON sources.
|
1222
|
+
# The default value is `false`.
|
1223
|
+
# @param [String] encoding The character encoding of the data. The
|
1224
|
+
# supported values are `UTF-8` or `ISO-8859-1`. The default value is
|
1225
|
+
# `UTF-8`.
|
1226
|
+
# @param [String] delimiter Specifices the separator for fields in a CSV
|
1227
|
+
# file. BigQuery converts the string to `ISO-8859-1` encoding, and
|
1228
|
+
# then uses the first byte of the encoded string to split the data in
|
1229
|
+
# its raw, binary state. Default is <code>,</code>.
|
1230
|
+
# @param [Boolean] ignore_unknown Indicates if BigQuery should allow
|
1231
|
+
# extra values that are not represented in the table schema. If true,
|
1232
|
+
# the extra values are ignored. If false, records with extra columns
|
1233
|
+
# are treated as bad records, and if there are too many bad records,
|
1234
|
+
# an invalid error is returned in the job result. The default value is
|
1235
|
+
# `false`.
|
1236
|
+
#
|
1237
|
+
# The `format` property determines what BigQuery treats as an extra
|
1238
|
+
# value:
|
1239
|
+
#
|
1240
|
+
# * `CSV`: Trailing columns
|
1241
|
+
# * `JSON`: Named values that don't match any column names
|
1242
|
+
# @param [Integer] max_bad_records The maximum number of bad records
|
1243
|
+
# that BigQuery can ignore when running the job. If the number of bad
|
1244
|
+
# records exceeds this value, an invalid error is returned in the job
|
1245
|
+
# result. The default value is `0`, which requires that all records
|
1246
|
+
# are valid.
|
1247
|
+
# @param [String] null_marker Specifies a string that represents a null
|
1248
|
+
# value in a CSV file. For example, if you specify `\N`, BigQuery
|
1249
|
+
# interprets `\N` as a null value when loading a CSV file. The default
|
1250
|
+
# value is the empty string. If you set this property to a custom
|
1251
|
+
# value, BigQuery throws an error if an empty string is present for
|
1252
|
+
# all data types except for STRING and BYTE. For STRING and BYTE
|
1253
|
+
# columns, BigQuery interprets the empty string as an empty value.
|
1254
|
+
# @param [String] quote The value that is used to quote data sections in
|
1255
|
+
# a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
|
1256
|
+
# then uses the first byte of the encoded string to split the data in
|
1257
|
+
# its raw, binary state. The default value is a double-quote
|
1258
|
+
# <code>"</code>. If your data does not contain quoted sections, set
|
1259
|
+
# the property value to an empty string. If your data contains quoted
|
1260
|
+
# newline characters, you must also set the allowQuotedNewlines
|
1261
|
+
# property to true.
|
1262
|
+
# @param [Integer] skip_leading The number of rows at the top of a CSV
|
1263
|
+
# file that BigQuery will skip when loading the data. The default
|
1264
|
+
# value is `0`. This property is useful if you have header rows in the
|
1265
|
+
# file that should be skipped.
|
1266
|
+
# @param [Google::Cloud::Bigquery::Schema] schema The schema for the
|
1267
|
+
# destination table. Optional. The schema can be omitted if the
|
1268
|
+
# destination table already exists, or if you're loading data from a
|
1269
|
+
# Google Cloud Datastore backup.
|
1270
|
+
#
|
1271
|
+
# See {Project#schema} for the creation of the schema for use with
|
1272
|
+
# this option. Also note that for most use cases, the block yielded by
|
1273
|
+
# this method is a more convenient way to configure the schema.
|
1274
|
+
# @param [string] session_id Session ID in which the load job must run.
|
1275
|
+
#
|
1276
|
+
# @yield [updater] A block for setting the schema of the destination
|
1277
|
+
# table and other options for the load job. The schema can be omitted
|
1278
|
+
# if the destination table already exists, or if you're loading data
|
1279
|
+
# from a Google Cloud Datastore backup.
|
1280
|
+
# @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
|
1281
|
+
# updater to modify the load job and its schema.
|
1282
|
+
#
|
1283
|
+
# @return [Boolean] Returns `true` if the load job was successful.
|
1284
|
+
#
|
1285
|
+
# @example
|
1286
|
+
# require "google/cloud/bigquery"
|
1287
|
+
#
|
1288
|
+
# bigquery = Google::Cloud::Bigquery.new
|
1289
|
+
#
|
1290
|
+
# gs_url = "gs://my-bucket/file-name.csv"
|
1291
|
+
# bigquery.load "my_new_table", gs_url, dataset_id: "my_dataset" do |schema|
|
1292
|
+
# schema.string "first_name", mode: :required
|
1293
|
+
# schema.record "cities_lived", mode: :repeated do |nested_schema|
|
1294
|
+
# nested_schema.string "place", mode: :required
|
1295
|
+
# nested_schema.integer "number_of_years", mode: :required
|
1296
|
+
# end
|
1297
|
+
# end
|
1298
|
+
#
|
1299
|
+
# @!group Data
|
1300
|
+
#
|
1301
|
+
def load table_id, files, dataset_id: "_SESSION", format: nil, create: nil, write: nil,
|
1302
|
+
projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
|
1303
|
+
delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
|
1304
|
+
skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
|
1305
|
+
job = load_job table_id, files, dataset_id: dataset_id,
|
1306
|
+
format: format, create: create, write: write, projection_fields: projection_fields,
|
1307
|
+
jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
|
1308
|
+
delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
|
1309
|
+
quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
|
1310
|
+
null_marker: null_marker, session_id: session_id, &block
|
1311
|
+
|
1312
|
+
job.wait_until_done!
|
1313
|
+
ensure_job_succeeded! job
|
1314
|
+
true
|
1315
|
+
end
|
1316
|
+
|
945
1317
|
##
|
946
1318
|
# Creates a new External::DataSource (or subclass) object that
|
947
1319
|
# represents the external data source that can be queried from directly,
|
@@ -748,11 +748,10 @@ module Google
|
|
748
748
|
def data token: nil, max: nil, start: nil
|
749
749
|
return nil unless done?
|
750
750
|
return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun?
|
751
|
-
if ddl? || dml?
|
751
|
+
if ddl? || dml? || !ensure_schema!
|
752
752
|
data_hash = { totalRows: nil, rows: [] }
|
753
753
|
return Data.from_gapi_json data_hash, nil, @gapi, service
|
754
754
|
end
|
755
|
-
ensure_schema!
|
756
755
|
|
757
756
|
data_hash = service.list_tabledata destination_table_dataset_id,
|
758
757
|
destination_table_table_id,
|
@@ -928,6 +927,7 @@ module Google
|
|
928
927
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
929
928
|
# | `DATE` | `Date` | |
|
930
929
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
|
930
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
|
931
931
|
# | `TIMESTAMP` | `Time` | |
|
932
932
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
933
933
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -965,6 +965,7 @@ module Google
|
|
965
965
|
# | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
|
966
966
|
# | `DATE` | `Date` | |
|
967
967
|
# | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
|
968
|
+
# | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
|
968
969
|
# | `TIMESTAMP` | `Time` | |
|
969
970
|
# | `TIME` | `Google::Cloud::BigQuery::Time` | |
|
970
971
|
# | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
|
@@ -991,6 +992,7 @@ module Google
|
|
991
992
|
# * `:DATETIME`
|
992
993
|
# * `:DATE`
|
993
994
|
# * `:GEOGRAPHY`
|
995
|
+
# * `:JSON`
|
994
996
|
# * `:TIMESTAMP`
|
995
997
|
# * `:TIME`
|
996
998
|
# * `:BYTES`
|
@@ -1791,10 +1793,10 @@ module Google
|
|
1791
1793
|
protected
|
1792
1794
|
|
1793
1795
|
def ensure_schema!
|
1794
|
-
return unless destination_schema.nil?
|
1796
|
+
return true unless destination_schema.nil?
|
1795
1797
|
|
1796
1798
|
query_results_gapi = service.job_query_results job_id, location: location, max: 0
|
1797
|
-
|
1799
|
+
return false if query_results_gapi.schema.nil?
|
1798
1800
|
@destination_schema_gapi = query_results_gapi.schema
|
1799
1801
|
end
|
1800
1802
|
|
@@ -52,6 +52,7 @@ module Google
|
|
52
52
|
"GEOGRAPHY",
|
53
53
|
"INTEGER",
|
54
54
|
"INT64",
|
55
|
+
"JSON",
|
55
56
|
"NUMERIC",
|
56
57
|
"RECORD",
|
57
58
|
"STRING",
|
@@ -98,6 +99,7 @@ module Google
|
|
98
99
|
# * `FLOAT`
|
99
100
|
# * `FLOAT64` (same as `FLOAT`)
|
100
101
|
# * `GEOGRAPHY`
|
102
|
+
# * `JSON`
|
101
103
|
# * `INTEGER`
|
102
104
|
# * `INT64` (same as `INTEGER`)
|
103
105
|
# * `NUMERIC`
|
@@ -125,6 +127,7 @@ module Google
|
|
125
127
|
# * `FLOAT`
|
126
128
|
# * `FLOAT64` (same as `FLOAT`)
|
127
129
|
# * `GEOGRAPHY`
|
130
|
+
# * `JSON`
|
128
131
|
# * `INTEGER`
|
129
132
|
# * `INT64` (same as `INTEGER`)
|
130
133
|
# * `NUMERIC`
|
@@ -263,6 +266,53 @@ module Google
|
|
263
266
|
@gapi.update! policy_tags: policy_tag_list
|
264
267
|
end
|
265
268
|
|
269
|
+
##
|
270
|
+
# The default value of a field using a SQL expression. It can only
|
271
|
+
# be set for top level fields (columns). Default value for the entire struct or
|
272
|
+
# array is set using a struct or array expression. The valid SQL expressions are:
|
273
|
+
# - Literals for all data types, including STRUCT and ARRAY.
|
274
|
+
# - The following functions:
|
275
|
+
# `CURRENT_TIMESTAMP`
|
276
|
+
# `CURRENT_TIME`
|
277
|
+
# `CURRENT_DATE`
|
278
|
+
# `CURRENT_DATETIME`
|
279
|
+
# `GENERATE_UUID`
|
280
|
+
# `RAND`
|
281
|
+
# `SESSION_USER`
|
282
|
+
# `ST_GEOPOINT`
|
283
|
+
# - Struct or array composed with the above allowed functions, for example:
|
284
|
+
# "[CURRENT_DATE(), DATE '2020-01-01'"]
|
285
|
+
#
|
286
|
+
# @return [String] The default value expression of the field.
|
287
|
+
#
|
288
|
+
def default_value_expression
|
289
|
+
@gapi.default_value_expression
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# Updates the default value expression of the field.
|
294
|
+
#
|
295
|
+
# @param default_value_expression [String] The default value of a field
|
296
|
+
# using a SQL expression. It can only be set for top level fields (columns).
|
297
|
+
# Use a struct or array expression to specify default value for the entire struct or
|
298
|
+
# array. The valid SQL expressions are:
|
299
|
+
# - Literals for all data types, including STRUCT and ARRAY.
|
300
|
+
# - The following functions:
|
301
|
+
# `CURRENT_TIMESTAMP`
|
302
|
+
# `CURRENT_TIME`
|
303
|
+
# `CURRENT_DATE`
|
304
|
+
# `CURRENT_DATETIME`
|
305
|
+
# `GENERATE_UUID`
|
306
|
+
# `RAND`
|
307
|
+
# `SESSION_USER`
|
308
|
+
# `ST_GEOPOINT`
|
309
|
+
# - Struct or array composed with the above allowed functions, for example:
|
310
|
+
# "[CURRENT_DATE(), DATE '2020-01-01'"]
|
311
|
+
#
|
312
|
+
def default_value_expression= default_value_expression
|
313
|
+
@gapi.update! default_value_expression: default_value_expression
|
314
|
+
end
|
315
|
+
|
266
316
|
##
|
267
317
|
# The maximum length of values of this field for {#string?} or {bytes?} fields. If `max_length` is not
|
268
318
|
# specified, no maximum length constraint is imposed on this field. If type = `STRING`, then `max_length`
|
@@ -409,6 +459,15 @@ module Google
|
|
409
459
|
type == "GEOGRAPHY"
|
410
460
|
end
|
411
461
|
|
462
|
+
##
|
463
|
+
# Checks if the type of the field is `JSON`.
|
464
|
+
#
|
465
|
+
# @return [Boolean] `true` when `JSON`, `false` otherwise.
|
466
|
+
#
|
467
|
+
def json?
|
468
|
+
type == "JSON"
|
469
|
+
end
|
470
|
+
|
412
471
|
##
|
413
472
|
# Checks if the type of the field is `RECORD`.
|
414
473
|
#
|
@@ -846,6 +905,30 @@ module Google
|
|
846
905
|
add_field name, :geography, description: description, mode: mode, policy_tags: policy_tags
|
847
906
|
end
|
848
907
|
|
908
|
+
##
|
909
|
+
# Adds a json field to the nested schema of a record field.
|
910
|
+
#
|
911
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type
|
912
|
+
#
|
913
|
+
# @param [String] name The field name. The name must contain only
|
914
|
+
# letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
|
915
|
+
# start with a letter or underscore. The maximum length is 128
|
916
|
+
# characters.
|
917
|
+
# @param [String] description A description of the field.
|
918
|
+
# @param [Symbol] mode The field's mode. The possible values are
|
919
|
+
# `:nullable`, `:required`, and `:repeated`. The default value is
|
920
|
+
# `:nullable`.
|
921
|
+
# @param [Array<String>, String] policy_tags The policy tag list or
|
922
|
+
# single policy tag for the field. Policy tag identifiers are of
|
923
|
+
# the form `projects/*/locations/*/taxonomies/*/policyTags/*`.
|
924
|
+
# At most 1 policy tag is currently allowed.
|
925
|
+
#
|
926
|
+
def json name, description: nil, mode: :nullable, policy_tags: nil
|
927
|
+
record_check!
|
928
|
+
|
929
|
+
add_field name, :json, description: description, mode: mode, policy_tags: policy_tags
|
930
|
+
end
|
931
|
+
|
849
932
|
##
|
850
933
|
# Adds a record field to the nested schema of a record field. A block
|
851
934
|
# must be passed describing the nested fields of the record. For more
|