google-cloud-bigquery 1.42.0 → 1.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,6 +67,15 @@ module Google
67
67
  @service = service
68
68
  end
69
69
 
70
+ ##
71
+ # The universe domain the client is connected to
72
+ #
73
+ # @return [String]
74
+ #
75
+ def universe_domain
76
+ service.universe_domain
77
+ end
78
+
70
79
  ##
71
80
  # The BigQuery project connected to.
72
81
  #
@@ -304,6 +313,7 @@ module Google
304
313
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
305
314
  # | `DATE` | `Date` | |
306
315
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
316
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
307
317
  # | `TIMESTAMP` | `Time` | |
308
318
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
309
319
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -330,6 +340,7 @@ module Google
330
340
  # * `:DATETIME`
331
341
  # * `:DATE`
332
342
  # * `:GEOGRAPHY`
343
+ # * `:JSON`
333
344
  # * `:TIMESTAMP`
334
345
  # * `:TIME`
335
346
  # * `:BYTES`
@@ -622,6 +633,7 @@ module Google
622
633
  create_session: nil,
623
634
  session_id: nil
624
635
  ensure_service!
636
+ project ||= self.project
625
637
  options = {
626
638
  params: params,
627
639
  types: types,
@@ -633,7 +645,7 @@ module Google
633
645
  write: write,
634
646
  dryrun: dryrun,
635
647
  dataset: dataset,
636
- project: (project || self.project),
648
+ project: project,
637
649
  standard_sql: standard_sql,
638
650
  legacy_sql: legacy_sql,
639
651
  large_results: large_results,
@@ -691,6 +703,7 @@ module Google
691
703
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
692
704
  # | `DATE` | `Date` | |
693
705
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
706
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
694
707
  # | `TIMESTAMP` | `Time` | |
695
708
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
696
709
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -717,6 +730,7 @@ module Google
717
730
  # * `:DATETIME`
718
731
  # * `:DATE`
719
732
  # * `:GEOGRAPHY`
733
+ # * `:JSON`
720
734
  # * `:TIMESTAMP`
721
735
  # * `:TIME`
722
736
  # * `:BYTES`
@@ -942,6 +956,364 @@ module Google
942
956
  job.data max: max
943
957
  end
944
958
 
959
+ ##
960
+ # Loads data into the provided destination table using an asynchronous
961
+ # method. In this method, a {LoadJob} is immediately returned. The
962
+ # caller may poll the service by repeatedly calling {Job#reload!} and
963
+ # {Job#done?} to detect when the job is done, or simply block until the
964
+ # job is done by calling #{Job#wait_until_done!}. See also {#load}.
965
+ #
966
+ # For the source of the data, you can pass a google-cloud storage file
967
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
968
+ # file directly. See [Loading Data with a POST
969
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
970
+ #
971
+ # The geographic location for the job ("US", "EU", etc.) can be set via
972
+ # {LoadJob::Updater#location=} in a block passed to this method.
973
+ #
974
+ # @param [String] table_id The destination table to load the data into.
975
+ # @param [File, Google::Cloud::Storage::File, String, URI,
976
+ # Array<Google::Cloud::Storage::File, String, URI>] files
977
+ # A file or the URI of a Google Cloud Storage file, or an Array of
978
+ # those, containing data to load into the table.
979
+ # @param [String] format The exported file format. The default value is
980
+ # `csv`.
981
+ #
982
+ # The following values are supported:
983
+ #
984
+ # * `csv` - CSV
985
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
986
+ # * `avro` - [Avro](http://avro.apache.org/)
987
+ # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
988
+ # * `parquet` - [Parquet](https://parquet.apache.org/)
989
+ # * `datastore_backup` - Cloud Datastore backup
990
+ # @param [String] dataset_id The destination table to load the data into.
991
+ # For load job with create_session/session_id it defaults to "_SESSION"
992
+ # @param [String] create Specifies whether the job is allowed to create
993
+ # new tables. The default value is `needed`.
994
+ #
995
+ # The following values are supported:
996
+ #
997
+ # * `needed` - Create the table if it does not exist.
998
+ # * `never` - The table must already exist. A 'notFound' error is
999
+ # raised if the table does not exist.
1000
+ # @param [String] write Specifies how to handle data already present in
1001
+ # the table. The default value is `append`.
1002
+ #
1003
+ # The following values are supported:
1004
+ #
1005
+ # * `truncate` - BigQuery overwrites the table data.
1006
+ # * `append` - BigQuery appends the data to the table.
1007
+ # * `empty` - An error will be returned if the table already contains
1008
+ # data.
1009
+ # @param [Array<String>] projection_fields If the `format` option is set
1010
+ # to `datastore_backup`, indicates which entity properties to load
1011
+ # from a Cloud Datastore backup. Property names are case sensitive and
1012
+ # must be top-level properties. If not set, BigQuery loads all
1013
+ # properties. If any named property isn't found in the Cloud Datastore
1014
+ # backup, an invalid error is returned.
1015
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1016
+ # optional columns. The missing values are treated as nulls. If
1017
+ # `false`, records with missing trailing columns are treated as bad
1018
+ # records, and if there are too many bad records, an invalid error is
1019
+ # returned in the job result. The default value is `false`. Only
1020
+ # applicable to CSV, ignored for other formats.
1021
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1022
+ # quoted data sections that contain newline characters in a CSV file.
1023
+ # The default value is `false`.
1024
+ # @param [Boolean] autodetect Indicates if BigQuery should
1025
+ # automatically infer the options and schema for CSV and JSON sources.
1026
+ # The default value is `false`.
1027
+ # @param [String] encoding The character encoding of the data. The
1028
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1029
+ # `UTF-8`.
1030
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1031
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1032
+ # then uses the first byte of the encoded string to split the data in
1033
+ # its raw, binary state. Default is <code>,</code>.
1034
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1035
+ # extra values that are not represented in the table schema. If true,
1036
+ # the extra values are ignored. If false, records with extra columns
1037
+ # are treated as bad records, and if there are too many bad records,
1038
+ # an invalid error is returned in the job result. The default value is
1039
+ # `false`.
1040
+ #
1041
+ # The `format` property determines what BigQuery treats as an extra
1042
+ # value:
1043
+ #
1044
+ # * `CSV`: Trailing columns
1045
+ # * `JSON`: Named values that don't match any column names
1046
+ # @param [Integer] max_bad_records The maximum number of bad records
1047
+ # that BigQuery can ignore when running the job. If the number of bad
1048
+ # records exceeds this value, an invalid error is returned in the job
1049
+ # result. The default value is `0`, which requires that all records
1050
+ # are valid.
1051
+ # @param [String] null_marker Specifies a string that represents a null
1052
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1053
+ # interprets `\N` as a null value when loading a CSV file. The default
1054
+ # value is the empty string. If you set this property to a custom
1055
+ # value, BigQuery throws an error if an empty string is present for
1056
+ # all data types except for STRING and BYTE. For STRING and BYTE
1057
+ # columns, BigQuery interprets the empty string as an empty value.
1058
+ # @param [String] quote The value that is used to quote data sections in
1059
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1060
+ # then uses the first byte of the encoded string to split the data in
1061
+ # its raw, binary state. The default value is a double-quote
1062
+ # <code>"</code>. If your data does not contain quoted sections, set
1063
+ # the property value to an empty string. If your data contains quoted
1064
+ # newline characters, you must also set the allowQuotedNewlines
1065
+ # property to true.
1066
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1067
+ # file that BigQuery will skip when loading the data. The default
1068
+ # value is `0`. This property is useful if you have header rows in the
1069
+ # file that should be skipped.
1070
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1071
+ # destination table. Optional. The schema can be omitted if the
1072
+ # destination table already exists, or if you're loading data from a
1073
+ # Google Cloud Datastore backup.
1074
+ #
1075
+ # See {Project#schema} for the creation of the schema for use with
1076
+ # this option. Also note that for most use cases, the block yielded by
1077
+ # this method is a more convenient way to configure the schema.
1078
+ # @param [String] job_id A user-defined ID for the load job. The ID
1079
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
1080
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
1081
+ # `job_id` is provided, then `prefix` will not be used.
1082
+ #
1083
+ # See [Generating a job
1084
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1085
+ # @param [String] prefix A string, usually human-readable, that will be
1086
+ # prepended to a generated value to produce a unique job ID. For
1087
+ # example, the prefix `daily_import_job_` can be given to generate a
1088
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1089
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
1090
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
1091
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1092
+ # be used.
1093
+ # @param [Hash] labels A hash of user-provided labels associated with
1094
+ # the job. You can use these to organize and group your jobs.
1095
+ #
1096
+ # The labels applied to a resource must meet the following requirements:
1097
+ #
1098
+ # * Each resource can have multiple labels, up to a maximum of 64.
1099
+ # * Each label must be a key-value pair.
1100
+ # * Keys have a minimum length of 1 character and a maximum length of
1101
+ # 63 characters, and cannot be empty. Values can be empty, and have
1102
+ # a maximum length of 63 characters.
1103
+ # * Keys and values can contain only lowercase letters, numeric characters,
1104
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1105
+ # international characters are allowed.
1106
+ # * The key portion of a label must be unique. However, you can use the
1107
+ # same key with multiple resources.
1108
+ # * Keys must start with a lowercase letter or international character.
1109
+ # @param [Boolean] create_session If set to true a new session will be created
1110
+ # and the load job will happen in the table created within that session.
1111
+ # Note: This will work only for tables in _SESSION dataset
1112
+ # else the property will be ignored by the backend.
1113
+ # @param [string] session_id Session ID in which the load job must run.
1114
+ #
1115
+ # @yield [updater] A block for setting the schema and other
1116
+ # options for the destination table. The schema can be omitted if the
1117
+ # destination table already exists, or if you're loading data from a
1118
+ # Google Cloud Datastore backup.
1119
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1120
+ # updater to modify the load job and its schema.
1121
+ # @param [Boolean] dryrun If set, don't actually run this job. Behavior
1122
+ # is undefined however for non-query jobs and may result in an error.
1123
+ # Deprecated.
1124
+ #
1125
+ # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
1126
+ #
1127
+ # @example
1128
+ # require "google/cloud/bigquery"
1129
+ #
1130
+ # bigquery = Google::Cloud::Bigquery.new
1131
+ #
1132
+ # gs_url = "gs://my-bucket/file-name.csv"
1133
+ # load_job = bigquery.load_job "temp_table", gs_url, autodetect: true, create_session: true
1134
+ # load_job.wait_until_done!
1135
+ # session_id = load_job.statistics["sessionInfo"]["sessionId"]
1136
+ #
1137
+ def load_job table_id, files, dataset_id: nil, format: nil, create: nil, write: nil,
1138
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
1139
+ delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
1140
+ skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
1141
+ null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, &block
1142
+ ensure_service!
1143
+ dataset_id ||= "_SESSION" unless create_session.nil? && session_id.nil?
1144
+ session_dataset = dataset dataset_id, skip_lookup: true
1145
+ table = session_dataset.table table_id, skip_lookup: true
1146
+ table.load_job files,
1147
+ format: format, create: create, write: write, projection_fields: projection_fields,
1148
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
1149
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
1150
+ max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
1151
+ dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
1152
+ autodetect: autodetect, null_marker: null_marker, create_session: create_session,
1153
+ session_id: session_id, &block
1154
+ end
1155
+
1156
+ ##
1157
+ # Loads data into the provided destination table using a synchronous
1158
+ # method that blocks for a response. Timeouts and transient errors are
1159
+ # generally handled as needed to complete the job. See also
1160
+ # {#load_job}.
1161
+ #
1162
+ # For the source of the data, you can pass a google-cloud storage file
1163
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1164
+ # file directly. See [Loading Data with a POST
1165
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1166
+ #
1167
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1168
+ # {LoadJob::Updater#location=} in a block passed to this method.
1169
+ #
1170
+ # @param [String] table_id The destination table to load the data into.
1171
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1172
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1173
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1174
+ # those, containing data to load into the table.
1175
+ # @param [String] format The exported file format. The default value is
1176
+ # `csv`.
1177
+ #
1178
+ # The following values are supported:
1179
+ #
1180
+ # * `csv` - CSV
1181
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1182
+ # * `avro` - [Avro](http://avro.apache.org/)
1183
+ # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1184
+ # * `parquet` - [Parquet](https://parquet.apache.org/)
1185
+ # * `datastore_backup` - Cloud Datastore backup
1186
+ # @param [String] create Specifies whether the job is allowed to create
1187
+ # new tables. The default value is `needed`.
1188
+ #
1189
+ # The following values are supported:
1190
+ #
1191
+ # * `needed` - Create the table if it does not exist.
1192
+ # * `never` - The table must already exist. A 'notFound' error is
1193
+ # raised if the table does not exist.
1194
+ # @param [String] dataset_id The destination table to load the data into.
1195
+ # For load job with session it defaults to "_SESSION"
1196
+ # @param [String] write Specifies how to handle data already present in
1197
+ # the table. The default value is `append`.
1198
+ #
1199
+ # The following values are supported:
1200
+ #
1201
+ # * `truncate` - BigQuery overwrites the table data.
1202
+ # * `append` - BigQuery appends the data to the table.
1203
+ # * `empty` - An error will be returned if the table already contains
1204
+ # data.
1205
+ # @param [Array<String>] projection_fields If the `format` option is set
1206
+ # to `datastore_backup`, indicates which entity properties to load
1207
+ # from a Cloud Datastore backup. Property names are case sensitive and
1208
+ # must be top-level properties. If not set, BigQuery loads all
1209
+ # properties. If any named property isn't found in the Cloud Datastore
1210
+ # backup, an invalid error is returned.
1211
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1212
+ # optional columns. The missing values are treated as nulls. If
1213
+ # `false`, records with missing trailing columns are treated as bad
1214
+ # records, and if there are too many bad records, an invalid error is
1215
+ # returned in the job result. The default value is `false`. Only
1216
+ # applicable to CSV, ignored for other formats.
1217
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1218
+ # quoted data sections that contain newline characters in a CSV file.
1219
+ # The default value is `false`.
1220
+ # @param [Boolean] autodetect Indicates if BigQuery should
1221
+ # automatically infer the options and schema for CSV and JSON sources.
1222
+ # The default value is `false`.
1223
+ # @param [String] encoding The character encoding of the data. The
1224
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1225
+ # `UTF-8`.
1226
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1227
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1228
+ # then uses the first byte of the encoded string to split the data in
1229
+ # its raw, binary state. Default is <code>,</code>.
1230
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1231
+ # extra values that are not represented in the table schema. If true,
1232
+ # the extra values are ignored. If false, records with extra columns
1233
+ # are treated as bad records, and if there are too many bad records,
1234
+ # an invalid error is returned in the job result. The default value is
1235
+ # `false`.
1236
+ #
1237
+ # The `format` property determines what BigQuery treats as an extra
1238
+ # value:
1239
+ #
1240
+ # * `CSV`: Trailing columns
1241
+ # * `JSON`: Named values that don't match any column names
1242
+ # @param [Integer] max_bad_records The maximum number of bad records
1243
+ # that BigQuery can ignore when running the job. If the number of bad
1244
+ # records exceeds this value, an invalid error is returned in the job
1245
+ # result. The default value is `0`, which requires that all records
1246
+ # are valid.
1247
+ # @param [String] null_marker Specifies a string that represents a null
1248
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1249
+ # interprets `\N` as a null value when loading a CSV file. The default
1250
+ # value is the empty string. If you set this property to a custom
1251
+ # value, BigQuery throws an error if an empty string is present for
1252
+ # all data types except for STRING and BYTE. For STRING and BYTE
1253
+ # columns, BigQuery interprets the empty string as an empty value.
1254
+ # @param [String] quote The value that is used to quote data sections in
1255
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1256
+ # then uses the first byte of the encoded string to split the data in
1257
+ # its raw, binary state. The default value is a double-quote
1258
+ # <code>"</code>. If your data does not contain quoted sections, set
1259
+ # the property value to an empty string. If your data contains quoted
1260
+ # newline characters, you must also set the allowQuotedNewlines
1261
+ # property to true.
1262
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1263
+ # file that BigQuery will skip when loading the data. The default
1264
+ # value is `0`. This property is useful if you have header rows in the
1265
+ # file that should be skipped.
1266
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1267
+ # destination table. Optional. The schema can be omitted if the
1268
+ # destination table already exists, or if you're loading data from a
1269
+ # Google Cloud Datastore backup.
1270
+ #
1271
+ # See {Project#schema} for the creation of the schema for use with
1272
+ # this option. Also note that for most use cases, the block yielded by
1273
+ # this method is a more convenient way to configure the schema.
1274
+ # @param [string] session_id Session ID in which the load job must run.
1275
+ #
1276
+ # @yield [updater] A block for setting the schema of the destination
1277
+ # table and other options for the load job. The schema can be omitted
1278
+ # if the destination table already exists, or if you're loading data
1279
+ # from a Google Cloud Datastore backup.
1280
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1281
+ # updater to modify the load job and its schema.
1282
+ #
1283
+ # @return [Boolean] Returns `true` if the load job was successful.
1284
+ #
1285
+ # @example
1286
+ # require "google/cloud/bigquery"
1287
+ #
1288
+ # bigquery = Google::Cloud::Bigquery.new
1289
+ #
1290
+ # gs_url = "gs://my-bucket/file-name.csv"
1291
+ # bigquery.load "my_new_table", gs_url, dataset_id: "my_dataset" do |schema|
1292
+ # schema.string "first_name", mode: :required
1293
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1294
+ # nested_schema.string "place", mode: :required
1295
+ # nested_schema.integer "number_of_years", mode: :required
1296
+ # end
1297
+ # end
1298
+ #
1299
+ # @!group Data
1300
+ #
1301
+ def load table_id, files, dataset_id: "_SESSION", format: nil, create: nil, write: nil,
1302
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
1303
+ delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
1304
+ skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
1305
+ job = load_job table_id, files, dataset_id: dataset_id,
1306
+ format: format, create: create, write: write, projection_fields: projection_fields,
1307
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
1308
+ delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
1309
+ quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
1310
+ null_marker: null_marker, session_id: session_id, &block
1311
+
1312
+ job.wait_until_done!
1313
+ ensure_job_succeeded! job
1314
+ true
1315
+ end
1316
+
945
1317
  ##
946
1318
  # Creates a new External::DataSource (or subclass) object that
947
1319
  # represents the external data source that can be queried from directly,
@@ -748,11 +748,10 @@ module Google
748
748
  def data token: nil, max: nil, start: nil
749
749
  return nil unless done?
750
750
  return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun?
751
- if ddl? || dml?
751
+ if ddl? || dml? || !ensure_schema!
752
752
  data_hash = { totalRows: nil, rows: [] }
753
753
  return Data.from_gapi_json data_hash, nil, @gapi, service
754
754
  end
755
- ensure_schema!
756
755
 
757
756
  data_hash = service.list_tabledata destination_table_dataset_id,
758
757
  destination_table_table_id,
@@ -928,6 +927,7 @@ module Google
928
927
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
929
928
  # | `DATE` | `Date` | |
930
929
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
930
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
931
931
  # | `TIMESTAMP` | `Time` | |
932
932
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
933
933
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -965,6 +965,7 @@ module Google
965
965
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
966
966
  # | `DATE` | `Date` | |
967
967
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
968
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
968
969
  # | `TIMESTAMP` | `Time` | |
969
970
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
970
971
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -991,6 +992,7 @@ module Google
991
992
  # * `:DATETIME`
992
993
  # * `:DATE`
993
994
  # * `:GEOGRAPHY`
995
+ # * `:JSON`
994
996
  # * `:TIMESTAMP`
995
997
  # * `:TIME`
996
998
  # * `:BYTES`
@@ -1791,10 +1793,10 @@ module Google
1791
1793
  protected
1792
1794
 
1793
1795
  def ensure_schema!
1794
- return unless destination_schema.nil?
1796
+ return true unless destination_schema.nil?
1795
1797
 
1796
1798
  query_results_gapi = service.job_query_results job_id, location: location, max: 0
1797
- # raise "unable to retrieve schema" if query_results_gapi.schema.nil?
1799
+ return false if query_results_gapi.schema.nil?
1798
1800
  @destination_schema_gapi = query_results_gapi.schema
1799
1801
  end
1800
1802
 
@@ -52,6 +52,7 @@ module Google
52
52
  "GEOGRAPHY",
53
53
  "INTEGER",
54
54
  "INT64",
55
+ "JSON",
55
56
  "NUMERIC",
56
57
  "RECORD",
57
58
  "STRING",
@@ -98,6 +99,7 @@ module Google
98
99
  # * `FLOAT`
99
100
  # * `FLOAT64` (same as `FLOAT`)
100
101
  # * `GEOGRAPHY`
102
+ # * `JSON`
101
103
  # * `INTEGER`
102
104
  # * `INT64` (same as `INTEGER`)
103
105
  # * `NUMERIC`
@@ -125,6 +127,7 @@ module Google
125
127
  # * `FLOAT`
126
128
  # * `FLOAT64` (same as `FLOAT`)
127
129
  # * `GEOGRAPHY`
130
+ # * `JSON`
128
131
  # * `INTEGER`
129
132
  # * `INT64` (same as `INTEGER`)
130
133
  # * `NUMERIC`
@@ -263,6 +266,53 @@ module Google
263
266
  @gapi.update! policy_tags: policy_tag_list
264
267
  end
265
268
 
269
+ ##
270
+ # The default value of a field using a SQL expression. It can only
271
+ # be set for top level fields (columns). Default value for the entire struct or
272
+ # array is set using a struct or array expression. The valid SQL expressions are:
273
+ # - Literals for all data types, including STRUCT and ARRAY.
274
+ # - The following functions:
275
+ # `CURRENT_TIMESTAMP`
276
+ # `CURRENT_TIME`
277
+ # `CURRENT_DATE`
278
+ # `CURRENT_DATETIME`
279
+ # `GENERATE_UUID`
280
+ # `RAND`
281
+ # `SESSION_USER`
282
+ # `ST_GEOPOINT`
283
+ # - Struct or array composed with the above allowed functions, for example:
284
+ # "[CURRENT_DATE(), DATE '2020-01-01'"]
285
+ #
286
+ # @return [String] The default value expression of the field.
287
+ #
288
+ def default_value_expression
289
+ @gapi.default_value_expression
290
+ end
291
+
292
+ ##
293
+ # Updates the default value expression of the field.
294
+ #
295
+ # @param default_value_expression [String] The default value of a field
296
+ # using a SQL expression. It can only be set for top level fields (columns).
297
+ # Use a struct or array expression to specify default value for the entire struct or
298
+ # array. The valid SQL expressions are:
299
+ # - Literals for all data types, including STRUCT and ARRAY.
300
+ # - The following functions:
301
+ # `CURRENT_TIMESTAMP`
302
+ # `CURRENT_TIME`
303
+ # `CURRENT_DATE`
304
+ # `CURRENT_DATETIME`
305
+ # `GENERATE_UUID`
306
+ # `RAND`
307
+ # `SESSION_USER`
308
+ # `ST_GEOPOINT`
309
+ # - Struct or array composed with the above allowed functions, for example:
310
+ # "[CURRENT_DATE(), DATE '2020-01-01'"]
311
+ #
312
+ def default_value_expression= default_value_expression
313
+ @gapi.update! default_value_expression: default_value_expression
314
+ end
315
+
266
316
  ##
267
317
  # The maximum length of values of this field for {#string?} or {bytes?} fields. If `max_length` is not
268
318
  # specified, no maximum length constraint is imposed on this field. If type = `STRING`, then `max_length`
@@ -409,6 +459,15 @@ module Google
409
459
  type == "GEOGRAPHY"
410
460
  end
411
461
 
462
+ ##
463
+ # Checks if the type of the field is `JSON`.
464
+ #
465
+ # @return [Boolean] `true` when `JSON`, `false` otherwise.
466
+ #
467
+ def json?
468
+ type == "JSON"
469
+ end
470
+
412
471
  ##
413
472
  # Checks if the type of the field is `RECORD`.
414
473
  #
@@ -846,6 +905,30 @@ module Google
846
905
  add_field name, :geography, description: description, mode: mode, policy_tags: policy_tags
847
906
  end
848
907
 
908
+ ##
909
+ # Adds a json field to the nested schema of a record field.
910
+ #
911
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type
912
+ #
913
+ # @param [String] name The field name. The name must contain only
914
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
915
+ # start with a letter or underscore. The maximum length is 128
916
+ # characters.
917
+ # @param [String] description A description of the field.
918
+ # @param [Symbol] mode The field's mode. The possible values are
919
+ # `:nullable`, `:required`, and `:repeated`. The default value is
920
+ # `:nullable`.
921
+ # @param [Array<String>, String] policy_tags The policy tag list or
922
+ # single policy tag for the field. Policy tag identifiers are of
923
+ # the form `projects/*/locations/*/taxonomies/*/policyTags/*`.
924
+ # At most 1 policy tag is currently allowed.
925
+ #
926
+ def json name, description: nil, mode: :nullable, policy_tags: nil
927
+ record_check!
928
+
929
+ add_field name, :json, description: description, mode: mode, policy_tags: policy_tags
930
+ end
931
+
849
932
  ##
850
933
  # Adds a record field to the nested schema of a record field. A block
851
934
  # must be passed describing the nested fields of the record. For more