google-cloud-bigquery 1.42.0 → 1.49.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -67,6 +67,15 @@ module Google
67
67
  @service = service
68
68
  end
69
69
 
70
+ ##
71
+ # The universe domain the client is connected to
72
+ #
73
+ # @return [String]
74
+ #
75
+ def universe_domain
76
+ service.universe_domain
77
+ end
78
+
70
79
  ##
71
80
  # The BigQuery project connected to.
72
81
  #
@@ -304,6 +313,7 @@ module Google
304
313
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
305
314
  # | `DATE` | `Date` | |
306
315
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
316
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
307
317
  # | `TIMESTAMP` | `Time` | |
308
318
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
309
319
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -330,6 +340,7 @@ module Google
330
340
  # * `:DATETIME`
331
341
  # * `:DATE`
332
342
  # * `:GEOGRAPHY`
343
+ # * `:JSON`
333
344
  # * `:TIMESTAMP`
334
345
  # * `:TIME`
335
346
  # * `:BYTES`
@@ -622,6 +633,7 @@ module Google
622
633
  create_session: nil,
623
634
  session_id: nil
624
635
  ensure_service!
636
+ project ||= self.project
625
637
  options = {
626
638
  params: params,
627
639
  types: types,
@@ -633,7 +645,7 @@ module Google
633
645
  write: write,
634
646
  dryrun: dryrun,
635
647
  dataset: dataset,
636
- project: (project || self.project),
648
+ project: project,
637
649
  standard_sql: standard_sql,
638
650
  legacy_sql: legacy_sql,
639
651
  large_results: large_results,
@@ -691,6 +703,7 @@ module Google
691
703
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
692
704
  # | `DATE` | `Date` | |
693
705
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`, below.|
706
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify. |
694
707
  # | `TIMESTAMP` | `Time` | |
695
708
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
696
709
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -717,6 +730,7 @@ module Google
717
730
  # * `:DATETIME`
718
731
  # * `:DATE`
719
732
  # * `:GEOGRAPHY`
733
+ # * `:JSON`
720
734
  # * `:TIMESTAMP`
721
735
  # * `:TIME`
722
736
  # * `:BYTES`
@@ -942,6 +956,364 @@ module Google
942
956
  job.data max: max
943
957
  end
944
958
 
959
+ ##
960
+ # Loads data into the provided destination table using an asynchronous
961
+ # method. In this method, a {LoadJob} is immediately returned. The
962
+ # caller may poll the service by repeatedly calling {Job#reload!} and
963
+ # {Job#done?} to detect when the job is done, or simply block until the
964
+ # job is done by calling #{Job#wait_until_done!}. See also {#load}.
965
+ #
966
+ # For the source of the data, you can pass a google-cloud storage file
967
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
968
+ # file directly. See [Loading Data with a POST
969
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
970
+ #
971
+ # The geographic location for the job ("US", "EU", etc.) can be set via
972
+ # {LoadJob::Updater#location=} in a block passed to this method.
973
+ #
974
+ # @param [String] table_id The destination table to load the data into.
975
+ # @param [File, Google::Cloud::Storage::File, String, URI,
976
+ # Array<Google::Cloud::Storage::File, String, URI>] files
977
+ # A file or the URI of a Google Cloud Storage file, or an Array of
978
+ # those, containing data to load into the table.
979
+ # @param [String] format The exported file format. The default value is
980
+ # `csv`.
981
+ #
982
+ # The following values are supported:
983
+ #
984
+ # * `csv` - CSV
985
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
986
+ # * `avro` - [Avro](http://avro.apache.org/)
987
+ # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
988
+ # * `parquet` - [Parquet](https://parquet.apache.org/)
989
+ # * `datastore_backup` - Cloud Datastore backup
990
+ # @param [String] dataset_id The destination table to load the data into.
991
+ # For load job with create_session/session_id it defaults to "_SESSION"
992
+ # @param [String] create Specifies whether the job is allowed to create
993
+ # new tables. The default value is `needed`.
994
+ #
995
+ # The following values are supported:
996
+ #
997
+ # * `needed` - Create the table if it does not exist.
998
+ # * `never` - The table must already exist. A 'notFound' error is
999
+ # raised if the table does not exist.
1000
+ # @param [String] write Specifies how to handle data already present in
1001
+ # the table. The default value is `append`.
1002
+ #
1003
+ # The following values are supported:
1004
+ #
1005
+ # * `truncate` - BigQuery overwrites the table data.
1006
+ # * `append` - BigQuery appends the data to the table.
1007
+ # * `empty` - An error will be returned if the table already contains
1008
+ # data.
1009
+ # @param [Array<String>] projection_fields If the `format` option is set
1010
+ # to `datastore_backup`, indicates which entity properties to load
1011
+ # from a Cloud Datastore backup. Property names are case sensitive and
1012
+ # must be top-level properties. If not set, BigQuery loads all
1013
+ # properties. If any named property isn't found in the Cloud Datastore
1014
+ # backup, an invalid error is returned.
1015
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1016
+ # optional columns. The missing values are treated as nulls. If
1017
+ # `false`, records with missing trailing columns are treated as bad
1018
+ # records, and if there are too many bad records, an invalid error is
1019
+ # returned in the job result. The default value is `false`. Only
1020
+ # applicable to CSV, ignored for other formats.
1021
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1022
+ # quoted data sections that contain newline characters in a CSV file.
1023
+ # The default value is `false`.
1024
+ # @param [Boolean] autodetect Indicates if BigQuery should
1025
+ # automatically infer the options and schema for CSV and JSON sources.
1026
+ # The default value is `false`.
1027
+ # @param [String] encoding The character encoding of the data. The
1028
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1029
+ # `UTF-8`.
1030
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1031
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1032
+ # then uses the first byte of the encoded string to split the data in
1033
+ # its raw, binary state. Default is <code>,</code>.
1034
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1035
+ # extra values that are not represented in the table schema. If true,
1036
+ # the extra values are ignored. If false, records with extra columns
1037
+ # are treated as bad records, and if there are too many bad records,
1038
+ # an invalid error is returned in the job result. The default value is
1039
+ # `false`.
1040
+ #
1041
+ # The `format` property determines what BigQuery treats as an extra
1042
+ # value:
1043
+ #
1044
+ # * `CSV`: Trailing columns
1045
+ # * `JSON`: Named values that don't match any column names
1046
+ # @param [Integer] max_bad_records The maximum number of bad records
1047
+ # that BigQuery can ignore when running the job. If the number of bad
1048
+ # records exceeds this value, an invalid error is returned in the job
1049
+ # result. The default value is `0`, which requires that all records
1050
+ # are valid.
1051
+ # @param [String] null_marker Specifies a string that represents a null
1052
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1053
+ # interprets `\N` as a null value when loading a CSV file. The default
1054
+ # value is the empty string. If you set this property to a custom
1055
+ # value, BigQuery throws an error if an empty string is present for
1056
+ # all data types except for STRING and BYTE. For STRING and BYTE
1057
+ # columns, BigQuery interprets the empty string as an empty value.
1058
+ # @param [String] quote The value that is used to quote data sections in
1059
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1060
+ # then uses the first byte of the encoded string to split the data in
1061
+ # its raw, binary state. The default value is a double-quote
1062
+ # <code>"</code>. If your data does not contain quoted sections, set
1063
+ # the property value to an empty string. If your data contains quoted
1064
+ # newline characters, you must also set the allowQuotedNewlines
1065
+ # property to true.
1066
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1067
+ # file that BigQuery will skip when loading the data. The default
1068
+ # value is `0`. This property is useful if you have header rows in the
1069
+ # file that should be skipped.
1070
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1071
+ # destination table. Optional. The schema can be omitted if the
1072
+ # destination table already exists, or if you're loading data from a
1073
+ # Google Cloud Datastore backup.
1074
+ #
1075
+ # See {Project#schema} for the creation of the schema for use with
1076
+ # this option. Also note that for most use cases, the block yielded by
1077
+ # this method is a more convenient way to configure the schema.
1078
+ # @param [String] job_id A user-defined ID for the load job. The ID
1079
+ # must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
1080
+ # (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
1081
+ # `job_id` is provided, then `prefix` will not be used.
1082
+ #
1083
+ # See [Generating a job
1084
+ # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
1085
+ # @param [String] prefix A string, usually human-readable, that will be
1086
+ # prepended to a generated value to produce a unique job ID. For
1087
+ # example, the prefix `daily_import_job_` can be given to generate a
1088
+ # job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
1089
+ # prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
1090
+ # underscores (`_`), or dashes (`-`). The maximum length of the entire ID
1091
+ # is 1,024 characters. If `job_id` is provided, then `prefix` will not
1092
+ # be used.
1093
+ # @param [Hash] labels A hash of user-provided labels associated with
1094
+ # the job. You can use these to organize and group your jobs.
1095
+ #
1096
+ # The labels applied to a resource must meet the following requirements:
1097
+ #
1098
+ # * Each resource can have multiple labels, up to a maximum of 64.
1099
+ # * Each label must be a key-value pair.
1100
+ # * Keys have a minimum length of 1 character and a maximum length of
1101
+ # 63 characters, and cannot be empty. Values can be empty, and have
1102
+ # a maximum length of 63 characters.
1103
+ # * Keys and values can contain only lowercase letters, numeric characters,
1104
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
1105
+ # international characters are allowed.
1106
+ # * The key portion of a label must be unique. However, you can use the
1107
+ # same key with multiple resources.
1108
+ # * Keys must start with a lowercase letter or international character.
1109
+ # @param [Boolean] create_session If set to true a new session will be created
1110
+ # and the load job will happen in the table created within that session.
1111
+ # Note: This will work only for tables in _SESSION dataset
1112
+ # else the property will be ignored by the backend.
1113
+ # @param [string] session_id Session ID in which the load job must run.
1114
+ #
1115
+ # @yield [updater] A block for setting the schema and other
1116
+ # options for the destination table. The schema can be omitted if the
1117
+ # destination table already exists, or if you're loading data from a
1118
+ # Google Cloud Datastore backup.
1119
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1120
+ # updater to modify the load job and its schema.
1121
+ # @param [Boolean] dryrun If set, don't actually run this job. Behavior
1122
+ # is undefined however for non-query jobs and may result in an error.
1123
+ # Deprecated.
1124
+ #
1125
+ # @return [Google::Cloud::Bigquery::LoadJob] A new load job object.
1126
+ #
1127
+ # @example
1128
+ # require "google/cloud/bigquery"
1129
+ #
1130
+ # bigquery = Google::Cloud::Bigquery.new
1131
+ #
1132
+ # gs_url = "gs://my-bucket/file-name.csv"
1133
+ # load_job = bigquery.load_job "temp_table", gs_url, autodetect: true, create_session: true
1134
+ # load_job.wait_until_done!
1135
+ # session_id = load_job.statistics["sessionInfo"]["sessionId"]
1136
+ #
1137
+ def load_job table_id, files, dataset_id: nil, format: nil, create: nil, write: nil,
1138
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
1139
+ delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
1140
+ skip_leading: nil, schema: nil, job_id: nil, prefix: nil, labels: nil, autodetect: nil,
1141
+ null_marker: nil, dryrun: nil, create_session: nil, session_id: nil, &block
1142
+ ensure_service!
1143
+ dataset_id ||= "_SESSION" unless create_session.nil? && session_id.nil?
1144
+ session_dataset = dataset dataset_id, skip_lookup: true
1145
+ table = session_dataset.table table_id, skip_lookup: true
1146
+ table.load_job files,
1147
+ format: format, create: create, write: write, projection_fields: projection_fields,
1148
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
1149
+ delimiter: delimiter, ignore_unknown: ignore_unknown,
1150
+ max_bad_records: max_bad_records, quote: quote, skip_leading: skip_leading,
1151
+ dryrun: dryrun, schema: schema, job_id: job_id, prefix: prefix, labels: labels,
1152
+ autodetect: autodetect, null_marker: null_marker, create_session: create_session,
1153
+ session_id: session_id, &block
1154
+ end
1155
+
1156
+ ##
1157
+ # Loads data into the provided destination table using a synchronous
1158
+ # method that blocks for a response. Timeouts and transient errors are
1159
+ # generally handled as needed to complete the job. See also
1160
+ # {#load_job}.
1161
+ #
1162
+ # For the source of the data, you can pass a google-cloud storage file
1163
+ # path or a google-cloud-storage `File` instance. Or, you can upload a
1164
+ # file directly. See [Loading Data with a POST
1165
+ # Request](https://cloud.google.com/bigquery/loading-data-post-request#multipart).
1166
+ #
1167
+ # The geographic location for the job ("US", "EU", etc.) can be set via
1168
+ # {LoadJob::Updater#location=} in a block passed to this method.
1169
+ #
1170
+ # @param [String] table_id The destination table to load the data into.
1171
+ # @param [File, Google::Cloud::Storage::File, String, URI,
1172
+ # Array<Google::Cloud::Storage::File, String, URI>] files
1173
+ # A file or the URI of a Google Cloud Storage file, or an Array of
1174
+ # those, containing data to load into the table.
1175
+ # @param [String] format The exported file format. The default value is
1176
+ # `csv`.
1177
+ #
1178
+ # The following values are supported:
1179
+ #
1180
+ # * `csv` - CSV
1181
+ # * `json` - [Newline-delimited JSON](https://jsonlines.org/)
1182
+ # * `avro` - [Avro](http://avro.apache.org/)
1183
+ # * `orc` - [ORC](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-orc)
1184
+ # * `parquet` - [Parquet](https://parquet.apache.org/)
1185
+ # * `datastore_backup` - Cloud Datastore backup
1186
+ # @param [String] create Specifies whether the job is allowed to create
1187
+ # new tables. The default value is `needed`.
1188
+ #
1189
+ # The following values are supported:
1190
+ #
1191
+ # * `needed` - Create the table if it does not exist.
1192
+ # * `never` - The table must already exist. A 'notFound' error is
1193
+ # raised if the table does not exist.
1194
+ # @param [String] dataset_id The destination table to load the data into.
1195
+ # For load job with session it defaults to "_SESSION"
1196
+ # @param [String] write Specifies how to handle data already present in
1197
+ # the table. The default value is `append`.
1198
+ #
1199
+ # The following values are supported:
1200
+ #
1201
+ # * `truncate` - BigQuery overwrites the table data.
1202
+ # * `append` - BigQuery appends the data to the table.
1203
+ # * `empty` - An error will be returned if the table already contains
1204
+ # data.
1205
+ # @param [Array<String>] projection_fields If the `format` option is set
1206
+ # to `datastore_backup`, indicates which entity properties to load
1207
+ # from a Cloud Datastore backup. Property names are case sensitive and
1208
+ # must be top-level properties. If not set, BigQuery loads all
1209
+ # properties. If any named property isn't found in the Cloud Datastore
1210
+ # backup, an invalid error is returned.
1211
+ # @param [Boolean] jagged_rows Accept rows that are missing trailing
1212
+ # optional columns. The missing values are treated as nulls. If
1213
+ # `false`, records with missing trailing columns are treated as bad
1214
+ # records, and if there are too many bad records, an invalid error is
1215
+ # returned in the job result. The default value is `false`. Only
1216
+ # applicable to CSV, ignored for other formats.
1217
+ # @param [Boolean] quoted_newlines Indicates if BigQuery should allow
1218
+ # quoted data sections that contain newline characters in a CSV file.
1219
+ # The default value is `false`.
1220
+ # @param [Boolean] autodetect Indicates if BigQuery should
1221
+ # automatically infer the options and schema for CSV and JSON sources.
1222
+ # The default value is `false`.
1223
+ # @param [String] encoding The character encoding of the data. The
1224
+ # supported values are `UTF-8` or `ISO-8859-1`. The default value is
1225
+ # `UTF-8`.
1226
+ # @param [String] delimiter Specifices the separator for fields in a CSV
1227
+ # file. BigQuery converts the string to `ISO-8859-1` encoding, and
1228
+ # then uses the first byte of the encoded string to split the data in
1229
+ # its raw, binary state. Default is <code>,</code>.
1230
+ # @param [Boolean] ignore_unknown Indicates if BigQuery should allow
1231
+ # extra values that are not represented in the table schema. If true,
1232
+ # the extra values are ignored. If false, records with extra columns
1233
+ # are treated as bad records, and if there are too many bad records,
1234
+ # an invalid error is returned in the job result. The default value is
1235
+ # `false`.
1236
+ #
1237
+ # The `format` property determines what BigQuery treats as an extra
1238
+ # value:
1239
+ #
1240
+ # * `CSV`: Trailing columns
1241
+ # * `JSON`: Named values that don't match any column names
1242
+ # @param [Integer] max_bad_records The maximum number of bad records
1243
+ # that BigQuery can ignore when running the job. If the number of bad
1244
+ # records exceeds this value, an invalid error is returned in the job
1245
+ # result. The default value is `0`, which requires that all records
1246
+ # are valid.
1247
+ # @param [String] null_marker Specifies a string that represents a null
1248
+ # value in a CSV file. For example, if you specify `\N`, BigQuery
1249
+ # interprets `\N` as a null value when loading a CSV file. The default
1250
+ # value is the empty string. If you set this property to a custom
1251
+ # value, BigQuery throws an error if an empty string is present for
1252
+ # all data types except for STRING and BYTE. For STRING and BYTE
1253
+ # columns, BigQuery interprets the empty string as an empty value.
1254
+ # @param [String] quote The value that is used to quote data sections in
1255
+ # a CSV file. BigQuery converts the string to ISO-8859-1 encoding, and
1256
+ # then uses the first byte of the encoded string to split the data in
1257
+ # its raw, binary state. The default value is a double-quote
1258
+ # <code>"</code>. If your data does not contain quoted sections, set
1259
+ # the property value to an empty string. If your data contains quoted
1260
+ # newline characters, you must also set the allowQuotedNewlines
1261
+ # property to true.
1262
+ # @param [Integer] skip_leading The number of rows at the top of a CSV
1263
+ # file that BigQuery will skip when loading the data. The default
1264
+ # value is `0`. This property is useful if you have header rows in the
1265
+ # file that should be skipped.
1266
+ # @param [Google::Cloud::Bigquery::Schema] schema The schema for the
1267
+ # destination table. Optional. The schema can be omitted if the
1268
+ # destination table already exists, or if you're loading data from a
1269
+ # Google Cloud Datastore backup.
1270
+ #
1271
+ # See {Project#schema} for the creation of the schema for use with
1272
+ # this option. Also note that for most use cases, the block yielded by
1273
+ # this method is a more convenient way to configure the schema.
1274
+ # @param [string] session_id Session ID in which the load job must run.
1275
+ #
1276
+ # @yield [updater] A block for setting the schema of the destination
1277
+ # table and other options for the load job. The schema can be omitted
1278
+ # if the destination table already exists, or if you're loading data
1279
+ # from a Google Cloud Datastore backup.
1280
+ # @yieldparam [Google::Cloud::Bigquery::LoadJob::Updater] updater An
1281
+ # updater to modify the load job and its schema.
1282
+ #
1283
+ # @return [Boolean] Returns `true` if the load job was successful.
1284
+ #
1285
+ # @example
1286
+ # require "google/cloud/bigquery"
1287
+ #
1288
+ # bigquery = Google::Cloud::Bigquery.new
1289
+ #
1290
+ # gs_url = "gs://my-bucket/file-name.csv"
1291
+ # bigquery.load "my_new_table", gs_url, dataset_id: "my_dataset" do |schema|
1292
+ # schema.string "first_name", mode: :required
1293
+ # schema.record "cities_lived", mode: :repeated do |nested_schema|
1294
+ # nested_schema.string "place", mode: :required
1295
+ # nested_schema.integer "number_of_years", mode: :required
1296
+ # end
1297
+ # end
1298
+ #
1299
+ # @!group Data
1300
+ #
1301
+ def load table_id, files, dataset_id: "_SESSION", format: nil, create: nil, write: nil,
1302
+ projection_fields: nil, jagged_rows: nil, quoted_newlines: nil, encoding: nil,
1303
+ delimiter: nil, ignore_unknown: nil, max_bad_records: nil, quote: nil,
1304
+ skip_leading: nil, schema: nil, autodetect: nil, null_marker: nil, session_id: nil, &block
1305
+ job = load_job table_id, files, dataset_id: dataset_id,
1306
+ format: format, create: create, write: write, projection_fields: projection_fields,
1307
+ jagged_rows: jagged_rows, quoted_newlines: quoted_newlines, encoding: encoding,
1308
+ delimiter: delimiter, ignore_unknown: ignore_unknown, max_bad_records: max_bad_records,
1309
+ quote: quote, skip_leading: skip_leading, schema: schema, autodetect: autodetect,
1310
+ null_marker: null_marker, session_id: session_id, &block
1311
+
1312
+ job.wait_until_done!
1313
+ ensure_job_succeeded! job
1314
+ true
1315
+ end
1316
+
945
1317
  ##
946
1318
  # Creates a new External::DataSource (or subclass) object that
947
1319
  # represents the external data source that can be queried from directly,
@@ -748,11 +748,10 @@ module Google
748
748
  def data token: nil, max: nil, start: nil
749
749
  return nil unless done?
750
750
  return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun?
751
- if ddl? || dml?
751
+ if ddl? || dml? || !ensure_schema!
752
752
  data_hash = { totalRows: nil, rows: [] }
753
753
  return Data.from_gapi_json data_hash, nil, @gapi, service
754
754
  end
755
- ensure_schema!
756
755
 
757
756
  data_hash = service.list_tabledata destination_table_dataset_id,
758
757
  destination_table_table_id,
@@ -928,6 +927,7 @@ module Google
928
927
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
929
928
  # | `DATE` | `Date` | |
930
929
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
930
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
931
931
  # | `TIMESTAMP` | `Time` | |
932
932
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
933
933
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -965,6 +965,7 @@ module Google
965
965
  # | `DATETIME` | `DateTime` | `DATETIME` does not support time zone. |
966
966
  # | `DATE` | `Date` | |
967
967
  # | `GEOGRAPHY` | `String` (WKT or GeoJSON) | NOT AUTOMATIC: Must be mapped using `types`. |
968
+ # | `JSON` | `String` (Stringified JSON) | String, as JSON does not have a schema to verify.|
968
969
  # | `TIMESTAMP` | `Time` | |
969
970
  # | `TIME` | `Google::Cloud::BigQuery::Time` | |
970
971
  # | `BYTES` | `File`, `IO`, `StringIO`, or similar | |
@@ -991,6 +992,7 @@ module Google
991
992
  # * `:DATETIME`
992
993
  # * `:DATE`
993
994
  # * `:GEOGRAPHY`
995
+ # * `:JSON`
994
996
  # * `:TIMESTAMP`
995
997
  # * `:TIME`
996
998
  # * `:BYTES`
@@ -1791,10 +1793,10 @@ module Google
1791
1793
  protected
1792
1794
 
1793
1795
  def ensure_schema!
1794
- return unless destination_schema.nil?
1796
+ return true unless destination_schema.nil?
1795
1797
 
1796
1798
  query_results_gapi = service.job_query_results job_id, location: location, max: 0
1797
- # raise "unable to retrieve schema" if query_results_gapi.schema.nil?
1799
+ return false if query_results_gapi.schema.nil?
1798
1800
  @destination_schema_gapi = query_results_gapi.schema
1799
1801
  end
1800
1802
 
@@ -52,6 +52,7 @@ module Google
52
52
  "GEOGRAPHY",
53
53
  "INTEGER",
54
54
  "INT64",
55
+ "JSON",
55
56
  "NUMERIC",
56
57
  "RECORD",
57
58
  "STRING",
@@ -98,6 +99,7 @@ module Google
98
99
  # * `FLOAT`
99
100
  # * `FLOAT64` (same as `FLOAT`)
100
101
  # * `GEOGRAPHY`
102
+ # * `JSON`
101
103
  # * `INTEGER`
102
104
  # * `INT64` (same as `INTEGER`)
103
105
  # * `NUMERIC`
@@ -125,6 +127,7 @@ module Google
125
127
  # * `FLOAT`
126
128
  # * `FLOAT64` (same as `FLOAT`)
127
129
  # * `GEOGRAPHY`
130
+ # * `JSON`
128
131
  # * `INTEGER`
129
132
  # * `INT64` (same as `INTEGER`)
130
133
  # * `NUMERIC`
@@ -263,6 +266,53 @@ module Google
263
266
  @gapi.update! policy_tags: policy_tag_list
264
267
  end
265
268
 
269
+ ##
270
+ # The default value of a field using a SQL expression. It can only
271
+ # be set for top level fields (columns). Default value for the entire struct or
272
+ # array is set using a struct or array expression. The valid SQL expressions are:
273
+ # - Literals for all data types, including STRUCT and ARRAY.
274
+ # - The following functions:
275
+ # `CURRENT_TIMESTAMP`
276
+ # `CURRENT_TIME`
277
+ # `CURRENT_DATE`
278
+ # `CURRENT_DATETIME`
279
+ # `GENERATE_UUID`
280
+ # `RAND`
281
+ # `SESSION_USER`
282
+ # `ST_GEOPOINT`
283
+ # - Struct or array composed with the above allowed functions, for example:
284
+ # "[CURRENT_DATE(), DATE '2020-01-01'"]
285
+ #
286
+ # @return [String] The default value expression of the field.
287
+ #
288
+ def default_value_expression
289
+ @gapi.default_value_expression
290
+ end
291
+
292
+ ##
293
+ # Updates the default value expression of the field.
294
+ #
295
+ # @param default_value_expression [String] The default value of a field
296
+ # using a SQL expression. It can only be set for top level fields (columns).
297
+ # Use a struct or array expression to specify default value for the entire struct or
298
+ # array. The valid SQL expressions are:
299
+ # - Literals for all data types, including STRUCT and ARRAY.
300
+ # - The following functions:
301
+ # `CURRENT_TIMESTAMP`
302
+ # `CURRENT_TIME`
303
+ # `CURRENT_DATE`
304
+ # `CURRENT_DATETIME`
305
+ # `GENERATE_UUID`
306
+ # `RAND`
307
+ # `SESSION_USER`
308
+ # `ST_GEOPOINT`
309
+ # - Struct or array composed with the above allowed functions, for example:
310
+ # "[CURRENT_DATE(), DATE '2020-01-01'"]
311
+ #
312
+ def default_value_expression= default_value_expression
313
+ @gapi.update! default_value_expression: default_value_expression
314
+ end
315
+
266
316
  ##
267
317
  # The maximum length of values of this field for {#string?} or {bytes?} fields. If `max_length` is not
268
318
  # specified, no maximum length constraint is imposed on this field. If type = `STRING`, then `max_length`
@@ -409,6 +459,15 @@ module Google
409
459
  type == "GEOGRAPHY"
410
460
  end
411
461
 
462
+ ##
463
+ # Checks if the type of the field is `JSON`.
464
+ #
465
+ # @return [Boolean] `true` when `JSON`, `false` otherwise.
466
+ #
467
+ def json?
468
+ type == "JSON"
469
+ end
470
+
412
471
  ##
413
472
  # Checks if the type of the field is `RECORD`.
414
473
  #
@@ -846,6 +905,30 @@ module Google
846
905
  add_field name, :geography, description: description, mode: mode, policy_tags: policy_tags
847
906
  end
848
907
 
908
+ ##
909
+ # Adds a json field to the nested schema of a record field.
910
+ #
911
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type
912
+ #
913
+ # @param [String] name The field name. The name must contain only
914
+ # letters (a-z, A-Z), numbers (0-9), or underscores (_), and must
915
+ # start with a letter or underscore. The maximum length is 128
916
+ # characters.
917
+ # @param [String] description A description of the field.
918
+ # @param [Symbol] mode The field's mode. The possible values are
919
+ # `:nullable`, `:required`, and `:repeated`. The default value is
920
+ # `:nullable`.
921
+ # @param [Array<String>, String] policy_tags The policy tag list or
922
+ # single policy tag for the field. Policy tag identifiers are of
923
+ # the form `projects/*/locations/*/taxonomies/*/policyTags/*`.
924
+ # At most 1 policy tag is currently allowed.
925
+ #
926
+ def json name, description: nil, mode: :nullable, policy_tags: nil
927
+ record_check!
928
+
929
+ add_field name, :json, description: description, mode: mode, policy_tags: policy_tags
930
+ end
931
+
849
932
  ##
850
933
  # Adds a record field to the nested schema of a record field. A block
851
934
  # must be passed describing the nested fields of the record. For more