aws-sdk-glue 1.137.0 → 1.139.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/VERSION +1 -1
- data/lib/aws-sdk-glue/client.rb +166 -55
- data/lib/aws-sdk-glue/client_api.rb +34 -0
- data/lib/aws-sdk-glue/types.rb +298 -94
- data/lib/aws-sdk-glue.rb +1 -1
- metadata +2 -2
data/lib/aws-sdk-glue/types.rb
CHANGED
@@ -155,14 +155,14 @@ module Aws::Glue
|
|
155
155
|
include Aws::Structure
|
156
156
|
end
|
157
157
|
|
158
|
-
# Specifies an
|
158
|
+
# Specifies an optional value when connecting to the Redshift cluster.
|
159
159
|
#
|
160
160
|
# @!attribute [rw] key
|
161
|
-
# The key
|
161
|
+
# The key for the additional connection option.
|
162
162
|
# @return [String]
|
163
163
|
#
|
164
164
|
# @!attribute [rw] value
|
165
|
-
# The value
|
165
|
+
# The value for the additional connection option.
|
166
166
|
# @return [String]
|
167
167
|
#
|
168
168
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/AmazonRedshiftAdvancedOption AWS API Documentation
|
@@ -2214,6 +2214,11 @@ module Aws::Glue
|
|
2214
2214
|
# Specifies a target that writes to a data target in Amazon Redshift.
|
2215
2215
|
# @return [Types::AmazonRedshiftTarget]
|
2216
2216
|
#
|
2217
|
+
# @!attribute [rw] evaluate_data_quality_multi_frame
|
2218
|
+
# Specifies your data quality evaluation criteria. Allows multiple
|
2219
|
+
# input data and returns a collection of Dynamic Frames.
|
2220
|
+
# @return [Types::EvaluateDataQualityMultiFrame]
|
2221
|
+
#
|
2217
2222
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CodeGenConfigurationNode AWS API Documentation
|
2218
2223
|
#
|
2219
2224
|
class CodeGenConfigurationNode < Struct.new(
|
@@ -2281,7 +2286,8 @@ module Aws::Glue
|
|
2281
2286
|
:s3_delta_catalog_target,
|
2282
2287
|
:s3_delta_direct_target,
|
2283
2288
|
:amazon_redshift_source,
|
2284
|
-
:amazon_redshift_target
|
2289
|
+
:amazon_redshift_target,
|
2290
|
+
:evaluate_data_quality_multi_frame)
|
2285
2291
|
SENSITIVE = []
|
2286
2292
|
include Aws::Structure
|
2287
2293
|
end
|
@@ -4245,7 +4251,8 @@ module Aws::Glue
|
|
4245
4251
|
# @return [Types::JobCommand]
|
4246
4252
|
#
|
4247
4253
|
# @!attribute [rw] default_arguments
|
4248
|
-
# The default arguments for this job
|
4254
|
+
# The default arguments for every run of this job, specified as
|
4255
|
+
# name-value pairs.
|
4249
4256
|
#
|
4250
4257
|
# You can specify arguments here that your own job-execution script
|
4251
4258
|
# consumes, as well as arguments that Glue itself consumes.
|
@@ -4259,19 +4266,24 @@ module Aws::Glue
|
|
4259
4266
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
4260
4267
|
# developer guide.
|
4261
4268
|
#
|
4262
|
-
# For information about the
|
4263
|
-
#
|
4264
|
-
# the developer guide.
|
4269
|
+
# For information about the arguments you can provide to this field
|
4270
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
4271
|
+
# Glue][2] topic in the developer guide.
|
4272
|
+
#
|
4273
|
+
# For information about the arguments you can provide to this field
|
4274
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
4275
|
+
# in the developer guide.
|
4265
4276
|
#
|
4266
4277
|
#
|
4267
4278
|
#
|
4268
4279
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
4269
4280
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
4281
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
4270
4282
|
# @return [Hash<String,String>]
|
4271
4283
|
#
|
4272
4284
|
# @!attribute [rw] non_overridable_arguments
|
4273
|
-
#
|
4274
|
-
# pairs.
|
4285
|
+
# Arguments for this job that are not overridden when providing job
|
4286
|
+
# arguments in a job run, specified as name-value pairs.
|
4275
4287
|
# @return [Hash<String,String>]
|
4276
4288
|
#
|
4277
4289
|
# @!attribute [rw] connections
|
@@ -4307,13 +4319,18 @@ module Aws::Glue
|
|
4307
4319
|
# type, the number of Glue data processing units (DPUs) that can be
|
4308
4320
|
# allocated when this job runs. A DPU is a relative measure of
|
4309
4321
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
4310
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
4322
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
4311
4323
|
#
|
4312
|
-
#
|
4324
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
4325
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
4326
|
+
# workers`.
|
4327
|
+
#
|
4328
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
4313
4329
|
# `NumberOfWorkers`.
|
4314
4330
|
#
|
4315
4331
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
4316
|
-
# you are running a Python shell job
|
4332
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
4333
|
+
# Apache Spark streaming ETL job:
|
4317
4334
|
#
|
4318
4335
|
# * When you specify a Python shell job
|
4319
4336
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
@@ -4321,14 +4338,10 @@ module Aws::Glue
|
|
4321
4338
|
#
|
4322
4339
|
# * When you specify an Apache Spark ETL job
|
4323
4340
|
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
4324
|
-
# (`JobCommand.Name`="gluestreaming"), you can allocate
|
4325
|
-
#
|
4341
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
4342
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
4326
4343
|
# fractional DPU allocation.
|
4327
4344
|
#
|
4328
|
-
# For Glue version 2.0 jobs, you cannot instead specify a `Maximum
|
4329
|
-
# capacity`. Instead, you should specify a `Worker type` and the
|
4330
|
-
# `Number of workers`.
|
4331
|
-
#
|
4332
4345
|
#
|
4333
4346
|
#
|
4334
4347
|
# [1]: https://aws.amazon.com/glue/pricing/
|
@@ -4354,9 +4367,14 @@ module Aws::Glue
|
|
4354
4367
|
# @return [Types::NotificationProperty]
|
4355
4368
|
#
|
4356
4369
|
# @!attribute [rw] glue_version
|
4357
|
-
#
|
4358
|
-
# Glue
|
4359
|
-
# for jobs of type Spark.
|
4370
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
4371
|
+
# and Python that Glue available in a job. The Python version
|
4372
|
+
# indicates the version supported for jobs of type Spark.
|
4373
|
+
#
|
4374
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
4375
|
+
# versions of Ray, Python and additional libraries available in your
|
4376
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
4377
|
+
# command.
|
4360
4378
|
#
|
4361
4379
|
# For more information about the available Glue versions and
|
4362
4380
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
@@ -4377,7 +4395,8 @@ module Aws::Glue
|
|
4377
4395
|
#
|
4378
4396
|
# @!attribute [rw] worker_type
|
4379
4397
|
# The type of predefined worker that is allocated when a job runs.
|
4380
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
4398
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
4399
|
+
# Accepts the value Z.2X for Ray jobs.
|
4381
4400
|
#
|
4382
4401
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
4383
4402
|
# of memory and a 50GB disk, and 2 executors per worker.
|
@@ -4395,6 +4414,10 @@ module Aws::Glue
|
|
4395
4414
|
# worker. We recommend this worker type for low volume streaming
|
4396
4415
|
# jobs. This worker type is only available for Glue version 3.0
|
4397
4416
|
# streaming jobs.
|
4417
|
+
#
|
4418
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
4419
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers
|
4420
|
+
# based on the autoscaler.
|
4398
4421
|
# @return [String]
|
4399
4422
|
#
|
4400
4423
|
# @!attribute [rw] code_gen_configuration_nodes
|
@@ -5851,13 +5874,18 @@ module Aws::Glue
|
|
5851
5874
|
# A pass or fail status for the rule.
|
5852
5875
|
# @return [String]
|
5853
5876
|
#
|
5877
|
+
# @!attribute [rw] evaluated_metrics
|
5878
|
+
# A map of metrics associated with the evaluation of the rule.
|
5879
|
+
# @return [Hash<String,Float>]
|
5880
|
+
#
|
5854
5881
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityRuleResult AWS API Documentation
|
5855
5882
|
#
|
5856
5883
|
class DataQualityRuleResult < Struct.new(
|
5857
5884
|
:name,
|
5858
5885
|
:description,
|
5859
5886
|
:evaluation_message,
|
5860
|
-
:result
|
5887
|
+
:result,
|
5888
|
+
:evaluated_metrics)
|
5861
5889
|
SENSITIVE = []
|
5862
5890
|
include Aws::Structure
|
5863
5891
|
end
|
@@ -6015,11 +6043,16 @@ module Aws::Glue
|
|
6015
6043
|
# The name of the database where the Glue table exists.
|
6016
6044
|
# @return [String]
|
6017
6045
|
#
|
6046
|
+
# @!attribute [rw] catalog_id
|
6047
|
+
# The catalog id where the Glue table exists.
|
6048
|
+
# @return [String]
|
6049
|
+
#
|
6018
6050
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityTargetTable AWS API Documentation
|
6019
6051
|
#
|
6020
6052
|
class DataQualityTargetTable < Struct.new(
|
6021
6053
|
:table_name,
|
6022
|
-
:database_name
|
6054
|
+
:database_name,
|
6055
|
+
:catalog_id)
|
6023
6056
|
SENSITIVE = []
|
6024
6057
|
include Aws::Structure
|
6025
6058
|
end
|
@@ -7759,6 +7792,52 @@ module Aws::Glue
|
|
7759
7792
|
include Aws::Structure
|
7760
7793
|
end
|
7761
7794
|
|
7795
|
+
# Specifies your data quality evaluation criteria.
|
7796
|
+
#
|
7797
|
+
# @!attribute [rw] name
|
7798
|
+
# The name of the data quality evaluation.
|
7799
|
+
# @return [String]
|
7800
|
+
#
|
7801
|
+
# @!attribute [rw] inputs
|
7802
|
+
# The inputs of your data quality evaluation. The first input in this
|
7803
|
+
# list is the primary data source.
|
7804
|
+
# @return [Array<String>]
|
7805
|
+
#
|
7806
|
+
# @!attribute [rw] additional_data_sources
|
7807
|
+
# The aliases of all data sources except primary.
|
7808
|
+
# @return [Hash<String,String>]
|
7809
|
+
#
|
7810
|
+
# @!attribute [rw] ruleset
|
7811
|
+
# The ruleset for your data quality evaluation.
|
7812
|
+
# @return [String]
|
7813
|
+
#
|
7814
|
+
# @!attribute [rw] publishing_options
|
7815
|
+
# Options to configure how your results are published.
|
7816
|
+
# @return [Types::DQResultsPublishingOptions]
|
7817
|
+
#
|
7818
|
+
# @!attribute [rw] additional_options
|
7819
|
+
# Options to configure runtime behavior of the transform.
|
7820
|
+
# @return [Hash<String,String>]
|
7821
|
+
#
|
7822
|
+
# @!attribute [rw] stop_job_on_failure_options
|
7823
|
+
# Options to configure how your job will stop if your data quality
|
7824
|
+
# evaluation fails.
|
7825
|
+
# @return [Types::DQStopJobOnFailureOptions]
|
7826
|
+
#
|
7827
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/EvaluateDataQualityMultiFrame AWS API Documentation
|
7828
|
+
#
|
7829
|
+
class EvaluateDataQualityMultiFrame < Struct.new(
|
7830
|
+
:name,
|
7831
|
+
:inputs,
|
7832
|
+
:additional_data_sources,
|
7833
|
+
:ruleset,
|
7834
|
+
:publishing_options,
|
7835
|
+
:additional_options,
|
7836
|
+
:stop_job_on_failure_options)
|
7837
|
+
SENSITIVE = []
|
7838
|
+
include Aws::Structure
|
7839
|
+
end
|
7840
|
+
|
7762
7841
|
# Evaluation metrics provide an estimate of the quality of your machine
|
7763
7842
|
# learning transform.
|
7764
7843
|
#
|
@@ -9008,6 +9087,11 @@ module Aws::Glue
|
|
9008
9087
|
# A list of result IDs for the data quality results for the run.
|
9009
9088
|
# @return [Array<String>]
|
9010
9089
|
#
|
9090
|
+
# @!attribute [rw] additional_data_sources
|
9091
|
+
# A map of reference strings to additional data sources you can
|
9092
|
+
# specify for an evaluation run.
|
9093
|
+
# @return [Hash<String,Types::DataSource>]
|
9094
|
+
#
|
9011
9095
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/GetDataQualityRulesetEvaluationRunResponse AWS API Documentation
|
9012
9096
|
#
|
9013
9097
|
class GetDataQualityRulesetEvaluationRunResponse < Struct.new(
|
@@ -9024,7 +9108,8 @@ module Aws::Glue
|
|
9024
9108
|
:completed_on,
|
9025
9109
|
:execution_time,
|
9026
9110
|
:ruleset_names,
|
9027
|
-
:result_ids
|
9111
|
+
:result_ids,
|
9112
|
+
:additional_data_sources)
|
9028
9113
|
SENSITIVE = []
|
9029
9114
|
include Aws::Structure
|
9030
9115
|
end
|
@@ -12097,28 +12182,39 @@ module Aws::Glue
|
|
12097
12182
|
# @return [Types::JobCommand]
|
12098
12183
|
#
|
12099
12184
|
# @!attribute [rw] default_arguments
|
12100
|
-
# The default arguments for this job, specified as
|
12185
|
+
# The default arguments for every run of this job, specified as
|
12186
|
+
# name-value pairs.
|
12101
12187
|
#
|
12102
12188
|
# You can specify arguments here that your own job-execution script
|
12103
12189
|
# consumes, as well as arguments that Glue itself consumes.
|
12104
12190
|
#
|
12191
|
+
# Job arguments may be logged. Do not pass plaintext secrets as
|
12192
|
+
# arguments. Retrieve secrets from a Glue Connection, Secrets Manager
|
12193
|
+
# or other secret management mechanism if you intend to keep them
|
12194
|
+
# within the Job.
|
12195
|
+
#
|
12105
12196
|
# For information about how to specify and consume your own Job
|
12106
12197
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
12107
12198
|
# developer guide.
|
12108
12199
|
#
|
12109
|
-
# For information about the
|
12110
|
-
#
|
12111
|
-
# the developer guide.
|
12200
|
+
# For information about the arguments you can provide to this field
|
12201
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
12202
|
+
# Glue][2] topic in the developer guide.
|
12203
|
+
#
|
12204
|
+
# For information about the arguments you can provide to this field
|
12205
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
12206
|
+
# in the developer guide.
|
12112
12207
|
#
|
12113
12208
|
#
|
12114
12209
|
#
|
12115
12210
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
12116
12211
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
12212
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
12117
12213
|
# @return [Hash<String,String>]
|
12118
12214
|
#
|
12119
12215
|
# @!attribute [rw] non_overridable_arguments
|
12120
|
-
#
|
12121
|
-
# pairs.
|
12216
|
+
# Arguments for this job that are not overridden when providing job
|
12217
|
+
# arguments in a job run, specified as name-value pairs.
|
12122
12218
|
# @return [Hash<String,String>]
|
12123
12219
|
#
|
12124
12220
|
# @!attribute [rw] connections
|
@@ -12156,7 +12252,7 @@ module Aws::Glue
|
|
12156
12252
|
# type, the number of Glue data processing units (DPUs) that can be
|
12157
12253
|
# allocated when this job runs. A DPU is a relative measure of
|
12158
12254
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
12159
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
12255
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
12160
12256
|
#
|
12161
12257
|
# For Glue version 2.0 or later jobs, you cannot specify a `Maximum
|
12162
12258
|
# capacity`. Instead, you should specify a `Worker type` and the
|
@@ -12186,7 +12282,8 @@ module Aws::Glue
|
|
12186
12282
|
#
|
12187
12283
|
# @!attribute [rw] worker_type
|
12188
12284
|
# The type of predefined worker that is allocated when a job runs.
|
12189
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X
|
12285
|
+
# Accepts a value of Standard, G.1X, G.2X, G.4X, G.8X, or G.025X for
|
12286
|
+
# Spark jobs. Accepts the value Z.2X for Ray jobs.
|
12190
12287
|
#
|
12191
12288
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
12192
12289
|
# of memory and a 50GB disk, and 2 executors per worker.
|
@@ -12207,20 +12304,30 @@ module Aws::Glue
|
|
12207
12304
|
# GB of memory, 256 GB disk), and provides 1 executor per worker. We
|
12208
12305
|
# recommend this worker type for jobs whose workloads contain your
|
12209
12306
|
# most demanding transforms, aggregations, joins, and queries. This
|
12210
|
-
# worker type is available only for Glue version 3.0 or later
|
12307
|
+
# worker type is available only for Glue version 3.0 or later Spark
|
12308
|
+
# ETL jobs in the following Amazon Web Services Regions: US East
|
12309
|
+
# (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific
|
12310
|
+
# (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada
|
12311
|
+
# (Central), Europe (Frankfurt), Europe (Ireland), and Europe
|
12312
|
+
# (Stockholm).
|
12211
12313
|
#
|
12212
12314
|
# * For the `G.8X` worker type, each worker maps to 8 DPU (32 vCPU,
|
12213
12315
|
# 128 GB of memory, 512 GB disk), and provides 1 executor per
|
12214
12316
|
# worker. We recommend this worker type for jobs whose workloads
|
12215
12317
|
# contain your most demanding transforms, aggregations, joins, and
|
12216
12318
|
# queries. This worker type is available only for Glue version 3.0
|
12217
|
-
# or later jobs
|
12319
|
+
# or later Spark ETL jobs, in the same Amazon Web Services Regions
|
12320
|
+
# as supported for the `G.4X` worker type.
|
12218
12321
|
#
|
12219
12322
|
# * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
|
12220
12323
|
# vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
|
12221
12324
|
# worker. We recommend this worker type for low volume streaming
|
12222
12325
|
# jobs. This worker type is only available for Glue version 3.0
|
12223
12326
|
# streaming jobs.
|
12327
|
+
#
|
12328
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
12329
|
+
# GB of m emory, 128 GB disk), and provides a default of 8 Ray
|
12330
|
+
# workers (1 per vCPU).
|
12224
12331
|
# @return [String]
|
12225
12332
|
#
|
12226
12333
|
# @!attribute [rw] number_of_workers
|
@@ -12238,9 +12345,14 @@ module Aws::Glue
|
|
12238
12345
|
# @return [Types::NotificationProperty]
|
12239
12346
|
#
|
12240
12347
|
# @!attribute [rw] glue_version
|
12241
|
-
#
|
12242
|
-
# Glue
|
12243
|
-
# for jobs of type Spark.
|
12348
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
12349
|
+
# and Python that Glue available in a job. The Python version
|
12350
|
+
# indicates the version supported for jobs of type Spark.
|
12351
|
+
#
|
12352
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
12353
|
+
# versions of Ray, Python and additional libraries available in your
|
12354
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
12355
|
+
# command.
|
12244
12356
|
#
|
12245
12357
|
# For more information about the available Glue versions and
|
12246
12358
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
@@ -12378,7 +12490,8 @@ module Aws::Glue
|
|
12378
12490
|
# @!attribute [rw] name
|
12379
12491
|
# The name of the job command. For an Apache Spark ETL job, this must
|
12380
12492
|
# be `glueetl`. For a Python shell job, it must be `pythonshell`. For
|
12381
|
-
# an Apache Spark streaming ETL job, this must be `gluestreaming`.
|
12493
|
+
# an Apache Spark streaming ETL job, this must be `gluestreaming`. For
|
12494
|
+
# a Ray job, this must be `glueray`.
|
12382
12495
|
# @return [String]
|
12383
12496
|
#
|
12384
12497
|
# @!attribute [rw] script_location
|
@@ -12391,12 +12504,24 @@ module Aws::Glue
|
|
12391
12504
|
# values are 2 or 3.
|
12392
12505
|
# @return [String]
|
12393
12506
|
#
|
12507
|
+
# @!attribute [rw] runtime
|
12508
|
+
# In Ray jobs, Runtime is used to specify the versions of Ray, Python
|
12509
|
+
# and additional libraries available in your environment. This field
|
12510
|
+
# is not used in other job types. For supported runtime environment
|
12511
|
+
# values, see [Working with Ray jobs][1] in the Glue Developer Guide.
|
12512
|
+
#
|
12513
|
+
#
|
12514
|
+
#
|
12515
|
+
# [1]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-runtimes.html
|
12516
|
+
# @return [String]
|
12517
|
+
#
|
12394
12518
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/JobCommand AWS API Documentation
|
12395
12519
|
#
|
12396
12520
|
class JobCommand < Struct.new(
|
12397
12521
|
:name,
|
12398
12522
|
:script_location,
|
12399
|
-
:python_version
|
12523
|
+
:python_version,
|
12524
|
+
:runtime)
|
12400
12525
|
SENSITIVE = []
|
12401
12526
|
include Aws::Structure
|
12402
12527
|
end
|
@@ -12467,18 +12592,28 @@ module Aws::Glue
|
|
12467
12592
|
# You can specify arguments here that your own job-execution script
|
12468
12593
|
# consumes, as well as arguments that Glue itself consumes.
|
12469
12594
|
#
|
12470
|
-
#
|
12595
|
+
# Job arguments may be logged. Do not pass plaintext secrets as
|
12596
|
+
# arguments. Retrieve secrets from a Glue Connection, Secrets Manager
|
12597
|
+
# or other secret management mechanism if you intend to keep them
|
12598
|
+
# within the Job.
|
12599
|
+
#
|
12600
|
+
# For information about how to specify and consume your own Job
|
12471
12601
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
12472
12602
|
# developer guide.
|
12473
12603
|
#
|
12474
|
-
# For information about the
|
12475
|
-
#
|
12476
|
-
# the developer guide.
|
12604
|
+
# For information about the arguments you can provide to this field
|
12605
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
12606
|
+
# Glue][2] topic in the developer guide.
|
12607
|
+
#
|
12608
|
+
# For information about the arguments you can provide to this field
|
12609
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
12610
|
+
# in the developer guide.
|
12477
12611
|
#
|
12478
12612
|
#
|
12479
12613
|
#
|
12480
12614
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
12481
12615
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
12616
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
12482
12617
|
# @return [Hash<String,String>]
|
12483
12618
|
#
|
12484
12619
|
# @!attribute [rw] error_message
|
@@ -12518,24 +12653,31 @@ module Aws::Glue
|
|
12518
12653
|
# @return [Integer]
|
12519
12654
|
#
|
12520
12655
|
# @!attribute [rw] max_capacity
|
12521
|
-
#
|
12656
|
+
# For Glue version 1.0 or earlier jobs, using the standard worker
|
12657
|
+
# type, the number of Glue data processing units (DPUs) that can be
|
12522
12658
|
# allocated when this job runs. A DPU is a relative measure of
|
12523
12659
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
12524
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
12660
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
12661
|
+
#
|
12662
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
12663
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
12664
|
+
# workers`.
|
12525
12665
|
#
|
12526
|
-
# Do not set `
|
12666
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
12527
12667
|
# `NumberOfWorkers`.
|
12528
12668
|
#
|
12529
12669
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
12530
|
-
# you are running a Python shell job
|
12670
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
12671
|
+
# Apache Spark streaming ETL job:
|
12531
12672
|
#
|
12532
12673
|
# * When you specify a Python shell job
|
12533
12674
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
12534
12675
|
# 0.0625 or 1 DPU. The default is 0.0625 DPU.
|
12535
12676
|
#
|
12536
12677
|
# * When you specify an Apache Spark ETL job
|
12537
|
-
# (`JobCommand.Name`="glueetl")
|
12538
|
-
#
|
12678
|
+
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
12679
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
12680
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
12539
12681
|
# fractional DPU allocation.
|
12540
12682
|
#
|
12541
12683
|
#
|
@@ -12545,22 +12687,29 @@ module Aws::Glue
|
|
12545
12687
|
#
|
12546
12688
|
# @!attribute [rw] worker_type
|
12547
12689
|
# The type of predefined worker that is allocated when a job runs.
|
12548
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
12690
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
12691
|
+
# Accepts the value Z.2X for Ray jobs.
|
12549
12692
|
#
|
12550
12693
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
12551
12694
|
# of memory and a 50GB disk, and 2 executors per worker.
|
12552
12695
|
#
|
12553
|
-
# * For the `G.1X` worker type, each worker
|
12554
|
-
# memory
|
12696
|
+
# * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
|
12697
|
+
# GB of memory, 64 GB disk), and provides 1 executor per worker. We
|
12698
|
+
# recommend this worker type for memory-intensive jobs.
|
12555
12699
|
#
|
12556
|
-
# * For the `G.2X` worker type, each worker
|
12557
|
-
# memory
|
12700
|
+
# * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
|
12701
|
+
# GB of memory, 128 GB disk), and provides 1 executor per worker. We
|
12702
|
+
# recommend this worker type for memory-intensive jobs.
|
12558
12703
|
#
|
12559
12704
|
# * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
|
12560
12705
|
# vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
|
12561
12706
|
# worker. We recommend this worker type for low volume streaming
|
12562
12707
|
# jobs. This worker type is only available for Glue version 3.0
|
12563
12708
|
# streaming jobs.
|
12709
|
+
#
|
12710
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
12711
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
|
12712
|
+
# per vCPU) based on the autoscaler.
|
12564
12713
|
# @return [String]
|
12565
12714
|
#
|
12566
12715
|
# @!attribute [rw] number_of_workers
|
@@ -12588,9 +12737,14 @@ module Aws::Glue
|
|
12588
12737
|
# @return [Types::NotificationProperty]
|
12589
12738
|
#
|
12590
12739
|
# @!attribute [rw] glue_version
|
12591
|
-
#
|
12592
|
-
# Glue
|
12593
|
-
# for jobs of type Spark.
|
12740
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
12741
|
+
# and Python that Glue available in a job. The Python version
|
12742
|
+
# indicates the version supported for jobs of type Spark.
|
12743
|
+
#
|
12744
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
12745
|
+
# versions of Ray, Python and additional libraries available in your
|
12746
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
12747
|
+
# command.
|
12594
12748
|
#
|
12595
12749
|
# For more information about the available Glue versions and
|
12596
12750
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
@@ -12687,28 +12841,39 @@ module Aws::Glue
|
|
12687
12841
|
# @return [Types::JobCommand]
|
12688
12842
|
#
|
12689
12843
|
# @!attribute [rw] default_arguments
|
12690
|
-
# The default arguments for this job
|
12844
|
+
# The default arguments for every run of this job, specified as
|
12845
|
+
# name-value pairs.
|
12691
12846
|
#
|
12692
12847
|
# You can specify arguments here that your own job-execution script
|
12693
12848
|
# consumes, as well as arguments that Glue itself consumes.
|
12694
12849
|
#
|
12850
|
+
# Job arguments may be logged. Do not pass plaintext secrets as
|
12851
|
+
# arguments. Retrieve secrets from a Glue Connection, Secrets Manager
|
12852
|
+
# or other secret management mechanism if you intend to keep them
|
12853
|
+
# within the Job.
|
12854
|
+
#
|
12695
12855
|
# For information about how to specify and consume your own Job
|
12696
12856
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
12697
12857
|
# developer guide.
|
12698
12858
|
#
|
12699
|
-
# For information about the
|
12700
|
-
#
|
12701
|
-
# the developer guide.
|
12859
|
+
# For information about the arguments you can provide to this field
|
12860
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
12861
|
+
# Glue][2] topic in the developer guide.
|
12862
|
+
#
|
12863
|
+
# For information about the arguments you can provide to this field
|
12864
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
12865
|
+
# in the developer guide.
|
12702
12866
|
#
|
12703
12867
|
#
|
12704
12868
|
#
|
12705
12869
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
12706
12870
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
12871
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
12707
12872
|
# @return [Hash<String,String>]
|
12708
12873
|
#
|
12709
12874
|
# @!attribute [rw] non_overridable_arguments
|
12710
|
-
#
|
12711
|
-
# pairs.
|
12875
|
+
# Arguments for this job that are not overridden when providing job
|
12876
|
+
# arguments in a job run, specified as name-value pairs.
|
12712
12877
|
# @return [Hash<String,String>]
|
12713
12878
|
#
|
12714
12879
|
# @!attribute [rw] connections
|
@@ -12744,13 +12909,18 @@ module Aws::Glue
|
|
12744
12909
|
# type, the number of Glue data processing units (DPUs) that can be
|
12745
12910
|
# allocated when this job runs. A DPU is a relative measure of
|
12746
12911
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
12747
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
12912
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
12913
|
+
#
|
12914
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
12915
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
12916
|
+
# workers`.
|
12748
12917
|
#
|
12749
|
-
# Do not set `
|
12918
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
12750
12919
|
# `NumberOfWorkers`.
|
12751
12920
|
#
|
12752
12921
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
12753
|
-
# you are running a Python shell job
|
12922
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
12923
|
+
# Apache Spark streaming ETL job:
|
12754
12924
|
#
|
12755
12925
|
# * When you specify a Python shell job
|
12756
12926
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
@@ -12758,14 +12928,10 @@ module Aws::Glue
|
|
12758
12928
|
#
|
12759
12929
|
# * When you specify an Apache Spark ETL job
|
12760
12930
|
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
12761
|
-
# (`JobCommand.Name`="gluestreaming"), you can allocate
|
12762
|
-
#
|
12931
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
12932
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
12763
12933
|
# fractional DPU allocation.
|
12764
12934
|
#
|
12765
|
-
# For Glue version 2.0 jobs, you cannot instead specify a `Maximum
|
12766
|
-
# capacity`. Instead, you should specify a `Worker type` and the
|
12767
|
-
# `Number of workers`.
|
12768
|
-
#
|
12769
12935
|
#
|
12770
12936
|
#
|
12771
12937
|
# [1]: https://aws.amazon.com/glue/pricing/
|
@@ -12773,7 +12939,8 @@ module Aws::Glue
|
|
12773
12939
|
#
|
12774
12940
|
# @!attribute [rw] worker_type
|
12775
12941
|
# The type of predefined worker that is allocated when a job runs.
|
12776
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
12942
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
12943
|
+
# Accepts the value Z.2X for Ray jobs.
|
12777
12944
|
#
|
12778
12945
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
12779
12946
|
# of memory and a 50GB disk, and 2 executors per worker.
|
@@ -12791,6 +12958,10 @@ module Aws::Glue
|
|
12791
12958
|
# worker. We recommend this worker type for low volume streaming
|
12792
12959
|
# jobs. This worker type is only available for Glue version 3.0
|
12793
12960
|
# streaming jobs.
|
12961
|
+
#
|
12962
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
12963
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers
|
12964
|
+
# based on the autoscaler.
|
12794
12965
|
# @return [String]
|
12795
12966
|
#
|
12796
12967
|
# @!attribute [rw] number_of_workers
|
@@ -12808,14 +12979,22 @@ module Aws::Glue
|
|
12808
12979
|
# @return [Types::NotificationProperty]
|
12809
12980
|
#
|
12810
12981
|
# @!attribute [rw] glue_version
|
12811
|
-
#
|
12812
|
-
# Glue
|
12813
|
-
# for jobs of type Spark.
|
12982
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
12983
|
+
# and Python that Glue available in a job. The Python version
|
12984
|
+
# indicates the version supported for jobs of type Spark.
|
12985
|
+
#
|
12986
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
12987
|
+
# versions of Ray, Python and additional libraries available in your
|
12988
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
12989
|
+
# command.
|
12814
12990
|
#
|
12815
12991
|
# For more information about the available Glue versions and
|
12816
12992
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
12817
12993
|
# the developer guide.
|
12818
12994
|
#
|
12995
|
+
# Jobs that are created without specifying a Glue version default to
|
12996
|
+
# Glue 0.9.
|
12997
|
+
#
|
12819
12998
|
#
|
12820
12999
|
#
|
12821
13000
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/add-job.html
|
@@ -18167,6 +18346,11 @@ module Aws::Glue
|
|
18167
18346
|
# A list of ruleset names.
|
18168
18347
|
# @return [Array<String>]
|
18169
18348
|
#
|
18349
|
+
# @!attribute [rw] additional_data_sources
|
18350
|
+
# A map of reference strings to additional data sources you can
|
18351
|
+
# specify for an evaluation run.
|
18352
|
+
# @return [Hash<String,Types::DataSource>]
|
18353
|
+
#
|
18170
18354
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/StartDataQualityRulesetEvaluationRunRequest AWS API Documentation
|
18171
18355
|
#
|
18172
18356
|
class StartDataQualityRulesetEvaluationRunRequest < Struct.new(
|
@@ -18176,7 +18360,8 @@ module Aws::Glue
|
|
18176
18360
|
:timeout,
|
18177
18361
|
:client_token,
|
18178
18362
|
:additional_run_options,
|
18179
|
-
:ruleset_names
|
18363
|
+
:ruleset_names,
|
18364
|
+
:additional_data_sources)
|
18180
18365
|
SENSITIVE = []
|
18181
18366
|
include Aws::Structure
|
18182
18367
|
end
|
@@ -18266,7 +18451,7 @@ module Aws::Glue
|
|
18266
18451
|
# @return [String]
|
18267
18452
|
#
|
18268
18453
|
# @!attribute [rw] arguments
|
18269
|
-
# The job arguments
|
18454
|
+
# The job arguments associated with this run. For this job run, they
|
18270
18455
|
# replace the default arguments set in the job definition itself.
|
18271
18456
|
#
|
18272
18457
|
# You can specify arguments here that your own job-execution script
|
@@ -18281,14 +18466,19 @@ module Aws::Glue
|
|
18281
18466
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
18282
18467
|
# developer guide.
|
18283
18468
|
#
|
18284
|
-
# For information about the
|
18285
|
-
#
|
18286
|
-
# the developer guide.
|
18469
|
+
# For information about the arguments you can provide to this field
|
18470
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
18471
|
+
# Glue][2] topic in the developer guide.
|
18472
|
+
#
|
18473
|
+
# For information about the arguments you can provide to this field
|
18474
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
18475
|
+
# in the developer guide.
|
18287
18476
|
#
|
18288
18477
|
#
|
18289
18478
|
#
|
18290
18479
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
18291
18480
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
18481
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
18292
18482
|
# @return [Hash<String,String>]
|
18293
18483
|
#
|
18294
18484
|
# @!attribute [rw] allocated_capacity
|
@@ -18316,24 +18506,31 @@ module Aws::Glue
|
|
18316
18506
|
# @return [Integer]
|
18317
18507
|
#
|
18318
18508
|
# @!attribute [rw] max_capacity
|
18319
|
-
#
|
18509
|
+
# For Glue version 1.0 or earlier jobs, using the standard worker
|
18510
|
+
# type, the number of Glue data processing units (DPUs) that can be
|
18320
18511
|
# allocated when this job runs. A DPU is a relative measure of
|
18321
18512
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
18322
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
18513
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
18323
18514
|
#
|
18324
|
-
#
|
18515
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
18516
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
18517
|
+
# workers`.
|
18518
|
+
#
|
18519
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
18325
18520
|
# `NumberOfWorkers`.
|
18326
18521
|
#
|
18327
18522
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
18328
|
-
# you are running a Python shell job,
|
18523
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
18524
|
+
# Apache Spark streaming ETL job:
|
18329
18525
|
#
|
18330
18526
|
# * When you specify a Python shell job
|
18331
18527
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
18332
18528
|
# 0.0625 or 1 DPU. The default is 0.0625 DPU.
|
18333
18529
|
#
|
18334
18530
|
# * When you specify an Apache Spark ETL job
|
18335
|
-
# (`JobCommand.Name`="glueetl")
|
18336
|
-
#
|
18531
|
+
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
18532
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
18533
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
18337
18534
|
# fractional DPU allocation.
|
18338
18535
|
#
|
18339
18536
|
#
|
@@ -18352,22 +18549,29 @@ module Aws::Glue
|
|
18352
18549
|
#
|
18353
18550
|
# @!attribute [rw] worker_type
|
18354
18551
|
# The type of predefined worker that is allocated when a job runs.
|
18355
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
18552
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
18553
|
+
# Accepts the value Z.2X for Ray jobs.
|
18356
18554
|
#
|
18357
18555
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
18358
18556
|
# of memory and a 50GB disk, and 2 executors per worker.
|
18359
18557
|
#
|
18360
|
-
# * For the `G.1X` worker type, each worker
|
18361
|
-
# memory
|
18558
|
+
# * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
|
18559
|
+
# GB of memory, 64 GB disk), and provides 1 executor per worker. We
|
18560
|
+
# recommend this worker type for memory-intensive jobs.
|
18362
18561
|
#
|
18363
|
-
# * For the `G.2X` worker type, each worker
|
18364
|
-
# memory
|
18562
|
+
# * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
|
18563
|
+
# GB of memory, 128 GB disk), and provides 1 executor per worker. We
|
18564
|
+
# recommend this worker type for memory-intensive jobs.
|
18365
18565
|
#
|
18366
18566
|
# * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
|
18367
18567
|
# vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
|
18368
18568
|
# worker. We recommend this worker type for low volume streaming
|
18369
18569
|
# jobs. This worker type is only available for Glue version 3.0
|
18370
18570
|
# streaming jobs.
|
18571
|
+
#
|
18572
|
+
# * For the `Z.2X` worker type, each worker maps to 2 DPU (8vCPU, 64
|
18573
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
|
18574
|
+
# per vCPU) based on the autoscaler.
|
18371
18575
|
# @return [String]
|
18372
18576
|
#
|
18373
18577
|
# @!attribute [rw] number_of_workers
|