aws-sdk-glue 1.137.0 → 1.139.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -155,14 +155,14 @@ module Aws::Glue
155
155
  include Aws::Structure
156
156
  end
157
157
 
158
- # Specifies an Amazon Redshift data store.
158
+ # Specifies an optional value when connecting to the Redshift cluster.
159
159
  #
160
160
  # @!attribute [rw] key
161
- # The key when specifying a key-value pair.
161
+ # The key for the additional connection option.
162
162
  # @return [String]
163
163
  #
164
164
  # @!attribute [rw] value
165
- # The value when specifying a key-value pair.
165
+ # The value for the additional connection option.
166
166
  # @return [String]
167
167
  #
168
168
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/AmazonRedshiftAdvancedOption AWS API Documentation
@@ -2214,6 +2214,11 @@ module Aws::Glue
2214
2214
  # Specifies a target that writes to a data target in Amazon Redshift.
2215
2215
  # @return [Types::AmazonRedshiftTarget]
2216
2216
  #
2217
+ # @!attribute [rw] evaluate_data_quality_multi_frame
2218
+ # Specifies your data quality evaluation criteria. Allows multiple
2219
+ # input data and returns a collection of Dynamic Frames.
2220
+ # @return [Types::EvaluateDataQualityMultiFrame]
2221
+ #
2217
2222
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CodeGenConfigurationNode AWS API Documentation
2218
2223
  #
2219
2224
  class CodeGenConfigurationNode < Struct.new(
@@ -2281,7 +2286,8 @@ module Aws::Glue
2281
2286
  :s3_delta_catalog_target,
2282
2287
  :s3_delta_direct_target,
2283
2288
  :amazon_redshift_source,
2284
- :amazon_redshift_target)
2289
+ :amazon_redshift_target,
2290
+ :evaluate_data_quality_multi_frame)
2285
2291
  SENSITIVE = []
2286
2292
  include Aws::Structure
2287
2293
  end
@@ -4245,7 +4251,8 @@ module Aws::Glue
4245
4251
  # @return [Types::JobCommand]
4246
4252
  #
4247
4253
  # @!attribute [rw] default_arguments
4248
- # The default arguments for this job.
4254
+ # The default arguments for every run of this job, specified as
4255
+ # name-value pairs.
4249
4256
  #
4250
4257
  # You can specify arguments here that your own job-execution script
4251
4258
  # consumes, as well as arguments that Glue itself consumes.
@@ -4259,19 +4266,24 @@ module Aws::Glue
4259
4266
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
4260
4267
  # developer guide.
4261
4268
  #
4262
- # For information about the key-value pairs that Glue consumes to set
4263
- # up your job, see the [Special Parameters Used by Glue][2] topic in
4264
- # the developer guide.
4269
+ # For information about the arguments you can provide to this field
4270
+ # when configuring Spark jobs, see the [Special Parameters Used by
4271
+ # Glue][2] topic in the developer guide.
4272
+ #
4273
+ # For information about the arguments you can provide to this field
4274
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
4275
+ # in the developer guide.
4265
4276
  #
4266
4277
  #
4267
4278
  #
4268
4279
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
4269
4280
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
4281
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
4270
4282
  # @return [Hash<String,String>]
4271
4283
  #
4272
4284
  # @!attribute [rw] non_overridable_arguments
4273
- # Non-overridable arguments for this job, specified as name-value
4274
- # pairs.
4285
+ # Arguments for this job that are not overridden when providing job
4286
+ # arguments in a job run, specified as name-value pairs.
4275
4287
  # @return [Hash<String,String>]
4276
4288
  #
4277
4289
  # @!attribute [rw] connections
@@ -4307,13 +4319,18 @@ module Aws::Glue
4307
4319
  # type, the number of Glue data processing units (DPUs) that can be
4308
4320
  # allocated when this job runs. A DPU is a relative measure of
4309
4321
  # processing power that consists of 4 vCPUs of compute capacity and 16
4310
- # GB of memory. For more information, see the [Glue pricing page][1].
4322
+ # GB of memory. For more information, see the [ Glue pricing page][1].
4311
4323
  #
4312
- # Do not set `Max Capacity` if using `WorkerType` and
4324
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
4325
+ # Instead, you should specify a `Worker type` and the `Number of
4326
+ # workers`.
4327
+ #
4328
+ # Do not set `MaxCapacity` if using `WorkerType` and
4313
4329
  # `NumberOfWorkers`.
4314
4330
  #
4315
4331
  # The value that can be allocated for `MaxCapacity` depends on whether
4316
- # you are running a Python shell job or an Apache Spark ETL job:
4332
+ # you are running a Python shell job, an Apache Spark ETL job, or an
4333
+ # Apache Spark streaming ETL job:
4317
4334
  #
4318
4335
  # * When you specify a Python shell job
4319
4336
  # (`JobCommand.Name`="pythonshell"), you can allocate either
@@ -4321,14 +4338,10 @@ module Aws::Glue
4321
4338
  #
4322
4339
  # * When you specify an Apache Spark ETL job
4323
4340
  # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
4324
- # (`JobCommand.Name`="gluestreaming"), you can allocate a minimum
4325
- # of 2 DPUs. The default is 10 DPUs. This job type cannot have a
4341
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
4342
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
4326
4343
  # fractional DPU allocation.
4327
4344
  #
4328
- # For Glue version 2.0 jobs, you cannot instead specify a `Maximum
4329
- # capacity`. Instead, you should specify a `Worker type` and the
4330
- # `Number of workers`.
4331
- #
4332
4345
  #
4333
4346
  #
4334
4347
  # [1]: https://aws.amazon.com/glue/pricing/
@@ -4354,9 +4367,14 @@ module Aws::Glue
4354
4367
  # @return [Types::NotificationProperty]
4355
4368
  #
4356
4369
  # @!attribute [rw] glue_version
4357
- # Glue version determines the versions of Apache Spark and Python that
4358
- # Glue supports. The Python version indicates the version supported
4359
- # for jobs of type Spark.
4370
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
4371
+ # and Python that Glue available in a job. The Python version
4372
+ # indicates the version supported for jobs of type Spark.
4373
+ #
4374
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
4375
+ # versions of Ray, Python and additional libraries available in your
4376
+ # Ray job are determined by the `Runtime` parameter of the Job
4377
+ # command.
4360
4378
  #
4361
4379
  # For more information about the available Glue versions and
4362
4380
  # corresponding Spark and Python versions, see [Glue version][1] in
@@ -4377,7 +4395,8 @@ module Aws::Glue
4377
4395
  #
4378
4396
  # @!attribute [rw] worker_type
4379
4397
  # The type of predefined worker that is allocated when a job runs.
4380
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
4398
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
4399
+ # Accepts the value Z.2X for Ray jobs.
4381
4400
  #
4382
4401
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
4383
4402
  # of memory and a 50GB disk, and 2 executors per worker.
@@ -4395,6 +4414,10 @@ module Aws::Glue
4395
4414
  # worker. We recommend this worker type for low volume streaming
4396
4415
  # jobs. This worker type is only available for Glue version 3.0
4397
4416
  # streaming jobs.
4417
+ #
4418
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
4419
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers
4420
+ # based on the autoscaler.
4398
4421
  # @return [String]
4399
4422
  #
4400
4423
  # @!attribute [rw] code_gen_configuration_nodes
@@ -5851,13 +5874,18 @@ module Aws::Glue
5851
5874
  # A pass or fail status for the rule.
5852
5875
  # @return [String]
5853
5876
  #
5877
+ # @!attribute [rw] evaluated_metrics
5878
+ # A map of metrics associated with the evaluation of the rule.
5879
+ # @return [Hash<String,Float>]
5880
+ #
5854
5881
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityRuleResult AWS API Documentation
5855
5882
  #
5856
5883
  class DataQualityRuleResult < Struct.new(
5857
5884
  :name,
5858
5885
  :description,
5859
5886
  :evaluation_message,
5860
- :result)
5887
+ :result,
5888
+ :evaluated_metrics)
5861
5889
  SENSITIVE = []
5862
5890
  include Aws::Structure
5863
5891
  end
@@ -6015,11 +6043,16 @@ module Aws::Glue
6015
6043
  # The name of the database where the Glue table exists.
6016
6044
  # @return [String]
6017
6045
  #
6046
+ # @!attribute [rw] catalog_id
6047
+ # The catalog id where the Glue table exists.
6048
+ # @return [String]
6049
+ #
6018
6050
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityTargetTable AWS API Documentation
6019
6051
  #
6020
6052
  class DataQualityTargetTable < Struct.new(
6021
6053
  :table_name,
6022
- :database_name)
6054
+ :database_name,
6055
+ :catalog_id)
6023
6056
  SENSITIVE = []
6024
6057
  include Aws::Structure
6025
6058
  end
@@ -7759,6 +7792,52 @@ module Aws::Glue
7759
7792
  include Aws::Structure
7760
7793
  end
7761
7794
 
7795
+ # Specifies your data quality evaluation criteria.
7796
+ #
7797
+ # @!attribute [rw] name
7798
+ # The name of the data quality evaluation.
7799
+ # @return [String]
7800
+ #
7801
+ # @!attribute [rw] inputs
7802
+ # The inputs of your data quality evaluation. The first input in this
7803
+ # list is the primary data source.
7804
+ # @return [Array<String>]
7805
+ #
7806
+ # @!attribute [rw] additional_data_sources
7807
+ # The aliases of all data sources except primary.
7808
+ # @return [Hash<String,String>]
7809
+ #
7810
+ # @!attribute [rw] ruleset
7811
+ # The ruleset for your data quality evaluation.
7812
+ # @return [String]
7813
+ #
7814
+ # @!attribute [rw] publishing_options
7815
+ # Options to configure how your results are published.
7816
+ # @return [Types::DQResultsPublishingOptions]
7817
+ #
7818
+ # @!attribute [rw] additional_options
7819
+ # Options to configure runtime behavior of the transform.
7820
+ # @return [Hash<String,String>]
7821
+ #
7822
+ # @!attribute [rw] stop_job_on_failure_options
7823
+ # Options to configure how your job will stop if your data quality
7824
+ # evaluation fails.
7825
+ # @return [Types::DQStopJobOnFailureOptions]
7826
+ #
7827
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/EvaluateDataQualityMultiFrame AWS API Documentation
7828
+ #
7829
+ class EvaluateDataQualityMultiFrame < Struct.new(
7830
+ :name,
7831
+ :inputs,
7832
+ :additional_data_sources,
7833
+ :ruleset,
7834
+ :publishing_options,
7835
+ :additional_options,
7836
+ :stop_job_on_failure_options)
7837
+ SENSITIVE = []
7838
+ include Aws::Structure
7839
+ end
7840
+
7762
7841
  # Evaluation metrics provide an estimate of the quality of your machine
7763
7842
  # learning transform.
7764
7843
  #
@@ -9008,6 +9087,11 @@ module Aws::Glue
9008
9087
  # A list of result IDs for the data quality results for the run.
9009
9088
  # @return [Array<String>]
9010
9089
  #
9090
+ # @!attribute [rw] additional_data_sources
9091
+ # A map of reference strings to additional data sources you can
9092
+ # specify for an evaluation run.
9093
+ # @return [Hash<String,Types::DataSource>]
9094
+ #
9011
9095
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/GetDataQualityRulesetEvaluationRunResponse AWS API Documentation
9012
9096
  #
9013
9097
  class GetDataQualityRulesetEvaluationRunResponse < Struct.new(
@@ -9024,7 +9108,8 @@ module Aws::Glue
9024
9108
  :completed_on,
9025
9109
  :execution_time,
9026
9110
  :ruleset_names,
9027
- :result_ids)
9111
+ :result_ids,
9112
+ :additional_data_sources)
9028
9113
  SENSITIVE = []
9029
9114
  include Aws::Structure
9030
9115
  end
@@ -12097,28 +12182,39 @@ module Aws::Glue
12097
12182
  # @return [Types::JobCommand]
12098
12183
  #
12099
12184
  # @!attribute [rw] default_arguments
12100
- # The default arguments for this job, specified as name-value pairs.
12185
+ # The default arguments for every run of this job, specified as
12186
+ # name-value pairs.
12101
12187
  #
12102
12188
  # You can specify arguments here that your own job-execution script
12103
12189
  # consumes, as well as arguments that Glue itself consumes.
12104
12190
  #
12191
+ # Job arguments may be logged. Do not pass plaintext secrets as
12192
+ # arguments. Retrieve secrets from a Glue Connection, Secrets Manager
12193
+ # or other secret management mechanism if you intend to keep them
12194
+ # within the Job.
12195
+ #
12105
12196
  # For information about how to specify and consume your own Job
12106
12197
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
12107
12198
  # developer guide.
12108
12199
  #
12109
- # For information about the key-value pairs that Glue consumes to set
12110
- # up your job, see the [Special Parameters Used by Glue][2] topic in
12111
- # the developer guide.
12200
+ # For information about the arguments you can provide to this field
12201
+ # when configuring Spark jobs, see the [Special Parameters Used by
12202
+ # Glue][2] topic in the developer guide.
12203
+ #
12204
+ # For information about the arguments you can provide to this field
12205
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
12206
+ # in the developer guide.
12112
12207
  #
12113
12208
  #
12114
12209
  #
12115
12210
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
12116
12211
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
12212
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
12117
12213
  # @return [Hash<String,String>]
12118
12214
  #
12119
12215
  # @!attribute [rw] non_overridable_arguments
12120
- # Non-overridable arguments for this job, specified as name-value
12121
- # pairs.
12216
+ # Arguments for this job that are not overridden when providing job
12217
+ # arguments in a job run, specified as name-value pairs.
12122
12218
  # @return [Hash<String,String>]
12123
12219
  #
12124
12220
  # @!attribute [rw] connections
@@ -12156,7 +12252,7 @@ module Aws::Glue
12156
12252
  # type, the number of Glue data processing units (DPUs) that can be
12157
12253
  # allocated when this job runs. A DPU is a relative measure of
12158
12254
  # processing power that consists of 4 vCPUs of compute capacity and 16
12159
- # GB of memory. For more information, see the [Glue pricing page][1].
12255
+ # GB of memory. For more information, see the [ Glue pricing page][1].
12160
12256
  #
12161
12257
  # For Glue version 2.0 or later jobs, you cannot specify a `Maximum
12162
12258
  # capacity`. Instead, you should specify a `Worker type` and the
@@ -12186,7 +12282,8 @@ module Aws::Glue
12186
12282
  #
12187
12283
  # @!attribute [rw] worker_type
12188
12284
  # The type of predefined worker that is allocated when a job runs.
12189
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
12285
+ # Accepts a value of Standard, G.1X, G.2X, G.4X, G.8X, or G.025X for
12286
+ # Spark jobs. Accepts the value Z.2X for Ray jobs.
12190
12287
  #
12191
12288
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
12192
12289
  # of memory and a 50GB disk, and 2 executors per worker.
@@ -12207,20 +12304,30 @@ module Aws::Glue
12207
12304
  # GB of memory, 256 GB disk), and provides 1 executor per worker. We
12208
12305
  # recommend this worker type for jobs whose workloads contain your
12209
12306
  # most demanding transforms, aggregations, joins, and queries. This
12210
- # worker type is available only for Glue version 3.0 or later jobs.
12307
+ # worker type is available only for Glue version 3.0 or later Spark
12308
+ # ETL jobs in the following Amazon Web Services Regions: US East
12309
+ # (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific
12310
+ # (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada
12311
+ # (Central), Europe (Frankfurt), Europe (Ireland), and Europe
12312
+ # (Stockholm).
12211
12313
  #
12212
12314
  # * For the `G.8X` worker type, each worker maps to 8 DPU (32 vCPU,
12213
12315
  # 128 GB of memory, 512 GB disk), and provides 1 executor per
12214
12316
  # worker. We recommend this worker type for jobs whose workloads
12215
12317
  # contain your most demanding transforms, aggregations, joins, and
12216
12318
  # queries. This worker type is available only for Glue version 3.0
12217
- # or later jobs.
12319
+ # or later Spark ETL jobs, in the same Amazon Web Services Regions
12320
+ # as supported for the `G.4X` worker type.
12218
12321
  #
12219
12322
  # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
12220
12323
  # vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
12221
12324
  # worker. We recommend this worker type for low volume streaming
12222
12325
  # jobs. This worker type is only available for Glue version 3.0
12223
12326
  # streaming jobs.
12327
+ #
12328
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
12329
+ # GB of m emory, 128 GB disk), and provides a default of 8 Ray
12330
+ # workers (1 per vCPU).
12224
12331
  # @return [String]
12225
12332
  #
12226
12333
  # @!attribute [rw] number_of_workers
@@ -12238,9 +12345,14 @@ module Aws::Glue
12238
12345
  # @return [Types::NotificationProperty]
12239
12346
  #
12240
12347
  # @!attribute [rw] glue_version
12241
- # Glue version determines the versions of Apache Spark and Python that
12242
- # Glue supports. The Python version indicates the version supported
12243
- # for jobs of type Spark.
12348
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
12349
+ # and Python that Glue available in a job. The Python version
12350
+ # indicates the version supported for jobs of type Spark.
12351
+ #
12352
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
12353
+ # versions of Ray, Python and additional libraries available in your
12354
+ # Ray job are determined by the `Runtime` parameter of the Job
12355
+ # command.
12244
12356
  #
12245
12357
  # For more information about the available Glue versions and
12246
12358
  # corresponding Spark and Python versions, see [Glue version][1] in
@@ -12378,7 +12490,8 @@ module Aws::Glue
12378
12490
  # @!attribute [rw] name
12379
12491
  # The name of the job command. For an Apache Spark ETL job, this must
12380
12492
  # be `glueetl`. For a Python shell job, it must be `pythonshell`. For
12381
- # an Apache Spark streaming ETL job, this must be `gluestreaming`.
12493
+ # an Apache Spark streaming ETL job, this must be `gluestreaming`. For
12494
+ # a Ray job, this must be `glueray`.
12382
12495
  # @return [String]
12383
12496
  #
12384
12497
  # @!attribute [rw] script_location
@@ -12391,12 +12504,24 @@ module Aws::Glue
12391
12504
  # values are 2 or 3.
12392
12505
  # @return [String]
12393
12506
  #
12507
+ # @!attribute [rw] runtime
12508
+ # In Ray jobs, Runtime is used to specify the versions of Ray, Python
12509
+ # and additional libraries available in your environment. This field
12510
+ # is not used in other job types. For supported runtime environment
12511
+ # values, see [Working with Ray jobs][1] in the Glue Developer Guide.
12512
+ #
12513
+ #
12514
+ #
12515
+ # [1]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-runtimes.html
12516
+ # @return [String]
12517
+ #
12394
12518
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/JobCommand AWS API Documentation
12395
12519
  #
12396
12520
  class JobCommand < Struct.new(
12397
12521
  :name,
12398
12522
  :script_location,
12399
- :python_version)
12523
+ :python_version,
12524
+ :runtime)
12400
12525
  SENSITIVE = []
12401
12526
  include Aws::Structure
12402
12527
  end
@@ -12467,18 +12592,28 @@ module Aws::Glue
12467
12592
  # You can specify arguments here that your own job-execution script
12468
12593
  # consumes, as well as arguments that Glue itself consumes.
12469
12594
  #
12470
- # For information about how to specify and consume your own job
12595
+ # Job arguments may be logged. Do not pass plaintext secrets as
12596
+ # arguments. Retrieve secrets from a Glue Connection, Secrets Manager
12597
+ # or other secret management mechanism if you intend to keep them
12598
+ # within the Job.
12599
+ #
12600
+ # For information about how to specify and consume your own Job
12471
12601
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
12472
12602
  # developer guide.
12473
12603
  #
12474
- # For information about the key-value pairs that Glue consumes to set
12475
- # up your job, see the [Special Parameters Used by Glue][2] topic in
12476
- # the developer guide.
12604
+ # For information about the arguments you can provide to this field
12605
+ # when configuring Spark jobs, see the [Special Parameters Used by
12606
+ # Glue][2] topic in the developer guide.
12607
+ #
12608
+ # For information about the arguments you can provide to this field
12609
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
12610
+ # in the developer guide.
12477
12611
  #
12478
12612
  #
12479
12613
  #
12480
12614
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
12481
12615
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
12616
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
12482
12617
  # @return [Hash<String,String>]
12483
12618
  #
12484
12619
  # @!attribute [rw] error_message
@@ -12518,24 +12653,31 @@ module Aws::Glue
12518
12653
  # @return [Integer]
12519
12654
  #
12520
12655
  # @!attribute [rw] max_capacity
12521
- # The number of Glue data processing units (DPUs) that can be
12656
+ # For Glue version 1.0 or earlier jobs, using the standard worker
12657
+ # type, the number of Glue data processing units (DPUs) that can be
12522
12658
  # allocated when this job runs. A DPU is a relative measure of
12523
12659
  # processing power that consists of 4 vCPUs of compute capacity and 16
12524
- # GB of memory. For more information, see the [Glue pricing page][1].
12660
+ # GB of memory. For more information, see the [ Glue pricing page][1].
12661
+ #
12662
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
12663
+ # Instead, you should specify a `Worker type` and the `Number of
12664
+ # workers`.
12525
12665
  #
12526
- # Do not set `Max Capacity` if using `WorkerType` and
12666
+ # Do not set `MaxCapacity` if using `WorkerType` and
12527
12667
  # `NumberOfWorkers`.
12528
12668
  #
12529
12669
  # The value that can be allocated for `MaxCapacity` depends on whether
12530
- # you are running a Python shell job or an Apache Spark ETL job:
12670
+ # you are running a Python shell job, an Apache Spark ETL job, or an
12671
+ # Apache Spark streaming ETL job:
12531
12672
  #
12532
12673
  # * When you specify a Python shell job
12533
12674
  # (`JobCommand.Name`="pythonshell"), you can allocate either
12534
12675
  # 0.0625 or 1 DPU. The default is 0.0625 DPU.
12535
12676
  #
12536
12677
  # * When you specify an Apache Spark ETL job
12537
- # (`JobCommand.Name`="glueetl"), you can allocate a minimum of 2
12538
- # DPUs. The default is 10 DPUs. This job type cannot have a
12678
+ # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
12679
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
12680
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
12539
12681
  # fractional DPU allocation.
12540
12682
  #
12541
12683
  #
@@ -12545,22 +12687,29 @@ module Aws::Glue
12545
12687
  #
12546
12688
  # @!attribute [rw] worker_type
12547
12689
  # The type of predefined worker that is allocated when a job runs.
12548
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
12690
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
12691
+ # Accepts the value Z.2X for Ray jobs.
12549
12692
  #
12550
12693
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
12551
12694
  # of memory and a 50GB disk, and 2 executors per worker.
12552
12695
  #
12553
- # * For the `G.1X` worker type, each worker provides 4 vCPU, 16 GB of
12554
- # memory and a 64GB disk, and 1 executor per worker.
12696
+ # * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
12697
+ # GB of memory, 64 GB disk), and provides 1 executor per worker. We
12698
+ # recommend this worker type for memory-intensive jobs.
12555
12699
  #
12556
- # * For the `G.2X` worker type, each worker provides 8 vCPU, 32 GB of
12557
- # memory and a 128GB disk, and 1 executor per worker.
12700
+ # * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
12701
+ # GB of memory, 128 GB disk), and provides 1 executor per worker. We
12702
+ # recommend this worker type for memory-intensive jobs.
12558
12703
  #
12559
12704
  # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
12560
12705
  # vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
12561
12706
  # worker. We recommend this worker type for low volume streaming
12562
12707
  # jobs. This worker type is only available for Glue version 3.0
12563
12708
  # streaming jobs.
12709
+ #
12710
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
12711
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
12712
+ # per vCPU) based on the autoscaler.
12564
12713
  # @return [String]
12565
12714
  #
12566
12715
  # @!attribute [rw] number_of_workers
@@ -12588,9 +12737,14 @@ module Aws::Glue
12588
12737
  # @return [Types::NotificationProperty]
12589
12738
  #
12590
12739
  # @!attribute [rw] glue_version
12591
- # Glue version determines the versions of Apache Spark and Python that
12592
- # Glue supports. The Python version indicates the version supported
12593
- # for jobs of type Spark.
12740
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
12741
+ # and Python that Glue available in a job. The Python version
12742
+ # indicates the version supported for jobs of type Spark.
12743
+ #
12744
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
12745
+ # versions of Ray, Python and additional libraries available in your
12746
+ # Ray job are determined by the `Runtime` parameter of the Job
12747
+ # command.
12594
12748
  #
12595
12749
  # For more information about the available Glue versions and
12596
12750
  # corresponding Spark and Python versions, see [Glue version][1] in
@@ -12687,28 +12841,39 @@ module Aws::Glue
12687
12841
  # @return [Types::JobCommand]
12688
12842
  #
12689
12843
  # @!attribute [rw] default_arguments
12690
- # The default arguments for this job.
12844
+ # The default arguments for every run of this job, specified as
12845
+ # name-value pairs.
12691
12846
  #
12692
12847
  # You can specify arguments here that your own job-execution script
12693
12848
  # consumes, as well as arguments that Glue itself consumes.
12694
12849
  #
12850
+ # Job arguments may be logged. Do not pass plaintext secrets as
12851
+ # arguments. Retrieve secrets from a Glue Connection, Secrets Manager
12852
+ # or other secret management mechanism if you intend to keep them
12853
+ # within the Job.
12854
+ #
12695
12855
  # For information about how to specify and consume your own Job
12696
12856
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
12697
12857
  # developer guide.
12698
12858
  #
12699
- # For information about the key-value pairs that Glue consumes to set
12700
- # up your job, see the [Special Parameters Used by Glue][2] topic in
12701
- # the developer guide.
12859
+ # For information about the arguments you can provide to this field
12860
+ # when configuring Spark jobs, see the [Special Parameters Used by
12861
+ # Glue][2] topic in the developer guide.
12862
+ #
12863
+ # For information about the arguments you can provide to this field
12864
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
12865
+ # in the developer guide.
12702
12866
  #
12703
12867
  #
12704
12868
  #
12705
12869
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
12706
12870
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
12871
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
12707
12872
  # @return [Hash<String,String>]
12708
12873
  #
12709
12874
  # @!attribute [rw] non_overridable_arguments
12710
- # Non-overridable arguments for this job, specified as name-value
12711
- # pairs.
12875
+ # Arguments for this job that are not overridden when providing job
12876
+ # arguments in a job run, specified as name-value pairs.
12712
12877
  # @return [Hash<String,String>]
12713
12878
  #
12714
12879
  # @!attribute [rw] connections
@@ -12744,13 +12909,18 @@ module Aws::Glue
12744
12909
  # type, the number of Glue data processing units (DPUs) that can be
12745
12910
  # allocated when this job runs. A DPU is a relative measure of
12746
12911
  # processing power that consists of 4 vCPUs of compute capacity and 16
12747
- # GB of memory. For more information, see the [Glue pricing page][1].
12912
+ # GB of memory. For more information, see the [ Glue pricing page][1].
12913
+ #
12914
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
12915
+ # Instead, you should specify a `Worker type` and the `Number of
12916
+ # workers`.
12748
12917
  #
12749
- # Do not set `Max Capacity` if using `WorkerType` and
12918
+ # Do not set `MaxCapacity` if using `WorkerType` and
12750
12919
  # `NumberOfWorkers`.
12751
12920
  #
12752
12921
  # The value that can be allocated for `MaxCapacity` depends on whether
12753
- # you are running a Python shell job or an Apache Spark ETL job:
12922
+ # you are running a Python shell job, an Apache Spark ETL job, or an
12923
+ # Apache Spark streaming ETL job:
12754
12924
  #
12755
12925
  # * When you specify a Python shell job
12756
12926
  # (`JobCommand.Name`="pythonshell"), you can allocate either
@@ -12758,14 +12928,10 @@ module Aws::Glue
12758
12928
  #
12759
12929
  # * When you specify an Apache Spark ETL job
12760
12930
  # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
12761
- # (`JobCommand.Name`="gluestreaming"), you can allocate a minimum
12762
- # of 2 DPUs. The default is 10 DPUs. This job type cannot have a
12931
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
12932
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
12763
12933
  # fractional DPU allocation.
12764
12934
  #
12765
- # For Glue version 2.0 jobs, you cannot instead specify a `Maximum
12766
- # capacity`. Instead, you should specify a `Worker type` and the
12767
- # `Number of workers`.
12768
- #
12769
12935
  #
12770
12936
  #
12771
12937
  # [1]: https://aws.amazon.com/glue/pricing/
@@ -12773,7 +12939,8 @@ module Aws::Glue
12773
12939
  #
12774
12940
  # @!attribute [rw] worker_type
12775
12941
  # The type of predefined worker that is allocated when a job runs.
12776
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
12942
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
12943
+ # Accepts the value Z.2X for Ray jobs.
12777
12944
  #
12778
12945
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
12779
12946
  # of memory and a 50GB disk, and 2 executors per worker.
@@ -12791,6 +12958,10 @@ module Aws::Glue
12791
12958
  # worker. We recommend this worker type for low volume streaming
12792
12959
  # jobs. This worker type is only available for Glue version 3.0
12793
12960
  # streaming jobs.
12961
+ #
12962
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
12963
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers
12964
+ # based on the autoscaler.
12794
12965
  # @return [String]
12795
12966
  #
12796
12967
  # @!attribute [rw] number_of_workers
@@ -12808,14 +12979,22 @@ module Aws::Glue
12808
12979
  # @return [Types::NotificationProperty]
12809
12980
  #
12810
12981
  # @!attribute [rw] glue_version
12811
- # Glue version determines the versions of Apache Spark and Python that
12812
- # Glue supports. The Python version indicates the version supported
12813
- # for jobs of type Spark.
12982
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
12983
+ # and Python that Glue available in a job. The Python version
12984
+ # indicates the version supported for jobs of type Spark.
12985
+ #
12986
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
12987
+ # versions of Ray, Python and additional libraries available in your
12988
+ # Ray job are determined by the `Runtime` parameter of the Job
12989
+ # command.
12814
12990
  #
12815
12991
  # For more information about the available Glue versions and
12816
12992
  # corresponding Spark and Python versions, see [Glue version][1] in
12817
12993
  # the developer guide.
12818
12994
  #
12995
+ # Jobs that are created without specifying a Glue version default to
12996
+ # Glue 0.9.
12997
+ #
12819
12998
  #
12820
12999
  #
12821
13000
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/add-job.html
@@ -18167,6 +18346,11 @@ module Aws::Glue
18167
18346
  # A list of ruleset names.
18168
18347
  # @return [Array<String>]
18169
18348
  #
18349
+ # @!attribute [rw] additional_data_sources
18350
+ # A map of reference strings to additional data sources you can
18351
+ # specify for an evaluation run.
18352
+ # @return [Hash<String,Types::DataSource>]
18353
+ #
18170
18354
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/StartDataQualityRulesetEvaluationRunRequest AWS API Documentation
18171
18355
  #
18172
18356
  class StartDataQualityRulesetEvaluationRunRequest < Struct.new(
@@ -18176,7 +18360,8 @@ module Aws::Glue
18176
18360
  :timeout,
18177
18361
  :client_token,
18178
18362
  :additional_run_options,
18179
- :ruleset_names)
18363
+ :ruleset_names,
18364
+ :additional_data_sources)
18180
18365
  SENSITIVE = []
18181
18366
  include Aws::Structure
18182
18367
  end
@@ -18266,7 +18451,7 @@ module Aws::Glue
18266
18451
  # @return [String]
18267
18452
  #
18268
18453
  # @!attribute [rw] arguments
18269
- # The job arguments specifically for this run. For this job run, they
18454
+ # The job arguments associated with this run. For this job run, they
18270
18455
  # replace the default arguments set in the job definition itself.
18271
18456
  #
18272
18457
  # You can specify arguments here that your own job-execution script
@@ -18281,14 +18466,19 @@ module Aws::Glue
18281
18466
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
18282
18467
  # developer guide.
18283
18468
  #
18284
- # For information about the key-value pairs that Glue consumes to set
18285
- # up your job, see the [Special Parameters Used by Glue][2] topic in
18286
- # the developer guide.
18469
+ # For information about the arguments you can provide to this field
18470
+ # when configuring Spark jobs, see the [Special Parameters Used by
18471
+ # Glue][2] topic in the developer guide.
18472
+ #
18473
+ # For information about the arguments you can provide to this field
18474
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
18475
+ # in the developer guide.
18287
18476
  #
18288
18477
  #
18289
18478
  #
18290
18479
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
18291
18480
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
18481
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
18292
18482
  # @return [Hash<String,String>]
18293
18483
  #
18294
18484
  # @!attribute [rw] allocated_capacity
@@ -18316,24 +18506,31 @@ module Aws::Glue
18316
18506
  # @return [Integer]
18317
18507
  #
18318
18508
  # @!attribute [rw] max_capacity
18319
- # The number of Glue data processing units (DPUs) that can be
18509
+ # For Glue version 1.0 or earlier jobs, using the standard worker
18510
+ # type, the number of Glue data processing units (DPUs) that can be
18320
18511
  # allocated when this job runs. A DPU is a relative measure of
18321
18512
  # processing power that consists of 4 vCPUs of compute capacity and 16
18322
- # GB of memory. For more information, see the [Glue pricing page][1].
18513
+ # GB of memory. For more information, see the [ Glue pricing page][1].
18323
18514
  #
18324
- # Do not set `Max Capacity` if using `WorkerType` and
18515
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
18516
+ # Instead, you should specify a `Worker type` and the `Number of
18517
+ # workers`.
18518
+ #
18519
+ # Do not set `MaxCapacity` if using `WorkerType` and
18325
18520
  # `NumberOfWorkers`.
18326
18521
  #
18327
18522
  # The value that can be allocated for `MaxCapacity` depends on whether
18328
- # you are running a Python shell job, or an Apache Spark ETL job:
18523
+ # you are running a Python shell job, an Apache Spark ETL job, or an
18524
+ # Apache Spark streaming ETL job:
18329
18525
  #
18330
18526
  # * When you specify a Python shell job
18331
18527
  # (`JobCommand.Name`="pythonshell"), you can allocate either
18332
18528
  # 0.0625 or 1 DPU. The default is 0.0625 DPU.
18333
18529
  #
18334
18530
  # * When you specify an Apache Spark ETL job
18335
- # (`JobCommand.Name`="glueetl"), you can allocate a minimum of 2
18336
- # DPUs. The default is 10 DPUs. This job type cannot have a
18531
+ # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
18532
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
18533
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
18337
18534
  # fractional DPU allocation.
18338
18535
  #
18339
18536
  #
@@ -18352,22 +18549,29 @@ module Aws::Glue
18352
18549
  #
18353
18550
  # @!attribute [rw] worker_type
18354
18551
  # The type of predefined worker that is allocated when a job runs.
18355
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
18552
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
18553
+ # Accepts the value Z.2X for Ray jobs.
18356
18554
  #
18357
18555
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
18358
18556
  # of memory and a 50GB disk, and 2 executors per worker.
18359
18557
  #
18360
- # * For the `G.1X` worker type, each worker provides 4 vCPU, 16 GB of
18361
- # memory and a 64GB disk, and 1 executor per worker.
18558
+ # * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
18559
+ # GB of memory, 64 GB disk), and provides 1 executor per worker. We
18560
+ # recommend this worker type for memory-intensive jobs.
18362
18561
  #
18363
- # * For the `G.2X` worker type, each worker provides 8 vCPU, 32 GB of
18364
- # memory and a 128GB disk, and 1 executor per worker.
18562
+ # * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
18563
+ # GB of memory, 128 GB disk), and provides 1 executor per worker. We
18564
+ # recommend this worker type for memory-intensive jobs.
18365
18565
  #
18366
18566
  # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
18367
18567
  # vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
18368
18568
  # worker. We recommend this worker type for low volume streaming
18369
18569
  # jobs. This worker type is only available for Glue version 3.0
18370
18570
  # streaming jobs.
18571
+ #
18572
+ # * For the `Z.2X` worker type, each worker maps to 2 DPU (8vCPU, 64
18573
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
18574
+ # per vCPU) based on the autoscaler.
18371
18575
  # @return [String]
18372
18576
  #
18373
18577
  # @!attribute [rw] number_of_workers