aws-sdk-glue 1.137.0 → 1.139.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -155,14 +155,14 @@ module Aws::Glue
155
155
  include Aws::Structure
156
156
  end
157
157
 
158
- # Specifies an Amazon Redshift data store.
158
+ # Specifies an optional value when connecting to the Redshift cluster.
159
159
  #
160
160
  # @!attribute [rw] key
161
- # The key when specifying a key-value pair.
161
+ # The key for the additional connection option.
162
162
  # @return [String]
163
163
  #
164
164
  # @!attribute [rw] value
165
- # The value when specifying a key-value pair.
165
+ # The value for the additional connection option.
166
166
  # @return [String]
167
167
  #
168
168
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/AmazonRedshiftAdvancedOption AWS API Documentation
@@ -2214,6 +2214,11 @@ module Aws::Glue
2214
2214
  # Specifies a target that writes to a data target in Amazon Redshift.
2215
2215
  # @return [Types::AmazonRedshiftTarget]
2216
2216
  #
2217
+ # @!attribute [rw] evaluate_data_quality_multi_frame
2218
+ # Specifies your data quality evaluation criteria. Allows multiple
2219
+ # input data and returns a collection of Dynamic Frames.
2220
+ # @return [Types::EvaluateDataQualityMultiFrame]
2221
+ #
2217
2222
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CodeGenConfigurationNode AWS API Documentation
2218
2223
  #
2219
2224
  class CodeGenConfigurationNode < Struct.new(
@@ -2281,7 +2286,8 @@ module Aws::Glue
2281
2286
  :s3_delta_catalog_target,
2282
2287
  :s3_delta_direct_target,
2283
2288
  :amazon_redshift_source,
2284
- :amazon_redshift_target)
2289
+ :amazon_redshift_target,
2290
+ :evaluate_data_quality_multi_frame)
2285
2291
  SENSITIVE = []
2286
2292
  include Aws::Structure
2287
2293
  end
@@ -4245,7 +4251,8 @@ module Aws::Glue
4245
4251
  # @return [Types::JobCommand]
4246
4252
  #
4247
4253
  # @!attribute [rw] default_arguments
4248
- # The default arguments for this job.
4254
+ # The default arguments for every run of this job, specified as
4255
+ # name-value pairs.
4249
4256
  #
4250
4257
  # You can specify arguments here that your own job-execution script
4251
4258
  # consumes, as well as arguments that Glue itself consumes.
@@ -4259,19 +4266,24 @@ module Aws::Glue
4259
4266
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
4260
4267
  # developer guide.
4261
4268
  #
4262
- # For information about the key-value pairs that Glue consumes to set
4263
- # up your job, see the [Special Parameters Used by Glue][2] topic in
4264
- # the developer guide.
4269
+ # For information about the arguments you can provide to this field
4270
+ # when configuring Spark jobs, see the [Special Parameters Used by
4271
+ # Glue][2] topic in the developer guide.
4272
+ #
4273
+ # For information about the arguments you can provide to this field
4274
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
4275
+ # in the developer guide.
4265
4276
  #
4266
4277
  #
4267
4278
  #
4268
4279
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
4269
4280
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
4281
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
4270
4282
  # @return [Hash<String,String>]
4271
4283
  #
4272
4284
  # @!attribute [rw] non_overridable_arguments
4273
- # Non-overridable arguments for this job, specified as name-value
4274
- # pairs.
4285
+ # Arguments for this job that are not overridden when providing job
4286
+ # arguments in a job run, specified as name-value pairs.
4275
4287
  # @return [Hash<String,String>]
4276
4288
  #
4277
4289
  # @!attribute [rw] connections
@@ -4307,13 +4319,18 @@ module Aws::Glue
4307
4319
  # type, the number of Glue data processing units (DPUs) that can be
4308
4320
  # allocated when this job runs. A DPU is a relative measure of
4309
4321
  # processing power that consists of 4 vCPUs of compute capacity and 16
4310
- # GB of memory. For more information, see the [Glue pricing page][1].
4322
+ # GB of memory. For more information, see the [ Glue pricing page][1].
4311
4323
  #
4312
- # Do not set `Max Capacity` if using `WorkerType` and
4324
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
4325
+ # Instead, you should specify a `Worker type` and the `Number of
4326
+ # workers`.
4327
+ #
4328
+ # Do not set `MaxCapacity` if using `WorkerType` and
4313
4329
  # `NumberOfWorkers`.
4314
4330
  #
4315
4331
  # The value that can be allocated for `MaxCapacity` depends on whether
4316
- # you are running a Python shell job or an Apache Spark ETL job:
4332
+ # you are running a Python shell job, an Apache Spark ETL job, or an
4333
+ # Apache Spark streaming ETL job:
4317
4334
  #
4318
4335
  # * When you specify a Python shell job
4319
4336
  # (`JobCommand.Name`="pythonshell"), you can allocate either
@@ -4321,14 +4338,10 @@ module Aws::Glue
4321
4338
  #
4322
4339
  # * When you specify an Apache Spark ETL job
4323
4340
  # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
4324
- # (`JobCommand.Name`="gluestreaming"), you can allocate a minimum
4325
- # of 2 DPUs. The default is 10 DPUs. This job type cannot have a
4341
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
4342
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
4326
4343
  # fractional DPU allocation.
4327
4344
  #
4328
- # For Glue version 2.0 jobs, you cannot instead specify a `Maximum
4329
- # capacity`. Instead, you should specify a `Worker type` and the
4330
- # `Number of workers`.
4331
- #
4332
4345
  #
4333
4346
  #
4334
4347
  # [1]: https://aws.amazon.com/glue/pricing/
@@ -4354,9 +4367,14 @@ module Aws::Glue
4354
4367
  # @return [Types::NotificationProperty]
4355
4368
  #
4356
4369
  # @!attribute [rw] glue_version
4357
- # Glue version determines the versions of Apache Spark and Python that
4358
- # Glue supports. The Python version indicates the version supported
4359
- # for jobs of type Spark.
4370
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
4371
+ # and Python that Glue available in a job. The Python version
4372
+ # indicates the version supported for jobs of type Spark.
4373
+ #
4374
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
4375
+ # versions of Ray, Python and additional libraries available in your
4376
+ # Ray job are determined by the `Runtime` parameter of the Job
4377
+ # command.
4360
4378
  #
4361
4379
  # For more information about the available Glue versions and
4362
4380
  # corresponding Spark and Python versions, see [Glue version][1] in
@@ -4377,7 +4395,8 @@ module Aws::Glue
4377
4395
  #
4378
4396
  # @!attribute [rw] worker_type
4379
4397
  # The type of predefined worker that is allocated when a job runs.
4380
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
4398
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
4399
+ # Accepts the value Z.2X for Ray jobs.
4381
4400
  #
4382
4401
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
4383
4402
  # of memory and a 50GB disk, and 2 executors per worker.
@@ -4395,6 +4414,10 @@ module Aws::Glue
4395
4414
  # worker. We recommend this worker type for low volume streaming
4396
4415
  # jobs. This worker type is only available for Glue version 3.0
4397
4416
  # streaming jobs.
4417
+ #
4418
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
4419
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers
4420
+ # based on the autoscaler.
4398
4421
  # @return [String]
4399
4422
  #
4400
4423
  # @!attribute [rw] code_gen_configuration_nodes
@@ -5851,13 +5874,18 @@ module Aws::Glue
5851
5874
  # A pass or fail status for the rule.
5852
5875
  # @return [String]
5853
5876
  #
5877
+ # @!attribute [rw] evaluated_metrics
5878
+ # A map of metrics associated with the evaluation of the rule.
5879
+ # @return [Hash<String,Float>]
5880
+ #
5854
5881
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityRuleResult AWS API Documentation
5855
5882
  #
5856
5883
  class DataQualityRuleResult < Struct.new(
5857
5884
  :name,
5858
5885
  :description,
5859
5886
  :evaluation_message,
5860
- :result)
5887
+ :result,
5888
+ :evaluated_metrics)
5861
5889
  SENSITIVE = []
5862
5890
  include Aws::Structure
5863
5891
  end
@@ -6015,11 +6043,16 @@ module Aws::Glue
6015
6043
  # The name of the database where the Glue table exists.
6016
6044
  # @return [String]
6017
6045
  #
6046
+ # @!attribute [rw] catalog_id
6047
+ # The catalog id where the Glue table exists.
6048
+ # @return [String]
6049
+ #
6018
6050
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityTargetTable AWS API Documentation
6019
6051
  #
6020
6052
  class DataQualityTargetTable < Struct.new(
6021
6053
  :table_name,
6022
- :database_name)
6054
+ :database_name,
6055
+ :catalog_id)
6023
6056
  SENSITIVE = []
6024
6057
  include Aws::Structure
6025
6058
  end
@@ -7759,6 +7792,52 @@ module Aws::Glue
7759
7792
  include Aws::Structure
7760
7793
  end
7761
7794
 
7795
+ # Specifies your data quality evaluation criteria.
7796
+ #
7797
+ # @!attribute [rw] name
7798
+ # The name of the data quality evaluation.
7799
+ # @return [String]
7800
+ #
7801
+ # @!attribute [rw] inputs
7802
+ # The inputs of your data quality evaluation. The first input in this
7803
+ # list is the primary data source.
7804
+ # @return [Array<String>]
7805
+ #
7806
+ # @!attribute [rw] additional_data_sources
7807
+ # The aliases of all data sources except primary.
7808
+ # @return [Hash<String,String>]
7809
+ #
7810
+ # @!attribute [rw] ruleset
7811
+ # The ruleset for your data quality evaluation.
7812
+ # @return [String]
7813
+ #
7814
+ # @!attribute [rw] publishing_options
7815
+ # Options to configure how your results are published.
7816
+ # @return [Types::DQResultsPublishingOptions]
7817
+ #
7818
+ # @!attribute [rw] additional_options
7819
+ # Options to configure runtime behavior of the transform.
7820
+ # @return [Hash<String,String>]
7821
+ #
7822
+ # @!attribute [rw] stop_job_on_failure_options
7823
+ # Options to configure how your job will stop if your data quality
7824
+ # evaluation fails.
7825
+ # @return [Types::DQStopJobOnFailureOptions]
7826
+ #
7827
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/EvaluateDataQualityMultiFrame AWS API Documentation
7828
+ #
7829
+ class EvaluateDataQualityMultiFrame < Struct.new(
7830
+ :name,
7831
+ :inputs,
7832
+ :additional_data_sources,
7833
+ :ruleset,
7834
+ :publishing_options,
7835
+ :additional_options,
7836
+ :stop_job_on_failure_options)
7837
+ SENSITIVE = []
7838
+ include Aws::Structure
7839
+ end
7840
+
7762
7841
  # Evaluation metrics provide an estimate of the quality of your machine
7763
7842
  # learning transform.
7764
7843
  #
@@ -9008,6 +9087,11 @@ module Aws::Glue
9008
9087
  # A list of result IDs for the data quality results for the run.
9009
9088
  # @return [Array<String>]
9010
9089
  #
9090
+ # @!attribute [rw] additional_data_sources
9091
+ # A map of reference strings to additional data sources you can
9092
+ # specify for an evaluation run.
9093
+ # @return [Hash<String,Types::DataSource>]
9094
+ #
9011
9095
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/GetDataQualityRulesetEvaluationRunResponse AWS API Documentation
9012
9096
  #
9013
9097
  class GetDataQualityRulesetEvaluationRunResponse < Struct.new(
@@ -9024,7 +9108,8 @@ module Aws::Glue
9024
9108
  :completed_on,
9025
9109
  :execution_time,
9026
9110
  :ruleset_names,
9027
- :result_ids)
9111
+ :result_ids,
9112
+ :additional_data_sources)
9028
9113
  SENSITIVE = []
9029
9114
  include Aws::Structure
9030
9115
  end
@@ -12097,28 +12182,39 @@ module Aws::Glue
12097
12182
  # @return [Types::JobCommand]
12098
12183
  #
12099
12184
  # @!attribute [rw] default_arguments
12100
- # The default arguments for this job, specified as name-value pairs.
12185
+ # The default arguments for every run of this job, specified as
12186
+ # name-value pairs.
12101
12187
  #
12102
12188
  # You can specify arguments here that your own job-execution script
12103
12189
  # consumes, as well as arguments that Glue itself consumes.
12104
12190
  #
12191
+ # Job arguments may be logged. Do not pass plaintext secrets as
12192
+ # arguments. Retrieve secrets from a Glue Connection, Secrets Manager
12193
+ # or other secret management mechanism if you intend to keep them
12194
+ # within the Job.
12195
+ #
12105
12196
  # For information about how to specify and consume your own Job
12106
12197
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
12107
12198
  # developer guide.
12108
12199
  #
12109
- # For information about the key-value pairs that Glue consumes to set
12110
- # up your job, see the [Special Parameters Used by Glue][2] topic in
12111
- # the developer guide.
12200
+ # For information about the arguments you can provide to this field
12201
+ # when configuring Spark jobs, see the [Special Parameters Used by
12202
+ # Glue][2] topic in the developer guide.
12203
+ #
12204
+ # For information about the arguments you can provide to this field
12205
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
12206
+ # in the developer guide.
12112
12207
  #
12113
12208
  #
12114
12209
  #
12115
12210
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
12116
12211
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
12212
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
12117
12213
  # @return [Hash<String,String>]
12118
12214
  #
12119
12215
  # @!attribute [rw] non_overridable_arguments
12120
- # Non-overridable arguments for this job, specified as name-value
12121
- # pairs.
12216
+ # Arguments for this job that are not overridden when providing job
12217
+ # arguments in a job run, specified as name-value pairs.
12122
12218
  # @return [Hash<String,String>]
12123
12219
  #
12124
12220
  # @!attribute [rw] connections
@@ -12156,7 +12252,7 @@ module Aws::Glue
12156
12252
  # type, the number of Glue data processing units (DPUs) that can be
12157
12253
  # allocated when this job runs. A DPU is a relative measure of
12158
12254
  # processing power that consists of 4 vCPUs of compute capacity and 16
12159
- # GB of memory. For more information, see the [Glue pricing page][1].
12255
+ # GB of memory. For more information, see the [ Glue pricing page][1].
12160
12256
  #
12161
12257
  # For Glue version 2.0 or later jobs, you cannot specify a `Maximum
12162
12258
  # capacity`. Instead, you should specify a `Worker type` and the
@@ -12186,7 +12282,8 @@ module Aws::Glue
12186
12282
  #
12187
12283
  # @!attribute [rw] worker_type
12188
12284
  # The type of predefined worker that is allocated when a job runs.
12189
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
12285
+ # Accepts a value of Standard, G.1X, G.2X, G.4X, G.8X, or G.025X for
12286
+ # Spark jobs. Accepts the value Z.2X for Ray jobs.
12190
12287
  #
12191
12288
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
12192
12289
  # of memory and a 50GB disk, and 2 executors per worker.
@@ -12207,20 +12304,30 @@ module Aws::Glue
12207
12304
  # GB of memory, 256 GB disk), and provides 1 executor per worker. We
12208
12305
  # recommend this worker type for jobs whose workloads contain your
12209
12306
  # most demanding transforms, aggregations, joins, and queries. This
12210
- # worker type is available only for Glue version 3.0 or later jobs.
12307
+ # worker type is available only for Glue version 3.0 or later Spark
12308
+ # ETL jobs in the following Amazon Web Services Regions: US East
12309
+ # (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific
12310
+ # (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada
12311
+ # (Central), Europe (Frankfurt), Europe (Ireland), and Europe
12312
+ # (Stockholm).
12211
12313
  #
12212
12314
  # * For the `G.8X` worker type, each worker maps to 8 DPU (32 vCPU,
12213
12315
  # 128 GB of memory, 512 GB disk), and provides 1 executor per
12214
12316
  # worker. We recommend this worker type for jobs whose workloads
12215
12317
  # contain your most demanding transforms, aggregations, joins, and
12216
12318
  # queries. This worker type is available only for Glue version 3.0
12217
- # or later jobs.
12319
+ # or later Spark ETL jobs, in the same Amazon Web Services Regions
12320
+ # as supported for the `G.4X` worker type.
12218
12321
  #
12219
12322
  # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
12220
12323
  # vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
12221
12324
  # worker. We recommend this worker type for low volume streaming
12222
12325
  # jobs. This worker type is only available for Glue version 3.0
12223
12326
  # streaming jobs.
12327
+ #
12328
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
12329
+ # GB of m emory, 128 GB disk), and provides a default of 8 Ray
12330
+ # workers (1 per vCPU).
12224
12331
  # @return [String]
12225
12332
  #
12226
12333
  # @!attribute [rw] number_of_workers
@@ -12238,9 +12345,14 @@ module Aws::Glue
12238
12345
  # @return [Types::NotificationProperty]
12239
12346
  #
12240
12347
  # @!attribute [rw] glue_version
12241
- # Glue version determines the versions of Apache Spark and Python that
12242
- # Glue supports. The Python version indicates the version supported
12243
- # for jobs of type Spark.
12348
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
12349
+ # and Python that Glue available in a job. The Python version
12350
+ # indicates the version supported for jobs of type Spark.
12351
+ #
12352
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
12353
+ # versions of Ray, Python and additional libraries available in your
12354
+ # Ray job are determined by the `Runtime` parameter of the Job
12355
+ # command.
12244
12356
  #
12245
12357
  # For more information about the available Glue versions and
12246
12358
  # corresponding Spark and Python versions, see [Glue version][1] in
@@ -12378,7 +12490,8 @@ module Aws::Glue
12378
12490
  # @!attribute [rw] name
12379
12491
  # The name of the job command. For an Apache Spark ETL job, this must
12380
12492
  # be `glueetl`. For a Python shell job, it must be `pythonshell`. For
12381
- # an Apache Spark streaming ETL job, this must be `gluestreaming`.
12493
+ # an Apache Spark streaming ETL job, this must be `gluestreaming`. For
12494
+ # a Ray job, this must be `glueray`.
12382
12495
  # @return [String]
12383
12496
  #
12384
12497
  # @!attribute [rw] script_location
@@ -12391,12 +12504,24 @@ module Aws::Glue
12391
12504
  # values are 2 or 3.
12392
12505
  # @return [String]
12393
12506
  #
12507
+ # @!attribute [rw] runtime
12508
+ # In Ray jobs, Runtime is used to specify the versions of Ray, Python
12509
+ # and additional libraries available in your environment. This field
12510
+ # is not used in other job types. For supported runtime environment
12511
+ # values, see [Working with Ray jobs][1] in the Glue Developer Guide.
12512
+ #
12513
+ #
12514
+ #
12515
+ # [1]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-runtimes.html
12516
+ # @return [String]
12517
+ #
12394
12518
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/JobCommand AWS API Documentation
12395
12519
  #
12396
12520
  class JobCommand < Struct.new(
12397
12521
  :name,
12398
12522
  :script_location,
12399
- :python_version)
12523
+ :python_version,
12524
+ :runtime)
12400
12525
  SENSITIVE = []
12401
12526
  include Aws::Structure
12402
12527
  end
@@ -12467,18 +12592,28 @@ module Aws::Glue
12467
12592
  # You can specify arguments here that your own job-execution script
12468
12593
  # consumes, as well as arguments that Glue itself consumes.
12469
12594
  #
12470
- # For information about how to specify and consume your own job
12595
+ # Job arguments may be logged. Do not pass plaintext secrets as
12596
+ # arguments. Retrieve secrets from a Glue Connection, Secrets Manager
12597
+ # or other secret management mechanism if you intend to keep them
12598
+ # within the Job.
12599
+ #
12600
+ # For information about how to specify and consume your own Job
12471
12601
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
12472
12602
  # developer guide.
12473
12603
  #
12474
- # For information about the key-value pairs that Glue consumes to set
12475
- # up your job, see the [Special Parameters Used by Glue][2] topic in
12476
- # the developer guide.
12604
+ # For information about the arguments you can provide to this field
12605
+ # when configuring Spark jobs, see the [Special Parameters Used by
12606
+ # Glue][2] topic in the developer guide.
12607
+ #
12608
+ # For information about the arguments you can provide to this field
12609
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
12610
+ # in the developer guide.
12477
12611
  #
12478
12612
  #
12479
12613
  #
12480
12614
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
12481
12615
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
12616
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
12482
12617
  # @return [Hash<String,String>]
12483
12618
  #
12484
12619
  # @!attribute [rw] error_message
@@ -12518,24 +12653,31 @@ module Aws::Glue
12518
12653
  # @return [Integer]
12519
12654
  #
12520
12655
  # @!attribute [rw] max_capacity
12521
- # The number of Glue data processing units (DPUs) that can be
12656
+ # For Glue version 1.0 or earlier jobs, using the standard worker
12657
+ # type, the number of Glue data processing units (DPUs) that can be
12522
12658
  # allocated when this job runs. A DPU is a relative measure of
12523
12659
  # processing power that consists of 4 vCPUs of compute capacity and 16
12524
- # GB of memory. For more information, see the [Glue pricing page][1].
12660
+ # GB of memory. For more information, see the [ Glue pricing page][1].
12661
+ #
12662
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
12663
+ # Instead, you should specify a `Worker type` and the `Number of
12664
+ # workers`.
12525
12665
  #
12526
- # Do not set `Max Capacity` if using `WorkerType` and
12666
+ # Do not set `MaxCapacity` if using `WorkerType` and
12527
12667
  # `NumberOfWorkers`.
12528
12668
  #
12529
12669
  # The value that can be allocated for `MaxCapacity` depends on whether
12530
- # you are running a Python shell job or an Apache Spark ETL job:
12670
+ # you are running a Python shell job, an Apache Spark ETL job, or an
12671
+ # Apache Spark streaming ETL job:
12531
12672
  #
12532
12673
  # * When you specify a Python shell job
12533
12674
  # (`JobCommand.Name`="pythonshell"), you can allocate either
12534
12675
  # 0.0625 or 1 DPU. The default is 0.0625 DPU.
12535
12676
  #
12536
12677
  # * When you specify an Apache Spark ETL job
12537
- # (`JobCommand.Name`="glueetl"), you can allocate a minimum of 2
12538
- # DPUs. The default is 10 DPUs. This job type cannot have a
12678
+ # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
12679
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
12680
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
12539
12681
  # fractional DPU allocation.
12540
12682
  #
12541
12683
  #
@@ -12545,22 +12687,29 @@ module Aws::Glue
12545
12687
  #
12546
12688
  # @!attribute [rw] worker_type
12547
12689
  # The type of predefined worker that is allocated when a job runs.
12548
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
12690
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
12691
+ # Accepts the value Z.2X for Ray jobs.
12549
12692
  #
12550
12693
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
12551
12694
  # of memory and a 50GB disk, and 2 executors per worker.
12552
12695
  #
12553
- # * For the `G.1X` worker type, each worker provides 4 vCPU, 16 GB of
12554
- # memory and a 64GB disk, and 1 executor per worker.
12696
+ # * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
12697
+ # GB of memory, 64 GB disk), and provides 1 executor per worker. We
12698
+ # recommend this worker type for memory-intensive jobs.
12555
12699
  #
12556
- # * For the `G.2X` worker type, each worker provides 8 vCPU, 32 GB of
12557
- # memory and a 128GB disk, and 1 executor per worker.
12700
+ # * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
12701
+ # GB of memory, 128 GB disk), and provides 1 executor per worker. We
12702
+ # recommend this worker type for memory-intensive jobs.
12558
12703
  #
12559
12704
  # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
12560
12705
  # vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
12561
12706
  # worker. We recommend this worker type for low volume streaming
12562
12707
  # jobs. This worker type is only available for Glue version 3.0
12563
12708
  # streaming jobs.
12709
+ #
12710
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
12711
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
12712
+ # per vCPU) based on the autoscaler.
12564
12713
  # @return [String]
12565
12714
  #
12566
12715
  # @!attribute [rw] number_of_workers
@@ -12588,9 +12737,14 @@ module Aws::Glue
12588
12737
  # @return [Types::NotificationProperty]
12589
12738
  #
12590
12739
  # @!attribute [rw] glue_version
12591
- # Glue version determines the versions of Apache Spark and Python that
12592
- # Glue supports. The Python version indicates the version supported
12593
- # for jobs of type Spark.
12740
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
12741
+ # and Python that Glue available in a job. The Python version
12742
+ # indicates the version supported for jobs of type Spark.
12743
+ #
12744
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
12745
+ # versions of Ray, Python and additional libraries available in your
12746
+ # Ray job are determined by the `Runtime` parameter of the Job
12747
+ # command.
12594
12748
  #
12595
12749
  # For more information about the available Glue versions and
12596
12750
  # corresponding Spark and Python versions, see [Glue version][1] in
@@ -12687,28 +12841,39 @@ module Aws::Glue
12687
12841
  # @return [Types::JobCommand]
12688
12842
  #
12689
12843
  # @!attribute [rw] default_arguments
12690
- # The default arguments for this job.
12844
+ # The default arguments for every run of this job, specified as
12845
+ # name-value pairs.
12691
12846
  #
12692
12847
  # You can specify arguments here that your own job-execution script
12693
12848
  # consumes, as well as arguments that Glue itself consumes.
12694
12849
  #
12850
+ # Job arguments may be logged. Do not pass plaintext secrets as
12851
+ # arguments. Retrieve secrets from a Glue Connection, Secrets Manager
12852
+ # or other secret management mechanism if you intend to keep them
12853
+ # within the Job.
12854
+ #
12695
12855
  # For information about how to specify and consume your own Job
12696
12856
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
12697
12857
  # developer guide.
12698
12858
  #
12699
- # For information about the key-value pairs that Glue consumes to set
12700
- # up your job, see the [Special Parameters Used by Glue][2] topic in
12701
- # the developer guide.
12859
+ # For information about the arguments you can provide to this field
12860
+ # when configuring Spark jobs, see the [Special Parameters Used by
12861
+ # Glue][2] topic in the developer guide.
12862
+ #
12863
+ # For information about the arguments you can provide to this field
12864
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
12865
+ # in the developer guide.
12702
12866
  #
12703
12867
  #
12704
12868
  #
12705
12869
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
12706
12870
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
12871
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
12707
12872
  # @return [Hash<String,String>]
12708
12873
  #
12709
12874
  # @!attribute [rw] non_overridable_arguments
12710
- # Non-overridable arguments for this job, specified as name-value
12711
- # pairs.
12875
+ # Arguments for this job that are not overridden when providing job
12876
+ # arguments in a job run, specified as name-value pairs.
12712
12877
  # @return [Hash<String,String>]
12713
12878
  #
12714
12879
  # @!attribute [rw] connections
@@ -12744,13 +12909,18 @@ module Aws::Glue
12744
12909
  # type, the number of Glue data processing units (DPUs) that can be
12745
12910
  # allocated when this job runs. A DPU is a relative measure of
12746
12911
  # processing power that consists of 4 vCPUs of compute capacity and 16
12747
- # GB of memory. For more information, see the [Glue pricing page][1].
12912
+ # GB of memory. For more information, see the [ Glue pricing page][1].
12913
+ #
12914
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
12915
+ # Instead, you should specify a `Worker type` and the `Number of
12916
+ # workers`.
12748
12917
  #
12749
- # Do not set `Max Capacity` if using `WorkerType` and
12918
+ # Do not set `MaxCapacity` if using `WorkerType` and
12750
12919
  # `NumberOfWorkers`.
12751
12920
  #
12752
12921
  # The value that can be allocated for `MaxCapacity` depends on whether
12753
- # you are running a Python shell job or an Apache Spark ETL job:
12922
+ # you are running a Python shell job, an Apache Spark ETL job, or an
12923
+ # Apache Spark streaming ETL job:
12754
12924
  #
12755
12925
  # * When you specify a Python shell job
12756
12926
  # (`JobCommand.Name`="pythonshell"), you can allocate either
@@ -12758,14 +12928,10 @@ module Aws::Glue
12758
12928
  #
12759
12929
  # * When you specify an Apache Spark ETL job
12760
12930
  # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
12761
- # (`JobCommand.Name`="gluestreaming"), you can allocate a minimum
12762
- # of 2 DPUs. The default is 10 DPUs. This job type cannot have a
12931
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
12932
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
12763
12933
  # fractional DPU allocation.
12764
12934
  #
12765
- # For Glue version 2.0 jobs, you cannot instead specify a `Maximum
12766
- # capacity`. Instead, you should specify a `Worker type` and the
12767
- # `Number of workers`.
12768
- #
12769
12935
  #
12770
12936
  #
12771
12937
  # [1]: https://aws.amazon.com/glue/pricing/
@@ -12773,7 +12939,8 @@ module Aws::Glue
12773
12939
  #
12774
12940
  # @!attribute [rw] worker_type
12775
12941
  # The type of predefined worker that is allocated when a job runs.
12776
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
12942
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
12943
+ # Accepts the value Z.2X for Ray jobs.
12777
12944
  #
12778
12945
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
12779
12946
  # of memory and a 50GB disk, and 2 executors per worker.
@@ -12791,6 +12958,10 @@ module Aws::Glue
12791
12958
  # worker. We recommend this worker type for low volume streaming
12792
12959
  # jobs. This worker type is only available for Glue version 3.0
12793
12960
  # streaming jobs.
12961
+ #
12962
+ # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
12963
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers
12964
+ # based on the autoscaler.
12794
12965
  # @return [String]
12795
12966
  #
12796
12967
  # @!attribute [rw] number_of_workers
@@ -12808,14 +12979,22 @@ module Aws::Glue
12808
12979
  # @return [Types::NotificationProperty]
12809
12980
  #
12810
12981
  # @!attribute [rw] glue_version
12811
- # Glue version determines the versions of Apache Spark and Python that
12812
- # Glue supports. The Python version indicates the version supported
12813
- # for jobs of type Spark.
12982
+ # In Spark jobs, `GlueVersion` determines the versions of Apache Spark
12983
+ # and Python that Glue available in a job. The Python version
12984
+ # indicates the version supported for jobs of type Spark.
12985
+ #
12986
+ # Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
12987
+ # versions of Ray, Python and additional libraries available in your
12988
+ # Ray job are determined by the `Runtime` parameter of the Job
12989
+ # command.
12814
12990
  #
12815
12991
  # For more information about the available Glue versions and
12816
12992
  # corresponding Spark and Python versions, see [Glue version][1] in
12817
12993
  # the developer guide.
12818
12994
  #
12995
+ # Jobs that are created without specifying a Glue version default to
12996
+ # Glue 0.9.
12997
+ #
12819
12998
  #
12820
12999
  #
12821
13000
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/add-job.html
@@ -18167,6 +18346,11 @@ module Aws::Glue
18167
18346
  # A list of ruleset names.
18168
18347
  # @return [Array<String>]
18169
18348
  #
18349
+ # @!attribute [rw] additional_data_sources
18350
+ # A map of reference strings to additional data sources you can
18351
+ # specify for an evaluation run.
18352
+ # @return [Hash<String,Types::DataSource>]
18353
+ #
18170
18354
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/StartDataQualityRulesetEvaluationRunRequest AWS API Documentation
18171
18355
  #
18172
18356
  class StartDataQualityRulesetEvaluationRunRequest < Struct.new(
@@ -18176,7 +18360,8 @@ module Aws::Glue
18176
18360
  :timeout,
18177
18361
  :client_token,
18178
18362
  :additional_run_options,
18179
- :ruleset_names)
18363
+ :ruleset_names,
18364
+ :additional_data_sources)
18180
18365
  SENSITIVE = []
18181
18366
  include Aws::Structure
18182
18367
  end
@@ -18266,7 +18451,7 @@ module Aws::Glue
18266
18451
  # @return [String]
18267
18452
  #
18268
18453
  # @!attribute [rw] arguments
18269
- # The job arguments specifically for this run. For this job run, they
18454
+ # The job arguments associated with this run. For this job run, they
18270
18455
  # replace the default arguments set in the job definition itself.
18271
18456
  #
18272
18457
  # You can specify arguments here that your own job-execution script
@@ -18281,14 +18466,19 @@ module Aws::Glue
18281
18466
  # arguments, see the [Calling Glue APIs in Python][1] topic in the
18282
18467
  # developer guide.
18283
18468
  #
18284
- # For information about the key-value pairs that Glue consumes to set
18285
- # up your job, see the [Special Parameters Used by Glue][2] topic in
18286
- # the developer guide.
18469
+ # For information about the arguments you can provide to this field
18470
+ # when configuring Spark jobs, see the [Special Parameters Used by
18471
+ # Glue][2] topic in the developer guide.
18472
+ #
18473
+ # For information about the arguments you can provide to this field
18474
+ # when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
18475
+ # in the developer guide.
18287
18476
  #
18288
18477
  #
18289
18478
  #
18290
18479
  # [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
18291
18480
  # [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
18481
+ # [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
18292
18482
  # @return [Hash<String,String>]
18293
18483
  #
18294
18484
  # @!attribute [rw] allocated_capacity
@@ -18316,24 +18506,31 @@ module Aws::Glue
18316
18506
  # @return [Integer]
18317
18507
  #
18318
18508
  # @!attribute [rw] max_capacity
18319
- # The number of Glue data processing units (DPUs) that can be
18509
+ # For Glue version 1.0 or earlier jobs, using the standard worker
18510
+ # type, the number of Glue data processing units (DPUs) that can be
18320
18511
  # allocated when this job runs. A DPU is a relative measure of
18321
18512
  # processing power that consists of 4 vCPUs of compute capacity and 16
18322
- # GB of memory. For more information, see the [Glue pricing page][1].
18513
+ # GB of memory. For more information, see the [ Glue pricing page][1].
18323
18514
  #
18324
- # Do not set `Max Capacity` if using `WorkerType` and
18515
+ # For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
18516
+ # Instead, you should specify a `Worker type` and the `Number of
18517
+ # workers`.
18518
+ #
18519
+ # Do not set `MaxCapacity` if using `WorkerType` and
18325
18520
  # `NumberOfWorkers`.
18326
18521
  #
18327
18522
  # The value that can be allocated for `MaxCapacity` depends on whether
18328
- # you are running a Python shell job, or an Apache Spark ETL job:
18523
+ # you are running a Python shell job, an Apache Spark ETL job, or an
18524
+ # Apache Spark streaming ETL job:
18329
18525
  #
18330
18526
  # * When you specify a Python shell job
18331
18527
  # (`JobCommand.Name`="pythonshell"), you can allocate either
18332
18528
  # 0.0625 or 1 DPU. The default is 0.0625 DPU.
18333
18529
  #
18334
18530
  # * When you specify an Apache Spark ETL job
18335
- # (`JobCommand.Name`="glueetl"), you can allocate a minimum of 2
18336
- # DPUs. The default is 10 DPUs. This job type cannot have a
18531
+ # (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
18532
+ # (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
18533
+ # 100 DPUs. The default is 10 DPUs. This job type cannot have a
18337
18534
  # fractional DPU allocation.
18338
18535
  #
18339
18536
  #
@@ -18352,22 +18549,29 @@ module Aws::Glue
18352
18549
  #
18353
18550
  # @!attribute [rw] worker_type
18354
18551
  # The type of predefined worker that is allocated when a job runs.
18355
- # Accepts a value of Standard, G.1X, G.2X, or G.025X.
18552
+ # Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
18553
+ # Accepts the value Z.2X for Ray jobs.
18356
18554
  #
18357
18555
  # * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
18358
18556
  # of memory and a 50GB disk, and 2 executors per worker.
18359
18557
  #
18360
- # * For the `G.1X` worker type, each worker provides 4 vCPU, 16 GB of
18361
- # memory and a 64GB disk, and 1 executor per worker.
18558
+ # * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
18559
+ # GB of memory, 64 GB disk), and provides 1 executor per worker. We
18560
+ # recommend this worker type for memory-intensive jobs.
18362
18561
  #
18363
- # * For the `G.2X` worker type, each worker provides 8 vCPU, 32 GB of
18364
- # memory and a 128GB disk, and 1 executor per worker.
18562
+ # * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
18563
+ # GB of memory, 128 GB disk), and provides 1 executor per worker. We
18564
+ # recommend this worker type for memory-intensive jobs.
18365
18565
  #
18366
18566
  # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
18367
18567
  # vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
18368
18568
  # worker. We recommend this worker type for low volume streaming
18369
18569
  # jobs. This worker type is only available for Glue version 3.0
18370
18570
  # streaming jobs.
18571
+ #
18572
+ # * For the `Z.2X` worker type, each worker maps to 2 DPU (8vCPU, 64
18573
+ # GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
18574
+ # per vCPU) based on the autoscaler.
18371
18575
  # @return [String]
18372
18576
  #
18373
18577
  # @!attribute [rw] number_of_workers