aws-sdk-glue 1.137.0 → 1.139.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/VERSION +1 -1
- data/lib/aws-sdk-glue/client.rb +166 -55
- data/lib/aws-sdk-glue/client_api.rb +34 -0
- data/lib/aws-sdk-glue/types.rb +298 -94
- data/lib/aws-sdk-glue.rb +1 -1
- metadata +2 -2
data/lib/aws-sdk-glue/types.rb
CHANGED
@@ -155,14 +155,14 @@ module Aws::Glue
|
|
155
155
|
include Aws::Structure
|
156
156
|
end
|
157
157
|
|
158
|
-
# Specifies an
|
158
|
+
# Specifies an optional value when connecting to the Redshift cluster.
|
159
159
|
#
|
160
160
|
# @!attribute [rw] key
|
161
|
-
# The key
|
161
|
+
# The key for the additional connection option.
|
162
162
|
# @return [String]
|
163
163
|
#
|
164
164
|
# @!attribute [rw] value
|
165
|
-
# The value
|
165
|
+
# The value for the additional connection option.
|
166
166
|
# @return [String]
|
167
167
|
#
|
168
168
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/AmazonRedshiftAdvancedOption AWS API Documentation
|
@@ -2214,6 +2214,11 @@ module Aws::Glue
|
|
2214
2214
|
# Specifies a target that writes to a data target in Amazon Redshift.
|
2215
2215
|
# @return [Types::AmazonRedshiftTarget]
|
2216
2216
|
#
|
2217
|
+
# @!attribute [rw] evaluate_data_quality_multi_frame
|
2218
|
+
# Specifies your data quality evaluation criteria. Allows multiple
|
2219
|
+
# input data and returns a collection of Dynamic Frames.
|
2220
|
+
# @return [Types::EvaluateDataQualityMultiFrame]
|
2221
|
+
#
|
2217
2222
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CodeGenConfigurationNode AWS API Documentation
|
2218
2223
|
#
|
2219
2224
|
class CodeGenConfigurationNode < Struct.new(
|
@@ -2281,7 +2286,8 @@ module Aws::Glue
|
|
2281
2286
|
:s3_delta_catalog_target,
|
2282
2287
|
:s3_delta_direct_target,
|
2283
2288
|
:amazon_redshift_source,
|
2284
|
-
:amazon_redshift_target
|
2289
|
+
:amazon_redshift_target,
|
2290
|
+
:evaluate_data_quality_multi_frame)
|
2285
2291
|
SENSITIVE = []
|
2286
2292
|
include Aws::Structure
|
2287
2293
|
end
|
@@ -4245,7 +4251,8 @@ module Aws::Glue
|
|
4245
4251
|
# @return [Types::JobCommand]
|
4246
4252
|
#
|
4247
4253
|
# @!attribute [rw] default_arguments
|
4248
|
-
# The default arguments for this job
|
4254
|
+
# The default arguments for every run of this job, specified as
|
4255
|
+
# name-value pairs.
|
4249
4256
|
#
|
4250
4257
|
# You can specify arguments here that your own job-execution script
|
4251
4258
|
# consumes, as well as arguments that Glue itself consumes.
|
@@ -4259,19 +4266,24 @@ module Aws::Glue
|
|
4259
4266
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
4260
4267
|
# developer guide.
|
4261
4268
|
#
|
4262
|
-
# For information about the
|
4263
|
-
#
|
4264
|
-
# the developer guide.
|
4269
|
+
# For information about the arguments you can provide to this field
|
4270
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
4271
|
+
# Glue][2] topic in the developer guide.
|
4272
|
+
#
|
4273
|
+
# For information about the arguments you can provide to this field
|
4274
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
4275
|
+
# in the developer guide.
|
4265
4276
|
#
|
4266
4277
|
#
|
4267
4278
|
#
|
4268
4279
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
4269
4280
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
4281
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
4270
4282
|
# @return [Hash<String,String>]
|
4271
4283
|
#
|
4272
4284
|
# @!attribute [rw] non_overridable_arguments
|
4273
|
-
#
|
4274
|
-
# pairs.
|
4285
|
+
# Arguments for this job that are not overridden when providing job
|
4286
|
+
# arguments in a job run, specified as name-value pairs.
|
4275
4287
|
# @return [Hash<String,String>]
|
4276
4288
|
#
|
4277
4289
|
# @!attribute [rw] connections
|
@@ -4307,13 +4319,18 @@ module Aws::Glue
|
|
4307
4319
|
# type, the number of Glue data processing units (DPUs) that can be
|
4308
4320
|
# allocated when this job runs. A DPU is a relative measure of
|
4309
4321
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
4310
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
4322
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
4311
4323
|
#
|
4312
|
-
#
|
4324
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
4325
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
4326
|
+
# workers`.
|
4327
|
+
#
|
4328
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
4313
4329
|
# `NumberOfWorkers`.
|
4314
4330
|
#
|
4315
4331
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
4316
|
-
# you are running a Python shell job
|
4332
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
4333
|
+
# Apache Spark streaming ETL job:
|
4317
4334
|
#
|
4318
4335
|
# * When you specify a Python shell job
|
4319
4336
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
@@ -4321,14 +4338,10 @@ module Aws::Glue
|
|
4321
4338
|
#
|
4322
4339
|
# * When you specify an Apache Spark ETL job
|
4323
4340
|
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
4324
|
-
# (`JobCommand.Name`="gluestreaming"), you can allocate
|
4325
|
-
#
|
4341
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
4342
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
4326
4343
|
# fractional DPU allocation.
|
4327
4344
|
#
|
4328
|
-
# For Glue version 2.0 jobs, you cannot instead specify a `Maximum
|
4329
|
-
# capacity`. Instead, you should specify a `Worker type` and the
|
4330
|
-
# `Number of workers`.
|
4331
|
-
#
|
4332
4345
|
#
|
4333
4346
|
#
|
4334
4347
|
# [1]: https://aws.amazon.com/glue/pricing/
|
@@ -4354,9 +4367,14 @@ module Aws::Glue
|
|
4354
4367
|
# @return [Types::NotificationProperty]
|
4355
4368
|
#
|
4356
4369
|
# @!attribute [rw] glue_version
|
4357
|
-
#
|
4358
|
-
# Glue
|
4359
|
-
# for jobs of type Spark.
|
4370
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
4371
|
+
# and Python that Glue available in a job. The Python version
|
4372
|
+
# indicates the version supported for jobs of type Spark.
|
4373
|
+
#
|
4374
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
4375
|
+
# versions of Ray, Python and additional libraries available in your
|
4376
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
4377
|
+
# command.
|
4360
4378
|
#
|
4361
4379
|
# For more information about the available Glue versions and
|
4362
4380
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
@@ -4377,7 +4395,8 @@ module Aws::Glue
|
|
4377
4395
|
#
|
4378
4396
|
# @!attribute [rw] worker_type
|
4379
4397
|
# The type of predefined worker that is allocated when a job runs.
|
4380
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
4398
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
4399
|
+
# Accepts the value Z.2X for Ray jobs.
|
4381
4400
|
#
|
4382
4401
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
4383
4402
|
# of memory and a 50GB disk, and 2 executors per worker.
|
@@ -4395,6 +4414,10 @@ module Aws::Glue
|
|
4395
4414
|
# worker. We recommend this worker type for low volume streaming
|
4396
4415
|
# jobs. This worker type is only available for Glue version 3.0
|
4397
4416
|
# streaming jobs.
|
4417
|
+
#
|
4418
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
4419
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers
|
4420
|
+
# based on the autoscaler.
|
4398
4421
|
# @return [String]
|
4399
4422
|
#
|
4400
4423
|
# @!attribute [rw] code_gen_configuration_nodes
|
@@ -5851,13 +5874,18 @@ module Aws::Glue
|
|
5851
5874
|
# A pass or fail status for the rule.
|
5852
5875
|
# @return [String]
|
5853
5876
|
#
|
5877
|
+
# @!attribute [rw] evaluated_metrics
|
5878
|
+
# A map of metrics associated with the evaluation of the rule.
|
5879
|
+
# @return [Hash<String,Float>]
|
5880
|
+
#
|
5854
5881
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityRuleResult AWS API Documentation
|
5855
5882
|
#
|
5856
5883
|
class DataQualityRuleResult < Struct.new(
|
5857
5884
|
:name,
|
5858
5885
|
:description,
|
5859
5886
|
:evaluation_message,
|
5860
|
-
:result
|
5887
|
+
:result,
|
5888
|
+
:evaluated_metrics)
|
5861
5889
|
SENSITIVE = []
|
5862
5890
|
include Aws::Structure
|
5863
5891
|
end
|
@@ -6015,11 +6043,16 @@ module Aws::Glue
|
|
6015
6043
|
# The name of the database where the Glue table exists.
|
6016
6044
|
# @return [String]
|
6017
6045
|
#
|
6046
|
+
# @!attribute [rw] catalog_id
|
6047
|
+
# The catalog id where the Glue table exists.
|
6048
|
+
# @return [String]
|
6049
|
+
#
|
6018
6050
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DataQualityTargetTable AWS API Documentation
|
6019
6051
|
#
|
6020
6052
|
class DataQualityTargetTable < Struct.new(
|
6021
6053
|
:table_name,
|
6022
|
-
:database_name
|
6054
|
+
:database_name,
|
6055
|
+
:catalog_id)
|
6023
6056
|
SENSITIVE = []
|
6024
6057
|
include Aws::Structure
|
6025
6058
|
end
|
@@ -7759,6 +7792,52 @@ module Aws::Glue
|
|
7759
7792
|
include Aws::Structure
|
7760
7793
|
end
|
7761
7794
|
|
7795
|
+
# Specifies your data quality evaluation criteria.
|
7796
|
+
#
|
7797
|
+
# @!attribute [rw] name
|
7798
|
+
# The name of the data quality evaluation.
|
7799
|
+
# @return [String]
|
7800
|
+
#
|
7801
|
+
# @!attribute [rw] inputs
|
7802
|
+
# The inputs of your data quality evaluation. The first input in this
|
7803
|
+
# list is the primary data source.
|
7804
|
+
# @return [Array<String>]
|
7805
|
+
#
|
7806
|
+
# @!attribute [rw] additional_data_sources
|
7807
|
+
# The aliases of all data sources except primary.
|
7808
|
+
# @return [Hash<String,String>]
|
7809
|
+
#
|
7810
|
+
# @!attribute [rw] ruleset
|
7811
|
+
# The ruleset for your data quality evaluation.
|
7812
|
+
# @return [String]
|
7813
|
+
#
|
7814
|
+
# @!attribute [rw] publishing_options
|
7815
|
+
# Options to configure how your results are published.
|
7816
|
+
# @return [Types::DQResultsPublishingOptions]
|
7817
|
+
#
|
7818
|
+
# @!attribute [rw] additional_options
|
7819
|
+
# Options to configure runtime behavior of the transform.
|
7820
|
+
# @return [Hash<String,String>]
|
7821
|
+
#
|
7822
|
+
# @!attribute [rw] stop_job_on_failure_options
|
7823
|
+
# Options to configure how your job will stop if your data quality
|
7824
|
+
# evaluation fails.
|
7825
|
+
# @return [Types::DQStopJobOnFailureOptions]
|
7826
|
+
#
|
7827
|
+
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/EvaluateDataQualityMultiFrame AWS API Documentation
|
7828
|
+
#
|
7829
|
+
class EvaluateDataQualityMultiFrame < Struct.new(
|
7830
|
+
:name,
|
7831
|
+
:inputs,
|
7832
|
+
:additional_data_sources,
|
7833
|
+
:ruleset,
|
7834
|
+
:publishing_options,
|
7835
|
+
:additional_options,
|
7836
|
+
:stop_job_on_failure_options)
|
7837
|
+
SENSITIVE = []
|
7838
|
+
include Aws::Structure
|
7839
|
+
end
|
7840
|
+
|
7762
7841
|
# Evaluation metrics provide an estimate of the quality of your machine
|
7763
7842
|
# learning transform.
|
7764
7843
|
#
|
@@ -9008,6 +9087,11 @@ module Aws::Glue
|
|
9008
9087
|
# A list of result IDs for the data quality results for the run.
|
9009
9088
|
# @return [Array<String>]
|
9010
9089
|
#
|
9090
|
+
# @!attribute [rw] additional_data_sources
|
9091
|
+
# A map of reference strings to additional data sources you can
|
9092
|
+
# specify for an evaluation run.
|
9093
|
+
# @return [Hash<String,Types::DataSource>]
|
9094
|
+
#
|
9011
9095
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/GetDataQualityRulesetEvaluationRunResponse AWS API Documentation
|
9012
9096
|
#
|
9013
9097
|
class GetDataQualityRulesetEvaluationRunResponse < Struct.new(
|
@@ -9024,7 +9108,8 @@ module Aws::Glue
|
|
9024
9108
|
:completed_on,
|
9025
9109
|
:execution_time,
|
9026
9110
|
:ruleset_names,
|
9027
|
-
:result_ids
|
9111
|
+
:result_ids,
|
9112
|
+
:additional_data_sources)
|
9028
9113
|
SENSITIVE = []
|
9029
9114
|
include Aws::Structure
|
9030
9115
|
end
|
@@ -12097,28 +12182,39 @@ module Aws::Glue
|
|
12097
12182
|
# @return [Types::JobCommand]
|
12098
12183
|
#
|
12099
12184
|
# @!attribute [rw] default_arguments
|
12100
|
-
# The default arguments for this job, specified as
|
12185
|
+
# The default arguments for every run of this job, specified as
|
12186
|
+
# name-value pairs.
|
12101
12187
|
#
|
12102
12188
|
# You can specify arguments here that your own job-execution script
|
12103
12189
|
# consumes, as well as arguments that Glue itself consumes.
|
12104
12190
|
#
|
12191
|
+
# Job arguments may be logged. Do not pass plaintext secrets as
|
12192
|
+
# arguments. Retrieve secrets from a Glue Connection, Secrets Manager
|
12193
|
+
# or other secret management mechanism if you intend to keep them
|
12194
|
+
# within the Job.
|
12195
|
+
#
|
12105
12196
|
# For information about how to specify and consume your own Job
|
12106
12197
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
12107
12198
|
# developer guide.
|
12108
12199
|
#
|
12109
|
-
# For information about the
|
12110
|
-
#
|
12111
|
-
# the developer guide.
|
12200
|
+
# For information about the arguments you can provide to this field
|
12201
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
12202
|
+
# Glue][2] topic in the developer guide.
|
12203
|
+
#
|
12204
|
+
# For information about the arguments you can provide to this field
|
12205
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
12206
|
+
# in the developer guide.
|
12112
12207
|
#
|
12113
12208
|
#
|
12114
12209
|
#
|
12115
12210
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
12116
12211
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
12212
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
12117
12213
|
# @return [Hash<String,String>]
|
12118
12214
|
#
|
12119
12215
|
# @!attribute [rw] non_overridable_arguments
|
12120
|
-
#
|
12121
|
-
# pairs.
|
12216
|
+
# Arguments for this job that are not overridden when providing job
|
12217
|
+
# arguments in a job run, specified as name-value pairs.
|
12122
12218
|
# @return [Hash<String,String>]
|
12123
12219
|
#
|
12124
12220
|
# @!attribute [rw] connections
|
@@ -12156,7 +12252,7 @@ module Aws::Glue
|
|
12156
12252
|
# type, the number of Glue data processing units (DPUs) that can be
|
12157
12253
|
# allocated when this job runs. A DPU is a relative measure of
|
12158
12254
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
12159
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
12255
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
12160
12256
|
#
|
12161
12257
|
# For Glue version 2.0 or later jobs, you cannot specify a `Maximum
|
12162
12258
|
# capacity`. Instead, you should specify a `Worker type` and the
|
@@ -12186,7 +12282,8 @@ module Aws::Glue
|
|
12186
12282
|
#
|
12187
12283
|
# @!attribute [rw] worker_type
|
12188
12284
|
# The type of predefined worker that is allocated when a job runs.
|
12189
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X
|
12285
|
+
# Accepts a value of Standard, G.1X, G.2X, G.4X, G.8X, or G.025X for
|
12286
|
+
# Spark jobs. Accepts the value Z.2X for Ray jobs.
|
12190
12287
|
#
|
12191
12288
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
12192
12289
|
# of memory and a 50GB disk, and 2 executors per worker.
|
@@ -12207,20 +12304,30 @@ module Aws::Glue
|
|
12207
12304
|
# GB of memory, 256 GB disk), and provides 1 executor per worker. We
|
12208
12305
|
# recommend this worker type for jobs whose workloads contain your
|
12209
12306
|
# most demanding transforms, aggregations, joins, and queries. This
|
12210
|
-
# worker type is available only for Glue version 3.0 or later
|
12307
|
+
# worker type is available only for Glue version 3.0 or later Spark
|
12308
|
+
# ETL jobs in the following Amazon Web Services Regions: US East
|
12309
|
+
# (Ohio), US East (N. Virginia), US West (Oregon), Asia Pacific
|
12310
|
+
# (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), Canada
|
12311
|
+
# (Central), Europe (Frankfurt), Europe (Ireland), and Europe
|
12312
|
+
# (Stockholm).
|
12211
12313
|
#
|
12212
12314
|
# * For the `G.8X` worker type, each worker maps to 8 DPU (32 vCPU,
|
12213
12315
|
# 128 GB of memory, 512 GB disk), and provides 1 executor per
|
12214
12316
|
# worker. We recommend this worker type for jobs whose workloads
|
12215
12317
|
# contain your most demanding transforms, aggregations, joins, and
|
12216
12318
|
# queries. This worker type is available only for Glue version 3.0
|
12217
|
-
# or later jobs
|
12319
|
+
# or later Spark ETL jobs, in the same Amazon Web Services Regions
|
12320
|
+
# as supported for the `G.4X` worker type.
|
12218
12321
|
#
|
12219
12322
|
# * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
|
12220
12323
|
# vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
|
12221
12324
|
# worker. We recommend this worker type for low volume streaming
|
12222
12325
|
# jobs. This worker type is only available for Glue version 3.0
|
12223
12326
|
# streaming jobs.
|
12327
|
+
#
|
12328
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
12329
|
+
# GB of m emory, 128 GB disk), and provides a default of 8 Ray
|
12330
|
+
# workers (1 per vCPU).
|
12224
12331
|
# @return [String]
|
12225
12332
|
#
|
12226
12333
|
# @!attribute [rw] number_of_workers
|
@@ -12238,9 +12345,14 @@ module Aws::Glue
|
|
12238
12345
|
# @return [Types::NotificationProperty]
|
12239
12346
|
#
|
12240
12347
|
# @!attribute [rw] glue_version
|
12241
|
-
#
|
12242
|
-
# Glue
|
12243
|
-
# for jobs of type Spark.
|
12348
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
12349
|
+
# and Python that Glue available in a job. The Python version
|
12350
|
+
# indicates the version supported for jobs of type Spark.
|
12351
|
+
#
|
12352
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
12353
|
+
# versions of Ray, Python and additional libraries available in your
|
12354
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
12355
|
+
# command.
|
12244
12356
|
#
|
12245
12357
|
# For more information about the available Glue versions and
|
12246
12358
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
@@ -12378,7 +12490,8 @@ module Aws::Glue
|
|
12378
12490
|
# @!attribute [rw] name
|
12379
12491
|
# The name of the job command. For an Apache Spark ETL job, this must
|
12380
12492
|
# be `glueetl`. For a Python shell job, it must be `pythonshell`. For
|
12381
|
-
# an Apache Spark streaming ETL job, this must be `gluestreaming`.
|
12493
|
+
# an Apache Spark streaming ETL job, this must be `gluestreaming`. For
|
12494
|
+
# a Ray job, this must be `glueray`.
|
12382
12495
|
# @return [String]
|
12383
12496
|
#
|
12384
12497
|
# @!attribute [rw] script_location
|
@@ -12391,12 +12504,24 @@ module Aws::Glue
|
|
12391
12504
|
# values are 2 or 3.
|
12392
12505
|
# @return [String]
|
12393
12506
|
#
|
12507
|
+
# @!attribute [rw] runtime
|
12508
|
+
# In Ray jobs, Runtime is used to specify the versions of Ray, Python
|
12509
|
+
# and additional libraries available in your environment. This field
|
12510
|
+
# is not used in other job types. For supported runtime environment
|
12511
|
+
# values, see [Working with Ray jobs][1] in the Glue Developer Guide.
|
12512
|
+
#
|
12513
|
+
#
|
12514
|
+
#
|
12515
|
+
# [1]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-runtimes.html
|
12516
|
+
# @return [String]
|
12517
|
+
#
|
12394
12518
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/JobCommand AWS API Documentation
|
12395
12519
|
#
|
12396
12520
|
class JobCommand < Struct.new(
|
12397
12521
|
:name,
|
12398
12522
|
:script_location,
|
12399
|
-
:python_version
|
12523
|
+
:python_version,
|
12524
|
+
:runtime)
|
12400
12525
|
SENSITIVE = []
|
12401
12526
|
include Aws::Structure
|
12402
12527
|
end
|
@@ -12467,18 +12592,28 @@ module Aws::Glue
|
|
12467
12592
|
# You can specify arguments here that your own job-execution script
|
12468
12593
|
# consumes, as well as arguments that Glue itself consumes.
|
12469
12594
|
#
|
12470
|
-
#
|
12595
|
+
# Job arguments may be logged. Do not pass plaintext secrets as
|
12596
|
+
# arguments. Retrieve secrets from a Glue Connection, Secrets Manager
|
12597
|
+
# or other secret management mechanism if you intend to keep them
|
12598
|
+
# within the Job.
|
12599
|
+
#
|
12600
|
+
# For information about how to specify and consume your own Job
|
12471
12601
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
12472
12602
|
# developer guide.
|
12473
12603
|
#
|
12474
|
-
# For information about the
|
12475
|
-
#
|
12476
|
-
# the developer guide.
|
12604
|
+
# For information about the arguments you can provide to this field
|
12605
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
12606
|
+
# Glue][2] topic in the developer guide.
|
12607
|
+
#
|
12608
|
+
# For information about the arguments you can provide to this field
|
12609
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
12610
|
+
# in the developer guide.
|
12477
12611
|
#
|
12478
12612
|
#
|
12479
12613
|
#
|
12480
12614
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
12481
12615
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
12616
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
12482
12617
|
# @return [Hash<String,String>]
|
12483
12618
|
#
|
12484
12619
|
# @!attribute [rw] error_message
|
@@ -12518,24 +12653,31 @@ module Aws::Glue
|
|
12518
12653
|
# @return [Integer]
|
12519
12654
|
#
|
12520
12655
|
# @!attribute [rw] max_capacity
|
12521
|
-
#
|
12656
|
+
# For Glue version 1.0 or earlier jobs, using the standard worker
|
12657
|
+
# type, the number of Glue data processing units (DPUs) that can be
|
12522
12658
|
# allocated when this job runs. A DPU is a relative measure of
|
12523
12659
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
12524
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
12660
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
12661
|
+
#
|
12662
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
12663
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
12664
|
+
# workers`.
|
12525
12665
|
#
|
12526
|
-
# Do not set `
|
12666
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
12527
12667
|
# `NumberOfWorkers`.
|
12528
12668
|
#
|
12529
12669
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
12530
|
-
# you are running a Python shell job
|
12670
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
12671
|
+
# Apache Spark streaming ETL job:
|
12531
12672
|
#
|
12532
12673
|
# * When you specify a Python shell job
|
12533
12674
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
12534
12675
|
# 0.0625 or 1 DPU. The default is 0.0625 DPU.
|
12535
12676
|
#
|
12536
12677
|
# * When you specify an Apache Spark ETL job
|
12537
|
-
# (`JobCommand.Name`="glueetl")
|
12538
|
-
#
|
12678
|
+
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
12679
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
12680
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
12539
12681
|
# fractional DPU allocation.
|
12540
12682
|
#
|
12541
12683
|
#
|
@@ -12545,22 +12687,29 @@ module Aws::Glue
|
|
12545
12687
|
#
|
12546
12688
|
# @!attribute [rw] worker_type
|
12547
12689
|
# The type of predefined worker that is allocated when a job runs.
|
12548
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
12690
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
12691
|
+
# Accepts the value Z.2X for Ray jobs.
|
12549
12692
|
#
|
12550
12693
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
12551
12694
|
# of memory and a 50GB disk, and 2 executors per worker.
|
12552
12695
|
#
|
12553
|
-
# * For the `G.1X` worker type, each worker
|
12554
|
-
# memory
|
12696
|
+
# * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
|
12697
|
+
# GB of memory, 64 GB disk), and provides 1 executor per worker. We
|
12698
|
+
# recommend this worker type for memory-intensive jobs.
|
12555
12699
|
#
|
12556
|
-
# * For the `G.2X` worker type, each worker
|
12557
|
-
# memory
|
12700
|
+
# * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
|
12701
|
+
# GB of memory, 128 GB disk), and provides 1 executor per worker. We
|
12702
|
+
# recommend this worker type for memory-intensive jobs.
|
12558
12703
|
#
|
12559
12704
|
# * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
|
12560
12705
|
# vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
|
12561
12706
|
# worker. We recommend this worker type for low volume streaming
|
12562
12707
|
# jobs. This worker type is only available for Glue version 3.0
|
12563
12708
|
# streaming jobs.
|
12709
|
+
#
|
12710
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
12711
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
|
12712
|
+
# per vCPU) based on the autoscaler.
|
12564
12713
|
# @return [String]
|
12565
12714
|
#
|
12566
12715
|
# @!attribute [rw] number_of_workers
|
@@ -12588,9 +12737,14 @@ module Aws::Glue
|
|
12588
12737
|
# @return [Types::NotificationProperty]
|
12589
12738
|
#
|
12590
12739
|
# @!attribute [rw] glue_version
|
12591
|
-
#
|
12592
|
-
# Glue
|
12593
|
-
# for jobs of type Spark.
|
12740
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
12741
|
+
# and Python that Glue available in a job. The Python version
|
12742
|
+
# indicates the version supported for jobs of type Spark.
|
12743
|
+
#
|
12744
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
12745
|
+
# versions of Ray, Python and additional libraries available in your
|
12746
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
12747
|
+
# command.
|
12594
12748
|
#
|
12595
12749
|
# For more information about the available Glue versions and
|
12596
12750
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
@@ -12687,28 +12841,39 @@ module Aws::Glue
|
|
12687
12841
|
# @return [Types::JobCommand]
|
12688
12842
|
#
|
12689
12843
|
# @!attribute [rw] default_arguments
|
12690
|
-
# The default arguments for this job
|
12844
|
+
# The default arguments for every run of this job, specified as
|
12845
|
+
# name-value pairs.
|
12691
12846
|
#
|
12692
12847
|
# You can specify arguments here that your own job-execution script
|
12693
12848
|
# consumes, as well as arguments that Glue itself consumes.
|
12694
12849
|
#
|
12850
|
+
# Job arguments may be logged. Do not pass plaintext secrets as
|
12851
|
+
# arguments. Retrieve secrets from a Glue Connection, Secrets Manager
|
12852
|
+
# or other secret management mechanism if you intend to keep them
|
12853
|
+
# within the Job.
|
12854
|
+
#
|
12695
12855
|
# For information about how to specify and consume your own Job
|
12696
12856
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
12697
12857
|
# developer guide.
|
12698
12858
|
#
|
12699
|
-
# For information about the
|
12700
|
-
#
|
12701
|
-
# the developer guide.
|
12859
|
+
# For information about the arguments you can provide to this field
|
12860
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
12861
|
+
# Glue][2] topic in the developer guide.
|
12862
|
+
#
|
12863
|
+
# For information about the arguments you can provide to this field
|
12864
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
12865
|
+
# in the developer guide.
|
12702
12866
|
#
|
12703
12867
|
#
|
12704
12868
|
#
|
12705
12869
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
12706
12870
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
12871
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
12707
12872
|
# @return [Hash<String,String>]
|
12708
12873
|
#
|
12709
12874
|
# @!attribute [rw] non_overridable_arguments
|
12710
|
-
#
|
12711
|
-
# pairs.
|
12875
|
+
# Arguments for this job that are not overridden when providing job
|
12876
|
+
# arguments in a job run, specified as name-value pairs.
|
12712
12877
|
# @return [Hash<String,String>]
|
12713
12878
|
#
|
12714
12879
|
# @!attribute [rw] connections
|
@@ -12744,13 +12909,18 @@ module Aws::Glue
|
|
12744
12909
|
# type, the number of Glue data processing units (DPUs) that can be
|
12745
12910
|
# allocated when this job runs. A DPU is a relative measure of
|
12746
12911
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
12747
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
12912
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
12913
|
+
#
|
12914
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
12915
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
12916
|
+
# workers`.
|
12748
12917
|
#
|
12749
|
-
# Do not set `
|
12918
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
12750
12919
|
# `NumberOfWorkers`.
|
12751
12920
|
#
|
12752
12921
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
12753
|
-
# you are running a Python shell job
|
12922
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
12923
|
+
# Apache Spark streaming ETL job:
|
12754
12924
|
#
|
12755
12925
|
# * When you specify a Python shell job
|
12756
12926
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
@@ -12758,14 +12928,10 @@ module Aws::Glue
|
|
12758
12928
|
#
|
12759
12929
|
# * When you specify an Apache Spark ETL job
|
12760
12930
|
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
12761
|
-
# (`JobCommand.Name`="gluestreaming"), you can allocate
|
12762
|
-
#
|
12931
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
12932
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
12763
12933
|
# fractional DPU allocation.
|
12764
12934
|
#
|
12765
|
-
# For Glue version 2.0 jobs, you cannot instead specify a `Maximum
|
12766
|
-
# capacity`. Instead, you should specify a `Worker type` and the
|
12767
|
-
# `Number of workers`.
|
12768
|
-
#
|
12769
12935
|
#
|
12770
12936
|
#
|
12771
12937
|
# [1]: https://aws.amazon.com/glue/pricing/
|
@@ -12773,7 +12939,8 @@ module Aws::Glue
|
|
12773
12939
|
#
|
12774
12940
|
# @!attribute [rw] worker_type
|
12775
12941
|
# The type of predefined worker that is allocated when a job runs.
|
12776
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
12942
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
12943
|
+
# Accepts the value Z.2X for Ray jobs.
|
12777
12944
|
#
|
12778
12945
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
12779
12946
|
# of memory and a 50GB disk, and 2 executors per worker.
|
@@ -12791,6 +12958,10 @@ module Aws::Glue
|
|
12791
12958
|
# worker. We recommend this worker type for low volume streaming
|
12792
12959
|
# jobs. This worker type is only available for Glue version 3.0
|
12793
12960
|
# streaming jobs.
|
12961
|
+
#
|
12962
|
+
# * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPU, 64
|
12963
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers
|
12964
|
+
# based on the autoscaler.
|
12794
12965
|
# @return [String]
|
12795
12966
|
#
|
12796
12967
|
# @!attribute [rw] number_of_workers
|
@@ -12808,14 +12979,22 @@ module Aws::Glue
|
|
12808
12979
|
# @return [Types::NotificationProperty]
|
12809
12980
|
#
|
12810
12981
|
# @!attribute [rw] glue_version
|
12811
|
-
#
|
12812
|
-
# Glue
|
12813
|
-
# for jobs of type Spark.
|
12982
|
+
# In Spark jobs, `GlueVersion` determines the versions of Apache Spark
|
12983
|
+
# and Python that Glue available in a job. The Python version
|
12984
|
+
# indicates the version supported for jobs of type Spark.
|
12985
|
+
#
|
12986
|
+
# Ray jobs should set `GlueVersion` to `4.0` or greater. However, the
|
12987
|
+
# versions of Ray, Python and additional libraries available in your
|
12988
|
+
# Ray job are determined by the `Runtime` parameter of the Job
|
12989
|
+
# command.
|
12814
12990
|
#
|
12815
12991
|
# For more information about the available Glue versions and
|
12816
12992
|
# corresponding Spark and Python versions, see [Glue version][1] in
|
12817
12993
|
# the developer guide.
|
12818
12994
|
#
|
12995
|
+
# Jobs that are created without specifying a Glue version default to
|
12996
|
+
# Glue 0.9.
|
12997
|
+
#
|
12819
12998
|
#
|
12820
12999
|
#
|
12821
13000
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/add-job.html
|
@@ -18167,6 +18346,11 @@ module Aws::Glue
|
|
18167
18346
|
# A list of ruleset names.
|
18168
18347
|
# @return [Array<String>]
|
18169
18348
|
#
|
18349
|
+
# @!attribute [rw] additional_data_sources
|
18350
|
+
# A map of reference strings to additional data sources you can
|
18351
|
+
# specify for an evaluation run.
|
18352
|
+
# @return [Hash<String,Types::DataSource>]
|
18353
|
+
#
|
18170
18354
|
# @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/StartDataQualityRulesetEvaluationRunRequest AWS API Documentation
|
18171
18355
|
#
|
18172
18356
|
class StartDataQualityRulesetEvaluationRunRequest < Struct.new(
|
@@ -18176,7 +18360,8 @@ module Aws::Glue
|
|
18176
18360
|
:timeout,
|
18177
18361
|
:client_token,
|
18178
18362
|
:additional_run_options,
|
18179
|
-
:ruleset_names
|
18363
|
+
:ruleset_names,
|
18364
|
+
:additional_data_sources)
|
18180
18365
|
SENSITIVE = []
|
18181
18366
|
include Aws::Structure
|
18182
18367
|
end
|
@@ -18266,7 +18451,7 @@ module Aws::Glue
|
|
18266
18451
|
# @return [String]
|
18267
18452
|
#
|
18268
18453
|
# @!attribute [rw] arguments
|
18269
|
-
# The job arguments
|
18454
|
+
# The job arguments associated with this run. For this job run, they
|
18270
18455
|
# replace the default arguments set in the job definition itself.
|
18271
18456
|
#
|
18272
18457
|
# You can specify arguments here that your own job-execution script
|
@@ -18281,14 +18466,19 @@ module Aws::Glue
|
|
18281
18466
|
# arguments, see the [Calling Glue APIs in Python][1] topic in the
|
18282
18467
|
# developer guide.
|
18283
18468
|
#
|
18284
|
-
# For information about the
|
18285
|
-
#
|
18286
|
-
# the developer guide.
|
18469
|
+
# For information about the arguments you can provide to this field
|
18470
|
+
# when configuring Spark jobs, see the [Special Parameters Used by
|
18471
|
+
# Glue][2] topic in the developer guide.
|
18472
|
+
#
|
18473
|
+
# For information about the arguments you can provide to this field
|
18474
|
+
# when configuring Ray jobs, see [Using job parameters in Ray jobs][3]
|
18475
|
+
# in the developer guide.
|
18287
18476
|
#
|
18288
18477
|
#
|
18289
18478
|
#
|
18290
18479
|
# [1]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html
|
18291
18480
|
# [2]: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
|
18481
|
+
# [3]: https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
|
18292
18482
|
# @return [Hash<String,String>]
|
18293
18483
|
#
|
18294
18484
|
# @!attribute [rw] allocated_capacity
|
@@ -18316,24 +18506,31 @@ module Aws::Glue
|
|
18316
18506
|
# @return [Integer]
|
18317
18507
|
#
|
18318
18508
|
# @!attribute [rw] max_capacity
|
18319
|
-
#
|
18509
|
+
# For Glue version 1.0 or earlier jobs, using the standard worker
|
18510
|
+
# type, the number of Glue data processing units (DPUs) that can be
|
18320
18511
|
# allocated when this job runs. A DPU is a relative measure of
|
18321
18512
|
# processing power that consists of 4 vCPUs of compute capacity and 16
|
18322
|
-
# GB of memory. For more information, see the [Glue pricing page][1].
|
18513
|
+
# GB of memory. For more information, see the [ Glue pricing page][1].
|
18323
18514
|
#
|
18324
|
-
#
|
18515
|
+
# For Glue version 2.0+ jobs, you cannot specify a `Maximum capacity`.
|
18516
|
+
# Instead, you should specify a `Worker type` and the `Number of
|
18517
|
+
# workers`.
|
18518
|
+
#
|
18519
|
+
# Do not set `MaxCapacity` if using `WorkerType` and
|
18325
18520
|
# `NumberOfWorkers`.
|
18326
18521
|
#
|
18327
18522
|
# The value that can be allocated for `MaxCapacity` depends on whether
|
18328
|
-
# you are running a Python shell job,
|
18523
|
+
# you are running a Python shell job, an Apache Spark ETL job, or an
|
18524
|
+
# Apache Spark streaming ETL job:
|
18329
18525
|
#
|
18330
18526
|
# * When you specify a Python shell job
|
18331
18527
|
# (`JobCommand.Name`="pythonshell"), you can allocate either
|
18332
18528
|
# 0.0625 or 1 DPU. The default is 0.0625 DPU.
|
18333
18529
|
#
|
18334
18530
|
# * When you specify an Apache Spark ETL job
|
18335
|
-
# (`JobCommand.Name`="glueetl")
|
18336
|
-
#
|
18531
|
+
# (`JobCommand.Name`="glueetl") or Apache Spark streaming ETL job
|
18532
|
+
# (`JobCommand.Name`="gluestreaming"), you can allocate from 2 to
|
18533
|
+
# 100 DPUs. The default is 10 DPUs. This job type cannot have a
|
18337
18534
|
# fractional DPU allocation.
|
18338
18535
|
#
|
18339
18536
|
#
|
@@ -18352,22 +18549,29 @@ module Aws::Glue
|
|
18352
18549
|
#
|
18353
18550
|
# @!attribute [rw] worker_type
|
18354
18551
|
# The type of predefined worker that is allocated when a job runs.
|
18355
|
-
# Accepts a value of Standard, G.1X, G.2X, or G.025X.
|
18552
|
+
# Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs.
|
18553
|
+
# Accepts the value Z.2X for Ray jobs.
|
18356
18554
|
#
|
18357
18555
|
# * For the `Standard` worker type, each worker provides 4 vCPU, 16 GB
|
18358
18556
|
# of memory and a 50GB disk, and 2 executors per worker.
|
18359
18557
|
#
|
18360
|
-
# * For the `G.1X` worker type, each worker
|
18361
|
-
# memory
|
18558
|
+
# * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPU, 16
|
18559
|
+
# GB of memory, 64 GB disk), and provides 1 executor per worker. We
|
18560
|
+
# recommend this worker type for memory-intensive jobs.
|
18362
18561
|
#
|
18363
|
-
# * For the `G.2X` worker type, each worker
|
18364
|
-
# memory
|
18562
|
+
# * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPU, 32
|
18563
|
+
# GB of memory, 128 GB disk), and provides 1 executor per worker. We
|
18564
|
+
# recommend this worker type for memory-intensive jobs.
|
18365
18565
|
#
|
18366
18566
|
# * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
|
18367
18567
|
# vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per
|
18368
18568
|
# worker. We recommend this worker type for low volume streaming
|
18369
18569
|
# jobs. This worker type is only available for Glue version 3.0
|
18370
18570
|
# streaming jobs.
|
18571
|
+
#
|
18572
|
+
# * For the `Z.2X` worker type, each worker maps to 2 DPU (8vCPU, 64
|
18573
|
+
# GB of m emory, 128 GB disk), and provides up to 8 Ray workers (one
|
18574
|
+
# per vCPU) based on the autoscaler.
|
18371
18575
|
# @return [String]
|
18372
18576
|
#
|
18373
18577
|
# @!attribute [rw] number_of_workers
|