aws-sdk-glue 1.227.0 → 1.228.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -657,6 +657,30 @@ module Aws::Glue
657
657
  include Aws::Structure
658
658
  end
659
659
 
660
+ # Specifies configuration options for automatic data quality evaluation
661
+ # in Glue jobs. This structure enables automated data quality checks and
662
+ # monitoring during ETL operations, helping to ensure data integrity and
663
+ # reliability without manual intervention.
664
+ #
665
+ # @!attribute [rw] is_enabled
666
+ # Specifies whether automatic data quality evaluation is enabled. When
667
+ # set to `true`, data quality checks are performed automatically.
668
+ # @return [Boolean]
669
+ #
670
+ # @!attribute [rw] evaluation_context
671
+ # The evaluation context for the automatic data quality checks. This
672
+ # defines the scope and parameters for the data quality evaluation.
673
+ # @return [String]
674
+ #
675
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/AutoDataQuality AWS API Documentation
676
+ #
677
+ class AutoDataQuality < Struct.new(
678
+ :is_enabled,
679
+ :evaluation_context)
680
+ SENSITIVE = []
681
+ include Aws::Structure
682
+ end
683
+
660
684
  # A list of errors that can occur when registering partition indexes for
661
685
  # an existing table.
662
686
  #
@@ -2105,6 +2129,41 @@ module Aws::Glue
2105
2129
  include Aws::Structure
2106
2130
  end
2107
2131
 
2132
+ # Specifies an Apache Iceberg data source that is registered in the Glue
2133
+ # Data Catalog.
2134
+ #
2135
+ # @!attribute [rw] name
2136
+ # The name of the Iceberg data source.
2137
+ # @return [String]
2138
+ #
2139
+ # @!attribute [rw] database
2140
+ # The name of the database to read from.
2141
+ # @return [String]
2142
+ #
2143
+ # @!attribute [rw] table
2144
+ # The name of the table in the database to read from.
2145
+ # @return [String]
2146
+ #
2147
+ # @!attribute [rw] additional_iceberg_options
2148
+ # Specifies additional connection options for the Iceberg data source.
2149
+ # @return [Hash<String,String>]
2150
+ #
2151
+ # @!attribute [rw] output_schemas
2152
+ # Specifies the data schema for the Iceberg source.
2153
+ # @return [Array<Types::GlueSchema>]
2154
+ #
2155
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CatalogIcebergSource AWS API Documentation
2156
+ #
2157
+ class CatalogIcebergSource < Struct.new(
2158
+ :name,
2159
+ :database,
2160
+ :table,
2161
+ :additional_iceberg_options,
2162
+ :output_schemas)
2163
+ SENSITIVE = []
2164
+ include Aws::Structure
2165
+ end
2166
+
2108
2167
  # A structure containing migration status information.
2109
2168
  #
2110
2169
  # @!attribute [rw] import_completed
@@ -2362,12 +2421,23 @@ module Aws::Glue
2362
2421
  # The name of the table in the database to read from.
2363
2422
  # @return [String]
2364
2423
  #
2424
+ # @!attribute [rw] partition_predicate
2425
+ # Partitions satisfying this predicate are deleted. Files within the
2426
+ # retention period in these partitions are not deleted.
2427
+ # @return [String]
2428
+ #
2429
+ # @!attribute [rw] output_schemas
2430
+ # Specifies the data schema for the catalog source.
2431
+ # @return [Array<Types::GlueSchema>]
2432
+ #
2365
2433
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CatalogSource AWS API Documentation
2366
2434
  #
2367
2435
  class CatalogSource < Struct.new(
2368
2436
  :name,
2369
2437
  :database,
2370
- :table)
2438
+ :table,
2439
+ :partition_predicate,
2440
+ :output_schemas)
2371
2441
  SENSITIVE = []
2372
2442
  include Aws::Structure
2373
2443
  end
@@ -2536,11 +2606,6 @@ module Aws::Glue
2536
2606
  # Amazon S3.
2537
2607
  # @return [Types::S3CsvSource]
2538
2608
  #
2539
- # @!attribute [rw] s3_excel_source
2540
- # Defines configuration parameters for reading Excel files from Amazon
2541
- # S3.
2542
- # @return [Types::S3ExcelSource]
2543
- #
2544
2609
  # @!attribute [rw] s3_json_source
2545
2610
  # Specifies a JSON data store stored in Amazon S3.
2546
2611
  # @return [Types::S3JsonSource]
@@ -2584,20 +2649,10 @@ module Aws::Glue
2584
2649
  # columnar storage.
2585
2650
  # @return [Types::S3GlueParquetTarget]
2586
2651
  #
2587
- # @!attribute [rw] s3_hyper_direct_target
2588
- # Defines configuration parameters for writing data to Amazon S3 using
2589
- # HyperDirect optimization.
2590
- # @return [Types::S3HyperDirectTarget]
2591
- #
2592
2652
  # @!attribute [rw] s3_direct_target
2593
2653
  # Specifies a data target that writes to Amazon S3.
2594
2654
  # @return [Types::S3DirectTarget]
2595
2655
  #
2596
- # @!attribute [rw] s3_iceberg_direct_target
2597
- # Defines configuration parameters for writing data to Amazon S3 as an
2598
- # Apache Iceberg table.
2599
- # @return [Types::S3IcebergDirectTarget]
2600
- #
2601
2656
  # @!attribute [rw] apply_mapping
2602
2657
  # Specifies a transform that maps data property keys in the data
2603
2658
  # source to data property keys in the data target. You can rename
@@ -2757,6 +2812,11 @@ module Aws::Glue
2757
2812
  # Specifies a target that uses Postgres SQL.
2758
2813
  # @return [Types::PostgreSQLCatalogTarget]
2759
2814
  #
2815
+ # @!attribute [rw] route
2816
+ # Specifies a route node that directs data to different output paths
2817
+ # based on defined filtering conditions.
2818
+ # @return [Types::Route]
2819
+ #
2760
2820
  # @!attribute [rw] dynamic_transform
2761
2821
  # Specifies a custom visual transform created by a user.
2762
2822
  # @return [Types::DynamicTransform]
@@ -2849,6 +2909,42 @@ module Aws::Glue
2849
2909
  # Specifies a target generated with standard connection options.
2850
2910
  # @return [Types::ConnectorDataTarget]
2851
2911
  #
2912
+ # @!attribute [rw] s3_catalog_iceberg_source
2913
+ # Specifies an Apache Iceberg data source that is registered in the
2914
+ # Glue Data Catalog. The Iceberg data source must be stored in Amazon
2915
+ # S3.
2916
+ # @return [Types::S3CatalogIcebergSource]
2917
+ #
2918
+ # @!attribute [rw] catalog_iceberg_source
2919
+ # Specifies an Apache Iceberg data source that is registered in the
2920
+ # Glue Data Catalog.
2921
+ # @return [Types::CatalogIcebergSource]
2922
+ #
2923
+ # @!attribute [rw] s3_iceberg_catalog_target
2924
+ # Specifies an Apache Iceberg catalog target that writes data to
2925
+ # Amazon S3 and registers the table in the Glue Data Catalog.
2926
+ # @return [Types::S3IcebergCatalogTarget]
2927
+ #
2928
+ # @!attribute [rw] s3_iceberg_direct_target
2929
+ # Defines configuration parameters for writing data to Amazon S3 as an
2930
+ # Apache Iceberg table.
2931
+ # @return [Types::S3IcebergDirectTarget]
2932
+ #
2933
+ # @!attribute [rw] s3_excel_source
2934
+ # Defines configuration parameters for reading Excel files from Amazon
2935
+ # S3.
2936
+ # @return [Types::S3ExcelSource]
2937
+ #
2938
+ # @!attribute [rw] s3_hyper_direct_target
2939
+ # Defines configuration parameters for writing data to Amazon S3 using
2940
+ # HyperDirect optimization.
2941
+ # @return [Types::S3HyperDirectTarget]
2942
+ #
2943
+ # @!attribute [rw] dynamo_dbelt_connector_source
2944
+ # Specifies a DynamoDB ELT connector source for extracting data from
2945
+ # DynamoDB tables.
2946
+ # @return [Types::DynamoDBELTConnectorSource]
2947
+ #
2852
2948
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/CodeGenConfigurationNode AWS API Documentation
2853
2949
  #
2854
2950
  class CodeGenConfigurationNode < Struct.new(
@@ -2859,7 +2955,6 @@ module Aws::Glue
2859
2955
  :redshift_source,
2860
2956
  :s3_catalog_source,
2861
2957
  :s3_csv_source,
2862
- :s3_excel_source,
2863
2958
  :s3_json_source,
2864
2959
  :s3_parquet_source,
2865
2960
  :relational_catalog_source,
@@ -2870,9 +2965,7 @@ module Aws::Glue
2870
2965
  :redshift_target,
2871
2966
  :s3_catalog_target,
2872
2967
  :s3_glue_parquet_target,
2873
- :s3_hyper_direct_target,
2874
2968
  :s3_direct_target,
2875
- :s3_iceberg_direct_target,
2876
2969
  :apply_mapping,
2877
2970
  :select_fields,
2878
2971
  :drop_fields,
@@ -2905,6 +2998,7 @@ module Aws::Glue
2905
2998
  :my_sql_catalog_target,
2906
2999
  :oracle_sql_catalog_target,
2907
3000
  :postgre_sql_catalog_target,
3001
+ :route,
2908
3002
  :dynamic_transform,
2909
3003
  :evaluate_data_quality,
2910
3004
  :s3_catalog_hudi_source,
@@ -2925,7 +3019,14 @@ module Aws::Glue
2925
3019
  :snowflake_source,
2926
3020
  :snowflake_target,
2927
3021
  :connector_data_source,
2928
- :connector_data_target)
3022
+ :connector_data_target,
3023
+ :s3_catalog_iceberg_source,
3024
+ :catalog_iceberg_source,
3025
+ :s3_iceberg_catalog_target,
3026
+ :s3_iceberg_direct_target,
3027
+ :s3_excel_source,
3028
+ :s3_hyper_direct_target,
3029
+ :dynamo_dbelt_connector_source)
2929
3030
  SENSITIVE = []
2930
3031
  include Aws::Structure
2931
3032
  end
@@ -6212,10 +6313,12 @@ module Aws::Glue
6212
6313
  # contain your most demanding transforms, aggregations, joins, and
6213
6314
  # queries. This worker type is available only for Glue version 3.0
6214
6315
  # or later Spark ETL jobs in the following Amazon Web Services
6215
- # Regions: US East (Ohio), US East (N. Virginia), US West (Oregon),
6216
- # Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific
6217
- # (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland),
6218
- # and Europe (Stockholm).
6316
+ # Regions: US East (Ohio), US East (N. Virginia), US West (N.
6317
+ # California), US West (Oregon), Asia Pacific (Mumbai), Asia Pacific
6318
+ # (Seoul), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia
6319
+ # Pacific (Tokyo), Canada (Central), Europe (Frankfurt), Europe
6320
+ # (Ireland), Europe (London), Europe (Spain), Europe (Stockholm),
6321
+ # and South America (São Paulo).
6219
6322
  #
6220
6323
  # * For the `G.8X` worker type, each worker maps to 8 DPU (32 vCPUs,
6221
6324
  # 128 GB of memory) with 512GB disk, and provides 1 executor per
@@ -7513,6 +7616,92 @@ module Aws::Glue
7513
7616
  include Aws::Structure
7514
7617
  end
7515
7618
 
7619
+ # Specifies additional options for DynamoDB ELT catalog operations.
7620
+ #
7621
+ # @!attribute [rw] dynamodb_export
7622
+ # Specifies the DynamoDB export configuration for the ELT operation.
7623
+ # @return [String]
7624
+ #
7625
+ # @!attribute [rw] dynamodb_unnest_ddb_json
7626
+ # Specifies whether to unnest DynamoDB JSON format. When set to
7627
+ # `true`, nested JSON structures in DynamoDB items are flattened.
7628
+ # @return [Boolean]
7629
+ #
7630
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DDBELTCatalogAdditionalOptions AWS API Documentation
7631
+ #
7632
+ class DDBELTCatalogAdditionalOptions < Struct.new(
7633
+ :dynamodb_export,
7634
+ :dynamodb_unnest_ddb_json)
7635
+ SENSITIVE = []
7636
+ include Aws::Structure
7637
+ end
7638
+
7639
+ # Specifies connection options for DynamoDB ELT (Extract, Load,
7640
+ # Transform) operations. This structure contains configuration
7641
+ # parameters for connecting to and extracting data from DynamoDB tables
7642
+ # using the ELT connector.
7643
+ #
7644
+ # @!attribute [rw] dynamodb_export
7645
+ # Specifies the export type for DynamoDB data extraction. This
7646
+ # parameter determines how data is exported from the DynamoDB table
7647
+ # during the ELT process.
7648
+ # @return [String]
7649
+ #
7650
+ # @!attribute [rw] dynamodb_unnest_ddb_json
7651
+ # A boolean value that specifies whether to unnest DynamoDB JSON
7652
+ # format during data extraction. When set to `true`, the connector
7653
+ # will flatten nested JSON structures from DynamoDB items. When set to
7654
+ # `false`, the original DynamoDB JSON structure is preserved.
7655
+ # @return [Boolean]
7656
+ #
7657
+ # @!attribute [rw] dynamodb_table_arn
7658
+ # The Amazon Resource Name (ARN) of the DynamoDB table to extract data
7659
+ # from. This parameter specifies the source table for the ELT
7660
+ # operation.
7661
+ # @return [String]
7662
+ #
7663
+ # @!attribute [rw] dynamodb_s3_bucket
7664
+ # The name of the Amazon S3 bucket used for intermediate storage
7665
+ # during the DynamoDB ELT process. This bucket is used to temporarily
7666
+ # store exported DynamoDB data before it is processed by the ELT job.
7667
+ # @return [String]
7668
+ #
7669
+ # @!attribute [rw] dynamodb_s3_prefix
7670
+ # The S3 object key prefix for files stored in the intermediate S3
7671
+ # bucket during the DynamoDB ELT process. This prefix helps organize
7672
+ # and identify the temporary files created during data extraction.
7673
+ # @return [String]
7674
+ #
7675
+ # @!attribute [rw] dynamodb_s3_bucket_owner
7676
+ # The Amazon Web Services account ID of the owner of the S3 bucket
7677
+ # specified in `DynamodbS3Bucket`. This parameter is required when the
7678
+ # S3 bucket is owned by a different Amazon Web Services account than
7679
+ # the one running the ELT job, enabling cross-account access to the
7680
+ # intermediate storage bucket.
7681
+ # @return [String]
7682
+ #
7683
+ # @!attribute [rw] dynamodb_sts_role_arn
7684
+ # The Amazon Resource Name (ARN) of the Amazon Web Services Security
7685
+ # Token Service (STS) role to assume for accessing DynamoDB and S3
7686
+ # resources during the ELT operation. This role must have the
7687
+ # necessary permissions to read from the DynamoDB table and write to
7688
+ # the intermediate S3 bucket.
7689
+ # @return [String]
7690
+ #
7691
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DDBELTConnectionOptions AWS API Documentation
7692
+ #
7693
+ class DDBELTConnectionOptions < Struct.new(
7694
+ :dynamodb_export,
7695
+ :dynamodb_unnest_ddb_json,
7696
+ :dynamodb_table_arn,
7697
+ :dynamodb_s3_bucket,
7698
+ :dynamodb_s3_prefix,
7699
+ :dynamodb_s3_bucket_owner,
7700
+ :dynamodb_sts_role_arn)
7701
+ SENSITIVE = []
7702
+ include Aws::Structure
7703
+ end
7704
+
7516
7705
  # Options to configure how your data quality evaluation results are
7517
7706
  # published.
7518
7707
  #
@@ -9982,6 +10171,10 @@ module Aws::Glue
9982
10171
  # The temp directory of the JDBC Redshift source.
9983
10172
  # @return [String]
9984
10173
  #
10174
+ # @!attribute [rw] output_schemas
10175
+ # Specifies the data schema for the direct JDBC source.
10176
+ # @return [Array<Types::GlueSchema>]
10177
+ #
9985
10178
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DirectJDBCSource AWS API Documentation
9986
10179
  #
9987
10180
  class DirectJDBCSource < Struct.new(
@@ -9990,7 +10183,8 @@ module Aws::Glue
9990
10183
  :table,
9991
10184
  :connection_name,
9992
10185
  :connection_type,
9993
- :redshift_tmp_dir)
10186
+ :redshift_tmp_dir,
10187
+ :output_schemas)
9994
10188
  SENSITIVE = []
9995
10189
  include Aws::Structure
9996
10190
  end
@@ -10282,12 +10476,50 @@ module Aws::Glue
10282
10476
  # The name of the table in the database to read from.
10283
10477
  # @return [String]
10284
10478
  #
10479
+ # @!attribute [rw] pitr_enabled
10480
+ # Specifies whether Point-in-Time Recovery (PITR) is enabled for the
10481
+ # DynamoDB table. When set to `true`, allows reading from a specific
10482
+ # point in time. The default value is `false`.
10483
+ # @return [Boolean]
10484
+ #
10485
+ # @!attribute [rw] additional_options
10486
+ # Specifies additional connection options for the DynamoDB data
10487
+ # source.
10488
+ # @return [Types::DDBELTCatalogAdditionalOptions]
10489
+ #
10285
10490
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DynamoDBCatalogSource AWS API Documentation
10286
10491
  #
10287
10492
  class DynamoDBCatalogSource < Struct.new(
10288
10493
  :name,
10289
10494
  :database,
10290
- :table)
10495
+ :table,
10496
+ :pitr_enabled,
10497
+ :additional_options)
10498
+ SENSITIVE = []
10499
+ include Aws::Structure
10500
+ end
10501
+
10502
+ # Specifies a DynamoDB ELT connector source for extracting data from
10503
+ # DynamoDB tables.
10504
+ #
10505
+ # @!attribute [rw] name
10506
+ # The name of the DynamoDB ELT connector source.
10507
+ # @return [String]
10508
+ #
10509
+ # @!attribute [rw] connection_options
10510
+ # The connection options for the DynamoDB ELT connector source.
10511
+ # @return [Types::DDBELTConnectionOptions]
10512
+ #
10513
+ # @!attribute [rw] output_schemas
10514
+ # Specifies the data schema for the DynamoDB ELT connector source.
10515
+ # @return [Array<Types::GlueSchema>]
10516
+ #
10517
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/DynamoDBELTConnectorSource AWS API Documentation
10518
+ #
10519
+ class DynamoDBELTConnectorSource < Struct.new(
10520
+ :name,
10521
+ :connection_options,
10522
+ :output_schemas)
10291
10523
  SENSITIVE = []
10292
10524
  include Aws::Structure
10293
10525
  end
@@ -15306,11 +15538,16 @@ module Aws::Glue
15306
15538
  # The hive type for this column in the Glue Studio schema.
15307
15539
  # @return [String]
15308
15540
  #
15541
+ # @!attribute [rw] glue_studio_type
15542
+ # The data type of the column as defined in Glue Studio.
15543
+ # @return [String]
15544
+ #
15309
15545
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/GlueStudioSchemaColumn AWS API Documentation
15310
15546
  #
15311
15547
  class GlueStudioSchemaColumn < Struct.new(
15312
15548
  :name,
15313
- :type)
15549
+ :type,
15550
+ :glue_studio_type)
15314
15551
  SENSITIVE = []
15315
15552
  include Aws::Structure
15316
15553
  end
@@ -15489,6 +15726,34 @@ module Aws::Glue
15489
15726
  include Aws::Structure
15490
15727
  end
15491
15728
 
15729
+ # Specifies a group of filters with a logical operator that determines
15730
+ # how the filters are combined to evaluate routing conditions.
15731
+ #
15732
+ # @!attribute [rw] group_name
15733
+ # The name of the filter group.
15734
+ # @return [String]
15735
+ #
15736
+ # @!attribute [rw] filters
15737
+ # A list of filter expressions that define the conditions for this
15738
+ # group.
15739
+ # @return [Array<Types::FilterExpression>]
15740
+ #
15741
+ # @!attribute [rw] logical_operator
15742
+ # The logical operator used to combine the filters in this group.
15743
+ # Determines whether all filters must match (AND) or any filter can
15744
+ # match (OR).
15745
+ # @return [String]
15746
+ #
15747
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/GroupFilters AWS API Documentation
15748
+ #
15749
+ class GroupFilters < Struct.new(
15750
+ :group_name,
15751
+ :filters,
15752
+ :logical_operator)
15753
+ SENSITIVE = []
15754
+ include Aws::Structure
15755
+ end
15756
+
15492
15757
  # Specifies an Apache Hudi data source.
15493
15758
  #
15494
15759
  # @!attribute [rw] paths
@@ -16904,49 +17169,33 @@ module Aws::Glue
16904
17169
  #
16905
17170
  # @!attribute [rw] worker_type
16906
17171
  # The type of predefined worker that is allocated when a job runs.
16907
- # Accepts a value of G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs.
16908
- # Accepts the value Z.2X for Ray jobs.
16909
17172
  #
16910
- # * For the `G.1X` worker type, each worker maps to 1 DPU (4 vCPUs, 16
16911
- # GB of memory) with 94GB disk, and provides 1 executor per worker.
16912
- # We recommend this worker type for workloads such as data
16913
- # transforms, joins, and queries, to offers a scalable and cost
16914
- # effective way to run most jobs.
17173
+ # Glue provides multiple worker types to accommodate different
17174
+ # workload requirements:
16915
17175
  #
16916
- # * For the `G.2X` worker type, each worker maps to 2 DPU (8 vCPUs, 32
16917
- # GB of memory) with 138GB disk, and provides 1 executor per worker.
16918
- # We recommend this worker type for workloads such as data
16919
- # transforms, joins, and queries, to offers a scalable and cost
16920
- # effective way to run most jobs.
17176
+ # G Worker Types (General-purpose compute workers):
16921
17177
  #
16922
- # * For the `G.4X` worker type, each worker maps to 4 DPU (16 vCPUs,
16923
- # 64 GB of memory) with 256GB disk, and provides 1 executor per
16924
- # worker. We recommend this worker type for jobs whose workloads
16925
- # contain your most demanding transforms, aggregations, joins, and
16926
- # queries. This worker type is available only for Glue version 3.0
16927
- # or later Spark ETL jobs in the following Amazon Web Services
16928
- # Regions: US East (Ohio), US East (N. Virginia), US West (Oregon),
16929
- # Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific
16930
- # (Tokyo), Canada (Central), Europe (Frankfurt), Europe (Ireland),
16931
- # and Europe (Stockholm).
17178
+ # * G.1X: 1 DPU (4 vCPUs, 16 GB memory, 94GB disk)
16932
17179
  #
16933
- # * For the `G.8X` worker type, each worker maps to 8 DPU (32 vCPUs,
16934
- # 128 GB of memory) with 512GB disk, and provides 1 executor per
16935
- # worker. We recommend this worker type for jobs whose workloads
16936
- # contain your most demanding transforms, aggregations, joins, and
16937
- # queries. This worker type is available only for Glue version 3.0
16938
- # or later Spark ETL jobs, in the same Amazon Web Services Regions
16939
- # as supported for the `G.4X` worker type.
17180
+ # * G.2X: 2 DPU (8 vCPUs, 32 GB memory, 138GB disk)
16940
17181
  #
16941
- # * For the `G.025X` worker type, each worker maps to 0.25 DPU (2
16942
- # vCPUs, 4 GB of memory) with 84GB disk, and provides 1 executor per
16943
- # worker. We recommend this worker type for low volume streaming
16944
- # jobs. This worker type is only available for Glue version 3.0 or
16945
- # later streaming jobs.
17182
+ # * G.4X: 4 DPU (16 vCPUs, 64 GB memory, 256GB disk)
16946
17183
  #
16947
- # * For the `Z.2X` worker type, each worker maps to 2 M-DPU (8vCPUs,
16948
- # 64 GB of memory) with 128 GB disk, and provides up to 8 Ray
16949
- # workers based on the autoscaler.
17184
+ # * G.8X: 8 DPU (32 vCPUs, 128 GB memory, 512GB disk)
17185
+ #
17186
+ # * G.12X: 12 DPU (48 vCPUs, 192 GB memory, 768GB disk)
17187
+ #
17188
+ # * G.16X: 16 DPU (64 vCPUs, 256 GB memory, 1024GB disk)
17189
+ #
17190
+ # R Worker Types (Memory-optimized workers):
17191
+ #
17192
+ # * R.1X: 1 M-DPU (4 vCPUs, 32 GB memory)
17193
+ #
17194
+ # * R.2X: 2 M-DPU (8 vCPUs, 64 GB memory)
17195
+ #
17196
+ # * R.4X: 4 M-DPU (16 vCPUs, 128 GB memory)
17197
+ #
17198
+ # * R.8X: 8 M-DPU (32 vCPUs, 256 GB memory)
16950
17199
  # @return [String]
16951
17200
  #
16952
17201
  # @!attribute [rw] number_of_workers
@@ -18228,6 +18477,12 @@ module Aws::Glue
18228
18477
  # "2023-04-04T08:00:00+08:00").
18229
18478
  # @return [Time]
18230
18479
  #
18480
+ # @!attribute [rw] fanout_consumer_arn
18481
+ # The Amazon Resource Name (ARN) of the Kinesis Data Streams enhanced
18482
+ # fan-out consumer. When specified, enables enhanced fan-out for
18483
+ # dedicated throughput and lower latency data consumption.
18484
+ # @return [String]
18485
+ #
18231
18486
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/KinesisStreamingSourceOptions AWS API Documentation
18232
18487
  #
18233
18488
  class KinesisStreamingSourceOptions < Struct.new(
@@ -18251,7 +18506,8 @@ module Aws::Glue
18251
18506
  :role_session_name,
18252
18507
  :add_record_timestamp,
18253
18508
  :emit_consumer_lag_metrics,
18254
- :starting_timestamp)
18509
+ :starting_timestamp,
18510
+ :fanout_consumer_arn)
18255
18511
  SENSITIVE = []
18256
18512
  include Aws::Structure
18257
18513
  end
@@ -20711,6 +20967,43 @@ module Aws::Glue
20711
20967
  # Indicates the value that will replace the detected entity.
20712
20968
  # @return [String]
20713
20969
  #
20970
+ # @!attribute [rw] redact_text
20971
+ # Specifies whether to redact the detected PII text. When set to
20972
+ # `true`, PII content is replaced with redaction characters.
20973
+ # @return [String]
20974
+ #
20975
+ # @!attribute [rw] redact_char
20976
+ # The character used to replace detected PII content when redaction is
20977
+ # enabled. The default redaction character is `*`.
20978
+ # @return [String]
20979
+ #
20980
+ # @!attribute [rw] match_pattern
20981
+ # A regular expression pattern used to identify additional PII content
20982
+ # beyond the standard detection algorithms.
20983
+ # @return [String]
20984
+ #
20985
+ # @!attribute [rw] num_left_chars_to_exclude
20986
+ # The number of characters to exclude from redaction on the left side
20987
+ # of detected PII content. This allows preserving context around the
20988
+ # sensitive data.
20989
+ # @return [Integer]
20990
+ #
20991
+ # @!attribute [rw] num_right_chars_to_exclude
20992
+ # The number of characters to exclude from redaction on the right side
20993
+ # of detected PII content. This allows preserving context around the
20994
+ # sensitive data.
20995
+ # @return [Integer]
20996
+ #
20997
+ # @!attribute [rw] detection_parameters
20998
+ # Additional parameters for configuring PII detection behavior and
20999
+ # sensitivity settings.
21000
+ # @return [String]
21001
+ #
21002
+ # @!attribute [rw] detection_sensitivity
21003
+ # The sensitivity level for PII detection. Higher sensitivity levels
21004
+ # detect more potential PII but may result in more false positives.
21005
+ # @return [String]
21006
+ #
20714
21007
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/PIIDetection AWS API Documentation
20715
21008
  #
20716
21009
  class PIIDetection < Struct.new(
@@ -20721,7 +21014,14 @@ module Aws::Glue
20721
21014
  :output_column_name,
20722
21015
  :sample_fraction,
20723
21016
  :threshold_fraction,
20724
- :mask_value)
21017
+ :mask_value,
21018
+ :redact_text,
21019
+ :redact_char,
21020
+ :match_pattern,
21021
+ :num_left_chars_to_exclude,
21022
+ :num_right_chars_to_exclude,
21023
+ :detection_parameters,
21024
+ :detection_sensitivity)
20725
21025
  SENSITIVE = []
20726
21026
  include Aws::Structure
20727
21027
  end
@@ -22070,6 +22370,32 @@ module Aws::Glue
22070
22370
  include Aws::Structure
22071
22371
  end
22072
22372
 
22373
+ # Specifies a route node that directs data to different output paths
22374
+ # based on defined filtering conditions.
22375
+ #
22376
+ # @!attribute [rw] name
22377
+ # The name of the route node.
22378
+ # @return [String]
22379
+ #
22380
+ # @!attribute [rw] inputs
22381
+ # The input connection for the route node.
22382
+ # @return [Array<String>]
22383
+ #
22384
+ # @!attribute [rw] group_filters_list
22385
+ # A list of group filters that define the routing conditions and
22386
+ # criteria for directing data to different output paths.
22387
+ # @return [Array<Types::GroupFilters>]
22388
+ #
22389
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/Route AWS API Documentation
22390
+ #
22391
+ class Route < Struct.new(
22392
+ :name,
22393
+ :inputs,
22394
+ :group_filters_list)
22395
+ SENSITIVE = []
22396
+ include Aws::Structure
22397
+ end
22398
+
22073
22399
  # A run identifier.
22074
22400
  #
22075
22401
  # @!attribute [rw] run_id
@@ -22226,6 +22552,41 @@ module Aws::Glue
22226
22552
  include Aws::Structure
22227
22553
  end
22228
22554
 
22555
+ # Specifies an Apache Iceberg data source that is registered in the Glue
22556
+ # Data Catalog. The Iceberg data source must be stored in Amazon S3.
22557
+ #
22558
+ # @!attribute [rw] name
22559
+ # The name of the Iceberg data source.
22560
+ # @return [String]
22561
+ #
22562
+ # @!attribute [rw] database
22563
+ # The name of the database to read from.
22564
+ # @return [String]
22565
+ #
22566
+ # @!attribute [rw] table
22567
+ # The name of the table in the database to read from.
22568
+ # @return [String]
22569
+ #
22570
+ # @!attribute [rw] additional_iceberg_options
22571
+ # Specifies additional connection options for the Iceberg data source.
22572
+ # @return [Hash<String,String>]
22573
+ #
22574
+ # @!attribute [rw] output_schemas
22575
+ # Specifies the data schema for the Iceberg source.
22576
+ # @return [Array<Types::GlueSchema>]
22577
+ #
22578
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3CatalogIcebergSource AWS API Documentation
22579
+ #
22580
+ class S3CatalogIcebergSource < Struct.new(
22581
+ :name,
22582
+ :database,
22583
+ :table,
22584
+ :additional_iceberg_options,
22585
+ :output_schemas)
22586
+ SENSITIVE = []
22587
+ include Aws::Structure
22588
+ end
22589
+
22229
22590
  # Specifies an Amazon S3 data store in the Glue Data Catalog.
22230
22591
  #
22231
22592
  # @!attribute [rw] name
@@ -22289,6 +22650,12 @@ module Aws::Glue
22289
22650
  # A policy that specifies update behavior for the crawler.
22290
22651
  # @return [Types::CatalogSchemaChangePolicy]
22291
22652
  #
22653
+ # @!attribute [rw] auto_data_quality
22654
+ # Specifies whether to automatically enable data quality evaluation
22655
+ # for the S3 catalog target. When set to `true`, data quality checks
22656
+ # are performed automatically during the write operation.
22657
+ # @return [Types::AutoDataQuality]
22658
+ #
22292
22659
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3CatalogTarget AWS API Documentation
22293
22660
  #
22294
22661
  class S3CatalogTarget < Struct.new(
@@ -22297,7 +22664,8 @@ module Aws::Glue
22297
22664
  :partition_keys,
22298
22665
  :table,
22299
22666
  :database,
22300
- :schema_change_policy)
22667
+ :schema_change_policy,
22668
+ :auto_data_quality)
22301
22669
  SENSITIVE = []
22302
22670
  include Aws::Structure
22303
22671
  end
@@ -22470,6 +22838,16 @@ module Aws::Glue
22470
22838
  # A policy that specifies update behavior for the crawler.
22471
22839
  # @return [Types::CatalogSchemaChangePolicy]
22472
22840
  #
22841
+ # @!attribute [rw] auto_data_quality
22842
+ # Specifies whether to automatically enable data quality evaluation
22843
+ # for the S3 Delta catalog target. When set to `true`, data quality
22844
+ # checks are performed automatically during the write operation.
22845
+ # @return [Types::AutoDataQuality]
22846
+ #
22847
+ # @!attribute [rw] output_schemas
22848
+ # Specifies the data schema for the S3 Delta catalog target.
22849
+ # @return [Array<Types::GlueSchema>]
22850
+ #
22473
22851
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3DeltaCatalogTarget AWS API Documentation
22474
22852
  #
22475
22853
  class S3DeltaCatalogTarget < Struct.new(
@@ -22479,7 +22857,9 @@ module Aws::Glue
22479
22857
  :table,
22480
22858
  :database,
22481
22859
  :additional_options,
22482
- :schema_change_policy)
22860
+ :schema_change_policy,
22861
+ :auto_data_quality,
22862
+ :output_schemas)
22483
22863
  SENSITIVE = []
22484
22864
  include Aws::Structure
22485
22865
  end
@@ -22526,6 +22906,12 @@ module Aws::Glue
22526
22906
  # A policy that specifies update behavior for the crawler.
22527
22907
  # @return [Types::DirectSchemaChangePolicy]
22528
22908
  #
22909
+ # @!attribute [rw] auto_data_quality
22910
+ # Specifies whether to automatically enable data quality evaluation
22911
+ # for the S3 Delta direct target. When set to `true`, data quality
22912
+ # checks are performed automatically during the write operation.
22913
+ # @return [Types::AutoDataQuality]
22914
+ #
22529
22915
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3DeltaDirectTarget AWS API Documentation
22530
22916
  #
22531
22917
  class S3DeltaDirectTarget < Struct.new(
@@ -22537,7 +22923,8 @@ module Aws::Glue
22537
22923
  :number_target_partitions,
22538
22924
  :format,
22539
22925
  :additional_options,
22540
- :schema_change_policy)
22926
+ :schema_change_policy,
22927
+ :auto_data_quality)
22541
22928
  SENSITIVE = []
22542
22929
  include Aws::Structure
22543
22930
  end
@@ -22644,6 +23031,16 @@ module Aws::Glue
22644
23031
  # A policy that specifies update behavior for the crawler.
22645
23032
  # @return [Types::DirectSchemaChangePolicy]
22646
23033
  #
23034
+ # @!attribute [rw] auto_data_quality
23035
+ # Specifies whether to automatically enable data quality evaluation
23036
+ # for the S3 direct target. When set to `true`, data quality checks
23037
+ # are performed automatically during the write operation.
23038
+ # @return [Types::AutoDataQuality]
23039
+ #
23040
+ # @!attribute [rw] output_schemas
23041
+ # Specifies the data schema for the S3 direct target.
23042
+ # @return [Array<Types::GlueSchema>]
23043
+ #
22647
23044
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3DirectTarget AWS API Documentation
22648
23045
  #
22649
23046
  class S3DirectTarget < Struct.new(
@@ -22654,7 +23051,9 @@ module Aws::Glue
22654
23051
  :compression,
22655
23052
  :number_target_partitions,
22656
23053
  :format,
22657
- :schema_change_policy)
23054
+ :schema_change_policy,
23055
+ :auto_data_quality,
23056
+ :output_schemas)
22658
23057
  SENSITIVE = []
22659
23058
  include Aws::Structure
22660
23059
  end
@@ -22731,7 +23130,7 @@ module Aws::Glue
22731
23130
  # @return [Integer]
22732
23131
  #
22733
23132
  # @!attribute [rw] output_schemas
22734
- # The AWS Glue schemas to apply to the processed data.
23133
+ # The Glue schemas to apply to the processed data.
22735
23134
  # @return [Array<Types::GlueSchema>]
22736
23135
  #
22737
23136
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3ExcelSource AWS API Documentation
@@ -22781,13 +23180,19 @@ module Aws::Glue
22781
23180
  #
22782
23181
  # @!attribute [rw] number_target_partitions
22783
23182
  # Specifies the number of target partitions for Parquet files when
22784
- # writing to Amazon S3 using AWS Glue.
23183
+ # writing to Amazon S3 using Glue.
22785
23184
  # @return [String]
22786
23185
  #
22787
23186
  # @!attribute [rw] schema_change_policy
22788
23187
  # A policy that specifies update behavior for the crawler.
22789
23188
  # @return [Types::DirectSchemaChangePolicy]
22790
23189
  #
23190
+ # @!attribute [rw] auto_data_quality
23191
+ # Specifies whether to automatically enable data quality evaluation
23192
+ # for the S3 Glue Parquet target. When set to `true`, data quality
23193
+ # checks are performed automatically during the write operation.
23194
+ # @return [Types::AutoDataQuality]
23195
+ #
22791
23196
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3GlueParquetTarget AWS API Documentation
22792
23197
  #
22793
23198
  class S3GlueParquetTarget < Struct.new(
@@ -22797,7 +23202,8 @@ module Aws::Glue
22797
23202
  :path,
22798
23203
  :compression,
22799
23204
  :number_target_partitions,
22800
- :schema_change_policy)
23205
+ :schema_change_policy,
23206
+ :auto_data_quality)
22801
23207
  SENSITIVE = []
22802
23208
  include Aws::Structure
22803
23209
  end
@@ -22833,6 +23239,16 @@ module Aws::Glue
22833
23239
  # A policy that specifies update behavior for the crawler.
22834
23240
  # @return [Types::CatalogSchemaChangePolicy]
22835
23241
  #
23242
+ # @!attribute [rw] auto_data_quality
23243
+ # Specifies whether to automatically enable data quality evaluation
23244
+ # for the S3 Hudi catalog target. When set to `true`, data quality
23245
+ # checks are performed automatically during the write operation.
23246
+ # @return [Types::AutoDataQuality]
23247
+ #
23248
+ # @!attribute [rw] output_schemas
23249
+ # Specifies the data schema for the S3 Hudi catalog target.
23250
+ # @return [Array<Types::GlueSchema>]
23251
+ #
22836
23252
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3HudiCatalogTarget AWS API Documentation
22837
23253
  #
22838
23254
  class S3HudiCatalogTarget < Struct.new(
@@ -22842,7 +23258,9 @@ module Aws::Glue
22842
23258
  :table,
22843
23259
  :database,
22844
23260
  :additional_options,
22845
- :schema_change_policy)
23261
+ :schema_change_policy,
23262
+ :auto_data_quality,
23263
+ :output_schemas)
22846
23264
  SENSITIVE = []
22847
23265
  include Aws::Structure
22848
23266
  end
@@ -22888,6 +23306,12 @@ module Aws::Glue
22888
23306
  # A policy that specifies update behavior for the crawler.
22889
23307
  # @return [Types::DirectSchemaChangePolicy]
22890
23308
  #
23309
+ # @!attribute [rw] auto_data_quality
23310
+ # Specifies whether to automatically enable data quality evaluation
23311
+ # for the S3 Hudi direct target. When set to `true`, data quality
23312
+ # checks are performed automatically during the write operation.
23313
+ # @return [Types::AutoDataQuality]
23314
+ #
22891
23315
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3HudiDirectTarget AWS API Documentation
22892
23316
  #
22893
23317
  class S3HudiDirectTarget < Struct.new(
@@ -22899,7 +23323,8 @@ module Aws::Glue
22899
23323
  :partition_keys,
22900
23324
  :format,
22901
23325
  :additional_options,
22902
- :schema_change_policy)
23326
+ :schema_change_policy,
23327
+ :auto_data_quality)
22903
23328
  SENSITIVE = []
22904
23329
  include Aws::Structure
22905
23330
  end
@@ -22948,6 +23373,10 @@ module Aws::Glue
22948
23373
  # Specifies the input source for the HyperDirect target.
22949
23374
  # @return [Array<String>]
22950
23375
  #
23376
+ # @!attribute [rw] format
23377
+ # Specifies the data output format for the HyperDirect target.
23378
+ # @return [String]
23379
+ #
22951
23380
  # @!attribute [rw] partition_keys
22952
23381
  # Defines the partitioning strategy for the output data.
22953
23382
  # @return [Array<Array<String>>]
@@ -22964,15 +23393,81 @@ module Aws::Glue
22964
23393
  # Defines how schema changes are handled during write operations.
22965
23394
  # @return [Types::DirectSchemaChangePolicy]
22966
23395
  #
23396
+ # @!attribute [rw] auto_data_quality
23397
+ # Specifies whether to automatically enable data quality evaluation
23398
+ # for the S3 Hyper direct target. When set to `true`, data quality
23399
+ # checks are performed automatically during the write operation.
23400
+ # @return [Types::AutoDataQuality]
23401
+ #
23402
+ # @!attribute [rw] output_schemas
23403
+ # Specifies the data schema for the S3 Hyper direct target.
23404
+ # @return [Array<Types::GlueSchema>]
23405
+ #
22967
23406
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3HyperDirectTarget AWS API Documentation
22968
23407
  #
22969
23408
  class S3HyperDirectTarget < Struct.new(
22970
23409
  :name,
22971
23410
  :inputs,
23411
+ :format,
22972
23412
  :partition_keys,
22973
23413
  :path,
22974
23414
  :compression,
22975
- :schema_change_policy)
23415
+ :schema_change_policy,
23416
+ :auto_data_quality,
23417
+ :output_schemas)
23418
+ SENSITIVE = []
23419
+ include Aws::Structure
23420
+ end
23421
+
23422
+ # Specifies an Apache Iceberg catalog target that writes data to Amazon
23423
+ # S3 and registers the table in the Glue Data Catalog.
23424
+ #
23425
+ # @!attribute [rw] name
23426
+ # The name of the Iceberg catalog target.
23427
+ # @return [String]
23428
+ #
23429
+ # @!attribute [rw] inputs
23430
+ # The input connection for the Iceberg catalog target.
23431
+ # @return [Array<String>]
23432
+ #
23433
+ # @!attribute [rw] partition_keys
23434
+ # A list of partition keys for the Iceberg table.
23435
+ # @return [Array<Array<String>>]
23436
+ #
23437
+ # @!attribute [rw] table
23438
+ # The name of the table to write to in the catalog.
23439
+ # @return [String]
23440
+ #
23441
+ # @!attribute [rw] database
23442
+ # The name of the database to write to.
23443
+ # @return [String]
23444
+ #
23445
+ # @!attribute [rw] additional_options
23446
+ # Specifies additional connection options for the Iceberg catalog
23447
+ # target.
23448
+ # @return [Hash<String,String>]
23449
+ #
23450
+ # @!attribute [rw] schema_change_policy
23451
+ # The policy for handling schema changes in the catalog target.
23452
+ # @return [Types::CatalogSchemaChangePolicy]
23453
+ #
23454
+ # @!attribute [rw] auto_data_quality
23455
+ # Specifies whether to automatically enable data quality evaluation
23456
+ # for the S3 Iceberg catalog target. When set to `true`, data quality
23457
+ # checks are performed automatically during the write operation.
23458
+ # @return [Types::AutoDataQuality]
23459
+ #
23460
+ # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3IcebergCatalogTarget AWS API Documentation
23461
+ #
23462
+ class S3IcebergCatalogTarget < Struct.new(
23463
+ :name,
23464
+ :inputs,
23465
+ :partition_keys,
23466
+ :table,
23467
+ :database,
23468
+ :additional_options,
23469
+ :schema_change_policy,
23470
+ :auto_data_quality)
22976
23471
  SENSITIVE = []
22977
23472
  include Aws::Structure
22978
23473
  end
@@ -23013,6 +23508,13 @@ module Aws::Glue
23013
23508
  # Iceberg table.
23014
23509
  # @return [Types::DirectSchemaChangePolicy]
23015
23510
  #
23511
+ # @!attribute [rw] auto_data_quality
23512
+ # Specifies configuration options for automatic data quality
23513
+ # evaluation in Glue jobs. This structure enables automated data
23514
+ # quality checks and monitoring during ETL operations, helping to
23515
+ # ensure data integrity and reliability without manual intervention.
23516
+ # @return [Types::AutoDataQuality]
23517
+ #
23016
23518
  # @!attribute [rw] compression
23017
23519
  # Specifies the compression codec used for Iceberg table files in S3.
23018
23520
  # @return [String]
@@ -23022,6 +23524,10 @@ module Aws::Glue
23022
23524
  # files across S3.
23023
23525
  # @return [String]
23024
23526
  #
23527
+ # @!attribute [rw] output_schemas
23528
+ # Specifies the data schema for the S3 Iceberg direct target.
23529
+ # @return [Array<Types::GlueSchema>]
23530
+ #
23025
23531
  # @see http://docs.aws.amazon.com/goto/WebAPI/glue-2017-03-31/S3IcebergDirectTarget AWS API Documentation
23026
23532
  #
23027
23533
  class S3IcebergDirectTarget < Struct.new(
@@ -23032,8 +23538,10 @@ module Aws::Glue
23032
23538
  :format,
23033
23539
  :additional_options,
23034
23540
  :schema_change_policy,
23541
+ :auto_data_quality,
23035
23542
  :compression,
23036
- :number_target_partitions)
23543
+ :number_target_partitions,
23544
+ :output_schemas)
23037
23545
  SENSITIVE = []
23038
23546
  include Aws::Structure
23039
23547
  end