aws-cdk-lib 2.220.0__py3-none-any.whl → 2.221.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aws-cdk-lib might be problematic. Click here for more details.
- aws_cdk/__init__.py +21 -18
- aws_cdk/_jsii/__init__.py +1 -1
- aws_cdk/_jsii/{aws-cdk-lib@2.220.0.jsii.tgz → aws-cdk-lib@2.221.0.jsii.tgz} +0 -0
- aws_cdk/alexa_ask/__init__.py +3 -0
- aws_cdk/aws_accessanalyzer/__init__.py +3 -0
- aws_cdk/aws_acmpca/__init__.py +4 -1
- aws_cdk/aws_aiops/__init__.py +3 -0
- aws_cdk/aws_amazonmq/__init__.py +3 -29
- aws_cdk/aws_amplify/__init__.py +3 -0
- aws_cdk/aws_amplifyuibuilder/__init__.py +3 -0
- aws_cdk/aws_apigateway/__init__.py +3 -0
- aws_cdk/aws_apigatewayv2/__init__.py +3 -0
- aws_cdk/aws_appconfig/__init__.py +3 -29
- aws_cdk/aws_appflow/__init__.py +3 -0
- aws_cdk/aws_appintegrations/__init__.py +3 -0
- aws_cdk/aws_applicationautoscaling/__init__.py +3 -0
- aws_cdk/aws_applicationinsights/__init__.py +3 -0
- aws_cdk/aws_applicationsignals/__init__.py +4 -1
- aws_cdk/aws_appmesh/__init__.py +3 -0
- aws_cdk/aws_apprunner/__init__.py +3 -0
- aws_cdk/aws_appstream/__init__.py +3 -0
- aws_cdk/aws_appsync/__init__.py +3 -0
- aws_cdk/aws_apptest/__init__.py +3 -0
- aws_cdk/aws_aps/__init__.py +1168 -83
- aws_cdk/aws_arcregionswitch/__init__.py +3 -0
- aws_cdk/aws_arczonalshift/__init__.py +3 -0
- aws_cdk/aws_athena/__init__.py +3 -0
- aws_cdk/aws_auditmanager/__init__.py +3 -0
- aws_cdk/aws_autoscaling/__init__.py +3 -0
- aws_cdk/aws_autoscaling_common/__init__.py +3 -0
- aws_cdk/aws_autoscalingplans/__init__.py +3 -0
- aws_cdk/aws_b2bi/__init__.py +3 -0
- aws_cdk/aws_backup/__init__.py +3 -29
- aws_cdk/aws_backupgateway/__init__.py +3 -0
- aws_cdk/aws_batch/__init__.py +3 -0
- aws_cdk/aws_bcmdataexports/__init__.py +3 -0
- aws_cdk/aws_bedrock/__init__.py +9 -29
- aws_cdk/aws_bedrockagentcore/__init__.py +782 -169
- aws_cdk/aws_billingconductor/__init__.py +3 -0
- aws_cdk/aws_budgets/__init__.py +3 -0
- aws_cdk/aws_cassandra/__init__.py +3 -0
- aws_cdk/aws_ce/__init__.py +3 -0
- aws_cdk/aws_certificatemanager/__init__.py +3 -0
- aws_cdk/aws_chatbot/__init__.py +3 -0
- aws_cdk/aws_cleanrooms/__init__.py +3 -0
- aws_cdk/aws_cleanroomsml/__init__.py +3 -0
- aws_cdk/aws_cloud9/__init__.py +3 -0
- aws_cdk/aws_cloudformation/__init__.py +3 -0
- aws_cdk/aws_cloudfront/__init__.py +69 -3
- aws_cdk/aws_cloudtrail/__init__.py +3 -0
- aws_cdk/aws_cloudwatch/__init__.py +3 -0
- aws_cdk/aws_codeartifact/__init__.py +3 -0
- aws_cdk/aws_codebuild/__init__.py +3 -0
- aws_cdk/aws_codecommit/__init__.py +3 -0
- aws_cdk/aws_codeconnections/__init__.py +3 -0
- aws_cdk/aws_codedeploy/__init__.py +3 -0
- aws_cdk/aws_codeguruprofiler/__init__.py +3 -0
- aws_cdk/aws_codegurureviewer/__init__.py +3 -0
- aws_cdk/aws_codepipeline/__init__.py +3 -0
- aws_cdk/aws_codepipeline_actions/__init__.py +3 -0
- aws_cdk/aws_codestar/__init__.py +3 -0
- aws_cdk/aws_codestarconnections/__init__.py +3 -0
- aws_cdk/aws_codestarnotifications/__init__.py +3 -0
- aws_cdk/aws_cognito/__init__.py +3 -0
- aws_cdk/aws_cognito_identitypool/__init__.py +3 -0
- aws_cdk/aws_comprehend/__init__.py +3 -0
- aws_cdk/aws_config/__init__.py +3 -0
- aws_cdk/aws_connect/__init__.py +1232 -2
- aws_cdk/aws_connectcampaigns/__init__.py +3 -0
- aws_cdk/aws_connectcampaignsv2/__init__.py +3 -0
- aws_cdk/aws_controltower/__init__.py +3 -0
- aws_cdk/aws_cur/__init__.py +3 -0
- aws_cdk/aws_customerprofiles/__init__.py +3 -29
- aws_cdk/aws_databrew/__init__.py +3 -0
- aws_cdk/aws_datapipeline/__init__.py +3 -0
- aws_cdk/aws_datasync/__init__.py +3 -0
- aws_cdk/aws_datazone/__init__.py +17 -15
- aws_cdk/aws_dax/__init__.py +3 -0
- aws_cdk/aws_deadline/__init__.py +3 -0
- aws_cdk/aws_detective/__init__.py +3 -0
- aws_cdk/aws_devicefarm/__init__.py +3 -0
- aws_cdk/aws_devopsguru/__init__.py +3 -0
- aws_cdk/aws_directoryservice/__init__.py +3 -0
- aws_cdk/aws_dlm/__init__.py +3 -0
- aws_cdk/aws_dms/__init__.py +3 -0
- aws_cdk/aws_docdb/__init__.py +14 -3
- aws_cdk/aws_docdbelastic/__init__.py +3 -0
- aws_cdk/aws_dsql/__init__.py +3 -0
- aws_cdk/aws_dynamodb/__init__.py +3 -0
- aws_cdk/aws_ec2/__init__.py +106 -40
- aws_cdk/aws_ecr/__init__.py +156 -33
- aws_cdk/aws_ecs/__init__.py +87 -48
- aws_cdk/aws_efs/__init__.py +3 -0
- aws_cdk/aws_eks/__init__.py +3 -58
- aws_cdk/aws_elasticache/__init__.py +3 -0
- aws_cdk/aws_elasticbeanstalk/__init__.py +3 -0
- aws_cdk/aws_elasticloadbalancing/__init__.py +3 -0
- aws_cdk/aws_elasticloadbalancingv2/__init__.py +445 -36
- aws_cdk/aws_elasticsearch/__init__.py +3 -0
- aws_cdk/aws_emr/__init__.py +3 -0
- aws_cdk/aws_emrcontainers/__init__.py +3 -0
- aws_cdk/aws_emrserverless/__init__.py +12 -11
- aws_cdk/aws_entityresolution/__init__.py +3 -0
- aws_cdk/aws_events/__init__.py +73 -29
- aws_cdk/aws_events_targets/__init__.py +3 -0
- aws_cdk/aws_eventschemas/__init__.py +3 -0
- aws_cdk/aws_evidently/__init__.py +3 -0
- aws_cdk/aws_evs/__init__.py +3 -0
- aws_cdk/aws_finspace/__init__.py +3 -0
- aws_cdk/aws_fis/__init__.py +3 -0
- aws_cdk/aws_fms/__init__.py +3 -0
- aws_cdk/aws_forecast/__init__.py +3 -0
- aws_cdk/aws_frauddetector/__init__.py +3 -0
- aws_cdk/aws_fsx/__init__.py +3 -0
- aws_cdk/aws_gamelift/__init__.py +3 -0
- aws_cdk/aws_gameliftstreams/__init__.py +8 -5
- aws_cdk/aws_globalaccelerator/__init__.py +3 -0
- aws_cdk/aws_glue/__init__.py +11 -80
- aws_cdk/aws_grafana/__init__.py +3 -0
- aws_cdk/aws_greengrass/__init__.py +3 -0
- aws_cdk/aws_greengrassv2/__init__.py +3 -0
- aws_cdk/aws_groundstation/__init__.py +3 -0
- aws_cdk/aws_guardduty/__init__.py +3 -0
- aws_cdk/aws_healthimaging/__init__.py +3 -0
- aws_cdk/aws_healthlake/__init__.py +3 -0
- aws_cdk/aws_iam/__init__.py +30 -32
- aws_cdk/aws_identitystore/__init__.py +3 -0
- aws_cdk/aws_imagebuilder/__init__.py +499 -0
- aws_cdk/aws_inspector/__init__.py +3 -0
- aws_cdk/aws_inspectorv2/__init__.py +3 -0
- aws_cdk/aws_internetmonitor/__init__.py +3 -0
- aws_cdk/aws_invoicing/__init__.py +3 -0
- aws_cdk/aws_iot/__init__.py +3 -0
- aws_cdk/aws_iotanalytics/__init__.py +3 -0
- aws_cdk/aws_iotcoredeviceadvisor/__init__.py +3 -0
- aws_cdk/aws_iotevents/__init__.py +3 -0
- aws_cdk/aws_iotfleethub/__init__.py +3 -0
- aws_cdk/aws_iotfleetwise/__init__.py +3 -0
- aws_cdk/aws_iotsitewise/__init__.py +3 -0
- aws_cdk/aws_iotthingsgraph/__init__.py +3 -0
- aws_cdk/aws_iottwinmaker/__init__.py +3 -87
- aws_cdk/aws_iotwireless/__init__.py +61 -0
- aws_cdk/aws_ivs/__init__.py +3 -0
- aws_cdk/aws_ivschat/__init__.py +3 -0
- aws_cdk/aws_kafkaconnect/__init__.py +3 -0
- aws_cdk/aws_kendra/__init__.py +3 -58
- aws_cdk/aws_kendraranking/__init__.py +3 -0
- aws_cdk/aws_kinesis/__init__.py +3 -0
- aws_cdk/aws_kinesisanalytics/__init__.py +3 -0
- aws_cdk/aws_kinesisanalyticsv2/__init__.py +3 -0
- aws_cdk/aws_kinesisfirehose/__init__.py +1709 -10
- aws_cdk/aws_kinesisvideo/__init__.py +3 -29
- aws_cdk/aws_kms/__init__.py +3 -0
- aws_cdk/aws_lakeformation/__init__.py +3 -0
- aws_cdk/aws_lambda/__init__.py +14 -7
- aws_cdk/aws_lambda_nodejs/__init__.py +3 -0
- aws_cdk/aws_launchwizard/__init__.py +3 -0
- aws_cdk/aws_lex/__init__.py +3 -29
- aws_cdk/aws_licensemanager/__init__.py +3 -0
- aws_cdk/aws_lightsail/__init__.py +3 -0
- aws_cdk/aws_location/__init__.py +3 -0
- aws_cdk/aws_logs/__init__.py +226 -9
- aws_cdk/aws_lookoutequipment/__init__.py +3 -29
- aws_cdk/aws_lookoutmetrics/__init__.py +3 -0
- aws_cdk/aws_lookoutvision/__init__.py +3 -0
- aws_cdk/aws_m2/__init__.py +3 -0
- aws_cdk/aws_macie/__init__.py +3 -0
- aws_cdk/aws_managedblockchain/__init__.py +3 -0
- aws_cdk/aws_mediaconnect/__init__.py +3 -0
- aws_cdk/aws_mediaconvert/__init__.py +3 -0
- aws_cdk/aws_medialive/__init__.py +171 -33
- aws_cdk/aws_mediapackage/__init__.py +3 -0
- aws_cdk/aws_mediapackagev2/__init__.py +3 -0
- aws_cdk/aws_mediastore/__init__.py +3 -0
- aws_cdk/aws_mediatailor/__init__.py +3 -58
- aws_cdk/aws_memorydb/__init__.py +3 -0
- aws_cdk/aws_mpa/__init__.py +3 -0
- aws_cdk/aws_msk/__init__.py +3 -0
- aws_cdk/aws_mwaa/__init__.py +3 -0
- aws_cdk/aws_neptune/__init__.py +55 -0
- aws_cdk/aws_neptunegraph/__init__.py +3 -0
- aws_cdk/aws_networkfirewall/__init__.py +3 -0
- aws_cdk/aws_networkmanager/__init__.py +3 -0
- aws_cdk/aws_nimblestudio/__init__.py +3 -0
- aws_cdk/aws_notifications/__init__.py +3 -0
- aws_cdk/aws_notificationscontacts/__init__.py +3 -0
- aws_cdk/aws_oam/__init__.py +3 -0
- aws_cdk/aws_observabilityadmin/__init__.py +171 -137
- aws_cdk/aws_odb/__init__.py +4 -1
- aws_cdk/aws_omics/__init__.py +3 -0
- aws_cdk/aws_opensearchserverless/__init__.py +3 -0
- aws_cdk/aws_opensearchservice/__init__.py +3 -0
- aws_cdk/aws_opsworks/__init__.py +3 -0
- aws_cdk/aws_opsworkscm/__init__.py +3 -0
- aws_cdk/aws_organizations/__init__.py +3 -116
- aws_cdk/aws_osis/__init__.py +157 -0
- aws_cdk/aws_panorama/__init__.py +3 -0
- aws_cdk/aws_paymentcryptography/__init__.py +3 -0
- aws_cdk/aws_pcaconnectorad/__init__.py +3 -0
- aws_cdk/aws_pcaconnectorscep/__init__.py +3 -0
- aws_cdk/aws_pcs/__init__.py +3 -0
- aws_cdk/aws_personalize/__init__.py +3 -0
- aws_cdk/aws_pinpoint/__init__.py +3 -58
- aws_cdk/aws_pinpointemail/__init__.py +3 -0
- aws_cdk/aws_pipes/__init__.py +3 -0
- aws_cdk/aws_proton/__init__.py +3 -0
- aws_cdk/aws_qbusiness/__init__.py +3 -174
- aws_cdk/aws_qldb/__init__.py +3 -29
- aws_cdk/aws_quicksight/__init__.py +418 -411
- aws_cdk/aws_ram/__init__.py +3 -0
- aws_cdk/aws_rbin/__init__.py +3 -0
- aws_cdk/aws_rds/__init__.py +42 -19
- aws_cdk/aws_redshift/__init__.py +3 -0
- aws_cdk/aws_redshiftserverless/__init__.py +3 -0
- aws_cdk/aws_refactorspaces/__init__.py +3 -0
- aws_cdk/aws_rekognition/__init__.py +3 -29
- aws_cdk/aws_resiliencehub/__init__.py +3 -0
- aws_cdk/aws_resourceexplorer2/__init__.py +3 -0
- aws_cdk/aws_resourcegroups/__init__.py +3 -0
- aws_cdk/aws_robomaker/__init__.py +3 -0
- aws_cdk/aws_rolesanywhere/__init__.py +3 -0
- aws_cdk/aws_route53/__init__.py +18 -18
- aws_cdk/aws_route53_targets/__init__.py +3 -0
- aws_cdk/aws_route53profiles/__init__.py +3 -0
- aws_cdk/aws_route53recoverycontrol/__init__.py +3 -0
- aws_cdk/aws_route53recoveryreadiness/__init__.py +3 -0
- aws_cdk/aws_route53resolver/__init__.py +11 -4
- aws_cdk/aws_rum/__init__.py +3 -0
- aws_cdk/aws_s3/__init__.py +51 -41
- aws_cdk/aws_s3_deployment/__init__.py +3 -0
- aws_cdk/aws_s3express/__init__.py +3 -0
- aws_cdk/aws_s3objectlambda/__init__.py +3 -0
- aws_cdk/aws_s3outposts/__init__.py +3 -0
- aws_cdk/aws_s3tables/__init__.py +3 -0
- aws_cdk/aws_sagemaker/__init__.py +166 -87
- aws_cdk/aws_sam/__init__.py +3 -0
- aws_cdk/aws_scheduler/__init__.py +3 -29
- aws_cdk/aws_sdb/__init__.py +3 -0
- aws_cdk/aws_secretsmanager/__init__.py +3 -0
- aws_cdk/aws_securityhub/__init__.py +3 -0
- aws_cdk/aws_securitylake/__init__.py +3 -0
- aws_cdk/aws_servicecatalog/__init__.py +136 -124
- aws_cdk/aws_servicecatalogappregistry/__init__.py +3 -0
- aws_cdk/aws_servicediscovery/__init__.py +3 -0
- aws_cdk/aws_ses/__init__.py +3 -0
- aws_cdk/aws_shield/__init__.py +3 -0
- aws_cdk/aws_signer/__init__.py +3 -0
- aws_cdk/aws_simspaceweaver/__init__.py +3 -0
- aws_cdk/aws_smsvoice/__init__.py +3 -29
- aws_cdk/aws_sns/__init__.py +3 -0
- aws_cdk/aws_sqs/__init__.py +3 -0
- aws_cdk/aws_ssm/__init__.py +10 -3
- aws_cdk/aws_ssmcontacts/__init__.py +3 -0
- aws_cdk/aws_ssmguiconnect/__init__.py +3 -0
- aws_cdk/aws_ssmincidents/__init__.py +3 -0
- aws_cdk/aws_ssmquicksetup/__init__.py +467 -0
- aws_cdk/aws_sso/__init__.py +3 -0
- aws_cdk/aws_stepfunctions/__init__.py +23 -19
- aws_cdk/aws_stepfunctions_tasks/__init__.py +6 -3
- aws_cdk/aws_supportapp/__init__.py +3 -0
- aws_cdk/aws_synthetics/__init__.py +59 -26
- aws_cdk/aws_systemsmanagersap/__init__.py +3 -0
- aws_cdk/aws_timestream/__init__.py +3 -29
- aws_cdk/aws_transfer/__init__.py +20 -34
- aws_cdk/aws_verifiedpermissions/__init__.py +3 -0
- aws_cdk/aws_voiceid/__init__.py +3 -0
- aws_cdk/aws_vpclattice/__init__.py +7 -4
- aws_cdk/aws_waf/__init__.py +3 -0
- aws_cdk/aws_wafregional/__init__.py +3 -0
- aws_cdk/aws_wafv2/__init__.py +3 -0
- aws_cdk/aws_wisdom/__init__.py +3 -116
- aws_cdk/aws_workspaces/__init__.py +3 -0
- aws_cdk/aws_workspacesinstances/__init__.py +3 -0
- aws_cdk/aws_workspacesthinclient/__init__.py +3 -0
- aws_cdk/aws_workspacesweb/__init__.py +3 -0
- aws_cdk/aws_xray/__init__.py +3 -0
- aws_cdk/cloud_assembly_schema/__init__.py +3 -0
- aws_cdk/cx_api/__init__.py +34 -0
- aws_cdk/pipelines/__init__.py +3 -0
- aws_cdk/region_info/__init__.py +3 -0
- aws_cdk/triggers/__init__.py +3 -0
- {aws_cdk_lib-2.220.0.dist-info → aws_cdk_lib-2.221.0.dist-info}/METADATA +2 -2
- aws_cdk_lib-2.221.0.dist-info/RECORD +313 -0
- aws_cdk_lib-2.220.0.dist-info/RECORD +0 -313
- {aws_cdk_lib-2.220.0.dist-info → aws_cdk_lib-2.221.0.dist-info}/LICENSE +0 -0
- {aws_cdk_lib-2.220.0.dist-info → aws_cdk_lib-2.221.0.dist-info}/NOTICE +0 -0
- {aws_cdk_lib-2.220.0.dist-info → aws_cdk_lib-2.221.0.dist-info}/WHEEL +0 -0
- {aws_cdk_lib-2.220.0.dist-info → aws_cdk_lib-2.221.0.dist-info}/top_level.txt +0 -0
|
@@ -130,6 +130,103 @@ s3_destination = firehose.S3Bucket(bucket,
|
|
|
130
130
|
)
|
|
131
131
|
```
|
|
132
132
|
|
|
133
|
+
## Data Format Conversion
|
|
134
|
+
|
|
135
|
+
Data format conversion allows automatic conversion of inputs from JSON to either Parquet or ORC.
|
|
136
|
+
Converting JSON records to columnar formats like Parquet or ORC can help speed up analytical querying while also increasing compression efficiency.
|
|
137
|
+
When data format conversion is specified, it automatically enables Snappy compression on the output.
|
|
138
|
+
|
|
139
|
+
Only S3 Destinations support data format conversion.
|
|
140
|
+
|
|
141
|
+
An example of defining an S3 destination configured with data format conversion:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
# bucket: s3.Bucket
|
|
145
|
+
# schema_glue_table: glue.CfnTable
|
|
146
|
+
|
|
147
|
+
s3_destination = firehose.S3Bucket(bucket,
|
|
148
|
+
data_format_conversion=firehose.DataFormatConversionProps(
|
|
149
|
+
schema_configuration=firehose.SchemaConfiguration.from_cfn_table(schema_glue_table),
|
|
150
|
+
input_format=firehose.InputFormat.OPENX_JSON,
|
|
151
|
+
output_format=firehose.OutputFormat.PARQUET
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
When data format conversion is enabled, the Delivery Stream's buffering size must be at least 64 MiB.
|
|
157
|
+
Additionally, the default buffering size is changed from 5 MiB to 128 MiB. This mirrors the Cloudformation behavior.
|
|
158
|
+
|
|
159
|
+
You can only parse JSON and transform it into either Parquet or ORC:
|
|
160
|
+
|
|
161
|
+
* to read JSON using OpenX parser, choose `InputFormat.OPENX_JSON`.
|
|
162
|
+
* to read JSON using Hive parser, choose `InputFormat.HIVE_JSON`.
|
|
163
|
+
* to transform into Parquet, choose `OutputFormat.PARQUET`.
|
|
164
|
+
* to transform into ORC, choose `OutputFormat.ORC`.
|
|
165
|
+
|
|
166
|
+
The following subsections explain how to specify advanced configuration options for each input and output format if the defaults are not desirable
|
|
167
|
+
|
|
168
|
+
### Input Format: OpenX JSON
|
|
169
|
+
|
|
170
|
+
Example creation of custom OpenX JSON InputFormat:
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
input_format = firehose.OpenXJsonInputFormat(
|
|
174
|
+
lowercase_column_names=False,
|
|
175
|
+
column_to_json_key_mappings={"ts": "timestamp"},
|
|
176
|
+
convert_dots_in_json_keys_to_underscores=True
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Input Format: Hive JSON
|
|
181
|
+
|
|
182
|
+
Example creation of custom Hive JSON InputFormat:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
input_format = firehose.HiveJsonInputFormat(
|
|
186
|
+
timestamp_parsers=[
|
|
187
|
+
firehose.TimestampParser.from_format_string("yyyy-MM-dd"), firehose.TimestampParser.EPOCH_MILLIS
|
|
188
|
+
]
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Hive JSON allows you to specify custom timestamp formats to parse. The syntax of the format string is Joda Time.
|
|
193
|
+
|
|
194
|
+
To parse timestamps formatted as milliseconds since epoch, use the convenience constant `TimestampParser.EPOCH_MILLIS`.
|
|
195
|
+
|
|
196
|
+
### Output Format: Parquet
|
|
197
|
+
|
|
198
|
+
Example of a custom Parquet OutputFormat, with all values changed from the defaults.
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
output_format = firehose.ParquetOutputFormat(
|
|
202
|
+
block_size=Size.mebibytes(512),
|
|
203
|
+
compression=firehose.ParquetCompression.UNCOMPRESSED,
|
|
204
|
+
enable_dictionary_compression=True,
|
|
205
|
+
max_padding=Size.bytes(10),
|
|
206
|
+
page_size=Size.mebibytes(2),
|
|
207
|
+
writer_version=firehose.ParquetWriterVersion.V2
|
|
208
|
+
)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Output Format: ORC
|
|
212
|
+
|
|
213
|
+
Example creation of custom ORC OutputFormat, with all values changed from the defaults.
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
output_format = firehose.OrcOutputFormat(
|
|
217
|
+
format_version=firehose.OrcFormatVersion.V0_11,
|
|
218
|
+
block_size=Size.mebibytes(256),
|
|
219
|
+
compression=firehose.OrcCompression.NONE,
|
|
220
|
+
bloom_filter_columns=["columnA"],
|
|
221
|
+
bloom_filter_false_positive_probability=0.1,
|
|
222
|
+
dictionary_key_threshold=0.7,
|
|
223
|
+
enable_padding=True,
|
|
224
|
+
padding_tolerance=0.2,
|
|
225
|
+
row_index_stride=9000,
|
|
226
|
+
stripe_size=Size.mebibytes(32)
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
133
230
|
## Server-side Encryption
|
|
134
231
|
|
|
135
232
|
Enabling server-side encryption (SSE) requires Amazon Data Firehose to encrypt all data
|
|
@@ -704,6 +801,7 @@ from ..aws_cloudwatch import (
|
|
|
704
801
|
from ..aws_ec2 import (
|
|
705
802
|
Connections as _Connections_0f31fce8, IConnectable as _IConnectable_10015a05
|
|
706
803
|
)
|
|
804
|
+
from ..aws_glue import CfnTable as _CfnTable_63ae0183
|
|
707
805
|
from ..aws_iam import (
|
|
708
806
|
Grant as _Grant_a7ae64f8,
|
|
709
807
|
IGrantable as _IGrantable_71c4f5de,
|
|
@@ -1313,7 +1411,7 @@ class CommonDestinationS3Props:
|
|
|
1313
1411
|
'''Common properties for defining a backup, intermediary, or final S3 destination for a Amazon Data Firehose delivery stream.
|
|
1314
1412
|
|
|
1315
1413
|
:param buffering_interval: The length of time that Firehose buffers incoming data before delivering it to the S3 bucket. Minimum: Duration.seconds(0) Maximum: Duration.seconds(900) Default: Duration.seconds(300)
|
|
1316
|
-
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) Maximum: Size.mebibytes(128) Default: Size.mebibytes(5)
|
|
1414
|
+
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled Maximum: Size.mebibytes(128) Default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
1317
1415
|
:param compression: The type of compression that Amazon Data Firehose uses to compress the data that it delivers to the Amazon S3 bucket. The compression formats SNAPPY or ZIP cannot be specified for Amazon Redshift destinations because they are not supported by the Amazon Redshift COPY operation that reads from the S3 bucket. Default: - UNCOMPRESSED
|
|
1318
1416
|
:param data_output_prefix: A prefix that Amazon Data Firehose evaluates and adds to records before writing them to S3. This prefix appears immediately following the bucket name. Default: "YYYY/MM/DD/HH"
|
|
1319
1417
|
:param encryption_key: The AWS KMS key used to encrypt the data that it delivers to your Amazon S3 bucket. Default: - Data is not encrypted.
|
|
@@ -1380,10 +1478,10 @@ class CommonDestinationS3Props:
|
|
|
1380
1478
|
def buffering_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
1381
1479
|
'''The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket.
|
|
1382
1480
|
|
|
1383
|
-
Minimum: Size.mebibytes(1)
|
|
1481
|
+
Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled
|
|
1384
1482
|
Maximum: Size.mebibytes(128)
|
|
1385
1483
|
|
|
1386
|
-
:default: Size.mebibytes(5)
|
|
1484
|
+
:default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
1387
1485
|
'''
|
|
1388
1486
|
result = self._values.get("buffering_size")
|
|
1389
1487
|
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
@@ -1518,6 +1616,104 @@ class Compression(
|
|
|
1518
1616
|
return typing.cast(builtins.str, jsii.get(self, "value"))
|
|
1519
1617
|
|
|
1520
1618
|
|
|
1619
|
+
@jsii.data_type(
|
|
1620
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.DataFormatConversionProps",
|
|
1621
|
+
jsii_struct_bases=[],
|
|
1622
|
+
name_mapping={
|
|
1623
|
+
"input_format": "inputFormat",
|
|
1624
|
+
"output_format": "outputFormat",
|
|
1625
|
+
"schema_configuration": "schemaConfiguration",
|
|
1626
|
+
"enabled": "enabled",
|
|
1627
|
+
},
|
|
1628
|
+
)
|
|
1629
|
+
class DataFormatConversionProps:
|
|
1630
|
+
def __init__(
|
|
1631
|
+
self,
|
|
1632
|
+
*,
|
|
1633
|
+
input_format: "IInputFormat",
|
|
1634
|
+
output_format: "IOutputFormat",
|
|
1635
|
+
schema_configuration: "SchemaConfiguration",
|
|
1636
|
+
enabled: typing.Optional[builtins.bool] = None,
|
|
1637
|
+
) -> None:
|
|
1638
|
+
'''Props for specifying data format conversion for Firehose.
|
|
1639
|
+
|
|
1640
|
+
:param input_format: The input format to convert from for record format conversion.
|
|
1641
|
+
:param output_format: The output format to convert to for record format conversion.
|
|
1642
|
+
:param schema_configuration: The schema configuration to use in converting the input format to output format.
|
|
1643
|
+
:param enabled: Whether data format conversion is enabled or not. Default: ``true``
|
|
1644
|
+
|
|
1645
|
+
:see: https://docs.aws.amazon.com/firehose/latest/dev/record-format-conversion.html
|
|
1646
|
+
:exampleMetadata: infused
|
|
1647
|
+
|
|
1648
|
+
Example::
|
|
1649
|
+
|
|
1650
|
+
# bucket: s3.Bucket
|
|
1651
|
+
# schema_glue_table: glue.CfnTable
|
|
1652
|
+
|
|
1653
|
+
s3_destination = firehose.S3Bucket(bucket,
|
|
1654
|
+
data_format_conversion=firehose.DataFormatConversionProps(
|
|
1655
|
+
schema_configuration=firehose.SchemaConfiguration.from_cfn_table(schema_glue_table),
|
|
1656
|
+
input_format=firehose.InputFormat.OPENX_JSON,
|
|
1657
|
+
output_format=firehose.OutputFormat.PARQUET
|
|
1658
|
+
)
|
|
1659
|
+
)
|
|
1660
|
+
'''
|
|
1661
|
+
if __debug__:
|
|
1662
|
+
type_hints = typing.get_type_hints(_typecheckingstub__bff90bf1ac37687c050bd1dbbc7970543cf96f46bffc7e9b92aa180e16446a3e)
|
|
1663
|
+
check_type(argname="argument input_format", value=input_format, expected_type=type_hints["input_format"])
|
|
1664
|
+
check_type(argname="argument output_format", value=output_format, expected_type=type_hints["output_format"])
|
|
1665
|
+
check_type(argname="argument schema_configuration", value=schema_configuration, expected_type=type_hints["schema_configuration"])
|
|
1666
|
+
check_type(argname="argument enabled", value=enabled, expected_type=type_hints["enabled"])
|
|
1667
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {
|
|
1668
|
+
"input_format": input_format,
|
|
1669
|
+
"output_format": output_format,
|
|
1670
|
+
"schema_configuration": schema_configuration,
|
|
1671
|
+
}
|
|
1672
|
+
if enabled is not None:
|
|
1673
|
+
self._values["enabled"] = enabled
|
|
1674
|
+
|
|
1675
|
+
@builtins.property
|
|
1676
|
+
def input_format(self) -> "IInputFormat":
|
|
1677
|
+
'''The input format to convert from for record format conversion.'''
|
|
1678
|
+
result = self._values.get("input_format")
|
|
1679
|
+
assert result is not None, "Required property 'input_format' is missing"
|
|
1680
|
+
return typing.cast("IInputFormat", result)
|
|
1681
|
+
|
|
1682
|
+
@builtins.property
|
|
1683
|
+
def output_format(self) -> "IOutputFormat":
|
|
1684
|
+
'''The output format to convert to for record format conversion.'''
|
|
1685
|
+
result = self._values.get("output_format")
|
|
1686
|
+
assert result is not None, "Required property 'output_format' is missing"
|
|
1687
|
+
return typing.cast("IOutputFormat", result)
|
|
1688
|
+
|
|
1689
|
+
@builtins.property
|
|
1690
|
+
def schema_configuration(self) -> "SchemaConfiguration":
|
|
1691
|
+
'''The schema configuration to use in converting the input format to output format.'''
|
|
1692
|
+
result = self._values.get("schema_configuration")
|
|
1693
|
+
assert result is not None, "Required property 'schema_configuration' is missing"
|
|
1694
|
+
return typing.cast("SchemaConfiguration", result)
|
|
1695
|
+
|
|
1696
|
+
@builtins.property
|
|
1697
|
+
def enabled(self) -> typing.Optional[builtins.bool]:
|
|
1698
|
+
'''Whether data format conversion is enabled or not.
|
|
1699
|
+
|
|
1700
|
+
:default: ``true``
|
|
1701
|
+
'''
|
|
1702
|
+
result = self._values.get("enabled")
|
|
1703
|
+
return typing.cast(typing.Optional[builtins.bool], result)
|
|
1704
|
+
|
|
1705
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
1706
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
1707
|
+
|
|
1708
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
1709
|
+
return not (rhs == self)
|
|
1710
|
+
|
|
1711
|
+
def __repr__(self) -> str:
|
|
1712
|
+
return "DataFormatConversionProps(%s)" % ", ".join(
|
|
1713
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
1714
|
+
)
|
|
1715
|
+
|
|
1716
|
+
|
|
1521
1717
|
@jsii.data_type(
|
|
1522
1718
|
jsii_type="aws-cdk-lib.aws_kinesisfirehose.DataProcessorBindOptions",
|
|
1523
1719
|
jsii_struct_bases=[],
|
|
@@ -2386,7 +2582,7 @@ class DestinationS3BackupProps(CommonDestinationS3Props):
|
|
|
2386
2582
|
S3 backup is available for all destinations, regardless of whether the final destination is S3 or not.
|
|
2387
2583
|
|
|
2388
2584
|
:param buffering_interval: The length of time that Firehose buffers incoming data before delivering it to the S3 bucket. Minimum: Duration.seconds(0) Maximum: Duration.seconds(900) Default: Duration.seconds(300)
|
|
2389
|
-
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) Maximum: Size.mebibytes(128) Default: Size.mebibytes(5)
|
|
2585
|
+
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled Maximum: Size.mebibytes(128) Default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
2390
2586
|
:param compression: The type of compression that Amazon Data Firehose uses to compress the data that it delivers to the Amazon S3 bucket. The compression formats SNAPPY or ZIP cannot be specified for Amazon Redshift destinations because they are not supported by the Amazon Redshift COPY operation that reads from the S3 bucket. Default: - UNCOMPRESSED
|
|
2391
2587
|
:param data_output_prefix: A prefix that Amazon Data Firehose evaluates and adds to records before writing them to S3. This prefix appears immediately following the bucket name. Default: "YYYY/MM/DD/HH"
|
|
2392
2588
|
:param encryption_key: The AWS KMS key used to encrypt the data that it delivers to your Amazon S3 bucket. Default: - Data is not encrypted.
|
|
@@ -2478,10 +2674,10 @@ class DestinationS3BackupProps(CommonDestinationS3Props):
|
|
|
2478
2674
|
def buffering_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
2479
2675
|
'''The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket.
|
|
2480
2676
|
|
|
2481
|
-
Minimum: Size.mebibytes(1)
|
|
2677
|
+
Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled
|
|
2482
2678
|
Maximum: Size.mebibytes(128)
|
|
2483
2679
|
|
|
2484
|
-
:default: Size.mebibytes(5)
|
|
2680
|
+
:default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
2485
2681
|
'''
|
|
2486
2682
|
result = self._values.get("buffering_size")
|
|
2487
2683
|
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
@@ -2578,6 +2774,64 @@ class DestinationS3BackupProps(CommonDestinationS3Props):
|
|
|
2578
2774
|
)
|
|
2579
2775
|
|
|
2580
2776
|
|
|
2777
|
+
@jsii.data_type(
|
|
2778
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.HiveJsonInputFormatProps",
|
|
2779
|
+
jsii_struct_bases=[],
|
|
2780
|
+
name_mapping={"timestamp_parsers": "timestampParsers"},
|
|
2781
|
+
)
|
|
2782
|
+
class HiveJsonInputFormatProps:
|
|
2783
|
+
def __init__(
|
|
2784
|
+
self,
|
|
2785
|
+
*,
|
|
2786
|
+
timestamp_parsers: typing.Optional[typing.Sequence["TimestampParser"]] = None,
|
|
2787
|
+
) -> None:
|
|
2788
|
+
'''Props for Hive JSON input format for data record format conversion.
|
|
2789
|
+
|
|
2790
|
+
:param timestamp_parsers: List of TimestampParsers. These are used to parse custom timestamp strings from input JSON into dates. Note: Specifying a parser will override the default timestamp parser. If the default timestamp parser is required, include ``TimestampParser.DEFAULT`` in the list of parsers along with the custom parser. Default: the default timestamp parser is used
|
|
2791
|
+
|
|
2792
|
+
:exampleMetadata: infused
|
|
2793
|
+
|
|
2794
|
+
Example::
|
|
2795
|
+
|
|
2796
|
+
input_format = firehose.HiveJsonInputFormat(
|
|
2797
|
+
timestamp_parsers=[
|
|
2798
|
+
firehose.TimestampParser.from_format_string("yyyy-MM-dd"), firehose.TimestampParser.EPOCH_MILLIS
|
|
2799
|
+
]
|
|
2800
|
+
)
|
|
2801
|
+
'''
|
|
2802
|
+
if __debug__:
|
|
2803
|
+
type_hints = typing.get_type_hints(_typecheckingstub__0afd5b01612b3cc327b3c1600a9eb4aa5aaa6f3ee92bada98ae2a5d7e07bf664)
|
|
2804
|
+
check_type(argname="argument timestamp_parsers", value=timestamp_parsers, expected_type=type_hints["timestamp_parsers"])
|
|
2805
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {}
|
|
2806
|
+
if timestamp_parsers is not None:
|
|
2807
|
+
self._values["timestamp_parsers"] = timestamp_parsers
|
|
2808
|
+
|
|
2809
|
+
@builtins.property
|
|
2810
|
+
def timestamp_parsers(self) -> typing.Optional[typing.List["TimestampParser"]]:
|
|
2811
|
+
'''List of TimestampParsers.
|
|
2812
|
+
|
|
2813
|
+
These are used to parse custom timestamp strings from input JSON into dates.
|
|
2814
|
+
|
|
2815
|
+
Note: Specifying a parser will override the default timestamp parser. If the default timestamp parser is required,
|
|
2816
|
+
include ``TimestampParser.DEFAULT`` in the list of parsers along with the custom parser.
|
|
2817
|
+
|
|
2818
|
+
:default: the default timestamp parser is used
|
|
2819
|
+
'''
|
|
2820
|
+
result = self._values.get("timestamp_parsers")
|
|
2821
|
+
return typing.cast(typing.Optional[typing.List["TimestampParser"]], result)
|
|
2822
|
+
|
|
2823
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
2824
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
2825
|
+
|
|
2826
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
2827
|
+
return not (rhs == self)
|
|
2828
|
+
|
|
2829
|
+
def __repr__(self) -> str:
|
|
2830
|
+
return "HiveJsonInputFormatProps(%s)" % ", ".join(
|
|
2831
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
2832
|
+
)
|
|
2833
|
+
|
|
2834
|
+
|
|
2581
2835
|
@jsii.interface(jsii_type="aws-cdk-lib.aws_kinesisfirehose.IDataProcessor")
|
|
2582
2836
|
class IDataProcessor(typing_extensions.Protocol):
|
|
2583
2837
|
'''A data processor that Amazon Data Firehose will call to transform records before delivering data.'''
|
|
@@ -3359,6 +3613,34 @@ class _IDestinationProxy:
|
|
|
3359
3613
|
typing.cast(typing.Any, IDestination).__jsii_proxy_class__ = lambda : _IDestinationProxy
|
|
3360
3614
|
|
|
3361
3615
|
|
|
3616
|
+
@jsii.interface(jsii_type="aws-cdk-lib.aws_kinesisfirehose.IInputFormat")
|
|
3617
|
+
class IInputFormat(typing_extensions.Protocol):
|
|
3618
|
+
'''An input format to be used in Firehose record format conversion.'''
|
|
3619
|
+
|
|
3620
|
+
@jsii.member(jsii_name="createInputFormatConfig")
|
|
3621
|
+
def create_input_format_config(
|
|
3622
|
+
self,
|
|
3623
|
+
) -> "CfnDeliveryStream.InputFormatConfigurationProperty":
|
|
3624
|
+
'''Renders the cloudformation properties for the input format.'''
|
|
3625
|
+
...
|
|
3626
|
+
|
|
3627
|
+
|
|
3628
|
+
class _IInputFormatProxy:
|
|
3629
|
+
'''An input format to be used in Firehose record format conversion.'''
|
|
3630
|
+
|
|
3631
|
+
__jsii_type__: typing.ClassVar[str] = "aws-cdk-lib.aws_kinesisfirehose.IInputFormat"
|
|
3632
|
+
|
|
3633
|
+
@jsii.member(jsii_name="createInputFormatConfig")
|
|
3634
|
+
def create_input_format_config(
|
|
3635
|
+
self,
|
|
3636
|
+
) -> "CfnDeliveryStream.InputFormatConfigurationProperty":
|
|
3637
|
+
'''Renders the cloudformation properties for the input format.'''
|
|
3638
|
+
return typing.cast("CfnDeliveryStream.InputFormatConfigurationProperty", jsii.invoke(self, "createInputFormatConfig", []))
|
|
3639
|
+
|
|
3640
|
+
# Adding a "__jsii_proxy_class__(): typing.Type" function to the interface
|
|
3641
|
+
typing.cast(typing.Any, IInputFormat).__jsii_proxy_class__ = lambda : _IInputFormatProxy
|
|
3642
|
+
|
|
3643
|
+
|
|
3362
3644
|
@jsii.interface(jsii_type="aws-cdk-lib.aws_kinesisfirehose.ILoggingConfig")
|
|
3363
3645
|
class ILoggingConfig(typing_extensions.Protocol):
|
|
3364
3646
|
'''Configuration interface for logging errors when data transformation or delivery fails.
|
|
@@ -3417,6 +3699,34 @@ class _ILoggingConfigProxy:
|
|
|
3417
3699
|
typing.cast(typing.Any, ILoggingConfig).__jsii_proxy_class__ = lambda : _ILoggingConfigProxy
|
|
3418
3700
|
|
|
3419
3701
|
|
|
3702
|
+
@jsii.interface(jsii_type="aws-cdk-lib.aws_kinesisfirehose.IOutputFormat")
|
|
3703
|
+
class IOutputFormat(typing_extensions.Protocol):
|
|
3704
|
+
'''An output format to be used in Firehose record format conversion.'''
|
|
3705
|
+
|
|
3706
|
+
@jsii.member(jsii_name="createOutputFormatConfig")
|
|
3707
|
+
def create_output_format_config(
|
|
3708
|
+
self,
|
|
3709
|
+
) -> "CfnDeliveryStream.OutputFormatConfigurationProperty":
|
|
3710
|
+
'''Renders the cloudformation properties for the output format.'''
|
|
3711
|
+
...
|
|
3712
|
+
|
|
3713
|
+
|
|
3714
|
+
class _IOutputFormatProxy:
|
|
3715
|
+
'''An output format to be used in Firehose record format conversion.'''
|
|
3716
|
+
|
|
3717
|
+
__jsii_type__: typing.ClassVar[str] = "aws-cdk-lib.aws_kinesisfirehose.IOutputFormat"
|
|
3718
|
+
|
|
3719
|
+
@jsii.member(jsii_name="createOutputFormatConfig")
|
|
3720
|
+
def create_output_format_config(
|
|
3721
|
+
self,
|
|
3722
|
+
) -> "CfnDeliveryStream.OutputFormatConfigurationProperty":
|
|
3723
|
+
'''Renders the cloudformation properties for the output format.'''
|
|
3724
|
+
return typing.cast("CfnDeliveryStream.OutputFormatConfigurationProperty", jsii.invoke(self, "createOutputFormatConfig", []))
|
|
3725
|
+
|
|
3726
|
+
# Adding a "__jsii_proxy_class__(): typing.Type" function to the interface
|
|
3727
|
+
typing.cast(typing.Any, IOutputFormat).__jsii_proxy_class__ = lambda : _IOutputFormatProxy
|
|
3728
|
+
|
|
3729
|
+
|
|
3420
3730
|
@jsii.interface(jsii_type="aws-cdk-lib.aws_kinesisfirehose.ISource")
|
|
3421
3731
|
class ISource(typing_extensions.Protocol):
|
|
3422
3732
|
'''An interface for defining a source that can be used in an Amazon Data Firehose delivery stream.'''
|
|
@@ -3456,6 +3766,44 @@ class _ISourceProxy:
|
|
|
3456
3766
|
typing.cast(typing.Any, ISource).__jsii_proxy_class__ = lambda : _ISourceProxy
|
|
3457
3767
|
|
|
3458
3768
|
|
|
3769
|
+
class InputFormat(
|
|
3770
|
+
metaclass=jsii.JSIIMeta,
|
|
3771
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.InputFormat",
|
|
3772
|
+
):
|
|
3773
|
+
'''Represents possible input formats when performing record data conversion.
|
|
3774
|
+
|
|
3775
|
+
:exampleMetadata: infused
|
|
3776
|
+
|
|
3777
|
+
Example::
|
|
3778
|
+
|
|
3779
|
+
# bucket: s3.Bucket
|
|
3780
|
+
# schema_glue_table: glue.CfnTable
|
|
3781
|
+
|
|
3782
|
+
s3_destination = firehose.S3Bucket(bucket,
|
|
3783
|
+
data_format_conversion=firehose.DataFormatConversionProps(
|
|
3784
|
+
schema_configuration=firehose.SchemaConfiguration.from_cfn_table(schema_glue_table),
|
|
3785
|
+
input_format=firehose.InputFormat.OPENX_JSON,
|
|
3786
|
+
output_format=firehose.OutputFormat.PARQUET
|
|
3787
|
+
)
|
|
3788
|
+
)
|
|
3789
|
+
'''
|
|
3790
|
+
|
|
3791
|
+
@jsii.python.classproperty
|
|
3792
|
+
@jsii.member(jsii_name="HIVE_JSON")
|
|
3793
|
+
def HIVE_JSON(cls) -> "HiveJsonInputFormat":
|
|
3794
|
+
'''Parse input JSON with Hive JSON specification.'''
|
|
3795
|
+
return typing.cast("HiveJsonInputFormat", jsii.sget(cls, "HIVE_JSON"))
|
|
3796
|
+
|
|
3797
|
+
@jsii.python.classproperty
|
|
3798
|
+
@jsii.member(jsii_name="OPENX_JSON")
|
|
3799
|
+
def OPENX_JSON(cls) -> "OpenXJsonInputFormat":
|
|
3800
|
+
'''Parse input JSON with OpenX JSON specification.
|
|
3801
|
+
|
|
3802
|
+
This will typically suffice.
|
|
3803
|
+
'''
|
|
3804
|
+
return typing.cast("OpenXJsonInputFormat", jsii.sget(cls, "OPENX_JSON"))
|
|
3805
|
+
|
|
3806
|
+
|
|
3459
3807
|
@jsii.implements(ISource)
|
|
3460
3808
|
class KinesisStreamSource(
|
|
3461
3809
|
metaclass=jsii.JSIIMeta,
|
|
@@ -3586,6 +3934,919 @@ class LambdaFunctionProcessor(
|
|
|
3586
3934
|
return typing.cast(DataProcessorProps, jsii.get(self, "props"))
|
|
3587
3935
|
|
|
3588
3936
|
|
|
3937
|
+
@jsii.implements(IInputFormat)
|
|
3938
|
+
class OpenXJsonInputFormat(
|
|
3939
|
+
metaclass=jsii.JSIIMeta,
|
|
3940
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.OpenXJsonInputFormat",
|
|
3941
|
+
):
|
|
3942
|
+
'''This class specifies properties for OpenX JSON input format for record format conversion.
|
|
3943
|
+
|
|
3944
|
+
You should only need to specify an instance of this class if the default configuration does not suit your needs.
|
|
3945
|
+
|
|
3946
|
+
:exampleMetadata: infused
|
|
3947
|
+
|
|
3948
|
+
Example::
|
|
3949
|
+
|
|
3950
|
+
input_format = firehose.OpenXJsonInputFormat(
|
|
3951
|
+
lowercase_column_names=False,
|
|
3952
|
+
column_to_json_key_mappings={"ts": "timestamp"},
|
|
3953
|
+
convert_dots_in_json_keys_to_underscores=True
|
|
3954
|
+
)
|
|
3955
|
+
'''
|
|
3956
|
+
|
|
3957
|
+
def __init__(
|
|
3958
|
+
self,
|
|
3959
|
+
*,
|
|
3960
|
+
column_to_json_key_mappings: typing.Optional[typing.Mapping[builtins.str, builtins.str]] = None,
|
|
3961
|
+
convert_dots_in_json_keys_to_underscores: typing.Optional[builtins.bool] = None,
|
|
3962
|
+
lowercase_column_names: typing.Optional[builtins.bool] = None,
|
|
3963
|
+
) -> None:
|
|
3964
|
+
'''
|
|
3965
|
+
:param column_to_json_key_mappings: Maps column names to JSON keys that aren't identical to the column names. This is useful when the JSON contains keys that are Hive keywords. For example, ``timestamp`` is a Hive keyword. If you have a JSON key named ``timestamp``, set this parameter to ``{"ts": "timestamp"}`` to map this key to a column named ``ts`` Default: JSON keys are not renamed
|
|
3966
|
+
:param convert_dots_in_json_keys_to_underscores: When set to ``true``, specifies that the names of the keys include dots and that you want Firehose to replace them with underscores. This is useful because Apache Hive does not allow dots in column names. For example, if the JSON contains a key whose name is "a.b", you can define the column name to be "a_b" when using this option. Default: ``false``
|
|
3967
|
+
:param lowercase_column_names: Whether the JSON keys should be lowercased when written as column names. Default: ``true``
|
|
3968
|
+
'''
|
|
3969
|
+
props = OpenXJsonInputFormatProps(
|
|
3970
|
+
column_to_json_key_mappings=column_to_json_key_mappings,
|
|
3971
|
+
convert_dots_in_json_keys_to_underscores=convert_dots_in_json_keys_to_underscores,
|
|
3972
|
+
lowercase_column_names=lowercase_column_names,
|
|
3973
|
+
)
|
|
3974
|
+
|
|
3975
|
+
jsii.create(self.__class__, self, [props])
|
|
3976
|
+
|
|
3977
|
+
@jsii.member(jsii_name="createInputFormatConfig")
|
|
3978
|
+
def create_input_format_config(
|
|
3979
|
+
self,
|
|
3980
|
+
) -> "CfnDeliveryStream.InputFormatConfigurationProperty":
|
|
3981
|
+
'''Renders the cloudformation properties for the input format.'''
|
|
3982
|
+
return typing.cast("CfnDeliveryStream.InputFormatConfigurationProperty", jsii.invoke(self, "createInputFormatConfig", []))
|
|
3983
|
+
|
|
3984
|
+
@builtins.property
|
|
3985
|
+
@jsii.member(jsii_name="props")
|
|
3986
|
+
def props(self) -> typing.Optional["OpenXJsonInputFormatProps"]:
|
|
3987
|
+
'''Properties for OpenX JSON input format.'''
|
|
3988
|
+
return typing.cast(typing.Optional["OpenXJsonInputFormatProps"], jsii.get(self, "props"))
|
|
3989
|
+
|
|
3990
|
+
|
|
3991
|
+
@jsii.data_type(
|
|
3992
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.OpenXJsonInputFormatProps",
|
|
3993
|
+
jsii_struct_bases=[],
|
|
3994
|
+
name_mapping={
|
|
3995
|
+
"column_to_json_key_mappings": "columnToJsonKeyMappings",
|
|
3996
|
+
"convert_dots_in_json_keys_to_underscores": "convertDotsInJsonKeysToUnderscores",
|
|
3997
|
+
"lowercase_column_names": "lowercaseColumnNames",
|
|
3998
|
+
},
|
|
3999
|
+
)
|
|
4000
|
+
class OpenXJsonInputFormatProps:
|
|
4001
|
+
def __init__(
|
|
4002
|
+
self,
|
|
4003
|
+
*,
|
|
4004
|
+
column_to_json_key_mappings: typing.Optional[typing.Mapping[builtins.str, builtins.str]] = None,
|
|
4005
|
+
convert_dots_in_json_keys_to_underscores: typing.Optional[builtins.bool] = None,
|
|
4006
|
+
lowercase_column_names: typing.Optional[builtins.bool] = None,
|
|
4007
|
+
) -> None:
|
|
4008
|
+
'''Props for OpenX JSON input format for data record format conversion.
|
|
4009
|
+
|
|
4010
|
+
:param column_to_json_key_mappings: Maps column names to JSON keys that aren't identical to the column names. This is useful when the JSON contains keys that are Hive keywords. For example, ``timestamp`` is a Hive keyword. If you have a JSON key named ``timestamp``, set this parameter to ``{"ts": "timestamp"}`` to map this key to a column named ``ts`` Default: JSON keys are not renamed
|
|
4011
|
+
:param convert_dots_in_json_keys_to_underscores: When set to ``true``, specifies that the names of the keys include dots and that you want Firehose to replace them with underscores. This is useful because Apache Hive does not allow dots in column names. For example, if the JSON contains a key whose name is "a.b", you can define the column name to be "a_b" when using this option. Default: ``false``
|
|
4012
|
+
:param lowercase_column_names: Whether the JSON keys should be lowercased when written as column names. Default: ``true``
|
|
4013
|
+
|
|
4014
|
+
:exampleMetadata: infused
|
|
4015
|
+
|
|
4016
|
+
Example::
|
|
4017
|
+
|
|
4018
|
+
input_format = firehose.OpenXJsonInputFormat(
|
|
4019
|
+
lowercase_column_names=False,
|
|
4020
|
+
column_to_json_key_mappings={"ts": "timestamp"},
|
|
4021
|
+
convert_dots_in_json_keys_to_underscores=True
|
|
4022
|
+
)
|
|
4023
|
+
'''
|
|
4024
|
+
if __debug__:
|
|
4025
|
+
type_hints = typing.get_type_hints(_typecheckingstub__bf09507e4b7ba6abbfda17b454958c835099a4ff05786b47104813d50d0d5e6f)
|
|
4026
|
+
check_type(argname="argument column_to_json_key_mappings", value=column_to_json_key_mappings, expected_type=type_hints["column_to_json_key_mappings"])
|
|
4027
|
+
check_type(argname="argument convert_dots_in_json_keys_to_underscores", value=convert_dots_in_json_keys_to_underscores, expected_type=type_hints["convert_dots_in_json_keys_to_underscores"])
|
|
4028
|
+
check_type(argname="argument lowercase_column_names", value=lowercase_column_names, expected_type=type_hints["lowercase_column_names"])
|
|
4029
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {}
|
|
4030
|
+
if column_to_json_key_mappings is not None:
|
|
4031
|
+
self._values["column_to_json_key_mappings"] = column_to_json_key_mappings
|
|
4032
|
+
if convert_dots_in_json_keys_to_underscores is not None:
|
|
4033
|
+
self._values["convert_dots_in_json_keys_to_underscores"] = convert_dots_in_json_keys_to_underscores
|
|
4034
|
+
if lowercase_column_names is not None:
|
|
4035
|
+
self._values["lowercase_column_names"] = lowercase_column_names
|
|
4036
|
+
|
|
4037
|
+
@builtins.property
|
|
4038
|
+
def column_to_json_key_mappings(
|
|
4039
|
+
self,
|
|
4040
|
+
) -> typing.Optional[typing.Mapping[builtins.str, builtins.str]]:
|
|
4041
|
+
'''Maps column names to JSON keys that aren't identical to the column names.
|
|
4042
|
+
|
|
4043
|
+
This is useful when the JSON contains keys that are Hive keywords.
|
|
4044
|
+
For example, ``timestamp`` is a Hive keyword. If you have a JSON key named ``timestamp``, set this parameter to ``{"ts": "timestamp"}`` to map this key to a column named ``ts``
|
|
4045
|
+
|
|
4046
|
+
:default: JSON keys are not renamed
|
|
4047
|
+
'''
|
|
4048
|
+
result = self._values.get("column_to_json_key_mappings")
|
|
4049
|
+
return typing.cast(typing.Optional[typing.Mapping[builtins.str, builtins.str]], result)
|
|
4050
|
+
|
|
4051
|
+
@builtins.property
|
|
4052
|
+
def convert_dots_in_json_keys_to_underscores(
|
|
4053
|
+
self,
|
|
4054
|
+
) -> typing.Optional[builtins.bool]:
|
|
4055
|
+
'''When set to ``true``, specifies that the names of the keys include dots and that you want Firehose to replace them with underscores.
|
|
4056
|
+
|
|
4057
|
+
This is useful because Apache Hive does not allow dots in column names.
|
|
4058
|
+
For example, if the JSON contains a key whose name is "a.b", you can define the column name to be "a_b" when using this option.
|
|
4059
|
+
|
|
4060
|
+
:default: ``false``
|
|
4061
|
+
'''
|
|
4062
|
+
result = self._values.get("convert_dots_in_json_keys_to_underscores")
|
|
4063
|
+
return typing.cast(typing.Optional[builtins.bool], result)
|
|
4064
|
+
|
|
4065
|
+
@builtins.property
|
|
4066
|
+
def lowercase_column_names(self) -> typing.Optional[builtins.bool]:
|
|
4067
|
+
'''Whether the JSON keys should be lowercased when written as column names.
|
|
4068
|
+
|
|
4069
|
+
:default: ``true``
|
|
4070
|
+
'''
|
|
4071
|
+
result = self._values.get("lowercase_column_names")
|
|
4072
|
+
return typing.cast(typing.Optional[builtins.bool], result)
|
|
4073
|
+
|
|
4074
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
4075
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
4076
|
+
|
|
4077
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
4078
|
+
return not (rhs == self)
|
|
4079
|
+
|
|
4080
|
+
def __repr__(self) -> str:
|
|
4081
|
+
return "OpenXJsonInputFormatProps(%s)" % ", ".join(
|
|
4082
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
4083
|
+
)
|
|
4084
|
+
|
|
4085
|
+
|
|
4086
|
+
class OrcCompression(
|
|
4087
|
+
metaclass=jsii.JSIIMeta,
|
|
4088
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.OrcCompression",
|
|
4089
|
+
):
|
|
4090
|
+
'''Possible compression options available for ORC OutputFormat.
|
|
4091
|
+
|
|
4092
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/TemplateReference/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-compression
|
|
4093
|
+
:exampleMetadata: infused
|
|
4094
|
+
|
|
4095
|
+
Example::
|
|
4096
|
+
|
|
4097
|
+
output_format = firehose.OrcOutputFormat(
|
|
4098
|
+
format_version=firehose.OrcFormatVersion.V0_11,
|
|
4099
|
+
block_size=Size.mebibytes(256),
|
|
4100
|
+
compression=firehose.OrcCompression.NONE,
|
|
4101
|
+
bloom_filter_columns=["columnA"],
|
|
4102
|
+
bloom_filter_false_positive_probability=0.1,
|
|
4103
|
+
dictionary_key_threshold=0.7,
|
|
4104
|
+
enable_padding=True,
|
|
4105
|
+
padding_tolerance=0.2,
|
|
4106
|
+
row_index_stride=9000,
|
|
4107
|
+
stripe_size=Size.mebibytes(32)
|
|
4108
|
+
)
|
|
4109
|
+
'''
|
|
4110
|
+
|
|
4111
|
+
@jsii.member(jsii_name="of")
|
|
4112
|
+
@builtins.classmethod
|
|
4113
|
+
def of(cls, value: builtins.str) -> "OrcCompression":
|
|
4114
|
+
'''Creates a new OrcCompression instance with a custom value.
|
|
4115
|
+
|
|
4116
|
+
:param value: -
|
|
4117
|
+
'''
|
|
4118
|
+
if __debug__:
|
|
4119
|
+
type_hints = typing.get_type_hints(_typecheckingstub__02948bebe4c2930eed4c6124d0d7f279623b5812d4fe6983e8d186c02a4b2f5c)
|
|
4120
|
+
check_type(argname="argument value", value=value, expected_type=type_hints["value"])
|
|
4121
|
+
return typing.cast("OrcCompression", jsii.sinvoke(cls, "of", [value]))
|
|
4122
|
+
|
|
4123
|
+
@jsii.python.classproperty
|
|
4124
|
+
@jsii.member(jsii_name="NONE")
|
|
4125
|
+
def NONE(cls) -> "OrcCompression":
|
|
4126
|
+
'''Uncompressed.'''
|
|
4127
|
+
return typing.cast("OrcCompression", jsii.sget(cls, "NONE"))
|
|
4128
|
+
|
|
4129
|
+
@jsii.python.classproperty
|
|
4130
|
+
@jsii.member(jsii_name="SNAPPY")
|
|
4131
|
+
def SNAPPY(cls) -> "OrcCompression":
|
|
4132
|
+
'''Snappy.'''
|
|
4133
|
+
return typing.cast("OrcCompression", jsii.sget(cls, "SNAPPY"))
|
|
4134
|
+
|
|
4135
|
+
@jsii.python.classproperty
|
|
4136
|
+
@jsii.member(jsii_name="ZLIB")
|
|
4137
|
+
def ZLIB(cls) -> "OrcCompression":
|
|
4138
|
+
'''Gzip.'''
|
|
4139
|
+
return typing.cast("OrcCompression", jsii.sget(cls, "ZLIB"))
|
|
4140
|
+
|
|
4141
|
+
@builtins.property
|
|
4142
|
+
@jsii.member(jsii_name="value")
|
|
4143
|
+
def value(self) -> builtins.str:
|
|
4144
|
+
'''the string value of the Serde Compression.'''
|
|
4145
|
+
return typing.cast(builtins.str, jsii.get(self, "value"))
|
|
4146
|
+
|
|
4147
|
+
|
|
4148
|
+
@jsii.enum(jsii_type="aws-cdk-lib.aws_kinesisfirehose.OrcFormatVersion")
|
|
4149
|
+
class OrcFormatVersion(enum.Enum):
|
|
4150
|
+
'''The available WriterVersions for ORC output format.
|
|
4151
|
+
|
|
4152
|
+
:exampleMetadata: infused
|
|
4153
|
+
|
|
4154
|
+
Example::
|
|
4155
|
+
|
|
4156
|
+
output_format = firehose.OrcOutputFormat(
|
|
4157
|
+
format_version=firehose.OrcFormatVersion.V0_11,
|
|
4158
|
+
block_size=Size.mebibytes(256),
|
|
4159
|
+
compression=firehose.OrcCompression.NONE,
|
|
4160
|
+
bloom_filter_columns=["columnA"],
|
|
4161
|
+
bloom_filter_false_positive_probability=0.1,
|
|
4162
|
+
dictionary_key_threshold=0.7,
|
|
4163
|
+
enable_padding=True,
|
|
4164
|
+
padding_tolerance=0.2,
|
|
4165
|
+
row_index_stride=9000,
|
|
4166
|
+
stripe_size=Size.mebibytes(32)
|
|
4167
|
+
)
|
|
4168
|
+
'''
|
|
4169
|
+
|
|
4170
|
+
V0_11 = "V0_11"
|
|
4171
|
+
'''Use V0_11 ORC writer version when writing the output of the record transformation.'''
|
|
4172
|
+
V0_12 = "V0_12"
|
|
4173
|
+
'''Use V0_12 ORC writer version when writing the output of the record transformation.'''
|
|
4174
|
+
|
|
4175
|
+
|
|
4176
|
+
@jsii.implements(IOutputFormat)
|
|
4177
|
+
class OrcOutputFormat(
|
|
4178
|
+
metaclass=jsii.JSIIMeta,
|
|
4179
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.OrcOutputFormat",
|
|
4180
|
+
):
|
|
4181
|
+
'''This class specifies properties for ORC output format for record format conversion.
|
|
4182
|
+
|
|
4183
|
+
You should only need to specify an instance of this class if the default configuration does not suit your needs.
|
|
4184
|
+
|
|
4185
|
+
:exampleMetadata: infused
|
|
4186
|
+
|
|
4187
|
+
Example::
|
|
4188
|
+
|
|
4189
|
+
output_format = firehose.OrcOutputFormat(
|
|
4190
|
+
format_version=firehose.OrcFormatVersion.V0_11,
|
|
4191
|
+
block_size=Size.mebibytes(256),
|
|
4192
|
+
compression=firehose.OrcCompression.NONE,
|
|
4193
|
+
bloom_filter_columns=["columnA"],
|
|
4194
|
+
bloom_filter_false_positive_probability=0.1,
|
|
4195
|
+
dictionary_key_threshold=0.7,
|
|
4196
|
+
enable_padding=True,
|
|
4197
|
+
padding_tolerance=0.2,
|
|
4198
|
+
row_index_stride=9000,
|
|
4199
|
+
stripe_size=Size.mebibytes(32)
|
|
4200
|
+
)
|
|
4201
|
+
'''
|
|
4202
|
+
|
|
4203
|
+
def __init__(
|
|
4204
|
+
self,
|
|
4205
|
+
*,
|
|
4206
|
+
block_size: typing.Optional[_Size_7b441c34] = None,
|
|
4207
|
+
bloom_filter_columns: typing.Optional[typing.Sequence[builtins.str]] = None,
|
|
4208
|
+
bloom_filter_false_positive_probability: typing.Optional[jsii.Number] = None,
|
|
4209
|
+
compression: typing.Optional[OrcCompression] = None,
|
|
4210
|
+
dictionary_key_threshold: typing.Optional[jsii.Number] = None,
|
|
4211
|
+
enable_padding: typing.Optional[builtins.bool] = None,
|
|
4212
|
+
format_version: typing.Optional[OrcFormatVersion] = None,
|
|
4213
|
+
padding_tolerance: typing.Optional[jsii.Number] = None,
|
|
4214
|
+
row_index_stride: typing.Optional[jsii.Number] = None,
|
|
4215
|
+
stripe_size: typing.Optional[_Size_7b441c34] = None,
|
|
4216
|
+
) -> None:
|
|
4217
|
+
'''
|
|
4218
|
+
:param block_size: The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Firehose uses this value for padding calculations. Default: ``Size.mebibytes(256)``
|
|
4219
|
+
:param bloom_filter_columns: The column names for which you want Firehose to create bloom filters. Default: no bloom filters are created
|
|
4220
|
+
:param bloom_filter_false_positive_probability: The Bloom filter false positive probability (FPP). The lower the FPP, the bigger the bloom filter. Default: ``0.05``
|
|
4221
|
+
:param compression: The compression code to use over data blocks. The possible values are ``NONE`` , ``SNAPPY`` , and ``ZLIB``. Use ``SNAPPY`` for higher decompression speed. Use ``GZIP`` if the compression ratio is more important than speed. Default: ``SNAPPY``
|
|
4222
|
+
:param dictionary_key_threshold: Determines whether dictionary encoding should be applied to a column. If the number of distinct keys (unique values) in a column exceeds this fraction of the total non-null rows in that column, dictionary encoding will be turned off for that specific column. To turn off dictionary encoding, set this threshold to 0. To always use dictionary encoding, set this threshold to 1. Default: ``0.8``
|
|
4223
|
+
:param enable_padding: Set this to ``true`` to indicate that you want stripes to be padded to the HDFS block boundaries. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Default: ``false``
|
|
4224
|
+
:param format_version: The version of the ORC format to write. The possible values are ``V0_11`` and ``V0_12``. Default: ``V0_12``
|
|
4225
|
+
:param padding_tolerance: A number between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe size. The default value is 0.05, which means 5 percent of stripe size. For the default values of 64 MiB ORC stripes and 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the available size within the block is more than 3.2 MiB, a new, smaller stripe is inserted to fit within that space. This ensures that no stripe crosses block boundaries and causes remote reads within a node-local task. Kinesis Data Firehose ignores this parameter when ``EnablePadding`` is ``false`` . Default: ``0.05`` if ``enablePadding`` is ``true``
|
|
4226
|
+
:param row_index_stride: The number of rows between index entries. Default: 10000
|
|
4227
|
+
:param stripe_size: The number of bytes in each stripe. The default is 64 MiB and the minimum is 8 MiB. Default: ``Size.mebibytes(64)``
|
|
4228
|
+
'''
|
|
4229
|
+
props = OrcOutputFormatProps(
|
|
4230
|
+
block_size=block_size,
|
|
4231
|
+
bloom_filter_columns=bloom_filter_columns,
|
|
4232
|
+
bloom_filter_false_positive_probability=bloom_filter_false_positive_probability,
|
|
4233
|
+
compression=compression,
|
|
4234
|
+
dictionary_key_threshold=dictionary_key_threshold,
|
|
4235
|
+
enable_padding=enable_padding,
|
|
4236
|
+
format_version=format_version,
|
|
4237
|
+
padding_tolerance=padding_tolerance,
|
|
4238
|
+
row_index_stride=row_index_stride,
|
|
4239
|
+
stripe_size=stripe_size,
|
|
4240
|
+
)
|
|
4241
|
+
|
|
4242
|
+
jsii.create(self.__class__, self, [props])
|
|
4243
|
+
|
|
4244
|
+
@jsii.member(jsii_name="createOutputFormatConfig")
|
|
4245
|
+
def create_output_format_config(
|
|
4246
|
+
self,
|
|
4247
|
+
) -> "CfnDeliveryStream.OutputFormatConfigurationProperty":
|
|
4248
|
+
'''Renders the cloudformation properties for the output format.'''
|
|
4249
|
+
return typing.cast("CfnDeliveryStream.OutputFormatConfigurationProperty", jsii.invoke(self, "createOutputFormatConfig", []))
|
|
4250
|
+
|
|
4251
|
+
@builtins.property
|
|
4252
|
+
@jsii.member(jsii_name="props")
|
|
4253
|
+
def props(self) -> typing.Optional["OrcOutputFormatProps"]:
|
|
4254
|
+
'''Properties for the ORC output format.'''
|
|
4255
|
+
return typing.cast(typing.Optional["OrcOutputFormatProps"], jsii.get(self, "props"))
|
|
4256
|
+
|
|
4257
|
+
|
|
4258
|
+
@jsii.data_type(
|
|
4259
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.OrcOutputFormatProps",
|
|
4260
|
+
jsii_struct_bases=[],
|
|
4261
|
+
name_mapping={
|
|
4262
|
+
"block_size": "blockSize",
|
|
4263
|
+
"bloom_filter_columns": "bloomFilterColumns",
|
|
4264
|
+
"bloom_filter_false_positive_probability": "bloomFilterFalsePositiveProbability",
|
|
4265
|
+
"compression": "compression",
|
|
4266
|
+
"dictionary_key_threshold": "dictionaryKeyThreshold",
|
|
4267
|
+
"enable_padding": "enablePadding",
|
|
4268
|
+
"format_version": "formatVersion",
|
|
4269
|
+
"padding_tolerance": "paddingTolerance",
|
|
4270
|
+
"row_index_stride": "rowIndexStride",
|
|
4271
|
+
"stripe_size": "stripeSize",
|
|
4272
|
+
},
|
|
4273
|
+
)
|
|
4274
|
+
class OrcOutputFormatProps:
|
|
4275
|
+
def __init__(
|
|
4276
|
+
self,
|
|
4277
|
+
*,
|
|
4278
|
+
block_size: typing.Optional[_Size_7b441c34] = None,
|
|
4279
|
+
bloom_filter_columns: typing.Optional[typing.Sequence[builtins.str]] = None,
|
|
4280
|
+
bloom_filter_false_positive_probability: typing.Optional[jsii.Number] = None,
|
|
4281
|
+
compression: typing.Optional[OrcCompression] = None,
|
|
4282
|
+
dictionary_key_threshold: typing.Optional[jsii.Number] = None,
|
|
4283
|
+
enable_padding: typing.Optional[builtins.bool] = None,
|
|
4284
|
+
format_version: typing.Optional[OrcFormatVersion] = None,
|
|
4285
|
+
padding_tolerance: typing.Optional[jsii.Number] = None,
|
|
4286
|
+
row_index_stride: typing.Optional[jsii.Number] = None,
|
|
4287
|
+
stripe_size: typing.Optional[_Size_7b441c34] = None,
|
|
4288
|
+
) -> None:
|
|
4289
|
+
'''Props for ORC output format for data record format conversion.
|
|
4290
|
+
|
|
4291
|
+
:param block_size: The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Firehose uses this value for padding calculations. Default: ``Size.mebibytes(256)``
|
|
4292
|
+
:param bloom_filter_columns: The column names for which you want Firehose to create bloom filters. Default: no bloom filters are created
|
|
4293
|
+
:param bloom_filter_false_positive_probability: The Bloom filter false positive probability (FPP). The lower the FPP, the bigger the bloom filter. Default: ``0.05``
|
|
4294
|
+
:param compression: The compression code to use over data blocks. The possible values are ``NONE`` , ``SNAPPY`` , and ``ZLIB``. Use ``SNAPPY`` for higher decompression speed. Use ``GZIP`` if the compression ratio is more important than speed. Default: ``SNAPPY``
|
|
4295
|
+
:param dictionary_key_threshold: Determines whether dictionary encoding should be applied to a column. If the number of distinct keys (unique values) in a column exceeds this fraction of the total non-null rows in that column, dictionary encoding will be turned off for that specific column. To turn off dictionary encoding, set this threshold to 0. To always use dictionary encoding, set this threshold to 1. Default: ``0.8``
|
|
4296
|
+
:param enable_padding: Set this to ``true`` to indicate that you want stripes to be padded to the HDFS block boundaries. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Default: ``false``
|
|
4297
|
+
:param format_version: The version of the ORC format to write. The possible values are ``V0_11`` and ``V0_12``. Default: ``V0_12``
|
|
4298
|
+
:param padding_tolerance: A number between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe size. The default value is 0.05, which means 5 percent of stripe size. For the default values of 64 MiB ORC stripes and 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the available size within the block is more than 3.2 MiB, a new, smaller stripe is inserted to fit within that space. This ensures that no stripe crosses block boundaries and causes remote reads within a node-local task. Kinesis Data Firehose ignores this parameter when ``EnablePadding`` is ``false`` . Default: ``0.05`` if ``enablePadding`` is ``true``
|
|
4299
|
+
:param row_index_stride: The number of rows between index entries. Default: 10000
|
|
4300
|
+
:param stripe_size: The number of bytes in each stripe. The default is 64 MiB and the minimum is 8 MiB. Default: ``Size.mebibytes(64)``
|
|
4301
|
+
|
|
4302
|
+
:exampleMetadata: infused
|
|
4303
|
+
|
|
4304
|
+
Example::
|
|
4305
|
+
|
|
4306
|
+
output_format = firehose.OrcOutputFormat(
|
|
4307
|
+
format_version=firehose.OrcFormatVersion.V0_11,
|
|
4308
|
+
block_size=Size.mebibytes(256),
|
|
4309
|
+
compression=firehose.OrcCompression.NONE,
|
|
4310
|
+
bloom_filter_columns=["columnA"],
|
|
4311
|
+
bloom_filter_false_positive_probability=0.1,
|
|
4312
|
+
dictionary_key_threshold=0.7,
|
|
4313
|
+
enable_padding=True,
|
|
4314
|
+
padding_tolerance=0.2,
|
|
4315
|
+
row_index_stride=9000,
|
|
4316
|
+
stripe_size=Size.mebibytes(32)
|
|
4317
|
+
)
|
|
4318
|
+
'''
|
|
4319
|
+
if __debug__:
|
|
4320
|
+
type_hints = typing.get_type_hints(_typecheckingstub__23d7be2aebca47c4f726452fdac9d7e13c1d079ee9bbc0eb6bf735c5fa7d1ec6)
|
|
4321
|
+
check_type(argname="argument block_size", value=block_size, expected_type=type_hints["block_size"])
|
|
4322
|
+
check_type(argname="argument bloom_filter_columns", value=bloom_filter_columns, expected_type=type_hints["bloom_filter_columns"])
|
|
4323
|
+
check_type(argname="argument bloom_filter_false_positive_probability", value=bloom_filter_false_positive_probability, expected_type=type_hints["bloom_filter_false_positive_probability"])
|
|
4324
|
+
check_type(argname="argument compression", value=compression, expected_type=type_hints["compression"])
|
|
4325
|
+
check_type(argname="argument dictionary_key_threshold", value=dictionary_key_threshold, expected_type=type_hints["dictionary_key_threshold"])
|
|
4326
|
+
check_type(argname="argument enable_padding", value=enable_padding, expected_type=type_hints["enable_padding"])
|
|
4327
|
+
check_type(argname="argument format_version", value=format_version, expected_type=type_hints["format_version"])
|
|
4328
|
+
check_type(argname="argument padding_tolerance", value=padding_tolerance, expected_type=type_hints["padding_tolerance"])
|
|
4329
|
+
check_type(argname="argument row_index_stride", value=row_index_stride, expected_type=type_hints["row_index_stride"])
|
|
4330
|
+
check_type(argname="argument stripe_size", value=stripe_size, expected_type=type_hints["stripe_size"])
|
|
4331
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {}
|
|
4332
|
+
if block_size is not None:
|
|
4333
|
+
self._values["block_size"] = block_size
|
|
4334
|
+
if bloom_filter_columns is not None:
|
|
4335
|
+
self._values["bloom_filter_columns"] = bloom_filter_columns
|
|
4336
|
+
if bloom_filter_false_positive_probability is not None:
|
|
4337
|
+
self._values["bloom_filter_false_positive_probability"] = bloom_filter_false_positive_probability
|
|
4338
|
+
if compression is not None:
|
|
4339
|
+
self._values["compression"] = compression
|
|
4340
|
+
if dictionary_key_threshold is not None:
|
|
4341
|
+
self._values["dictionary_key_threshold"] = dictionary_key_threshold
|
|
4342
|
+
if enable_padding is not None:
|
|
4343
|
+
self._values["enable_padding"] = enable_padding
|
|
4344
|
+
if format_version is not None:
|
|
4345
|
+
self._values["format_version"] = format_version
|
|
4346
|
+
if padding_tolerance is not None:
|
|
4347
|
+
self._values["padding_tolerance"] = padding_tolerance
|
|
4348
|
+
if row_index_stride is not None:
|
|
4349
|
+
self._values["row_index_stride"] = row_index_stride
|
|
4350
|
+
if stripe_size is not None:
|
|
4351
|
+
self._values["stripe_size"] = stripe_size
|
|
4352
|
+
|
|
4353
|
+
@builtins.property
|
|
4354
|
+
def block_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
4355
|
+
'''The Hadoop Distributed File System (HDFS) block size.
|
|
4356
|
+
|
|
4357
|
+
This is useful if you intend to copy the data from Amazon S3 to HDFS before querying.
|
|
4358
|
+
Firehose uses this value for padding calculations.
|
|
4359
|
+
|
|
4360
|
+
:default: ``Size.mebibytes(256)``
|
|
4361
|
+
|
|
4362
|
+
:minimum: ``Size.mebibytes(64)``
|
|
4363
|
+
'''
|
|
4364
|
+
result = self._values.get("block_size")
|
|
4365
|
+
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
4366
|
+
|
|
4367
|
+
@builtins.property
|
|
4368
|
+
def bloom_filter_columns(self) -> typing.Optional[typing.List[builtins.str]]:
|
|
4369
|
+
'''The column names for which you want Firehose to create bloom filters.
|
|
4370
|
+
|
|
4371
|
+
:default: no bloom filters are created
|
|
4372
|
+
|
|
4373
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-bloomfiltercolumns
|
|
4374
|
+
'''
|
|
4375
|
+
result = self._values.get("bloom_filter_columns")
|
|
4376
|
+
return typing.cast(typing.Optional[typing.List[builtins.str]], result)
|
|
4377
|
+
|
|
4378
|
+
@builtins.property
|
|
4379
|
+
def bloom_filter_false_positive_probability(self) -> typing.Optional[jsii.Number]:
|
|
4380
|
+
'''The Bloom filter false positive probability (FPP).
|
|
4381
|
+
|
|
4382
|
+
The lower the FPP, the bigger the bloom filter.
|
|
4383
|
+
|
|
4384
|
+
:default: ``0.05``
|
|
4385
|
+
|
|
4386
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-bloomfilterfalsepositiveprobability
|
|
4387
|
+
:maximum: ``1``
|
|
4388
|
+
:minimum: ``0``
|
|
4389
|
+
'''
|
|
4390
|
+
result = self._values.get("bloom_filter_false_positive_probability")
|
|
4391
|
+
return typing.cast(typing.Optional[jsii.Number], result)
|
|
4392
|
+
|
|
4393
|
+
@builtins.property
|
|
4394
|
+
def compression(self) -> typing.Optional[OrcCompression]:
|
|
4395
|
+
'''The compression code to use over data blocks.
|
|
4396
|
+
|
|
4397
|
+
The possible values are ``NONE`` , ``SNAPPY`` , and ``ZLIB``.
|
|
4398
|
+
Use ``SNAPPY`` for higher decompression speed.
|
|
4399
|
+
Use ``GZIP`` if the compression ratio is more important than speed.
|
|
4400
|
+
|
|
4401
|
+
:default: ``SNAPPY``
|
|
4402
|
+
|
|
4403
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/TemplateReference/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-compression
|
|
4404
|
+
'''
|
|
4405
|
+
result = self._values.get("compression")
|
|
4406
|
+
return typing.cast(typing.Optional[OrcCompression], result)
|
|
4407
|
+
|
|
4408
|
+
@builtins.property
|
|
4409
|
+
def dictionary_key_threshold(self) -> typing.Optional[jsii.Number]:
|
|
4410
|
+
'''Determines whether dictionary encoding should be applied to a column.
|
|
4411
|
+
|
|
4412
|
+
If the number of distinct keys (unique values) in a column exceeds this fraction of the total non-null rows in that column, dictionary encoding will be turned off for that specific column.
|
|
4413
|
+
|
|
4414
|
+
To turn off dictionary encoding, set this threshold to 0. To always use dictionary encoding, set this threshold to 1.
|
|
4415
|
+
|
|
4416
|
+
:default: ``0.8``
|
|
4417
|
+
|
|
4418
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-dictionarykeythreshold
|
|
4419
|
+
:maximum: ``1``
|
|
4420
|
+
:minimum: ``0``
|
|
4421
|
+
'''
|
|
4422
|
+
result = self._values.get("dictionary_key_threshold")
|
|
4423
|
+
return typing.cast(typing.Optional[jsii.Number], result)
|
|
4424
|
+
|
|
4425
|
+
@builtins.property
|
|
4426
|
+
def enable_padding(self) -> typing.Optional[builtins.bool]:
|
|
4427
|
+
'''Set this to ``true`` to indicate that you want stripes to be padded to the HDFS block boundaries.
|
|
4428
|
+
|
|
4429
|
+
This is useful if you intend to copy the data from Amazon S3 to HDFS before querying.
|
|
4430
|
+
|
|
4431
|
+
:default: ``false``
|
|
4432
|
+
|
|
4433
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-enablepadding
|
|
4434
|
+
'''
|
|
4435
|
+
result = self._values.get("enable_padding")
|
|
4436
|
+
return typing.cast(typing.Optional[builtins.bool], result)
|
|
4437
|
+
|
|
4438
|
+
@builtins.property
|
|
4439
|
+
def format_version(self) -> typing.Optional[OrcFormatVersion]:
|
|
4440
|
+
'''The version of the ORC format to write.
|
|
4441
|
+
|
|
4442
|
+
The possible values are ``V0_11`` and ``V0_12``.
|
|
4443
|
+
|
|
4444
|
+
:default: ``V0_12``
|
|
4445
|
+
|
|
4446
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-formatversion
|
|
4447
|
+
'''
|
|
4448
|
+
result = self._values.get("format_version")
|
|
4449
|
+
return typing.cast(typing.Optional[OrcFormatVersion], result)
|
|
4450
|
+
|
|
4451
|
+
@builtins.property
|
|
4452
|
+
def padding_tolerance(self) -> typing.Optional[jsii.Number]:
|
|
4453
|
+
'''A number between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe size.
|
|
4454
|
+
|
|
4455
|
+
The default value is 0.05, which means 5 percent of stripe size.
|
|
4456
|
+
|
|
4457
|
+
For the default values of 64 MiB ORC stripes and 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a maximum of 3.2 MiB for padding within the 256 MiB block.
|
|
4458
|
+
In such a case, if the available size within the block is more than 3.2 MiB, a new, smaller stripe is inserted to fit within that space.
|
|
4459
|
+
This ensures that no stripe crosses block boundaries and causes remote reads within a node-local task.
|
|
4460
|
+
|
|
4461
|
+
Kinesis Data Firehose ignores this parameter when ``EnablePadding`` is ``false`` .
|
|
4462
|
+
|
|
4463
|
+
:default: ``0.05`` if ``enablePadding`` is ``true``
|
|
4464
|
+
|
|
4465
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-paddingtolerance
|
|
4466
|
+
'''
|
|
4467
|
+
result = self._values.get("padding_tolerance")
|
|
4468
|
+
return typing.cast(typing.Optional[jsii.Number], result)
|
|
4469
|
+
|
|
4470
|
+
@builtins.property
|
|
4471
|
+
def row_index_stride(self) -> typing.Optional[jsii.Number]:
|
|
4472
|
+
'''The number of rows between index entries.
|
|
4473
|
+
|
|
4474
|
+
:default: 10000
|
|
4475
|
+
|
|
4476
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-rowindexstride
|
|
4477
|
+
:minimum: 1000
|
|
4478
|
+
'''
|
|
4479
|
+
result = self._values.get("row_index_stride")
|
|
4480
|
+
return typing.cast(typing.Optional[jsii.Number], result)
|
|
4481
|
+
|
|
4482
|
+
@builtins.property
|
|
4483
|
+
def stripe_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
4484
|
+
'''The number of bytes in each stripe.
|
|
4485
|
+
|
|
4486
|
+
The default is 64 MiB and the minimum is 8 MiB.
|
|
4487
|
+
|
|
4488
|
+
:default: ``Size.mebibytes(64)``
|
|
4489
|
+
|
|
4490
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-orcserde.html#cfn-kinesisfirehose-deliverystream-orcserde-stripesizebytes
|
|
4491
|
+
:minimum: ``Size.mebibytes(8)``
|
|
4492
|
+
'''
|
|
4493
|
+
result = self._values.get("stripe_size")
|
|
4494
|
+
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
4495
|
+
|
|
4496
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
4497
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
4498
|
+
|
|
4499
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
4500
|
+
return not (rhs == self)
|
|
4501
|
+
|
|
4502
|
+
def __repr__(self) -> str:
|
|
4503
|
+
return "OrcOutputFormatProps(%s)" % ", ".join(
|
|
4504
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
4505
|
+
)
|
|
4506
|
+
|
|
4507
|
+
|
|
4508
|
+
class OutputFormat(
|
|
4509
|
+
metaclass=jsii.JSIIMeta,
|
|
4510
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.OutputFormat",
|
|
4511
|
+
):
|
|
4512
|
+
'''Represents possible output formats when performing record data conversion.
|
|
4513
|
+
|
|
4514
|
+
:exampleMetadata: infused
|
|
4515
|
+
|
|
4516
|
+
Example::
|
|
4517
|
+
|
|
4518
|
+
# bucket: s3.Bucket
|
|
4519
|
+
# schema_glue_table: glue.CfnTable
|
|
4520
|
+
|
|
4521
|
+
s3_destination = firehose.S3Bucket(bucket,
|
|
4522
|
+
data_format_conversion=firehose.DataFormatConversionProps(
|
|
4523
|
+
schema_configuration=firehose.SchemaConfiguration.from_cfn_table(schema_glue_table),
|
|
4524
|
+
input_format=firehose.InputFormat.OPENX_JSON,
|
|
4525
|
+
output_format=firehose.OutputFormat.PARQUET
|
|
4526
|
+
)
|
|
4527
|
+
)
|
|
4528
|
+
'''
|
|
4529
|
+
|
|
4530
|
+
@jsii.python.classproperty
|
|
4531
|
+
@jsii.member(jsii_name="ORC")
|
|
4532
|
+
def ORC(cls) -> OrcOutputFormat:
|
|
4533
|
+
'''Write output files in ORC.'''
|
|
4534
|
+
return typing.cast(OrcOutputFormat, jsii.sget(cls, "ORC"))
|
|
4535
|
+
|
|
4536
|
+
@jsii.python.classproperty
|
|
4537
|
+
@jsii.member(jsii_name="PARQUET")
|
|
4538
|
+
def PARQUET(cls) -> "ParquetOutputFormat":
|
|
4539
|
+
'''Write output files in Parquet.'''
|
|
4540
|
+
return typing.cast("ParquetOutputFormat", jsii.sget(cls, "PARQUET"))
|
|
4541
|
+
|
|
4542
|
+
|
|
4543
|
+
class ParquetCompression(
|
|
4544
|
+
metaclass=jsii.JSIIMeta,
|
|
4545
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.ParquetCompression",
|
|
4546
|
+
):
|
|
4547
|
+
'''Possible compression options available for Parquet OutputFormat.
|
|
4548
|
+
|
|
4549
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-parquetserde.html#cfn-kinesisfirehose-deliverystream-parquetserde-compression
|
|
4550
|
+
:exampleMetadata: infused
|
|
4551
|
+
|
|
4552
|
+
Example::
|
|
4553
|
+
|
|
4554
|
+
output_format = firehose.ParquetOutputFormat(
|
|
4555
|
+
block_size=Size.mebibytes(512),
|
|
4556
|
+
compression=firehose.ParquetCompression.UNCOMPRESSED,
|
|
4557
|
+
enable_dictionary_compression=True,
|
|
4558
|
+
max_padding=Size.bytes(10),
|
|
4559
|
+
page_size=Size.mebibytes(2),
|
|
4560
|
+
writer_version=firehose.ParquetWriterVersion.V2
|
|
4561
|
+
)
|
|
4562
|
+
'''
|
|
4563
|
+
|
|
4564
|
+
@jsii.member(jsii_name="of")
|
|
4565
|
+
@builtins.classmethod
|
|
4566
|
+
def of(cls, value: builtins.str) -> "ParquetCompression":
|
|
4567
|
+
'''Creates a new ParquetCompression instance with a custom value.
|
|
4568
|
+
|
|
4569
|
+
:param value: -
|
|
4570
|
+
'''
|
|
4571
|
+
if __debug__:
|
|
4572
|
+
type_hints = typing.get_type_hints(_typecheckingstub__61bd74ac3570328dbd418a538644f7198c553bd1d41a6ca4a6136f48d7cf4d50)
|
|
4573
|
+
check_type(argname="argument value", value=value, expected_type=type_hints["value"])
|
|
4574
|
+
return typing.cast("ParquetCompression", jsii.sinvoke(cls, "of", [value]))
|
|
4575
|
+
|
|
4576
|
+
@jsii.python.classproperty
|
|
4577
|
+
@jsii.member(jsii_name="GZIP")
|
|
4578
|
+
def GZIP(cls) -> "ParquetCompression":
|
|
4579
|
+
'''Gzip.'''
|
|
4580
|
+
return typing.cast("ParquetCompression", jsii.sget(cls, "GZIP"))
|
|
4581
|
+
|
|
4582
|
+
@jsii.python.classproperty
|
|
4583
|
+
@jsii.member(jsii_name="SNAPPY")
|
|
4584
|
+
def SNAPPY(cls) -> "ParquetCompression":
|
|
4585
|
+
'''Snappy.'''
|
|
4586
|
+
return typing.cast("ParquetCompression", jsii.sget(cls, "SNAPPY"))
|
|
4587
|
+
|
|
4588
|
+
@jsii.python.classproperty
|
|
4589
|
+
@jsii.member(jsii_name="UNCOMPRESSED")
|
|
4590
|
+
def UNCOMPRESSED(cls) -> "ParquetCompression":
|
|
4591
|
+
'''Uncompressed.'''
|
|
4592
|
+
return typing.cast("ParquetCompression", jsii.sget(cls, "UNCOMPRESSED"))
|
|
4593
|
+
|
|
4594
|
+
@builtins.property
|
|
4595
|
+
@jsii.member(jsii_name="value")
|
|
4596
|
+
def value(self) -> builtins.str:
|
|
4597
|
+
'''the string value of the Serde Compression.'''
|
|
4598
|
+
return typing.cast(builtins.str, jsii.get(self, "value"))
|
|
4599
|
+
|
|
4600
|
+
|
|
4601
|
+
@jsii.implements(IOutputFormat)
|
|
4602
|
+
class ParquetOutputFormat(
|
|
4603
|
+
metaclass=jsii.JSIIMeta,
|
|
4604
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.ParquetOutputFormat",
|
|
4605
|
+
):
|
|
4606
|
+
'''This class specifies properties for Parquet output format for record format conversion.
|
|
4607
|
+
|
|
4608
|
+
You should only need to specify an instance of this class if the default configuration does not suit your needs.
|
|
4609
|
+
|
|
4610
|
+
:exampleMetadata: infused
|
|
4611
|
+
|
|
4612
|
+
Example::
|
|
4613
|
+
|
|
4614
|
+
output_format = firehose.ParquetOutputFormat(
|
|
4615
|
+
block_size=Size.mebibytes(512),
|
|
4616
|
+
compression=firehose.ParquetCompression.UNCOMPRESSED,
|
|
4617
|
+
enable_dictionary_compression=True,
|
|
4618
|
+
max_padding=Size.bytes(10),
|
|
4619
|
+
page_size=Size.mebibytes(2),
|
|
4620
|
+
writer_version=firehose.ParquetWriterVersion.V2
|
|
4621
|
+
)
|
|
4622
|
+
'''
|
|
4623
|
+
|
|
4624
|
+
def __init__(
|
|
4625
|
+
self,
|
|
4626
|
+
*,
|
|
4627
|
+
block_size: typing.Optional[_Size_7b441c34] = None,
|
|
4628
|
+
compression: typing.Optional[ParquetCompression] = None,
|
|
4629
|
+
enable_dictionary_compression: typing.Optional[builtins.bool] = None,
|
|
4630
|
+
max_padding: typing.Optional[_Size_7b441c34] = None,
|
|
4631
|
+
page_size: typing.Optional[_Size_7b441c34] = None,
|
|
4632
|
+
writer_version: typing.Optional["ParquetWriterVersion"] = None,
|
|
4633
|
+
) -> None:
|
|
4634
|
+
'''
|
|
4635
|
+
:param block_size: The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Firehose uses this value for padding calculations. Default: ``Size.mebibytes(256)``
|
|
4636
|
+
:param compression: The compression code to use over data blocks. The possible values are ``UNCOMPRESSED`` , ``SNAPPY`` , and ``GZIP``. Use ``SNAPPY`` for higher decompression speed. Use ``GZIP`` if the compression ratio is more important than speed. Default: ``SNAPPY``
|
|
4637
|
+
:param enable_dictionary_compression: Indicates whether to enable dictionary compression. Default: ``false``
|
|
4638
|
+
:param max_padding: The maximum amount of padding to apply. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Default: no padding is applied
|
|
4639
|
+
:param page_size: The Parquet page size. Column chunks are divided into pages. A page is conceptually an indivisible unit (in terms of compression and encoding). The minimum value is 64 KiB and the default is 1 MiB. Default: ``Size.mebibytes(1)``
|
|
4640
|
+
:param writer_version: Indicates the version of Parquet to output. The possible values are ``V1`` and ``V2`` Default: ``V1``
|
|
4641
|
+
'''
|
|
4642
|
+
props = ParquetOutputFormatProps(
|
|
4643
|
+
block_size=block_size,
|
|
4644
|
+
compression=compression,
|
|
4645
|
+
enable_dictionary_compression=enable_dictionary_compression,
|
|
4646
|
+
max_padding=max_padding,
|
|
4647
|
+
page_size=page_size,
|
|
4648
|
+
writer_version=writer_version,
|
|
4649
|
+
)
|
|
4650
|
+
|
|
4651
|
+
jsii.create(self.__class__, self, [props])
|
|
4652
|
+
|
|
4653
|
+
@jsii.member(jsii_name="createOutputFormatConfig")
|
|
4654
|
+
def create_output_format_config(
|
|
4655
|
+
self,
|
|
4656
|
+
) -> "CfnDeliveryStream.OutputFormatConfigurationProperty":
|
|
4657
|
+
'''Renders the cloudformation properties for the output format.'''
|
|
4658
|
+
return typing.cast("CfnDeliveryStream.OutputFormatConfigurationProperty", jsii.invoke(self, "createOutputFormatConfig", []))
|
|
4659
|
+
|
|
4660
|
+
@builtins.property
|
|
4661
|
+
@jsii.member(jsii_name="props")
|
|
4662
|
+
def props(self) -> typing.Optional["ParquetOutputFormatProps"]:
|
|
4663
|
+
'''Properties for the Parquet output format.'''
|
|
4664
|
+
return typing.cast(typing.Optional["ParquetOutputFormatProps"], jsii.get(self, "props"))
|
|
4665
|
+
|
|
4666
|
+
|
|
4667
|
+
@jsii.data_type(
|
|
4668
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.ParquetOutputFormatProps",
|
|
4669
|
+
jsii_struct_bases=[],
|
|
4670
|
+
name_mapping={
|
|
4671
|
+
"block_size": "blockSize",
|
|
4672
|
+
"compression": "compression",
|
|
4673
|
+
"enable_dictionary_compression": "enableDictionaryCompression",
|
|
4674
|
+
"max_padding": "maxPadding",
|
|
4675
|
+
"page_size": "pageSize",
|
|
4676
|
+
"writer_version": "writerVersion",
|
|
4677
|
+
},
|
|
4678
|
+
)
|
|
4679
|
+
class ParquetOutputFormatProps:
|
|
4680
|
+
def __init__(
|
|
4681
|
+
self,
|
|
4682
|
+
*,
|
|
4683
|
+
block_size: typing.Optional[_Size_7b441c34] = None,
|
|
4684
|
+
compression: typing.Optional[ParquetCompression] = None,
|
|
4685
|
+
enable_dictionary_compression: typing.Optional[builtins.bool] = None,
|
|
4686
|
+
max_padding: typing.Optional[_Size_7b441c34] = None,
|
|
4687
|
+
page_size: typing.Optional[_Size_7b441c34] = None,
|
|
4688
|
+
writer_version: typing.Optional["ParquetWriterVersion"] = None,
|
|
4689
|
+
) -> None:
|
|
4690
|
+
'''Props for Parquet output format for data record format conversion.
|
|
4691
|
+
|
|
4692
|
+
:param block_size: The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Firehose uses this value for padding calculations. Default: ``Size.mebibytes(256)``
|
|
4693
|
+
:param compression: The compression code to use over data blocks. The possible values are ``UNCOMPRESSED`` , ``SNAPPY`` , and ``GZIP``. Use ``SNAPPY`` for higher decompression speed. Use ``GZIP`` if the compression ratio is more important than speed. Default: ``SNAPPY``
|
|
4694
|
+
:param enable_dictionary_compression: Indicates whether to enable dictionary compression. Default: ``false``
|
|
4695
|
+
:param max_padding: The maximum amount of padding to apply. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. Default: no padding is applied
|
|
4696
|
+
:param page_size: The Parquet page size. Column chunks are divided into pages. A page is conceptually an indivisible unit (in terms of compression and encoding). The minimum value is 64 KiB and the default is 1 MiB. Default: ``Size.mebibytes(1)``
|
|
4697
|
+
:param writer_version: Indicates the version of Parquet to output. The possible values are ``V1`` and ``V2`` Default: ``V1``
|
|
4698
|
+
|
|
4699
|
+
:exampleMetadata: infused
|
|
4700
|
+
|
|
4701
|
+
Example::
|
|
4702
|
+
|
|
4703
|
+
output_format = firehose.ParquetOutputFormat(
|
|
4704
|
+
block_size=Size.mebibytes(512),
|
|
4705
|
+
compression=firehose.ParquetCompression.UNCOMPRESSED,
|
|
4706
|
+
enable_dictionary_compression=True,
|
|
4707
|
+
max_padding=Size.bytes(10),
|
|
4708
|
+
page_size=Size.mebibytes(2),
|
|
4709
|
+
writer_version=firehose.ParquetWriterVersion.V2
|
|
4710
|
+
)
|
|
4711
|
+
'''
|
|
4712
|
+
if __debug__:
|
|
4713
|
+
type_hints = typing.get_type_hints(_typecheckingstub__30f6620eefd956acc092d03fba63b6121a146d30b699581234817a52e1d9792b)
|
|
4714
|
+
check_type(argname="argument block_size", value=block_size, expected_type=type_hints["block_size"])
|
|
4715
|
+
check_type(argname="argument compression", value=compression, expected_type=type_hints["compression"])
|
|
4716
|
+
check_type(argname="argument enable_dictionary_compression", value=enable_dictionary_compression, expected_type=type_hints["enable_dictionary_compression"])
|
|
4717
|
+
check_type(argname="argument max_padding", value=max_padding, expected_type=type_hints["max_padding"])
|
|
4718
|
+
check_type(argname="argument page_size", value=page_size, expected_type=type_hints["page_size"])
|
|
4719
|
+
check_type(argname="argument writer_version", value=writer_version, expected_type=type_hints["writer_version"])
|
|
4720
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {}
|
|
4721
|
+
if block_size is not None:
|
|
4722
|
+
self._values["block_size"] = block_size
|
|
4723
|
+
if compression is not None:
|
|
4724
|
+
self._values["compression"] = compression
|
|
4725
|
+
if enable_dictionary_compression is not None:
|
|
4726
|
+
self._values["enable_dictionary_compression"] = enable_dictionary_compression
|
|
4727
|
+
if max_padding is not None:
|
|
4728
|
+
self._values["max_padding"] = max_padding
|
|
4729
|
+
if page_size is not None:
|
|
4730
|
+
self._values["page_size"] = page_size
|
|
4731
|
+
if writer_version is not None:
|
|
4732
|
+
self._values["writer_version"] = writer_version
|
|
4733
|
+
|
|
4734
|
+
@builtins.property
|
|
4735
|
+
def block_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
4736
|
+
'''The Hadoop Distributed File System (HDFS) block size.
|
|
4737
|
+
|
|
4738
|
+
This is useful if you intend to copy the data from Amazon S3 to HDFS before querying.
|
|
4739
|
+
Firehose uses this value for padding calculations.
|
|
4740
|
+
|
|
4741
|
+
:default: ``Size.mebibytes(256)``
|
|
4742
|
+
|
|
4743
|
+
:minimum: ``Size.mebibytes(64)``
|
|
4744
|
+
'''
|
|
4745
|
+
result = self._values.get("block_size")
|
|
4746
|
+
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
4747
|
+
|
|
4748
|
+
@builtins.property
|
|
4749
|
+
def compression(self) -> typing.Optional[ParquetCompression]:
|
|
4750
|
+
'''The compression code to use over data blocks.
|
|
4751
|
+
|
|
4752
|
+
The possible values are ``UNCOMPRESSED`` , ``SNAPPY`` , and ``GZIP``.
|
|
4753
|
+
Use ``SNAPPY`` for higher decompression speed.
|
|
4754
|
+
Use ``GZIP`` if the compression ratio is more important than speed.
|
|
4755
|
+
|
|
4756
|
+
:default: ``SNAPPY``
|
|
4757
|
+
|
|
4758
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-parquetserde.html#cfn-kinesisfirehose-deliverystream-parquetserde-compression
|
|
4759
|
+
'''
|
|
4760
|
+
result = self._values.get("compression")
|
|
4761
|
+
return typing.cast(typing.Optional[ParquetCompression], result)
|
|
4762
|
+
|
|
4763
|
+
@builtins.property
|
|
4764
|
+
def enable_dictionary_compression(self) -> typing.Optional[builtins.bool]:
|
|
4765
|
+
'''Indicates whether to enable dictionary compression.
|
|
4766
|
+
|
|
4767
|
+
:default: ``false``
|
|
4768
|
+
|
|
4769
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-parquetserde.html#cfn-kinesisfirehose-deliverystream-parquetserde-enabledictionarycompression
|
|
4770
|
+
'''
|
|
4771
|
+
result = self._values.get("enable_dictionary_compression")
|
|
4772
|
+
return typing.cast(typing.Optional[builtins.bool], result)
|
|
4773
|
+
|
|
4774
|
+
@builtins.property
|
|
4775
|
+
def max_padding(self) -> typing.Optional[_Size_7b441c34]:
|
|
4776
|
+
'''The maximum amount of padding to apply.
|
|
4777
|
+
|
|
4778
|
+
This is useful if you intend to copy the data from Amazon S3 to HDFS before querying.
|
|
4779
|
+
|
|
4780
|
+
:default: no padding is applied
|
|
4781
|
+
|
|
4782
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-parquetserde.html#cfn-kinesisfirehose-deliverystream-parquetserde-maxpaddingbytes
|
|
4783
|
+
'''
|
|
4784
|
+
result = self._values.get("max_padding")
|
|
4785
|
+
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
4786
|
+
|
|
4787
|
+
@builtins.property
|
|
4788
|
+
def page_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
4789
|
+
'''The Parquet page size.
|
|
4790
|
+
|
|
4791
|
+
Column chunks are divided into pages. A page is conceptually an indivisible unit (in terms of compression and encoding). The minimum value is 64 KiB and the default is 1 MiB.
|
|
4792
|
+
|
|
4793
|
+
:default: ``Size.mebibytes(1)``
|
|
4794
|
+
|
|
4795
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-parquetserde.html#cfn-kinesisfirehose-deliverystream-parquetserde-pagesizebytes
|
|
4796
|
+
:minimum: ``Size.kibibytes(64)``
|
|
4797
|
+
'''
|
|
4798
|
+
result = self._values.get("page_size")
|
|
4799
|
+
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
4800
|
+
|
|
4801
|
+
@builtins.property
|
|
4802
|
+
def writer_version(self) -> typing.Optional["ParquetWriterVersion"]:
|
|
4803
|
+
'''Indicates the version of Parquet to output.
|
|
4804
|
+
|
|
4805
|
+
The possible values are ``V1`` and ``V2``
|
|
4806
|
+
|
|
4807
|
+
:default: ``V1``
|
|
4808
|
+
|
|
4809
|
+
:see: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-parquetserde.html#cfn-kinesisfirehose-deliverystream-parquetserde-writerversion
|
|
4810
|
+
'''
|
|
4811
|
+
result = self._values.get("writer_version")
|
|
4812
|
+
return typing.cast(typing.Optional["ParquetWriterVersion"], result)
|
|
4813
|
+
|
|
4814
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
4815
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
4816
|
+
|
|
4817
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
4818
|
+
return not (rhs == self)
|
|
4819
|
+
|
|
4820
|
+
def __repr__(self) -> str:
|
|
4821
|
+
return "ParquetOutputFormatProps(%s)" % ", ".join(
|
|
4822
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
4823
|
+
)
|
|
4824
|
+
|
|
4825
|
+
|
|
4826
|
+
@jsii.enum(jsii_type="aws-cdk-lib.aws_kinesisfirehose.ParquetWriterVersion")
|
|
4827
|
+
class ParquetWriterVersion(enum.Enum):
|
|
4828
|
+
'''The available WriterVersions for Parquet output format.
|
|
4829
|
+
|
|
4830
|
+
:exampleMetadata: infused
|
|
4831
|
+
|
|
4832
|
+
Example::
|
|
4833
|
+
|
|
4834
|
+
output_format = firehose.ParquetOutputFormat(
|
|
4835
|
+
block_size=Size.mebibytes(512),
|
|
4836
|
+
compression=firehose.ParquetCompression.UNCOMPRESSED,
|
|
4837
|
+
enable_dictionary_compression=True,
|
|
4838
|
+
max_padding=Size.bytes(10),
|
|
4839
|
+
page_size=Size.mebibytes(2),
|
|
4840
|
+
writer_version=firehose.ParquetWriterVersion.V2
|
|
4841
|
+
)
|
|
4842
|
+
'''
|
|
4843
|
+
|
|
4844
|
+
V1 = "V1"
|
|
4845
|
+
'''Use V1 Parquet writer version when writing the output.'''
|
|
4846
|
+
V2 = "V2"
|
|
4847
|
+
'''Use V2 Parquet writer version when writing the output.'''
|
|
4848
|
+
|
|
4849
|
+
|
|
3589
4850
|
@jsii.implements(IDestination)
|
|
3590
4851
|
class S3Bucket(
|
|
3591
4852
|
metaclass=jsii.JSIIMeta,
|
|
@@ -3620,6 +4881,7 @@ class S3Bucket(
|
|
|
3620
4881
|
self,
|
|
3621
4882
|
bucket: _IBucket_42e086fd,
|
|
3622
4883
|
*,
|
|
4884
|
+
data_format_conversion: typing.Optional[typing.Union[DataFormatConversionProps, typing.Dict[builtins.str, typing.Any]]] = None,
|
|
3623
4885
|
file_extension: typing.Optional[builtins.str] = None,
|
|
3624
4886
|
time_zone: typing.Optional[_TimeZone_cdd72ac9] = None,
|
|
3625
4887
|
buffering_interval: typing.Optional[_Duration_4839e8c3] = None,
|
|
@@ -3635,10 +4897,11 @@ class S3Bucket(
|
|
|
3635
4897
|
) -> None:
|
|
3636
4898
|
'''
|
|
3637
4899
|
:param bucket: -
|
|
4900
|
+
:param data_format_conversion: The input format, output format, and schema config for converting data from the JSON format to the Parquet or ORC format before writing to Amazon S3. Default: no data format conversion is done
|
|
3638
4901
|
:param file_extension: Specify a file extension. It will override the default file extension appended by Data Format Conversion or S3 compression features such as ``.parquet`` or ``.gz``. File extension must start with a period (``.``) and can contain allowed characters: ``0-9a-z!-_.*'()``. Default: - The default file extension appended by Data Format Conversion or S3 compression features
|
|
3639
4902
|
:param time_zone: The time zone you prefer. Default: - UTC
|
|
3640
4903
|
:param buffering_interval: The length of time that Firehose buffers incoming data before delivering it to the S3 bucket. Minimum: Duration.seconds(0) Maximum: Duration.seconds(900) Default: Duration.seconds(300)
|
|
3641
|
-
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) Maximum: Size.mebibytes(128) Default: Size.mebibytes(5)
|
|
4904
|
+
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled Maximum: Size.mebibytes(128) Default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
3642
4905
|
:param compression: The type of compression that Amazon Data Firehose uses to compress the data that it delivers to the Amazon S3 bucket. The compression formats SNAPPY or ZIP cannot be specified for Amazon Redshift destinations because they are not supported by the Amazon Redshift COPY operation that reads from the S3 bucket. Default: - UNCOMPRESSED
|
|
3643
4906
|
:param data_output_prefix: A prefix that Amazon Data Firehose evaluates and adds to records before writing them to S3. This prefix appears immediately following the bucket name. Default: "YYYY/MM/DD/HH"
|
|
3644
4907
|
:param encryption_key: The AWS KMS key used to encrypt the data that it delivers to your Amazon S3 bucket. Default: - Data is not encrypted.
|
|
@@ -3652,6 +4915,7 @@ class S3Bucket(
|
|
|
3652
4915
|
type_hints = typing.get_type_hints(_typecheckingstub__a2eaf455255fc260033aa24d456779f4b21172e8b4cf2c51f6355f415c9f3ccd)
|
|
3653
4916
|
check_type(argname="argument bucket", value=bucket, expected_type=type_hints["bucket"])
|
|
3654
4917
|
props = S3BucketProps(
|
|
4918
|
+
data_format_conversion=data_format_conversion,
|
|
3655
4919
|
file_extension=file_extension,
|
|
3656
4920
|
time_zone=time_zone,
|
|
3657
4921
|
buffering_interval=buffering_interval,
|
|
@@ -3698,6 +4962,7 @@ class S3Bucket(
|
|
|
3698
4962
|
"processor": "processor",
|
|
3699
4963
|
"role": "role",
|
|
3700
4964
|
"s3_backup": "s3Backup",
|
|
4965
|
+
"data_format_conversion": "dataFormatConversion",
|
|
3701
4966
|
"file_extension": "fileExtension",
|
|
3702
4967
|
"time_zone": "timeZone",
|
|
3703
4968
|
},
|
|
@@ -3716,13 +4981,14 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3716
4981
|
processor: typing.Optional[IDataProcessor] = None,
|
|
3717
4982
|
role: typing.Optional[_IRole_235f5d8e] = None,
|
|
3718
4983
|
s3_backup: typing.Optional[typing.Union[DestinationS3BackupProps, typing.Dict[builtins.str, typing.Any]]] = None,
|
|
4984
|
+
data_format_conversion: typing.Optional[typing.Union[DataFormatConversionProps, typing.Dict[builtins.str, typing.Any]]] = None,
|
|
3719
4985
|
file_extension: typing.Optional[builtins.str] = None,
|
|
3720
4986
|
time_zone: typing.Optional[_TimeZone_cdd72ac9] = None,
|
|
3721
4987
|
) -> None:
|
|
3722
4988
|
'''Props for defining an S3 destination of an Amazon Data Firehose delivery stream.
|
|
3723
4989
|
|
|
3724
4990
|
:param buffering_interval: The length of time that Firehose buffers incoming data before delivering it to the S3 bucket. Minimum: Duration.seconds(0) Maximum: Duration.seconds(900) Default: Duration.seconds(300)
|
|
3725
|
-
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) Maximum: Size.mebibytes(128) Default: Size.mebibytes(5)
|
|
4991
|
+
:param buffering_size: The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket. Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled Maximum: Size.mebibytes(128) Default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
3726
4992
|
:param compression: The type of compression that Amazon Data Firehose uses to compress the data that it delivers to the Amazon S3 bucket. The compression formats SNAPPY or ZIP cannot be specified for Amazon Redshift destinations because they are not supported by the Amazon Redshift COPY operation that reads from the S3 bucket. Default: - UNCOMPRESSED
|
|
3727
4993
|
:param data_output_prefix: A prefix that Amazon Data Firehose evaluates and adds to records before writing them to S3. This prefix appears immediately following the bucket name. Default: "YYYY/MM/DD/HH"
|
|
3728
4994
|
:param encryption_key: The AWS KMS key used to encrypt the data that it delivers to your Amazon S3 bucket. Default: - Data is not encrypted.
|
|
@@ -3731,6 +4997,7 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3731
4997
|
:param processor: The data transformation that should be performed on the data before writing to the destination. Default: - no data transformation will occur.
|
|
3732
4998
|
:param role: The IAM role associated with this destination. Assumed by Amazon Data Firehose to invoke processors and write to destinations Default: - a role will be created with default permissions.
|
|
3733
4999
|
:param s3_backup: The configuration for backing up source records to S3. Default: - source records will not be backed up to S3.
|
|
5000
|
+
:param data_format_conversion: The input format, output format, and schema config for converting data from the JSON format to the Parquet or ORC format before writing to Amazon S3. Default: no data format conversion is done
|
|
3734
5001
|
:param file_extension: Specify a file extension. It will override the default file extension appended by Data Format Conversion or S3 compression features such as ``.parquet`` or ``.gz``. File extension must start with a period (``.``) and can contain allowed characters: ``0-9a-z!-_.*'()``. Default: - The default file extension appended by Data Format Conversion or S3 compression features
|
|
3735
5002
|
:param time_zone: The time zone you prefer. Default: - UTC
|
|
3736
5003
|
|
|
@@ -3760,6 +5027,8 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3760
5027
|
'''
|
|
3761
5028
|
if isinstance(s3_backup, dict):
|
|
3762
5029
|
s3_backup = DestinationS3BackupProps(**s3_backup)
|
|
5030
|
+
if isinstance(data_format_conversion, dict):
|
|
5031
|
+
data_format_conversion = DataFormatConversionProps(**data_format_conversion)
|
|
3763
5032
|
if __debug__:
|
|
3764
5033
|
type_hints = typing.get_type_hints(_typecheckingstub__04b12dc503479d22af2396c4df8d38c37536719187eef6ddd01c18b529dcbfc9)
|
|
3765
5034
|
check_type(argname="argument buffering_interval", value=buffering_interval, expected_type=type_hints["buffering_interval"])
|
|
@@ -3772,6 +5041,7 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3772
5041
|
check_type(argname="argument processor", value=processor, expected_type=type_hints["processor"])
|
|
3773
5042
|
check_type(argname="argument role", value=role, expected_type=type_hints["role"])
|
|
3774
5043
|
check_type(argname="argument s3_backup", value=s3_backup, expected_type=type_hints["s3_backup"])
|
|
5044
|
+
check_type(argname="argument data_format_conversion", value=data_format_conversion, expected_type=type_hints["data_format_conversion"])
|
|
3775
5045
|
check_type(argname="argument file_extension", value=file_extension, expected_type=type_hints["file_extension"])
|
|
3776
5046
|
check_type(argname="argument time_zone", value=time_zone, expected_type=type_hints["time_zone"])
|
|
3777
5047
|
self._values: typing.Dict[builtins.str, typing.Any] = {}
|
|
@@ -3795,6 +5065,8 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3795
5065
|
self._values["role"] = role
|
|
3796
5066
|
if s3_backup is not None:
|
|
3797
5067
|
self._values["s3_backup"] = s3_backup
|
|
5068
|
+
if data_format_conversion is not None:
|
|
5069
|
+
self._values["data_format_conversion"] = data_format_conversion
|
|
3798
5070
|
if file_extension is not None:
|
|
3799
5071
|
self._values["file_extension"] = file_extension
|
|
3800
5072
|
if time_zone is not None:
|
|
@@ -3816,10 +5088,10 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3816
5088
|
def buffering_size(self) -> typing.Optional[_Size_7b441c34]:
|
|
3817
5089
|
'''The size of the buffer that Amazon Data Firehose uses for incoming data before delivering it to the S3 bucket.
|
|
3818
5090
|
|
|
3819
|
-
Minimum: Size.mebibytes(1)
|
|
5091
|
+
Minimum: Size.mebibytes(1) when record data format conversion is disabled, Size.mebibytes(64) when it is enabled
|
|
3820
5092
|
Maximum: Size.mebibytes(128)
|
|
3821
5093
|
|
|
3822
|
-
:default: Size.mebibytes(5)
|
|
5094
|
+
:default: Size.mebibytes(5) when record data format conversion is disabled, Size.mebibytes(128) when it is enabled
|
|
3823
5095
|
'''
|
|
3824
5096
|
result = self._values.get("buffering_size")
|
|
3825
5097
|
return typing.cast(typing.Optional[_Size_7b441c34], result)
|
|
@@ -3910,6 +5182,17 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3910
5182
|
result = self._values.get("s3_backup")
|
|
3911
5183
|
return typing.cast(typing.Optional[DestinationS3BackupProps], result)
|
|
3912
5184
|
|
|
5185
|
+
@builtins.property
|
|
5186
|
+
def data_format_conversion(self) -> typing.Optional[DataFormatConversionProps]:
|
|
5187
|
+
'''The input format, output format, and schema config for converting data from the JSON format to the Parquet or ORC format before writing to Amazon S3.
|
|
5188
|
+
|
|
5189
|
+
:default: no data format conversion is done
|
|
5190
|
+
|
|
5191
|
+
:see: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-extendeds3destinationconfiguration.html#cfn-kinesisfirehose-deliverystream-extendeds3destinationconfiguration-dataformatconversionconfiguration
|
|
5192
|
+
'''
|
|
5193
|
+
result = self._values.get("data_format_conversion")
|
|
5194
|
+
return typing.cast(typing.Optional[DataFormatConversionProps], result)
|
|
5195
|
+
|
|
3913
5196
|
@builtins.property
|
|
3914
5197
|
def file_extension(self) -> typing.Optional[builtins.str]:
|
|
3915
5198
|
'''Specify a file extension.
|
|
@@ -3948,6 +5231,199 @@ class S3BucketProps(CommonDestinationS3Props, CommonDestinationProps):
|
|
|
3948
5231
|
)
|
|
3949
5232
|
|
|
3950
5233
|
|
|
5234
|
+
class SchemaConfiguration(
|
|
5235
|
+
metaclass=jsii.JSIIMeta,
|
|
5236
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.SchemaConfiguration",
|
|
5237
|
+
):
|
|
5238
|
+
'''Represents a schema configuration for Firehose S3 data record format conversion.
|
|
5239
|
+
|
|
5240
|
+
:see: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-dataformatconversionconfiguration.html#cfn-kinesisfirehose-deliverystream-dataformatconversionconfiguration-schemaconfiguration
|
|
5241
|
+
:exampleMetadata: infused
|
|
5242
|
+
|
|
5243
|
+
Example::
|
|
5244
|
+
|
|
5245
|
+
# bucket: s3.Bucket
|
|
5246
|
+
# schema_glue_table: glue.CfnTable
|
|
5247
|
+
|
|
5248
|
+
s3_destination = firehose.S3Bucket(bucket,
|
|
5249
|
+
data_format_conversion=firehose.DataFormatConversionProps(
|
|
5250
|
+
schema_configuration=firehose.SchemaConfiguration.from_cfn_table(schema_glue_table),
|
|
5251
|
+
input_format=firehose.InputFormat.OPENX_JSON,
|
|
5252
|
+
output_format=firehose.OutputFormat.PARQUET
|
|
5253
|
+
)
|
|
5254
|
+
)
|
|
5255
|
+
'''
|
|
5256
|
+
|
|
5257
|
+
@jsii.member(jsii_name="fromCfnTable")
|
|
5258
|
+
@builtins.classmethod
|
|
5259
|
+
def from_cfn_table(
|
|
5260
|
+
cls,
|
|
5261
|
+
table: _CfnTable_63ae0183,
|
|
5262
|
+
*,
|
|
5263
|
+
region: typing.Optional[builtins.str] = None,
|
|
5264
|
+
version_id: typing.Optional[builtins.str] = None,
|
|
5265
|
+
) -> "SchemaConfiguration":
|
|
5266
|
+
'''Obtain schema configuration for data record format conversion from an ``aws_glue.CfnTable``.
|
|
5267
|
+
|
|
5268
|
+
:param table: -
|
|
5269
|
+
:param region: The region of the database the table is in. Default: the region of the stack that contains the table reference is used
|
|
5270
|
+
:param version_id: Specifies the table version for the output data schema. if set to ``LATEST``, Firehose uses the most recent table version. This means that any updates to the table are automatically picked up. Default: ``LATEST``
|
|
5271
|
+
'''
|
|
5272
|
+
if __debug__:
|
|
5273
|
+
type_hints = typing.get_type_hints(_typecheckingstub__16698efebf7812a619f54735d92a199e9f2be81de7b9a45a6b47a846ad97bb22)
|
|
5274
|
+
check_type(argname="argument table", value=table, expected_type=type_hints["table"])
|
|
5275
|
+
props = SchemaConfigurationFromCfnTableProps(
|
|
5276
|
+
region=region, version_id=version_id
|
|
5277
|
+
)
|
|
5278
|
+
|
|
5279
|
+
return typing.cast("SchemaConfiguration", jsii.sinvoke(cls, "fromCfnTable", [table, props]))
|
|
5280
|
+
|
|
5281
|
+
@jsii.member(jsii_name="bind")
|
|
5282
|
+
def bind(
|
|
5283
|
+
self,
|
|
5284
|
+
scope: _constructs_77d1e7e8.Construct,
|
|
5285
|
+
*,
|
|
5286
|
+
role: _IRole_235f5d8e,
|
|
5287
|
+
) -> "CfnDeliveryStream.SchemaConfigurationProperty":
|
|
5288
|
+
'''Binds this Schema to the Destination, adding the necessary permissions to the Destination role.
|
|
5289
|
+
|
|
5290
|
+
:param scope: -
|
|
5291
|
+
:param role: The IAM Role that will be used by the Delivery Stream for access to the Glue data catalog for record format conversion.
|
|
5292
|
+
'''
|
|
5293
|
+
if __debug__:
|
|
5294
|
+
type_hints = typing.get_type_hints(_typecheckingstub__fa302f4f5dcb045545aee457a21bea52383c93a0b3a83d889ecd270cb21edc8d)
|
|
5295
|
+
check_type(argname="argument scope", value=scope, expected_type=type_hints["scope"])
|
|
5296
|
+
options = SchemaConfigurationBindOptions(role=role)
|
|
5297
|
+
|
|
5298
|
+
return typing.cast("CfnDeliveryStream.SchemaConfigurationProperty", jsii.invoke(self, "bind", [scope, options]))
|
|
5299
|
+
|
|
5300
|
+
|
|
5301
|
+
@jsii.data_type(
|
|
5302
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.SchemaConfigurationBindOptions",
|
|
5303
|
+
jsii_struct_bases=[],
|
|
5304
|
+
name_mapping={"role": "role"},
|
|
5305
|
+
)
|
|
5306
|
+
class SchemaConfigurationBindOptions:
|
|
5307
|
+
def __init__(self, *, role: _IRole_235f5d8e) -> None:
|
|
5308
|
+
'''Options when binding a SchemaConfig to a Destination.
|
|
5309
|
+
|
|
5310
|
+
:param role: The IAM Role that will be used by the Delivery Stream for access to the Glue data catalog for record format conversion.
|
|
5311
|
+
|
|
5312
|
+
:exampleMetadata: fixture=_generated
|
|
5313
|
+
|
|
5314
|
+
Example::
|
|
5315
|
+
|
|
5316
|
+
# The code below shows an example of how to instantiate this type.
|
|
5317
|
+
# The values are placeholders you should change.
|
|
5318
|
+
from aws_cdk import aws_iam as iam
|
|
5319
|
+
from aws_cdk import aws_kinesisfirehose as kinesisfirehose
|
|
5320
|
+
|
|
5321
|
+
# role: iam.Role
|
|
5322
|
+
|
|
5323
|
+
schema_configuration_bind_options = kinesisfirehose.SchemaConfigurationBindOptions(
|
|
5324
|
+
role=role
|
|
5325
|
+
)
|
|
5326
|
+
'''
|
|
5327
|
+
if __debug__:
|
|
5328
|
+
type_hints = typing.get_type_hints(_typecheckingstub__be38cc765d422319285e857e984a2a96aeac0bf84fc8ba50ca36f24ae4a656a6)
|
|
5329
|
+
check_type(argname="argument role", value=role, expected_type=type_hints["role"])
|
|
5330
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {
|
|
5331
|
+
"role": role,
|
|
5332
|
+
}
|
|
5333
|
+
|
|
5334
|
+
@builtins.property
|
|
5335
|
+
def role(self) -> _IRole_235f5d8e:
|
|
5336
|
+
'''The IAM Role that will be used by the Delivery Stream for access to the Glue data catalog for record format conversion.'''
|
|
5337
|
+
result = self._values.get("role")
|
|
5338
|
+
assert result is not None, "Required property 'role' is missing"
|
|
5339
|
+
return typing.cast(_IRole_235f5d8e, result)
|
|
5340
|
+
|
|
5341
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
5342
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
5343
|
+
|
|
5344
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
5345
|
+
return not (rhs == self)
|
|
5346
|
+
|
|
5347
|
+
def __repr__(self) -> str:
|
|
5348
|
+
return "SchemaConfigurationBindOptions(%s)" % ", ".join(
|
|
5349
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
5350
|
+
)
|
|
5351
|
+
|
|
5352
|
+
|
|
5353
|
+
@jsii.data_type(
|
|
5354
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.SchemaConfigurationFromCfnTableProps",
|
|
5355
|
+
jsii_struct_bases=[],
|
|
5356
|
+
name_mapping={"region": "region", "version_id": "versionId"},
|
|
5357
|
+
)
|
|
5358
|
+
class SchemaConfigurationFromCfnTableProps:
|
|
5359
|
+
def __init__(
|
|
5360
|
+
self,
|
|
5361
|
+
*,
|
|
5362
|
+
region: typing.Optional[builtins.str] = None,
|
|
5363
|
+
version_id: typing.Optional[builtins.str] = None,
|
|
5364
|
+
) -> None:
|
|
5365
|
+
'''Options for creating a Schema for record format conversion from a ``glue.CfnTable``.
|
|
5366
|
+
|
|
5367
|
+
:param region: The region of the database the table is in. Default: the region of the stack that contains the table reference is used
|
|
5368
|
+
:param version_id: Specifies the table version for the output data schema. if set to ``LATEST``, Firehose uses the most recent table version. This means that any updates to the table are automatically picked up. Default: ``LATEST``
|
|
5369
|
+
|
|
5370
|
+
:exampleMetadata: fixture=_generated
|
|
5371
|
+
|
|
5372
|
+
Example::
|
|
5373
|
+
|
|
5374
|
+
# The code below shows an example of how to instantiate this type.
|
|
5375
|
+
# The values are placeholders you should change.
|
|
5376
|
+
from aws_cdk import aws_kinesisfirehose as kinesisfirehose
|
|
5377
|
+
|
|
5378
|
+
schema_configuration_from_cfn_table_props = kinesisfirehose.SchemaConfigurationFromCfnTableProps(
|
|
5379
|
+
region="region",
|
|
5380
|
+
version_id="versionId"
|
|
5381
|
+
)
|
|
5382
|
+
'''
|
|
5383
|
+
if __debug__:
|
|
5384
|
+
type_hints = typing.get_type_hints(_typecheckingstub__d59e8faea792bc8275a33e7b7ca4b7d0096136ba71d39758a60dc5f61140e8dd)
|
|
5385
|
+
check_type(argname="argument region", value=region, expected_type=type_hints["region"])
|
|
5386
|
+
check_type(argname="argument version_id", value=version_id, expected_type=type_hints["version_id"])
|
|
5387
|
+
self._values: typing.Dict[builtins.str, typing.Any] = {}
|
|
5388
|
+
if region is not None:
|
|
5389
|
+
self._values["region"] = region
|
|
5390
|
+
if version_id is not None:
|
|
5391
|
+
self._values["version_id"] = version_id
|
|
5392
|
+
|
|
5393
|
+
@builtins.property
|
|
5394
|
+
def region(self) -> typing.Optional[builtins.str]:
|
|
5395
|
+
'''The region of the database the table is in.
|
|
5396
|
+
|
|
5397
|
+
:default: the region of the stack that contains the table reference is used
|
|
5398
|
+
'''
|
|
5399
|
+
result = self._values.get("region")
|
|
5400
|
+
return typing.cast(typing.Optional[builtins.str], result)
|
|
5401
|
+
|
|
5402
|
+
@builtins.property
|
|
5403
|
+
def version_id(self) -> typing.Optional[builtins.str]:
|
|
5404
|
+
'''Specifies the table version for the output data schema.
|
|
5405
|
+
|
|
5406
|
+
if set to ``LATEST``, Firehose uses the most recent table version. This means that any updates to the table are automatically picked up.
|
|
5407
|
+
|
|
5408
|
+
:default: ``LATEST``
|
|
5409
|
+
|
|
5410
|
+
:see: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kinesisfirehose-deliverystream-schemaconfiguration.html#cfn-kinesisfirehose-deliverystream-schemaconfiguration-versionid
|
|
5411
|
+
'''
|
|
5412
|
+
result = self._values.get("version_id")
|
|
5413
|
+
return typing.cast(typing.Optional[builtins.str], result)
|
|
5414
|
+
|
|
5415
|
+
def __eq__(self, rhs: typing.Any) -> builtins.bool:
|
|
5416
|
+
return isinstance(rhs, self.__class__) and rhs._values == self._values
|
|
5417
|
+
|
|
5418
|
+
def __ne__(self, rhs: typing.Any) -> builtins.bool:
|
|
5419
|
+
return not (rhs == self)
|
|
5420
|
+
|
|
5421
|
+
def __repr__(self) -> str:
|
|
5422
|
+
return "SchemaConfigurationFromCfnTableProps(%s)" % ", ".join(
|
|
5423
|
+
k + "=" + repr(v) for k, v in self._values.items()
|
|
5424
|
+
)
|
|
5425
|
+
|
|
5426
|
+
|
|
3951
5427
|
class StreamEncryption(
|
|
3952
5428
|
metaclass=jsii.JSIIAbstractClass,
|
|
3953
5429
|
jsii_type="aws-cdk-lib.aws_kinesisfirehose.StreamEncryption",
|
|
@@ -4038,6 +5514,53 @@ class StreamEncryptionType(enum.Enum):
|
|
|
4038
5514
|
'''Data in the stream is stored encrypted by a KMS key owned by AWS and managed for use in multiple AWS accounts.'''
|
|
4039
5515
|
|
|
4040
5516
|
|
|
5517
|
+
class TimestampParser(
|
|
5518
|
+
metaclass=jsii.JSIIMeta,
|
|
5519
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.TimestampParser",
|
|
5520
|
+
):
|
|
5521
|
+
'''Value class that wraps a Joda Time format string.
|
|
5522
|
+
|
|
5523
|
+
Use this with the Hive JSON input format for data record format conversion to parse custom timestamp formats.
|
|
5524
|
+
|
|
5525
|
+
:exampleMetadata: infused
|
|
5526
|
+
|
|
5527
|
+
Example::
|
|
5528
|
+
|
|
5529
|
+
input_format = firehose.HiveJsonInputFormat(
|
|
5530
|
+
timestamp_parsers=[
|
|
5531
|
+
firehose.TimestampParser.from_format_string("yyyy-MM-dd"), firehose.TimestampParser.EPOCH_MILLIS
|
|
5532
|
+
]
|
|
5533
|
+
)
|
|
5534
|
+
'''
|
|
5535
|
+
|
|
5536
|
+
@jsii.member(jsii_name="fromFormatString")
|
|
5537
|
+
@builtins.classmethod
|
|
5538
|
+
def from_format_string(cls, format: builtins.str) -> "TimestampParser":
|
|
5539
|
+
'''Creates a TimestampParser from the given format string.
|
|
5540
|
+
|
|
5541
|
+
The format string should be a valid Joda Time pattern string.
|
|
5542
|
+
See `Class DateTimeFormat <https://docs.aws.amazon.com/https://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html>`_ for more details
|
|
5543
|
+
|
|
5544
|
+
:param format: the Joda Time format string.
|
|
5545
|
+
'''
|
|
5546
|
+
if __debug__:
|
|
5547
|
+
type_hints = typing.get_type_hints(_typecheckingstub__f9355b4b9cb75f1433155f9d39e32472e4f0342bd652e191a412203a56a7a082)
|
|
5548
|
+
check_type(argname="argument format", value=format, expected_type=type_hints["format"])
|
|
5549
|
+
return typing.cast("TimestampParser", jsii.sinvoke(cls, "fromFormatString", [format]))
|
|
5550
|
+
|
|
5551
|
+
@jsii.python.classproperty
|
|
5552
|
+
@jsii.member(jsii_name="EPOCH_MILLIS")
|
|
5553
|
+
def EPOCH_MILLIS(cls) -> "TimestampParser":
|
|
5554
|
+
'''Parses timestamps formatted in milliseconds since epoch.'''
|
|
5555
|
+
return typing.cast("TimestampParser", jsii.sget(cls, "EPOCH_MILLIS"))
|
|
5556
|
+
|
|
5557
|
+
@builtins.property
|
|
5558
|
+
@jsii.member(jsii_name="format")
|
|
5559
|
+
def format(self) -> builtins.str:
|
|
5560
|
+
'''The format string to use in Hive JSON input format configuration.'''
|
|
5561
|
+
return typing.cast(builtins.str, jsii.get(self, "format"))
|
|
5562
|
+
|
|
5563
|
+
|
|
4041
5564
|
@jsii.implements(_IInspectable_c2943556, IDeliveryStreamRef, _ITaggable_36806126)
|
|
4042
5565
|
class CfnDeliveryStream(
|
|
4043
5566
|
_CfnResource_9df397a6,
|
|
@@ -13275,6 +14798,52 @@ class EnableLogging(
|
|
|
13275
14798
|
return typing.cast(typing.Optional[_ILogGroup_3c4fa718], jsii.get(self, "logGroup"))
|
|
13276
14799
|
|
|
13277
14800
|
|
|
14801
|
+
@jsii.implements(IInputFormat)
|
|
14802
|
+
class HiveJsonInputFormat(
|
|
14803
|
+
metaclass=jsii.JSIIMeta,
|
|
14804
|
+
jsii_type="aws-cdk-lib.aws_kinesisfirehose.HiveJsonInputFormat",
|
|
14805
|
+
):
|
|
14806
|
+
'''This class specifies properties for Hive JSON input format for record format conversion.
|
|
14807
|
+
|
|
14808
|
+
You should only need to specify an instance of this class if the default configuration does not suit your needs.
|
|
14809
|
+
|
|
14810
|
+
:exampleMetadata: infused
|
|
14811
|
+
|
|
14812
|
+
Example::
|
|
14813
|
+
|
|
14814
|
+
input_format = firehose.HiveJsonInputFormat(
|
|
14815
|
+
timestamp_parsers=[
|
|
14816
|
+
firehose.TimestampParser.from_format_string("yyyy-MM-dd"), firehose.TimestampParser.EPOCH_MILLIS
|
|
14817
|
+
]
|
|
14818
|
+
)
|
|
14819
|
+
'''
|
|
14820
|
+
|
|
14821
|
+
def __init__(
|
|
14822
|
+
self,
|
|
14823
|
+
*,
|
|
14824
|
+
timestamp_parsers: typing.Optional[typing.Sequence[TimestampParser]] = None,
|
|
14825
|
+
) -> None:
|
|
14826
|
+
'''
|
|
14827
|
+
:param timestamp_parsers: List of TimestampParsers. These are used to parse custom timestamp strings from input JSON into dates. Note: Specifying a parser will override the default timestamp parser. If the default timestamp parser is required, include ``TimestampParser.DEFAULT`` in the list of parsers along with the custom parser. Default: the default timestamp parser is used
|
|
14828
|
+
'''
|
|
14829
|
+
props = HiveJsonInputFormatProps(timestamp_parsers=timestamp_parsers)
|
|
14830
|
+
|
|
14831
|
+
jsii.create(self.__class__, self, [props])
|
|
14832
|
+
|
|
14833
|
+
@jsii.member(jsii_name="createInputFormatConfig")
|
|
14834
|
+
def create_input_format_config(
|
|
14835
|
+
self,
|
|
14836
|
+
) -> CfnDeliveryStream.InputFormatConfigurationProperty:
|
|
14837
|
+
'''Renders the cloudformation properties for the input format.'''
|
|
14838
|
+
return typing.cast(CfnDeliveryStream.InputFormatConfigurationProperty, jsii.invoke(self, "createInputFormatConfig", []))
|
|
14839
|
+
|
|
14840
|
+
@builtins.property
|
|
14841
|
+
@jsii.member(jsii_name="props")
|
|
14842
|
+
def props(self) -> typing.Optional[HiveJsonInputFormatProps]:
|
|
14843
|
+
'''Properties for Hive JSON input format.'''
|
|
14844
|
+
return typing.cast(typing.Optional[HiveJsonInputFormatProps], jsii.get(self, "props"))
|
|
14845
|
+
|
|
14846
|
+
|
|
13278
14847
|
__all__ = [
|
|
13279
14848
|
"BackupMode",
|
|
13280
14849
|
"CfnDeliveryStream",
|
|
@@ -13282,6 +14851,7 @@ __all__ = [
|
|
|
13282
14851
|
"CommonDestinationProps",
|
|
13283
14852
|
"CommonDestinationS3Props",
|
|
13284
14853
|
"Compression",
|
|
14854
|
+
"DataFormatConversionProps",
|
|
13285
14855
|
"DataProcessorBindOptions",
|
|
13286
14856
|
"DataProcessorConfig",
|
|
13287
14857
|
"DataProcessorIdentifier",
|
|
@@ -13295,18 +14865,38 @@ __all__ = [
|
|
|
13295
14865
|
"DestinationS3BackupProps",
|
|
13296
14866
|
"DisableLogging",
|
|
13297
14867
|
"EnableLogging",
|
|
14868
|
+
"HiveJsonInputFormat",
|
|
14869
|
+
"HiveJsonInputFormatProps",
|
|
13298
14870
|
"IDataProcessor",
|
|
13299
14871
|
"IDeliveryStream",
|
|
13300
14872
|
"IDeliveryStreamRef",
|
|
13301
14873
|
"IDestination",
|
|
14874
|
+
"IInputFormat",
|
|
13302
14875
|
"ILoggingConfig",
|
|
14876
|
+
"IOutputFormat",
|
|
13303
14877
|
"ISource",
|
|
14878
|
+
"InputFormat",
|
|
13304
14879
|
"KinesisStreamSource",
|
|
13305
14880
|
"LambdaFunctionProcessor",
|
|
14881
|
+
"OpenXJsonInputFormat",
|
|
14882
|
+
"OpenXJsonInputFormatProps",
|
|
14883
|
+
"OrcCompression",
|
|
14884
|
+
"OrcFormatVersion",
|
|
14885
|
+
"OrcOutputFormat",
|
|
14886
|
+
"OrcOutputFormatProps",
|
|
14887
|
+
"OutputFormat",
|
|
14888
|
+
"ParquetCompression",
|
|
14889
|
+
"ParquetOutputFormat",
|
|
14890
|
+
"ParquetOutputFormatProps",
|
|
14891
|
+
"ParquetWriterVersion",
|
|
13306
14892
|
"S3Bucket",
|
|
13307
14893
|
"S3BucketProps",
|
|
14894
|
+
"SchemaConfiguration",
|
|
14895
|
+
"SchemaConfigurationBindOptions",
|
|
14896
|
+
"SchemaConfigurationFromCfnTableProps",
|
|
13308
14897
|
"StreamEncryption",
|
|
13309
14898
|
"StreamEncryptionType",
|
|
14899
|
+
"TimestampParser",
|
|
13310
14900
|
]
|
|
13311
14901
|
|
|
13312
14902
|
publication.publish()
|
|
@@ -13363,6 +14953,16 @@ def _typecheckingstub__4e41ad5beb7c57e7d6a51a6e7b54af84f87429433140b71bcff2768d4
|
|
|
13363
14953
|
"""Type checking stubs"""
|
|
13364
14954
|
pass
|
|
13365
14955
|
|
|
14956
|
+
def _typecheckingstub__bff90bf1ac37687c050bd1dbbc7970543cf96f46bffc7e9b92aa180e16446a3e(
|
|
14957
|
+
*,
|
|
14958
|
+
input_format: IInputFormat,
|
|
14959
|
+
output_format: IOutputFormat,
|
|
14960
|
+
schema_configuration: SchemaConfiguration,
|
|
14961
|
+
enabled: typing.Optional[builtins.bool] = None,
|
|
14962
|
+
) -> None:
|
|
14963
|
+
"""Type checking stubs"""
|
|
14964
|
+
pass
|
|
14965
|
+
|
|
13366
14966
|
def _typecheckingstub__19eda2faa3921fd664688bb9d58a7766cede4c60f2944654651ac8a298dad52e(
|
|
13367
14967
|
*,
|
|
13368
14968
|
role: _IRole_235f5d8e,
|
|
@@ -13446,6 +15046,13 @@ def _typecheckingstub__14700eb876e8e0f20f42a3b1362e4b8cd4eb596f1fbaecf0e207a387e
|
|
|
13446
15046
|
"""Type checking stubs"""
|
|
13447
15047
|
pass
|
|
13448
15048
|
|
|
15049
|
+
def _typecheckingstub__0afd5b01612b3cc327b3c1600a9eb4aa5aaa6f3ee92bada98ae2a5d7e07bf664(
|
|
15050
|
+
*,
|
|
15051
|
+
timestamp_parsers: typing.Optional[typing.Sequence[TimestampParser]] = None,
|
|
15052
|
+
) -> None:
|
|
15053
|
+
"""Type checking stubs"""
|
|
15054
|
+
pass
|
|
15055
|
+
|
|
13449
15056
|
def _typecheckingstub__4720a6b97c475eae9ec0d65aca8250b00f57d45f0efb2368b8df6d486162c508(
|
|
13450
15057
|
scope: _constructs_77d1e7e8.Construct,
|
|
13451
15058
|
*,
|
|
@@ -13528,9 +15135,59 @@ def _typecheckingstub__393c41d8ae2fe5acab13fd70fff9f4778e727adfd78b86d20820f0670
|
|
|
13528
15135
|
"""Type checking stubs"""
|
|
13529
15136
|
pass
|
|
13530
15137
|
|
|
15138
|
+
def _typecheckingstub__bf09507e4b7ba6abbfda17b454958c835099a4ff05786b47104813d50d0d5e6f(
|
|
15139
|
+
*,
|
|
15140
|
+
column_to_json_key_mappings: typing.Optional[typing.Mapping[builtins.str, builtins.str]] = None,
|
|
15141
|
+
convert_dots_in_json_keys_to_underscores: typing.Optional[builtins.bool] = None,
|
|
15142
|
+
lowercase_column_names: typing.Optional[builtins.bool] = None,
|
|
15143
|
+
) -> None:
|
|
15144
|
+
"""Type checking stubs"""
|
|
15145
|
+
pass
|
|
15146
|
+
|
|
15147
|
+
def _typecheckingstub__02948bebe4c2930eed4c6124d0d7f279623b5812d4fe6983e8d186c02a4b2f5c(
|
|
15148
|
+
value: builtins.str,
|
|
15149
|
+
) -> None:
|
|
15150
|
+
"""Type checking stubs"""
|
|
15151
|
+
pass
|
|
15152
|
+
|
|
15153
|
+
def _typecheckingstub__23d7be2aebca47c4f726452fdac9d7e13c1d079ee9bbc0eb6bf735c5fa7d1ec6(
|
|
15154
|
+
*,
|
|
15155
|
+
block_size: typing.Optional[_Size_7b441c34] = None,
|
|
15156
|
+
bloom_filter_columns: typing.Optional[typing.Sequence[builtins.str]] = None,
|
|
15157
|
+
bloom_filter_false_positive_probability: typing.Optional[jsii.Number] = None,
|
|
15158
|
+
compression: typing.Optional[OrcCompression] = None,
|
|
15159
|
+
dictionary_key_threshold: typing.Optional[jsii.Number] = None,
|
|
15160
|
+
enable_padding: typing.Optional[builtins.bool] = None,
|
|
15161
|
+
format_version: typing.Optional[OrcFormatVersion] = None,
|
|
15162
|
+
padding_tolerance: typing.Optional[jsii.Number] = None,
|
|
15163
|
+
row_index_stride: typing.Optional[jsii.Number] = None,
|
|
15164
|
+
stripe_size: typing.Optional[_Size_7b441c34] = None,
|
|
15165
|
+
) -> None:
|
|
15166
|
+
"""Type checking stubs"""
|
|
15167
|
+
pass
|
|
15168
|
+
|
|
15169
|
+
def _typecheckingstub__61bd74ac3570328dbd418a538644f7198c553bd1d41a6ca4a6136f48d7cf4d50(
|
|
15170
|
+
value: builtins.str,
|
|
15171
|
+
) -> None:
|
|
15172
|
+
"""Type checking stubs"""
|
|
15173
|
+
pass
|
|
15174
|
+
|
|
15175
|
+
def _typecheckingstub__30f6620eefd956acc092d03fba63b6121a146d30b699581234817a52e1d9792b(
|
|
15176
|
+
*,
|
|
15177
|
+
block_size: typing.Optional[_Size_7b441c34] = None,
|
|
15178
|
+
compression: typing.Optional[ParquetCompression] = None,
|
|
15179
|
+
enable_dictionary_compression: typing.Optional[builtins.bool] = None,
|
|
15180
|
+
max_padding: typing.Optional[_Size_7b441c34] = None,
|
|
15181
|
+
page_size: typing.Optional[_Size_7b441c34] = None,
|
|
15182
|
+
writer_version: typing.Optional[ParquetWriterVersion] = None,
|
|
15183
|
+
) -> None:
|
|
15184
|
+
"""Type checking stubs"""
|
|
15185
|
+
pass
|
|
15186
|
+
|
|
13531
15187
|
def _typecheckingstub__a2eaf455255fc260033aa24d456779f4b21172e8b4cf2c51f6355f415c9f3ccd(
|
|
13532
15188
|
bucket: _IBucket_42e086fd,
|
|
13533
15189
|
*,
|
|
15190
|
+
data_format_conversion: typing.Optional[typing.Union[DataFormatConversionProps, typing.Dict[builtins.str, typing.Any]]] = None,
|
|
13534
15191
|
file_extension: typing.Optional[builtins.str] = None,
|
|
13535
15192
|
time_zone: typing.Optional[_TimeZone_cdd72ac9] = None,
|
|
13536
15193
|
buffering_interval: typing.Optional[_Duration_4839e8c3] = None,
|
|
@@ -13565,18 +15222,57 @@ def _typecheckingstub__04b12dc503479d22af2396c4df8d38c37536719187eef6ddd01c18b52
|
|
|
13565
15222
|
processor: typing.Optional[IDataProcessor] = None,
|
|
13566
15223
|
role: typing.Optional[_IRole_235f5d8e] = None,
|
|
13567
15224
|
s3_backup: typing.Optional[typing.Union[DestinationS3BackupProps, typing.Dict[builtins.str, typing.Any]]] = None,
|
|
15225
|
+
data_format_conversion: typing.Optional[typing.Union[DataFormatConversionProps, typing.Dict[builtins.str, typing.Any]]] = None,
|
|
13568
15226
|
file_extension: typing.Optional[builtins.str] = None,
|
|
13569
15227
|
time_zone: typing.Optional[_TimeZone_cdd72ac9] = None,
|
|
13570
15228
|
) -> None:
|
|
13571
15229
|
"""Type checking stubs"""
|
|
13572
15230
|
pass
|
|
13573
15231
|
|
|
15232
|
+
def _typecheckingstub__16698efebf7812a619f54735d92a199e9f2be81de7b9a45a6b47a846ad97bb22(
|
|
15233
|
+
table: _CfnTable_63ae0183,
|
|
15234
|
+
*,
|
|
15235
|
+
region: typing.Optional[builtins.str] = None,
|
|
15236
|
+
version_id: typing.Optional[builtins.str] = None,
|
|
15237
|
+
) -> None:
|
|
15238
|
+
"""Type checking stubs"""
|
|
15239
|
+
pass
|
|
15240
|
+
|
|
15241
|
+
def _typecheckingstub__fa302f4f5dcb045545aee457a21bea52383c93a0b3a83d889ecd270cb21edc8d(
|
|
15242
|
+
scope: _constructs_77d1e7e8.Construct,
|
|
15243
|
+
*,
|
|
15244
|
+
role: _IRole_235f5d8e,
|
|
15245
|
+
) -> None:
|
|
15246
|
+
"""Type checking stubs"""
|
|
15247
|
+
pass
|
|
15248
|
+
|
|
15249
|
+
def _typecheckingstub__be38cc765d422319285e857e984a2a96aeac0bf84fc8ba50ca36f24ae4a656a6(
|
|
15250
|
+
*,
|
|
15251
|
+
role: _IRole_235f5d8e,
|
|
15252
|
+
) -> None:
|
|
15253
|
+
"""Type checking stubs"""
|
|
15254
|
+
pass
|
|
15255
|
+
|
|
15256
|
+
def _typecheckingstub__d59e8faea792bc8275a33e7b7ca4b7d0096136ba71d39758a60dc5f61140e8dd(
|
|
15257
|
+
*,
|
|
15258
|
+
region: typing.Optional[builtins.str] = None,
|
|
15259
|
+
version_id: typing.Optional[builtins.str] = None,
|
|
15260
|
+
) -> None:
|
|
15261
|
+
"""Type checking stubs"""
|
|
15262
|
+
pass
|
|
15263
|
+
|
|
13574
15264
|
def _typecheckingstub__efb44f4c68ce5ed338b1cadc1095db8f6b1ea6c2478ee68c07bb0fa95cecdf47(
|
|
13575
15265
|
encryption_key: typing.Optional[_IKey_5f11635f] = None,
|
|
13576
15266
|
) -> None:
|
|
13577
15267
|
"""Type checking stubs"""
|
|
13578
15268
|
pass
|
|
13579
15269
|
|
|
15270
|
+
def _typecheckingstub__f9355b4b9cb75f1433155f9d39e32472e4f0342bd652e191a412203a56a7a082(
|
|
15271
|
+
format: builtins.str,
|
|
15272
|
+
) -> None:
|
|
15273
|
+
"""Type checking stubs"""
|
|
15274
|
+
pass
|
|
15275
|
+
|
|
13580
15276
|
def _typecheckingstub__b3cd824a2680c7d043cac684bd1be9ca77e94201f1ba00785d60a50ff43c2288(
|
|
13581
15277
|
scope: _constructs_77d1e7e8.Construct,
|
|
13582
15278
|
id: builtins.str,
|
|
@@ -14470,3 +16166,6 @@ def _typecheckingstub__ba11d69a3d91c8a6ba63c6ed55a7bbd149c317325863da3c41ebf373c
|
|
|
14470
16166
|
) -> None:
|
|
14471
16167
|
"""Type checking stubs"""
|
|
14472
16168
|
pass
|
|
16169
|
+
|
|
16170
|
+
for cls in [IDataProcessor, IDeliveryStream, IDeliveryStreamRef, IDestination, IInputFormat, ILoggingConfig, IOutputFormat, ISource]:
|
|
16171
|
+
typing.cast(typing.Any, cls).__protocol_attrs__ = typing.cast(typing.Any, cls).__protocol_attrs__ - set(['__jsii_proxy_class__', '__jsii_type__'])
|