@aws-mdaa/datawarehouse 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,191 @@
1
+ # Data Warehouse
2
+
3
+ The Data Warehouse CDK application is used to configure and deploy resources required for a secure Redshift-based Data Warehouse on AWS.
4
+
5
+ ***
6
+
7
+ ## Deployed Resources and Compliance Details
8
+
9
+ ![datawarehouse](../../../constructs/L3/analytics/datawarehouse-l3-construct/docs/datawarehouse.png)
10
+
11
+ **Warehouse KMS Key** - Will be used to encrypt all warehouse data at rest (Warehouse bucket, Redshift Cluster).
12
+
13
+ **Warehouse Bucket** - An S3 Bucket which can be used for warehouse utility/maintenance operations.
14
+
15
+ **Warehouse Logging Bucket** - If 'enableAuditLoggingToS3' specified in config, an S3 Bucket specific to Redshift user activity logging will be created.
16
+
17
+ * **Note** - This bucket will be configured with SSE-S3 encryption due to Redshift logging requirements (KMS not supported).
18
+
19
+ **Execution Roles** - List of externally managed execution roles required to be associated to the Redshift Cluster. Access to these roles may be granted to Redshift users in order to interact with other AWS services through Redshift queries and commands.
20
+
21
+ **Warehouse Security Group** - Will control who can connect to the cluster according to the app config (CIDR ranges, other Security Groups).
22
+
23
+ * All egress permitted by default
24
+ * No ingress (to cluster) permitted by default
25
+
26
+ **Warehouse Subnet Group** - Controls which subnets the cluster will be deployed on.
27
+
28
+ **Warehouse Parameter Group** - Contains cluster config parameters required to control cluster behaviour and ensure secure operation.
29
+
30
+ * Enforces use of SSL on client connections
31
+
32
+ **Warehouse Cluster** - A Redshift cluster conforming to the specified configuration and security controls.
33
+
34
+ * All data encrypted at rest using warehouse KMS key
35
+ * SSL enforced on all client connections
36
+ * Network access controlled by security group.
37
+
38
+ **Warehouse Cluster Scheduled Actions** - Scheduled actions to automatically pause and resume Redshift cluster.
39
+
40
+ **Warehouse Federation Roles** - Roles which are used via IAM SAML Identity Providers to federate access to the cluster
41
+
42
+ * Establishes assume role trust (with SAML) with IAM Identity Provider
43
+ * Grants ability to dynamically generate cluster user and credentials, and join groups provided in the SAML claim by the identity provider
44
+ * Groups must pre-exist in cluster, otherwise federation will fail
45
+
46
+ **Warehouse Users** - Generates Redshift user credentials inside of cluster and stores them in a Secret
47
+
48
+ * Automated secret rotation can be triggered on an configurable cycle (by days)
49
+
50
+ ***
51
+
52
+ ## Configuration
53
+
54
+ ### MDAA Config
55
+
56
+ Add the following snippet to your mdaa.yaml under the `modules:` section of a domain/env in order to use this module:
57
+
58
+ ```yaml
59
+ datawarehouse: # Module Name can be customized
60
+ module_path: "@aws-caef/datawarehouse" # Must match module NPM package name
61
+ module_configs:
62
+ - ./datawarehouse.yaml # Filename/path can be customized
63
+ ```
64
+
65
+ ### Module Config (./datawarehouse.yaml)
66
+
67
+ [Config Schema Docs](SCHEMA.md)
68
+
69
+ ```yaml
70
+ # Specify the admin username to be created on the cluster. A secret will
71
+ # be automatically generated containing the admin password.
72
+ adminUsername: admin
73
+
74
+ # The admin password will be automatically rotatated after this many days
75
+ adminPasswordRotationDays: 30
76
+
77
+ # The number of days that automated snapshots are retained (1-35 days)
78
+ # Set 0 to disable the snapshot.
79
+ # Default - 1
80
+ automatedSnapshotRetentionDays: 3
81
+
82
+ # An optional list of arns for keys which may be used to write data to the cluster bucket.
83
+ # This may be useful to allow a Glue job to write data to the cluster bucket in order to load into the cluster.
84
+ additionalBucketKmsKeyArns:
85
+ - arn:{{partition}}:kms:{{region}}:{{account}}:key/abcd-123123-abcd-12312421
86
+
87
+ #Used to configure SAML federations
88
+ federations:
89
+ - federationName: "test" # Should be descriptive and unique
90
+ # This is the arn of the IAM Identity Provider
91
+ providerArn: arn:{{partition}}:iam::{{account}}:saml-provider/sample-saml-identity-provider
92
+
93
+ # This is a set of Role/Principal Arns which will be granted access to the Warehouse S3 bucket
94
+ dataAdminRoles:
95
+ - arn: arn:{{partition}}:iam::{{account}}:role/Admin
96
+
97
+ # A list of roles which will be provided read/write access to the warehouse bucket
98
+ warehouseBucketUserRoles:
99
+ - name: User
100
+ - name: team2-ex-role
101
+
102
+ # Set of execution roles required to be associated to the cluster
103
+ # If execution role requires read/write access to warehouse bucket, explicitly add that role to 'warehouseBucketUserRoles' property
104
+ executionRoles:
105
+ - arn: arn:{{partition}}:iam::{{account}}:role/team1-ex-role
106
+ - name: team2-ex-role
107
+
108
+ # The VPC and subnets on which the cluster will be deployed. If automatic cluster relocation is required,
109
+ # at least one subnet per AZ should be specified.
110
+ vpcId: vpc-12321421412
111
+ subnetIds:
112
+ - subnet-12312312421
113
+ - subnet-12312321412
114
+
115
+ #A preferred maintenance window day/time range. Should be specified as a range ddd:hh24:mi-ddd:hh24:mi (24H Clock UTC).
116
+ #Example: 'Sun:23:45-Mon:00:15'
117
+ preferredMaintenanceWindow: Sun:23:45-Mon:00:15
118
+
119
+ # Port the cluster will listen on (defaults to 5439)
120
+ clusterPort: 54390
121
+
122
+ # Ingress rules to be added to the cluster security group.
123
+ # All other traffic will be blocked
124
+ # Can reference other security groups (prefix sg:) or ipv4 CIDR sources (prefix ipv4:)
125
+ securityGroupIngress:
126
+ ipv4:
127
+ - 172.31.0.0/16
128
+ sg:
129
+ - ssm:/path/to/ssm
130
+ # The node type and initial number of nodes
131
+ nodeType: RA3_4XLARGE
132
+ numberOfNodes: 2
133
+
134
+ # Controls whether or not the cluster logs user audit activity to S3
135
+ # Note that enabling this will result in a new S3 bucket being created
136
+ # specifically for user audit logs. Due to Redshift limitations, this
137
+ # S3 bucket will use S3/AES-256 encryption instead of KMS CMK.
138
+ enableAuditLoggingToS3: true
139
+
140
+ databaseUsers:
141
+ - userName: "serviceuserGlue"
142
+ dbName: "default_db"
143
+ secretRotationDays: 90
144
+ secretAccessRoles:
145
+ - name: "test-arn"
146
+ - userName: "serviceuserQuicksight"
147
+ dbName: "default_db"
148
+ secretRotationDays: 90
149
+
150
+ # The list of scheduled actions to pause and/or resume cluster
151
+ scheduledActions:
152
+ # Pause cluster every Friday at 6pm ET starting April 13, 2022 until Dec 31, 2099
153
+ - name: pause-cluster
154
+ enable: True
155
+ # Target Action must be either of: "pauseCluster" or "resumeCluster". resizeCluster is not supported yet.
156
+ targetAction: pauseCluster
157
+ # Specify the action schedule in cron format cron(Minutes Hours Day-of-month Month Day-of-week Year).
158
+ schedule: cron(0 22 ? * FRI *)
159
+ # Start Date and Time in UTC format when the schedule becomes active. This must be a future date-time.
160
+ startTime: "2023-12-31T00:00:00Z"
161
+ # End Date and Time in UTC format after which the schedule is no longer active. This must be a future date-time later than start date.
162
+ endTime: "2099-12-31T00:00:00Z"
163
+
164
+ - name: resume-cluster
165
+ # Resume cluster every Monday at 7am ET starting April 13, 2022 until Dec 31, 2099
166
+ enable: True
167
+ # Target Action must be either of: "pauseCluster" or "resumeCluster". resizeCluster is not supported yet.
168
+ targetAction: resumeCluster
169
+ # Specify the action schedule in cron format cron(Minutes Hours Day-of-month Month Day-of-week Year).
170
+ schedule: cron(0 12 ? * MON *)
171
+ # Start Date and Time in UTC format when the schedule becomes active. This must be a future date-time.
172
+ startTime: "2023-12-31T00:00:00Z"
173
+ # End Date and Time in UTC format after which the schedule is no longer active. This must be a future date-time later than start date.
174
+ endTime: "2099-12-31T00:00:00Z"
175
+
176
+ # Cluster and Scheduled Action event notification configs
177
+ eventNotifications:
178
+ # List of emails to which email notifications will be sent
179
+ # If not specified, an SNS topic is still created and
180
+ # other types of subscriptions can be directly added.
181
+ email:
182
+ - example@example.com
183
+ # Event severity level
184
+ # "ERROR" | "INFO"
185
+ severity: INFO
186
+ # Event categories to be included
187
+ # "configuration" | "management" | "monitoring" | "security" | "pending"
188
+ eventCategories:
189
+ - management
190
+ - security
191
+ ```