agentic-team-templates 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +280 -0
- package/bin/cli.js +5 -0
- package/package.json +47 -0
- package/src/index.js +521 -0
- package/templates/_shared/code-quality.md +162 -0
- package/templates/_shared/communication.md +114 -0
- package/templates/_shared/core-principles.md +62 -0
- package/templates/_shared/git-workflow.md +165 -0
- package/templates/_shared/security-fundamentals.md +173 -0
- package/templates/blockchain/.cursorrules/defi-patterns.md +520 -0
- package/templates/blockchain/.cursorrules/gas-optimization.md +339 -0
- package/templates/blockchain/.cursorrules/overview.md +130 -0
- package/templates/blockchain/.cursorrules/security.md +318 -0
- package/templates/blockchain/.cursorrules/smart-contracts.md +364 -0
- package/templates/blockchain/.cursorrules/testing.md +415 -0
- package/templates/blockchain/.cursorrules/web3-integration.md +538 -0
- package/templates/blockchain/CLAUDE.md +389 -0
- package/templates/cli-tools/.cursorrules/architecture.md +412 -0
- package/templates/cli-tools/.cursorrules/arguments.md +406 -0
- package/templates/cli-tools/.cursorrules/distribution.md +546 -0
- package/templates/cli-tools/.cursorrules/error-handling.md +455 -0
- package/templates/cli-tools/.cursorrules/overview.md +136 -0
- package/templates/cli-tools/.cursorrules/testing.md +537 -0
- package/templates/cli-tools/.cursorrules/user-experience.md +545 -0
- package/templates/cli-tools/CLAUDE.md +356 -0
- package/templates/data-engineering/.cursorrules/data-modeling.md +367 -0
- package/templates/data-engineering/.cursorrules/data-quality.md +455 -0
- package/templates/data-engineering/.cursorrules/overview.md +85 -0
- package/templates/data-engineering/.cursorrules/performance.md +339 -0
- package/templates/data-engineering/.cursorrules/pipeline-design.md +280 -0
- package/templates/data-engineering/.cursorrules/security.md +460 -0
- package/templates/data-engineering/.cursorrules/testing.md +452 -0
- package/templates/data-engineering/CLAUDE.md +974 -0
- package/templates/devops-sre/.cursorrules/capacity-planning.md +653 -0
- package/templates/devops-sre/.cursorrules/change-management.md +584 -0
- package/templates/devops-sre/.cursorrules/chaos-engineering.md +651 -0
- package/templates/devops-sre/.cursorrules/disaster-recovery.md +641 -0
- package/templates/devops-sre/.cursorrules/incident-management.md +565 -0
- package/templates/devops-sre/.cursorrules/observability.md +714 -0
- package/templates/devops-sre/.cursorrules/overview.md +230 -0
- package/templates/devops-sre/.cursorrules/postmortems.md +588 -0
- package/templates/devops-sre/.cursorrules/runbooks.md +760 -0
- package/templates/devops-sre/.cursorrules/slo-sli.md +617 -0
- package/templates/devops-sre/.cursorrules/toil-reduction.md +567 -0
- package/templates/devops-sre/CLAUDE.md +1007 -0
- package/templates/documentation/.cursorrules/adr.md +277 -0
- package/templates/documentation/.cursorrules/api-documentation.md +411 -0
- package/templates/documentation/.cursorrules/code-comments.md +253 -0
- package/templates/documentation/.cursorrules/maintenance.md +260 -0
- package/templates/documentation/.cursorrules/overview.md +82 -0
- package/templates/documentation/.cursorrules/readme-standards.md +306 -0
- package/templates/documentation/CLAUDE.md +120 -0
- package/templates/fullstack/.cursorrules/api-contracts.md +331 -0
- package/templates/fullstack/.cursorrules/architecture.md +298 -0
- package/templates/fullstack/.cursorrules/overview.md +109 -0
- package/templates/fullstack/.cursorrules/shared-types.md +348 -0
- package/templates/fullstack/.cursorrules/testing.md +386 -0
- package/templates/fullstack/CLAUDE.md +349 -0
- package/templates/ml-ai/.cursorrules/data-engineering.md +483 -0
- package/templates/ml-ai/.cursorrules/deployment.md +601 -0
- package/templates/ml-ai/.cursorrules/model-development.md +538 -0
- package/templates/ml-ai/.cursorrules/monitoring.md +658 -0
- package/templates/ml-ai/.cursorrules/overview.md +131 -0
- package/templates/ml-ai/.cursorrules/security.md +637 -0
- package/templates/ml-ai/.cursorrules/testing.md +678 -0
- package/templates/ml-ai/CLAUDE.md +1136 -0
- package/templates/mobile/.cursorrules/navigation.md +246 -0
- package/templates/mobile/.cursorrules/offline-first.md +302 -0
- package/templates/mobile/.cursorrules/overview.md +71 -0
- package/templates/mobile/.cursorrules/performance.md +345 -0
- package/templates/mobile/.cursorrules/testing.md +339 -0
- package/templates/mobile/CLAUDE.md +233 -0
- package/templates/platform-engineering/.cursorrules/ci-cd.md +778 -0
- package/templates/platform-engineering/.cursorrules/developer-experience.md +632 -0
- package/templates/platform-engineering/.cursorrules/infrastructure-as-code.md +600 -0
- package/templates/platform-engineering/.cursorrules/kubernetes.md +710 -0
- package/templates/platform-engineering/.cursorrules/observability.md +747 -0
- package/templates/platform-engineering/.cursorrules/overview.md +215 -0
- package/templates/platform-engineering/.cursorrules/security.md +855 -0
- package/templates/platform-engineering/.cursorrules/testing.md +878 -0
- package/templates/platform-engineering/CLAUDE.md +850 -0
- package/templates/utility-agent/.cursorrules/action-control.md +284 -0
- package/templates/utility-agent/.cursorrules/context-management.md +186 -0
- package/templates/utility-agent/.cursorrules/hallucination-prevention.md +253 -0
- package/templates/utility-agent/.cursorrules/overview.md +78 -0
- package/templates/utility-agent/.cursorrules/token-optimization.md +369 -0
- package/templates/utility-agent/CLAUDE.md +513 -0
- package/templates/web-backend/.cursorrules/api-design.md +255 -0
- package/templates/web-backend/.cursorrules/authentication.md +309 -0
- package/templates/web-backend/.cursorrules/database-patterns.md +298 -0
- package/templates/web-backend/.cursorrules/error-handling.md +366 -0
- package/templates/web-backend/.cursorrules/overview.md +69 -0
- package/templates/web-backend/.cursorrules/security.md +358 -0
- package/templates/web-backend/.cursorrules/testing.md +395 -0
- package/templates/web-backend/CLAUDE.md +366 -0
- package/templates/web-frontend/.cursorrules/accessibility.md +296 -0
- package/templates/web-frontend/.cursorrules/component-patterns.md +204 -0
- package/templates/web-frontend/.cursorrules/overview.md +72 -0
- package/templates/web-frontend/.cursorrules/performance.md +325 -0
- package/templates/web-frontend/.cursorrules/state-management.md +227 -0
- package/templates/web-frontend/.cursorrules/styling.md +271 -0
- package/templates/web-frontend/.cursorrules/testing.md +311 -0
- package/templates/web-frontend/CLAUDE.md +399 -0
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
# Security & Governance
|
|
2
|
+
|
|
3
|
+
Patterns for securing data and maintaining compliance.
|
|
4
|
+
|
|
5
|
+
## Data Classification
|
|
6
|
+
|
|
7
|
+
### Classification Levels
|
|
8
|
+
|
|
9
|
+
| Level | Description | Examples | Requirements |
|
|
10
|
+
|-------|-------------|----------|--------------|
|
|
11
|
+
| **Public** | Can share externally | Product catalog, public stats | None |
|
|
12
|
+
| **Internal** | Business data, internal use | Sales reports, metrics | Authentication |
|
|
13
|
+
| **Confidential** | Sensitive business data | Financial data, contracts | Encryption, access control |
|
|
14
|
+
| **Restricted** | Regulated/PII data | SSN, health records, PCI | Encryption, masking, audit logging |
|
|
15
|
+
|
|
16
|
+
### Apply Classifications
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
# Tag tables with classification
|
|
20
|
+
spark.sql("""
|
|
21
|
+
ALTER TABLE curated.customers
|
|
22
|
+
SET TBLPROPERTIES (
|
|
23
|
+
'data_classification' = 'restricted',
|
|
24
|
+
'contains_pii' = 'true',
|
|
25
|
+
'data_owner' = 'customer-team',
|
|
26
|
+
'retention_days' = '365'
|
|
27
|
+
)
|
|
28
|
+
""")
|
|
29
|
+
|
|
30
|
+
# Query classification metadata
|
|
31
|
+
spark.sql("""
|
|
32
|
+
SELECT table_name, properties['data_classification'] as classification
|
|
33
|
+
FROM information_schema.tables
|
|
34
|
+
WHERE properties['contains_pii'] = 'true'
|
|
35
|
+
""")
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## PII Handling
|
|
39
|
+
|
|
40
|
+
### Identify PII Columns
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
PII_COLUMNS = {
|
|
44
|
+
# Direct identifiers
|
|
45
|
+
"ssn": "restricted",
|
|
46
|
+
"social_security_number": "restricted",
|
|
47
|
+
"national_id": "restricted",
|
|
48
|
+
"passport_number": "restricted",
|
|
49
|
+
"driver_license": "restricted",
|
|
50
|
+
|
|
51
|
+
# Contact information
|
|
52
|
+
"email": "confidential",
|
|
53
|
+
"phone": "confidential",
|
|
54
|
+
"phone_number": "confidential",
|
|
55
|
+
"address": "confidential",
|
|
56
|
+
|
|
57
|
+
# Financial
|
|
58
|
+
"credit_card": "restricted",
|
|
59
|
+
"bank_account": "restricted",
|
|
60
|
+
"salary": "confidential",
|
|
61
|
+
|
|
62
|
+
# Health
|
|
63
|
+
"medical_record": "restricted",
|
|
64
|
+
"diagnosis": "restricted",
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### PII Masking Strategies
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from pyspark.sql import functions as F
|
|
72
|
+
from cryptography.fernet import Fernet
|
|
73
|
+
|
|
74
|
+
def mask_pii(df: DataFrame, strategy: dict[str, str]) -> DataFrame:
|
|
75
|
+
"""
|
|
76
|
+
Apply PII masking based on strategy.
|
|
77
|
+
|
|
78
|
+
Strategies:
|
|
79
|
+
- hash: One-way SHA-256 hash (for matching)
|
|
80
|
+
- encrypt: Reversible encryption (for authorized access)
|
|
81
|
+
- mask: Partial masking (for display)
|
|
82
|
+
- redact: Complete removal
|
|
83
|
+
"""
|
|
84
|
+
for column, method in strategy.items():
|
|
85
|
+
if column not in df.columns:
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if method == "hash":
|
|
89
|
+
df = df.withColumn(column, F.sha2(F.col(column), 256))
|
|
90
|
+
|
|
91
|
+
elif method == "encrypt":
|
|
92
|
+
df = df.withColumn(column, encrypt_udf(F.col(column)))
|
|
93
|
+
|
|
94
|
+
elif method == "mask":
|
|
95
|
+
# Keep first/last chars, mask middle
|
|
96
|
+
df = df.withColumn(column,
|
|
97
|
+
F.concat(
|
|
98
|
+
F.substring(F.col(column), 1, 2),
|
|
99
|
+
F.lit("****"),
|
|
100
|
+
F.substring(F.col(column), -2, 2)
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
elif method == "redact":
|
|
105
|
+
df = df.withColumn(column, F.lit("[REDACTED]"))
|
|
106
|
+
|
|
107
|
+
return df
|
|
108
|
+
|
|
109
|
+
# Usage
|
|
110
|
+
masked_df = mask_pii(customers_df, {
|
|
111
|
+
"ssn": "hash",
|
|
112
|
+
"email": "encrypt",
|
|
113
|
+
"phone": "mask",
|
|
114
|
+
"credit_card": "redact",
|
|
115
|
+
})
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Encryption at Rest
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from cryptography.fernet import Fernet
|
|
122
|
+
|
|
123
|
+
class DataEncryption:
|
|
124
|
+
"""Encrypt/decrypt sensitive data fields."""
|
|
125
|
+
|
|
126
|
+
def __init__(self, key: str = None):
|
|
127
|
+
self.key = key or os.environ["ENCRYPTION_KEY"]
|
|
128
|
+
self.cipher = Fernet(self.key.encode())
|
|
129
|
+
|
|
130
|
+
def encrypt(self, value: str) -> str:
|
|
131
|
+
if value is None:
|
|
132
|
+
return None
|
|
133
|
+
return self.cipher.encrypt(value.encode()).decode()
|
|
134
|
+
|
|
135
|
+
def decrypt(self, encrypted: str) -> str:
|
|
136
|
+
if encrypted is None:
|
|
137
|
+
return None
|
|
138
|
+
return self.cipher.decrypt(encrypted.encode()).decode()
|
|
139
|
+
|
|
140
|
+
# Register as UDF
|
|
141
|
+
encryption = DataEncryption()
|
|
142
|
+
encrypt_udf = F.udf(encryption.encrypt, StringType())
|
|
143
|
+
decrypt_udf = F.udf(encryption.decrypt, StringType())
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Access Control
|
|
147
|
+
|
|
148
|
+
### Role-Based Access Control (RBAC)
|
|
149
|
+
|
|
150
|
+
```sql
|
|
151
|
+
-- Create roles
|
|
152
|
+
CREATE ROLE data_analyst;
|
|
153
|
+
CREATE ROLE data_engineer;
|
|
154
|
+
CREATE ROLE data_admin;
|
|
155
|
+
|
|
156
|
+
-- Grant permissions
|
|
157
|
+
GRANT SELECT ON DATABASE curated TO data_analyst;
|
|
158
|
+
GRANT SELECT, INSERT, UPDATE ON DATABASE curated TO data_engineer;
|
|
159
|
+
GRANT ALL PRIVILEGES ON DATABASE curated TO data_admin;
|
|
160
|
+
|
|
161
|
+
-- Restrict sensitive tables
|
|
162
|
+
REVOKE SELECT ON TABLE curated.pii_customers FROM data_analyst;
|
|
163
|
+
GRANT SELECT ON TABLE curated.masked_customers TO data_analyst;
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Column-Level Security
|
|
167
|
+
|
|
168
|
+
```sql
|
|
169
|
+
-- Create view with masked columns for analysts
|
|
170
|
+
CREATE VIEW curated.customers_masked AS
|
|
171
|
+
SELECT
|
|
172
|
+
customer_id,
|
|
173
|
+
-- Mask PII for non-privileged users
|
|
174
|
+
CASE WHEN IS_ACCOUNT_GROUP_MEMBER('pii_access')
|
|
175
|
+
THEN email
|
|
176
|
+
ELSE CONCAT(LEFT(email, 2), '****@', SPLIT(email, '@')[1])
|
|
177
|
+
END AS email,
|
|
178
|
+
CASE WHEN IS_ACCOUNT_GROUP_MEMBER('pii_access')
|
|
179
|
+
THEN phone
|
|
180
|
+
ELSE CONCAT('***-***-', RIGHT(phone, 4))
|
|
181
|
+
END AS phone,
|
|
182
|
+
segment,
|
|
183
|
+
region
|
|
184
|
+
FROM curated.customers;
|
|
185
|
+
|
|
186
|
+
-- Analysts use masked view
|
|
187
|
+
GRANT SELECT ON curated.customers_masked TO data_analyst;
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Row-Level Security
|
|
191
|
+
|
|
192
|
+
```sql
|
|
193
|
+
-- Filter rows based on user's region
|
|
194
|
+
CREATE VIEW curated.regional_orders AS
|
|
195
|
+
SELECT *
|
|
196
|
+
FROM curated.orders
|
|
197
|
+
WHERE
|
|
198
|
+
-- Admins see all
|
|
199
|
+
IS_ACCOUNT_GROUP_MEMBER('data_admin')
|
|
200
|
+
OR
|
|
201
|
+
-- Regional managers see their region
|
|
202
|
+
region = CURRENT_USER_ATTRIBUTE('region');
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Audit Logging
|
|
206
|
+
|
|
207
|
+
### Log Data Access
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
def log_data_access(
|
|
211
|
+
user: str,
|
|
212
|
+
table: str,
|
|
213
|
+
operation: str,
|
|
214
|
+
row_count: int,
|
|
215
|
+
query: str = None,
|
|
216
|
+
) -> None:
|
|
217
|
+
"""Log all data access for compliance."""
|
|
218
|
+
audit_record = {
|
|
219
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
220
|
+
"user": user,
|
|
221
|
+
"table": table,
|
|
222
|
+
"operation": operation,
|
|
223
|
+
"row_count": row_count,
|
|
224
|
+
"query_hash": hashlib.sha256(query.encode()).hexdigest() if query else None,
|
|
225
|
+
"client_ip": get_client_ip(),
|
|
226
|
+
"session_id": get_session_id(),
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
# Write to audit log (append-only, immutable)
|
|
230
|
+
spark.createDataFrame([audit_record]).write.mode("append").saveAsTable("audit.data_access_log")
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Log Schema Changes
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
def log_schema_change(
|
|
237
|
+
table: str,
|
|
238
|
+
change_type: str, # "column_added", "column_removed", "type_changed"
|
|
239
|
+
before_schema: StructType,
|
|
240
|
+
after_schema: StructType,
|
|
241
|
+
user: str,
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Log schema changes for tracking."""
|
|
244
|
+
audit_record = {
|
|
245
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
246
|
+
"table": table,
|
|
247
|
+
"change_type": change_type,
|
|
248
|
+
"before_schema": before_schema.json(),
|
|
249
|
+
"after_schema": after_schema.json(),
|
|
250
|
+
"user": user,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
spark.createDataFrame([audit_record]).write.mode("append").saveAsTable("audit.schema_changes")
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### Query Audit Logs
|
|
257
|
+
|
|
258
|
+
```sql
|
|
259
|
+
-- Who accessed PII tables?
|
|
260
|
+
SELECT user, table, operation, row_count, timestamp
|
|
261
|
+
FROM audit.data_access_log
|
|
262
|
+
WHERE table IN ('curated.customers', 'curated.payments')
|
|
263
|
+
AND timestamp >= CURRENT_DATE - 30
|
|
264
|
+
ORDER BY timestamp DESC;
|
|
265
|
+
|
|
266
|
+
-- Detect unusual access patterns
|
|
267
|
+
SELECT
|
|
268
|
+
user,
|
|
269
|
+
COUNT(*) as access_count,
|
|
270
|
+
SUM(row_count) as total_rows
|
|
271
|
+
FROM audit.data_access_log
|
|
272
|
+
WHERE timestamp >= CURRENT_DATE - 1
|
|
273
|
+
GROUP BY user
|
|
274
|
+
HAVING access_count > 100 OR total_rows > 1000000
|
|
275
|
+
ORDER BY total_rows DESC;
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Data Retention
|
|
279
|
+
|
|
280
|
+
### Retention Policies
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
RETENTION_POLICIES = {
|
|
284
|
+
"raw": {"days": 90, "archive": True},
|
|
285
|
+
"curated": {"days": 365 * 3, "archive": True},
|
|
286
|
+
"marts": {"days": 365 * 7, "archive": False},
|
|
287
|
+
"pii_tables": {"days": 365, "archive": False}, # Regulatory requirement
|
|
288
|
+
"audit_logs": {"days": 365 * 7, "archive": True}, # Keep for compliance
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
def apply_retention_policy(table: str, policy: dict) -> None:
|
|
292
|
+
"""Delete data older than retention period."""
|
|
293
|
+
cutoff_date = date.today() - timedelta(days=policy["days"])
|
|
294
|
+
|
|
295
|
+
if policy.get("archive"):
|
|
296
|
+
# Archive before delete
|
|
297
|
+
spark.sql(f"""
|
|
298
|
+
INSERT INTO archive.{table}
|
|
299
|
+
SELECT * FROM {table}
|
|
300
|
+
WHERE _loaded_at < '{cutoff_date}'
|
|
301
|
+
""")
|
|
302
|
+
|
|
303
|
+
# Delete old data
|
|
304
|
+
spark.sql(f"""
|
|
305
|
+
DELETE FROM {table}
|
|
306
|
+
WHERE _loaded_at < '{cutoff_date}'
|
|
307
|
+
""")
|
|
308
|
+
|
|
309
|
+
# Log retention action
|
|
310
|
+
logger.info(f"Applied retention policy to {table}: deleted records before {cutoff_date}")
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
### Right to Deletion (GDPR)
|
|
314
|
+
|
|
315
|
+
```python
|
|
316
|
+
def delete_customer_data(customer_id: str, tables: list[str]) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Delete all data for a customer (GDPR right to erasure).
|
|
319
|
+
"""
|
|
320
|
+
deleted_records = {}
|
|
321
|
+
|
|
322
|
+
for table in tables:
|
|
323
|
+
# Count before delete
|
|
324
|
+
count_before = spark.sql(f"""
|
|
325
|
+
SELECT COUNT(*) FROM {table} WHERE customer_id = '{customer_id}'
|
|
326
|
+
""").collect()[0][0]
|
|
327
|
+
|
|
328
|
+
# Delete
|
|
329
|
+
spark.sql(f"""
|
|
330
|
+
DELETE FROM {table} WHERE customer_id = '{customer_id}'
|
|
331
|
+
""")
|
|
332
|
+
|
|
333
|
+
deleted_records[table] = count_before
|
|
334
|
+
|
|
335
|
+
# Log deletion for compliance
|
|
336
|
+
log_gdpr_deletion(customer_id, deleted_records)
|
|
337
|
+
|
|
338
|
+
return deleted_records
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
## Secrets Management
|
|
342
|
+
|
|
343
|
+
### Never Hardcode Secrets
|
|
344
|
+
|
|
345
|
+
```python
|
|
346
|
+
# Bad: Hardcoded credentials
|
|
347
|
+
connection_string = "postgresql://user:password123@host:5432/db"
|
|
348
|
+
|
|
349
|
+
# Good: Environment variables
|
|
350
|
+
connection_string = f"postgresql://{os.environ['DB_USER']}:{os.environ['DB_PASSWORD']}@{os.environ['DB_HOST']}:5432/{os.environ['DB_NAME']}"
|
|
351
|
+
|
|
352
|
+
# Better: Secrets manager
|
|
353
|
+
from databricks.sdk.runtime import dbutils
|
|
354
|
+
connection_string = dbutils.secrets.get(scope="production", key="db_connection_string")
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### Rotate Credentials
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
def rotate_encryption_key(old_key: str, new_key: str, tables: list[str]) -> None:
|
|
361
|
+
"""
|
|
362
|
+
Rotate encryption key for encrypted columns.
|
|
363
|
+
Re-encrypt all data with new key.
|
|
364
|
+
"""
|
|
365
|
+
old_cipher = Fernet(old_key.encode())
|
|
366
|
+
new_cipher = Fernet(new_key.encode())
|
|
367
|
+
|
|
368
|
+
@udf(returnType=StringType())
|
|
369
|
+
def reencrypt(value: str) -> str:
|
|
370
|
+
if value is None:
|
|
371
|
+
return None
|
|
372
|
+
decrypted = old_cipher.decrypt(value.encode()).decode()
|
|
373
|
+
return new_cipher.encrypt(decrypted.encode()).decode()
|
|
374
|
+
|
|
375
|
+
for table in tables:
|
|
376
|
+
encrypted_cols = get_encrypted_columns(table)
|
|
377
|
+
|
|
378
|
+
df = spark.table(table)
|
|
379
|
+
for col in encrypted_cols:
|
|
380
|
+
df = df.withColumn(col, reencrypt(F.col(col)))
|
|
381
|
+
|
|
382
|
+
df.write.mode("overwrite").saveAsTable(table)
|
|
383
|
+
|
|
384
|
+
logger.info(f"Rotated encryption key for {table}")
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
## Compliance Checklist
|
|
388
|
+
|
|
389
|
+
### General Data Protection
|
|
390
|
+
|
|
391
|
+
- [ ] Data classified by sensitivity
|
|
392
|
+
- [ ] PII identified and catalogued
|
|
393
|
+
- [ ] Encryption at rest for sensitive data
|
|
394
|
+
- [ ] Encryption in transit (TLS)
|
|
395
|
+
- [ ] Access controls implemented
|
|
396
|
+
- [ ] Audit logging enabled
|
|
397
|
+
|
|
398
|
+
### GDPR Compliance
|
|
399
|
+
|
|
400
|
+
- [ ] Data inventory documented
|
|
401
|
+
- [ ] Legal basis for processing defined
|
|
402
|
+
- [ ] Right to access implemented
|
|
403
|
+
- [ ] Right to deletion implemented
|
|
404
|
+
- [ ] Data portability supported
|
|
405
|
+
- [ ] Breach notification process defined
|
|
406
|
+
|
|
407
|
+
### SOC 2 / HIPAA / PCI
|
|
408
|
+
|
|
409
|
+
- [ ] Access reviews conducted regularly
|
|
410
|
+
- [ ] Logs retained per requirements
|
|
411
|
+
- [ ] Change management documented
|
|
412
|
+
- [ ] Incident response plan exists
|
|
413
|
+
- [ ] Vendor assessments completed
|
|
414
|
+
- [ ] Training completed by team
|
|
415
|
+
|
|
416
|
+
## Security Best Practices
|
|
417
|
+
|
|
418
|
+
### Principle of Least Privilege
|
|
419
|
+
|
|
420
|
+
```sql
|
|
421
|
+
-- Bad: Overly permissive
|
|
422
|
+
GRANT ALL PRIVILEGES ON DATABASE curated TO data_team;
|
|
423
|
+
|
|
424
|
+
-- Good: Minimal necessary permissions
|
|
425
|
+
GRANT SELECT ON curated.orders TO sales_analyst;
|
|
426
|
+
GRANT SELECT ON curated.products TO sales_analyst;
|
|
427
|
+
-- No access to customer PII
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
### Defense in Depth
|
|
431
|
+
|
|
432
|
+
```
|
|
433
|
+
┌─────────────────────────────────────────┐
|
|
434
|
+
│ Network: VPC, Firewall, Private Link │
|
|
435
|
+
├─────────────────────────────────────────┤
|
|
436
|
+
│ Authentication: SSO, MFA │
|
|
437
|
+
├─────────────────────────────────────────┤
|
|
438
|
+
│ Authorization: RBAC, Row/Column Security│
|
|
439
|
+
├─────────────────────────────────────────┤
|
|
440
|
+
│ Encryption: At rest, In transit │
|
|
441
|
+
├─────────────────────────────────────────┤
|
|
442
|
+
│ Monitoring: Audit logs, Alerts │
|
|
443
|
+
└─────────────────────────────────────────┘
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
### Secure Defaults
|
|
447
|
+
|
|
448
|
+
```python
|
|
449
|
+
# Default to restricted access
|
|
450
|
+
DEFAULT_TABLE_PROPERTIES = {
|
|
451
|
+
"data_classification": "internal",
|
|
452
|
+
"access_requires_approval": "true",
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
# Default to encryption
|
|
456
|
+
DEFAULT_WRITE_OPTIONS = {
|
|
457
|
+
"encryption": "true",
|
|
458
|
+
"compression": "zstd",
|
|
459
|
+
}
|
|
460
|
+
```
|