nui-python-shared-utils 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,172 @@
1
+ """
2
+ Utilities for extracting CloudWatch logs from Kinesis stream records.
3
+
4
+ Provides standardized Kinesis log extraction, decompression, and index naming
5
+ for Lambda functions that stream CloudWatch logs to Elasticsearch.
6
+ """
7
+
8
+ import base64
9
+ import json
10
+ import logging
11
+ import zlib
12
+ from datetime import datetime
13
+ from typing import Any, Callable, Dict, Iterator, List, Optional, TypedDict
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CloudWatchLogEvent(TypedDict):
19
+ """Single log event from CloudWatch."""
20
+
21
+ id: str
22
+ timestamp: int # Unix timestamp in milliseconds
23
+ message: str
24
+
25
+
26
+ class CloudWatchLogsData(TypedDict):
27
+ """Decompressed CloudWatch logs data structure."""
28
+
29
+ messageType: str # "DATA_MESSAGE" or "CONTROL_MESSAGE"
30
+ owner: str
31
+ logGroup: str
32
+ logStream: str
33
+ subscriptionFilters: List[str]
34
+ logEvents: List[CloudWatchLogEvent]
35
+
36
+
37
+ def extract_cloudwatch_logs_from_kinesis(
38
+ records: List[Dict[str, Any]],
39
+ process_fn: Callable[[str, str, List[Dict]], Iterator[Dict]],
40
+ on_error: Optional[Callable[[Exception, Dict], None]] = None,
41
+ ) -> Iterator[Dict[str, Any]]:
42
+ """
43
+ Extract CloudWatch logs from Kinesis stream records.
44
+
45
+ Handles base64 decoding, gzip decompression, JSON parsing, and
46
+ CONTROL_MESSAGE filtering. Yields documents from the process_fn callback.
47
+
48
+ Args:
49
+ records: Kinesis event records (event["Records"])
50
+ process_fn: Callback to process log events. Signature:
51
+ process_fn(log_group: str, log_stream: str, log_events: List[Dict]) -> Iterator[Dict]
52
+ Should yield dicts with at minimum: {"_index": str, "_source": dict}
53
+ on_error: Optional error handler. If None, exceptions are raised.
54
+ Signature: on_error(exception: Exception, record_data: Dict) -> None
55
+
56
+ Yields:
57
+ Dict documents ready for Elasticsearch streaming_bulk()
58
+
59
+ Example:
60
+ from elasticsearch.helpers import streaming_bulk
61
+
62
+ def my_processor(log_group, log_stream, events):
63
+ for event in events:
64
+ yield {
65
+ "_index": f"log-{log_group.split('/')[-1]}-2025-01",
66
+ "_id": event["id"],
67
+ "_source": {"message": event["message"], ...}
68
+ }
69
+
70
+ for ok, response in streaming_bulk(
71
+ client=es,
72
+ actions=extract_cloudwatch_logs_from_kinesis(
73
+ event["Records"],
74
+ process_fn=my_processor
75
+ )
76
+ ):
77
+ if not ok:
78
+ logger.error(f"Failed: {response}")
79
+ """
80
+ log_counts = []
81
+
82
+ for row in records:
83
+ try:
84
+ raw_data = row["kinesis"]["data"]
85
+ except (KeyError, TypeError) as e:
86
+ logger.exception("Kinesis record missing 'kinesis.data' key")
87
+ if on_error:
88
+ on_error(e, {"row": row})
89
+ continue
90
+ raise
91
+
92
+ try:
93
+ decompressed = zlib.decompress(
94
+ base64.b64decode(raw_data), 16 + zlib.MAX_WBITS
95
+ ).decode("utf-8")
96
+ data = json.loads(decompressed)
97
+ except Exception as e:
98
+ logger.exception("Failed to decode/decompress Kinesis record")
99
+ if on_error:
100
+ on_error(e, {"raw_data": raw_data[:100]})
101
+ continue
102
+ raise
103
+
104
+ try:
105
+ message_type = data["messageType"]
106
+ log_group = data["logGroup"]
107
+ log_stream = data["logStream"]
108
+ log_events = data["logEvents"]
109
+ except KeyError as e:
110
+ logger.exception("Malformed CloudWatch logs payload missing key: %s", e)
111
+ if on_error:
112
+ on_error(e, data)
113
+ continue
114
+ raise
115
+
116
+ if message_type == "CONTROL_MESSAGE":
117
+ logger.debug("Skipping CONTROL_MESSAGE")
118
+ continue
119
+
120
+ log_counts.append(len(log_events))
121
+
122
+ try:
123
+ yield from process_fn(log_group, log_stream, log_events)
124
+ except Exception as e:
125
+ logger.exception(f"Failed to process log events from {log_group}")
126
+ if on_error:
127
+ on_error(e, data)
128
+ continue
129
+ raise
130
+
131
+ logger.debug(
132
+ f"Processed {sum(log_counts)} log events from {len(records)} Kinesis records"
133
+ )
134
+
135
+
136
+ def derive_index_name(
137
+ log_group: str,
138
+ timestamp: datetime,
139
+ prefix: str = "log",
140
+ date_format: str = "%Y-m%m",
141
+ target_override: Optional[str] = None,
142
+ ) -> str:
143
+ """
144
+ Derive Elasticsearch index name from log group and timestamp.
145
+
146
+ Default pattern: log-{service}-{YYYY}-m{MM}
147
+
148
+ Args:
149
+ log_group: CloudWatch log group name (e.g., "/aws/lambda/my-function")
150
+ timestamp: Event timestamp for date-based index suffix
151
+ prefix: Index name prefix (default: "log")
152
+ date_format: strftime format for date suffix (default: "%Y-m%m")
153
+ target_override: If provided, use this as service name instead of deriving from log_group
154
+
155
+ Returns:
156
+ Index name string (e.g., "log-my-function-2025-m01")
157
+
158
+ Example:
159
+ >>> derive_index_name("/aws/lambda/order-processor", datetime(2025, 1, 15))
160
+ 'log-order-processor-2025-m01'
161
+
162
+ >>> derive_index_name("/ecs/my-service", datetime(2025, 1, 15), target_override="custom")
163
+ 'log-custom-2025-m01'
164
+ """
165
+ if target_override:
166
+ service = target_override
167
+ else:
168
+ service = log_group.split("/")[-1]
169
+
170
+ date_suffix = timestamp.strftime(date_format)
171
+
172
+ return f"{prefix}-{service}-{date_suffix}".lower()
@@ -0,0 +1,263 @@
1
+ """
2
+ AWS Powertools integration utilities for Lambda functions.
3
+
4
+ Provides standardized logging, metrics, and error handling patterns using AWS Lambda Powertools.
5
+ """
6
+
7
+ import functools
8
+ import logging
9
+ import os
10
+ from typing import Any, Callable, Dict, Optional, Union
11
+
12
+ # Optional imports with graceful degradation
13
+ try:
14
+ from aws_lambda_powertools import Logger, Metrics
15
+
16
+ POWERTOOLS_AVAILABLE = True
17
+ except ImportError:
18
+ POWERTOOLS_AVAILABLE = False
19
+ Logger = None # type: ignore
20
+ Metrics = None # type: ignore
21
+
22
+ try:
23
+ import coloredlogs
24
+
25
+ COLOREDLOGS_AVAILABLE = True
26
+ except ImportError:
27
+ COLOREDLOGS_AVAILABLE = False
28
+
29
+ try:
30
+ from .slack_client import SlackClient
31
+
32
+ SLACK_CLIENT_AVAILABLE = True
33
+ except ImportError:
34
+ SLACK_CLIENT_AVAILABLE = False
35
+ SlackClient = None # type: ignore
36
+
37
+ from .lambda_helpers import get_lambda_environment_info
38
+
39
+
40
+ __all__ = ["get_powertools_logger", "powertools_handler"]
41
+
42
+
43
+ def get_powertools_logger(
44
+ service_name: str,
45
+ level: str = "INFO",
46
+ local_dev_colors: bool = True,
47
+ ) -> Union[Logger, logging.Logger]:
48
+ """
49
+ Create AWS Powertools Logger with Elasticsearch-compatible formatting.
50
+
51
+ Automatically detects Lambda environment and configures appropriate logging:
52
+ - Lambda environment: AWS Powertools Logger with JSON structured logging
53
+ - Local environment: Standard Python logger with coloredlogs (if available)
54
+
55
+ The logger uses Elasticsearch-compatible timestamp format (%Y-%m-%dT%H:%M:%SZ)
56
+ and enforces UTC timezone for consistency with log aggregation systems.
57
+
58
+ Args:
59
+ service_name: Service identifier (e.g., "nui-tender-analyser", "connect-email-ingest")
60
+ level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). Default: INFO
61
+ local_dev_colors: Enable coloredlogs for local development. Default: True
62
+
63
+ Returns:
64
+ Logger instance with inject_lambda_context decorator method.
65
+ - In Lambda: AWS Powertools Logger with JSON formatting
66
+ - Locally: Python Logger with mock inject_lambda_context method
67
+
68
+ Raises:
69
+ ImportError: If aws-lambda-powertools is not installed when running in Lambda environment
70
+
71
+ Example:
72
+ >>> logger = get_powertools_logger("my-service", level="INFO")
73
+ >>> @logger.inject_lambda_context
74
+ ... def handler(event, context):
75
+ ... logger.info("Processing event", extra={"event_type": event.get("type")})
76
+ ... return {"statusCode": 200}
77
+ """
78
+ # Detect Lambda environment
79
+ env_info = get_lambda_environment_info()
80
+ is_sam_local = os.getenv("AWS_SAM_LOCAL") is not None
81
+
82
+ # Local development environment (or SAM local for dev-friendly logging)
83
+ if env_info["is_local"] or is_sam_local:
84
+ logging.captureWarnings(True)
85
+
86
+ # Use coloredlogs for local development if available and enabled
87
+ if COLOREDLOGS_AVAILABLE and local_dev_colors:
88
+ # Clear root logger handlers before coloredlogs to avoid duplicates
89
+ logging.getLogger().handlers = []
90
+ coloredlogs.install(level=level, isatty=True)
91
+
92
+ # Create standard Python logger
93
+ logger = logging.getLogger(service_name)
94
+ logger.setLevel(level)
95
+
96
+ # Add mock inject_lambda_context decorator for local compatibility
97
+ # Must handle both @logger.inject_lambda_context and @logger.inject_lambda_context(log_event=False)
98
+ def _mock_inject_lambda_context(func=None, **_kwargs):
99
+ if func is not None:
100
+ return func
101
+ return lambda f: f
102
+ logger.inject_lambda_context = _mock_inject_lambda_context # type: ignore
103
+
104
+ return logger
105
+
106
+ # Lambda environment - use AWS Powertools
107
+ if not POWERTOOLS_AVAILABLE:
108
+ raise ImportError(
109
+ "aws-lambda-powertools is required for Lambda environment. "
110
+ "Install with: pip install nui-python-shared-utils[powertools]"
111
+ )
112
+
113
+ # Create Powertools Logger with ES-compatible timestamp format
114
+ # Powertools default: '2025-01-18 04:39:27,788+0000'
115
+ # Elasticsearch expects: '2025-01-18T04:39:27Z' (ISO 8601)
116
+ # Note: %f (microseconds) is not supported by time.strftime() which logging uses internally
117
+ powertools_logger = Logger(
118
+ service=service_name,
119
+ level=level,
120
+ sampling_rate=1,
121
+ datefmt="%Y-%m-%dT%H:%M:%SZ",
122
+ utc=True,
123
+ )
124
+
125
+ return powertools_logger
126
+
127
+
128
+ def powertools_handler(
129
+ service_name: str,
130
+ metrics_namespace: Optional[str] = None,
131
+ slack_alert_channel: Optional[str] = None,
132
+ slack_account_names: Optional[Dict[str, str]] = None,
133
+ slack_account_names_config: Optional[str] = None,
134
+ ):
135
+ """
136
+ Decorator for Lambda handlers with logging, metrics, and error handling.
137
+
138
+ Combines AWS Powertools Logger and Metrics decorators with automatic exception
139
+ handling and optional Slack alerting. Provides consistent error responses and
140
+ structured logging for Lambda functions.
141
+
142
+ Features:
143
+ - Automatic logger.inject_lambda_context integration
144
+ - Optional metrics.log_metrics integration (if metrics_namespace provided)
145
+ - Structured exception logging with traceback
146
+ - Optional Slack alerts on failures (if slack_alert_channel provided)
147
+ - Graceful degradation if Slack client unavailable
148
+ - Proper Lambda error response formatting
149
+
150
+ Args:
151
+ service_name: Service identifier for logging and metrics dimensions
152
+ metrics_namespace: CloudWatch namespace for metrics (e.g., "NUI/TenderAnalyser").
153
+ If None, metrics publishing is disabled.
154
+ slack_alert_channel: Slack channel for error alerts (e.g., "#alerts", "#errors").
155
+ If None, Slack alerting is disabled.
156
+ slack_account_names: Dict mapping AWS account IDs to display names
157
+ slack_account_names_config: Path to YAML file with account_names mapping.
158
+ The file must be bundled in the Lambda deployment package/zip.
159
+ Values loaded from this file are used as defaults, but any keys
160
+ provided in ``slack_account_names`` take precedence and override
161
+ the YAML-loaded values.
162
+
163
+ Returns:
164
+ Decorator function for Lambda handlers
165
+
166
+ Example:
167
+ >>> @powertools_handler(
168
+ ... service_name="my-lambda",
169
+ ... metrics_namespace="MyApp/Lambda",
170
+ ... slack_alert_channel="#errors"
171
+ ... )
172
+ ... def handler(event, context):
173
+ ... logger.info("Processing event")
174
+ ... return {"statusCode": 200, "body": "Success"}
175
+
176
+ Example (minimal):
177
+ >>> @powertools_handler(service_name="simple-lambda")
178
+ ... def handler(event, context):
179
+ ... return {"statusCode": 200}
180
+
181
+ Note:
182
+ The decorated handler must return a dict with statusCode and optional body.
183
+ On exception, returns: {"statusCode": 500, "body": "Internal Server Error"}
184
+ """
185
+
186
+ def decorator(func: Callable) -> Callable:
187
+ # Create logger
188
+ logger = get_powertools_logger(service_name)
189
+
190
+ # Create metrics publisher if namespace provided
191
+ metrics = None
192
+ if metrics_namespace and POWERTOOLS_AVAILABLE:
193
+ metrics = Metrics(namespace=metrics_namespace, service=service_name)
194
+
195
+ # Create Slack client if channel provided
196
+ slack_client = None
197
+ if slack_alert_channel and SLACK_CLIENT_AVAILABLE:
198
+ try:
199
+ slack_client = SlackClient(
200
+ account_names=slack_account_names,
201
+ account_names_config=slack_account_names_config,
202
+ )
203
+ except Exception as e:
204
+ logger.warning("Failed to initialize Slack client: %s", e)
205
+
206
+ @functools.wraps(func)
207
+ def wrapper(event: dict, context: Any) -> dict:
208
+ # Populate SlackClient account info from Lambda context ARN
209
+ if slack_client:
210
+ slack_client.set_handler_context(context)
211
+
212
+ try:
213
+ # Apply logger context injection
214
+ # Note: inject_lambda_context is added dynamically to logging.Logger (line 95)
215
+ # and is native to Powertools Logger. Type checker can't verify this union.
216
+ handler_with_logging = logger.inject_lambda_context(func) # type: ignore[union-attr, attr-defined]
217
+
218
+ # Apply metrics if configured
219
+ if metrics:
220
+ handler_with_metrics = metrics.log_metrics(handler_with_logging)
221
+ result = handler_with_metrics(event, context)
222
+ else:
223
+ result = handler_with_logging(event, context)
224
+
225
+ return result
226
+
227
+ except Exception as e:
228
+ # Log exception with full context
229
+ logger.exception(
230
+ "Lambda handler failed: %s",
231
+ str(e),
232
+ extra={
233
+ "error_type": type(e).__name__,
234
+ "error_message": str(e),
235
+ "service": service_name,
236
+ },
237
+ )
238
+
239
+ # Send Slack alert if configured
240
+ if slack_client and slack_alert_channel:
241
+ try:
242
+ error_message = f"*Lambda Error: {service_name}*\n\n"
243
+ error_message += f"Error: `{type(e).__name__}: {str(e)}`\n"
244
+ error_message += (
245
+ f"Function: `{context.function_name if hasattr(context, 'function_name') else 'unknown'}`"
246
+ )
247
+
248
+ slack_client.send_message(
249
+ channel=slack_alert_channel,
250
+ text=error_message,
251
+ )
252
+ except Exception as slack_error:
253
+ logger.warning("Failed to send Slack alert: %s", slack_error)
254
+
255
+ # Return proper Lambda error response
256
+ return {
257
+ "statusCode": 500,
258
+ "body": "Internal Server Error",
259
+ }
260
+
261
+ return wrapper
262
+
263
+ return decorator
@@ -0,0 +1,187 @@
1
+ """
2
+ AWS Secrets Manager helper for retrieving credentials.
3
+ Shared across all AWS Lambda functions.
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import logging
9
+ from typing import Dict, Optional
10
+ import boto3
11
+ from botocore.exceptions import ClientError
12
+
13
+ from .config import get_config
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+ # Cache for secrets to avoid repeated API calls
18
+ _secrets_cache = {}
19
+
20
+
21
+ def get_secret(secret_name: str) -> Dict:
22
+ """
23
+ Retrieve secret from AWS Secrets Manager.
24
+
25
+ Args:
26
+ secret_name: Name of the secret in Secrets Manager
27
+
28
+ Returns:
29
+ Dict containing the secret values
30
+
31
+ Raises:
32
+ Exception if secret cannot be retrieved
33
+ """
34
+ # Check cache first
35
+ if secret_name in _secrets_cache:
36
+ return _secrets_cache[secret_name]
37
+
38
+ # Create a Secrets Manager client
39
+ session = boto3.session.Session()
40
+ client = session.client(service_name="secretsmanager", region_name=session.region_name or "ap-southeast-2")
41
+
42
+ try:
43
+ response = client.get_secret_value(SecretId=secret_name)
44
+
45
+ # Secrets Manager stores either a string or binary
46
+ if "SecretString" in response:
47
+ secret = json.loads(response["SecretString"])
48
+ else:
49
+ # Binary secret (not typically used for credentials)
50
+ secret = json.loads(response["SecretBinary"].decode("utf-8"))
51
+
52
+ # Cache the secret
53
+ _secrets_cache[secret_name] = secret
54
+
55
+ log.info(f"Successfully retrieved secret: {secret_name}")
56
+ return secret
57
+
58
+ except ClientError as e:
59
+ error_code = e.response["Error"]["Code"]
60
+
61
+ if error_code == "DecryptionFailureException":
62
+ log.error(f"Cannot decrypt secret {secret_name}: {e}")
63
+ raise Exception(f"Cannot decrypt secret {secret_name}")
64
+ elif error_code == "InternalServiceErrorException":
65
+ log.error(f"Internal service error retrieving {secret_name}: {e}")
66
+ raise Exception(f"Internal service error retrieving {secret_name}")
67
+ elif error_code == "InvalidParameterException":
68
+ log.error(f"Invalid parameter for {secret_name}: {e}")
69
+ raise Exception(f"Invalid parameter for {secret_name}")
70
+ elif error_code == "InvalidRequestException":
71
+ log.error(f"Invalid request for {secret_name}: {e}")
72
+ raise Exception(f"Invalid request for {secret_name}")
73
+ elif error_code == "ResourceNotFoundException":
74
+ log.error(f"Secret {secret_name} not found: {e}")
75
+ raise Exception(f"Secret {secret_name} not found")
76
+ else:
77
+ log.error(f"Unknown error retrieving {secret_name}: {e}")
78
+ raise Exception(f"Error retrieving secret {secret_name}: {error_code}")
79
+ except Exception as e:
80
+ log.error(f"Unexpected error retrieving {secret_name}: {e}")
81
+ raise Exception(f"Unexpected error retrieving secret {secret_name}: {str(e)}")
82
+
83
+
84
+ def get_database_credentials(secret_name: Optional[str] = None) -> Dict:
85
+ """
86
+ Get database credentials with standardized field names.
87
+
88
+ Args:
89
+ secret_name: Override default from configuration or environment
90
+
91
+ Returns:
92
+ Dict with host, port, username, password, database
93
+ """
94
+ config = get_config()
95
+ secret = secret_name or os.environ.get("DB_CREDENTIALS_SECRET") or config.db_credentials_secret
96
+ if not secret:
97
+ raise ValueError("No database secret name provided")
98
+
99
+ creds = get_secret(secret)
100
+
101
+ # Normalize field names
102
+ return {
103
+ "host": creds.get("host", creds.get("endpoint", creds.get("hostname"))),
104
+ "port": int(creds.get("port", 3306)),
105
+ "username": creds.get("username", creds.get("user")),
106
+ "password": creds.get("password"),
107
+ "database": creds.get("database", creds.get("dbname", "app")),
108
+ }
109
+
110
+
111
+ def get_elasticsearch_credentials(secret_name: Optional[str] = None) -> Dict:
112
+ """
113
+ Get Elasticsearch credentials.
114
+
115
+ Args:
116
+ secret_name: Override default from configuration or environment
117
+
118
+ Returns:
119
+ Dict with host, username, password
120
+ """
121
+ config = get_config()
122
+ secret = secret_name or os.environ.get("ES_CREDENTIALS_SECRET") or config.es_credentials_secret
123
+ if not secret:
124
+ raise ValueError("No Elasticsearch secret name provided")
125
+
126
+ creds = get_secret(secret)
127
+
128
+ # Use configuration system for host defaults instead of hardcoded value
129
+ host = os.environ.get("ES_HOST") or creds.get("host") or config.es_host
130
+ # Ensure port is included if not already present
131
+ if ":" not in host and not host.startswith("http"):
132
+ host = f"{host}:9200"
133
+
134
+ return {
135
+ "host": host,
136
+ "username": creds.get("username", "elastic"),
137
+ "password": creds.get("password"),
138
+ }
139
+
140
+
141
+ def get_slack_credentials(secret_name: Optional[str] = None) -> Dict:
142
+ """
143
+ Get Slack bot credentials.
144
+
145
+ Args:
146
+ secret_name: Override default from configuration or environment
147
+
148
+ Returns:
149
+ Dict with bot_token and optional webhook_url
150
+ """
151
+ config = get_config()
152
+ secret = secret_name or os.environ.get("SLACK_CREDENTIALS_SECRET") or config.slack_credentials_secret
153
+ if not secret:
154
+ raise ValueError("No Slack secret name provided")
155
+
156
+ creds = get_secret(secret)
157
+
158
+ return {
159
+ "bot_token": creds.get("bot_token", creds.get("token")),
160
+ "webhook_url": creds.get("webhook_url"), # Optional
161
+ }
162
+
163
+
164
+ def get_api_key(secret_name: str, key_field: str = "api_key") -> str:
165
+ """
166
+ Get a simple API key from secrets.
167
+
168
+ Args:
169
+ secret_name: Name of the secret
170
+ key_field: Field name containing the key (default: 'api_key')
171
+
172
+ Returns:
173
+ The API key string
174
+ """
175
+ secret = get_secret(secret_name)
176
+
177
+ if key_field not in secret:
178
+ raise KeyError(f"Field '{key_field}' not found in secret {secret_name}")
179
+
180
+ return secret[key_field]
181
+
182
+
183
+ def clear_cache() -> None:
184
+ """Clear the secrets cache. Useful for long-running Lambdas."""
185
+ global _secrets_cache
186
+ _secrets_cache.clear()
187
+ log.info("Cleared secrets cache")