@cdklabs/cdk-appmod-catalog-blueprints 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +2579 -194
- package/lib/document-processing/adapter/adapter.d.ts +4 -2
- package/lib/document-processing/adapter/adapter.js +1 -1
- package/lib/document-processing/adapter/queued-s3-adapter.d.ts +9 -2
- package/lib/document-processing/adapter/queued-s3-adapter.js +29 -15
- package/lib/document-processing/agentic-document-processing.d.ts +4 -0
- package/lib/document-processing/agentic-document-processing.js +20 -10
- package/lib/document-processing/base-document-processing.d.ts +54 -2
- package/lib/document-processing/base-document-processing.js +136 -82
- package/lib/document-processing/bedrock-document-processing.d.ts +202 -2
- package/lib/document-processing/bedrock-document-processing.js +717 -77
- package/lib/document-processing/chunking-config.d.ts +614 -0
- package/lib/document-processing/chunking-config.js +5 -0
- package/lib/document-processing/default-document-processing-config.js +1 -1
- package/lib/document-processing/index.d.ts +1 -0
- package/lib/document-processing/index.js +2 -1
- package/lib/document-processing/resources/aggregation/handler.py +567 -0
- package/lib/document-processing/resources/aggregation/requirements.txt +7 -0
- package/lib/document-processing/resources/aggregation/test_handler.py +362 -0
- package/lib/document-processing/resources/cleanup/handler.py +276 -0
- package/lib/document-processing/resources/cleanup/requirements.txt +5 -0
- package/lib/document-processing/resources/cleanup/test_handler.py +436 -0
- package/lib/document-processing/resources/default-bedrock-invoke/index.py +85 -3
- package/lib/document-processing/resources/default-bedrock-invoke/test_index.py +622 -0
- package/lib/document-processing/resources/pdf-chunking/README.md +313 -0
- package/lib/document-processing/resources/pdf-chunking/chunking_strategies.py +460 -0
- package/lib/document-processing/resources/pdf-chunking/error_handling.py +491 -0
- package/lib/document-processing/resources/pdf-chunking/handler.py +958 -0
- package/lib/document-processing/resources/pdf-chunking/metrics.py +435 -0
- package/lib/document-processing/resources/pdf-chunking/requirements.txt +3 -0
- package/lib/document-processing/resources/pdf-chunking/strategy_selection.py +420 -0
- package/lib/document-processing/resources/pdf-chunking/structured_logging.py +457 -0
- package/lib/document-processing/resources/pdf-chunking/test_chunking_strategies.py +353 -0
- package/lib/document-processing/resources/pdf-chunking/test_error_handling.py +487 -0
- package/lib/document-processing/resources/pdf-chunking/test_handler.py +609 -0
- package/lib/document-processing/resources/pdf-chunking/test_integration.py +694 -0
- package/lib/document-processing/resources/pdf-chunking/test_metrics.py +532 -0
- package/lib/document-processing/resources/pdf-chunking/test_strategy_selection.py +471 -0
- package/lib/document-processing/resources/pdf-chunking/test_structured_logging.py +449 -0
- package/lib/document-processing/resources/pdf-chunking/test_token_estimation.py +374 -0
- package/lib/document-processing/resources/pdf-chunking/token_estimation.py +189 -0
- package/lib/document-processing/tests/agentic-document-processing-nag.test.js +4 -3
- package/lib/document-processing/tests/agentic-document-processing.test.js +488 -4
- package/lib/document-processing/tests/base-document-processing-nag.test.js +9 -2
- package/lib/document-processing/tests/base-document-processing-schema.test.d.ts +1 -0
- package/lib/document-processing/tests/base-document-processing-schema.test.js +337 -0
- package/lib/document-processing/tests/base-document-processing.test.js +114 -8
- package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.d.ts +1 -0
- package/lib/document-processing/tests/bedrock-document-processing-chunking-nag.test.js +382 -0
- package/lib/document-processing/tests/bedrock-document-processing-nag.test.js +4 -3
- package/lib/document-processing/tests/bedrock-document-processing-security.test.d.ts +1 -0
- package/lib/document-processing/tests/bedrock-document-processing-security.test.js +389 -0
- package/lib/document-processing/tests/bedrock-document-processing.test.js +808 -8
- package/lib/document-processing/tests/chunking-config.test.d.ts +1 -0
- package/lib/document-processing/tests/chunking-config.test.js +238 -0
- package/lib/document-processing/tests/queued-s3-adapter-nag.test.js +9 -2
- package/lib/document-processing/tests/queued-s3-adapter.test.js +17 -6
- package/lib/framework/agents/base-agent.js +1 -1
- package/lib/framework/agents/batch-agent.js +1 -1
- package/lib/framework/agents/default-agent-config.js +1 -1
- package/lib/framework/bedrock/bedrock.js +1 -1
- package/lib/framework/custom-resource/default-runtimes.js +1 -1
- package/lib/framework/foundation/access-log.js +1 -1
- package/lib/framework/foundation/eventbridge-broker.js +1 -1
- package/lib/framework/foundation/network.d.ts +4 -2
- package/lib/framework/foundation/network.js +52 -41
- package/lib/framework/tests/access-log.test.js +5 -2
- package/lib/framework/tests/batch-agent.test.js +5 -2
- package/lib/framework/tests/bedrock.test.js +5 -2
- package/lib/framework/tests/eventbridge-broker.test.js +5 -2
- package/lib/framework/tests/framework-nag.test.js +26 -7
- package/lib/framework/tests/network.test.js +30 -2
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/utilities/data-loader.js +1 -1
- package/lib/utilities/lambda-iam-utils.js +1 -1
- package/lib/utilities/observability/cloudfront-distribution-observability-property-injector.js +1 -1
- package/lib/utilities/observability/default-observability-config.js +1 -1
- package/lib/utilities/observability/lambda-observability-property-injector.js +1 -1
- package/lib/utilities/observability/log-group-data-protection-utils.js +1 -1
- package/lib/utilities/observability/powertools-config.d.ts +10 -1
- package/lib/utilities/observability/powertools-config.js +19 -3
- package/lib/utilities/observability/state-machine-observability-property-injector.js +1 -1
- package/lib/utilities/test-utils.d.ts +43 -0
- package/lib/utilities/test-utils.js +56 -0
- package/lib/utilities/tests/data-loader-nag.test.js +3 -2
- package/lib/utilities/tests/data-loader.test.js +3 -2
- package/lib/webapp/frontend-construct.js +1 -1
- package/lib/webapp/tests/frontend-construct-nag.test.js +3 -2
- package/lib/webapp/tests/frontend-construct.test.js +3 -2
- package/package.json +6 -5
- package/lib/document-processing/resources/default-error-handler/index.js +0 -46
- package/lib/document-processing/resources/default-pdf-processor/index.js +0 -46
- package/lib/document-processing/resources/default-pdf-validator/index.js +0 -36
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CloudWatch Metrics Module for PDF Chunking.
|
|
3
|
+
|
|
4
|
+
This module provides functions to emit CloudWatch metrics for PDF chunking operations
|
|
5
|
+
using AWS Lambda Powertools for efficient EMF (Embedded Metric Format) logging.
|
|
6
|
+
|
|
7
|
+
Metrics are only emitted when observability is enabled via the ENABLE_METRICS
|
|
8
|
+
environment variable (set to 'true'). This is controlled by the enableObservability
|
|
9
|
+
prop in the CDK construct.
|
|
10
|
+
|
|
11
|
+
Requirements: 7.4
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Optional, List
|
|
18
|
+
from functools import wraps
|
|
19
|
+
|
|
20
|
+
from aws_lambda_powertools import Metrics
|
|
21
|
+
from aws_lambda_powertools.metrics import MetricUnit
|
|
22
|
+
|
|
23
|
+
# Configure logging
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Check if metrics are enabled via environment variable
|
|
27
|
+
# This is set by the CDK construct when enableObservability is true
|
|
28
|
+
METRICS_ENABLED = os.environ.get('ENABLE_METRICS', 'false').lower() == 'true'
|
|
29
|
+
|
|
30
|
+
# Initialize Powertools Metrics
|
|
31
|
+
# Namespace and service are configured via environment variables:
|
|
32
|
+
# - POWERTOOLS_METRICS_NAMESPACE
|
|
33
|
+
# - POWERTOOLS_SERVICE_NAME
|
|
34
|
+
metrics = Metrics()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _is_metrics_enabled() -> bool:
|
|
38
|
+
"""
|
|
39
|
+
Check if metrics emission is enabled.
|
|
40
|
+
|
|
41
|
+
Metrics are enabled when the ENABLE_METRICS environment variable is set to 'true'.
|
|
42
|
+
This is controlled by the enableObservability prop in the CDK construct.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
True if metrics should be emitted, False otherwise
|
|
46
|
+
"""
|
|
47
|
+
return METRICS_ENABLED
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def emit_chunking_operation(
|
|
51
|
+
strategy: str,
|
|
52
|
+
requires_chunking: bool,
|
|
53
|
+
document_id: Optional[str] = None
|
|
54
|
+
) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Emit ChunkingOperations metric.
|
|
57
|
+
|
|
58
|
+
Emits a count metric for each chunking operation with dimension for strategy.
|
|
59
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
strategy: Chunking strategy used (fixed-pages, token-based, hybrid)
|
|
63
|
+
requires_chunking: Whether chunking was required
|
|
64
|
+
document_id: Optional document ID for logging
|
|
65
|
+
|
|
66
|
+
Requirements: 7.4
|
|
67
|
+
"""
|
|
68
|
+
if not _is_metrics_enabled():
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
metrics.add_dimension(name="Strategy", value=strategy)
|
|
73
|
+
metrics.add_dimension(name="RequiresChunking", value=str(requires_chunking).lower())
|
|
74
|
+
metrics.add_metric(name="ChunkingOperations", unit=MetricUnit.Count, value=1)
|
|
75
|
+
|
|
76
|
+
logger.debug(
|
|
77
|
+
f"Emitted ChunkingOperations metric: strategy={strategy}, "
|
|
78
|
+
f"requires_chunking={requires_chunking}",
|
|
79
|
+
extra={
|
|
80
|
+
'documentId': document_id,
|
|
81
|
+
'strategy': strategy,
|
|
82
|
+
'requiresChunking': requires_chunking
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.warning(
|
|
88
|
+
f"Failed to emit ChunkingOperations metric: {str(e)}",
|
|
89
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def emit_chunk_count(
|
|
94
|
+
chunk_count: int,
|
|
95
|
+
strategy: str,
|
|
96
|
+
document_id: Optional[str] = None
|
|
97
|
+
) -> None:
|
|
98
|
+
"""
|
|
99
|
+
Emit ChunkCount metric.
|
|
100
|
+
|
|
101
|
+
Emits the number of chunks created for a document.
|
|
102
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
chunk_count: Number of chunks created
|
|
106
|
+
strategy: Chunking strategy used
|
|
107
|
+
document_id: Optional document ID for logging
|
|
108
|
+
|
|
109
|
+
Requirements: 7.4
|
|
110
|
+
"""
|
|
111
|
+
if not _is_metrics_enabled():
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
metrics.add_dimension(name="Strategy", value=strategy)
|
|
116
|
+
metrics.add_metric(name="ChunkCount", unit=MetricUnit.Count, value=chunk_count)
|
|
117
|
+
|
|
118
|
+
logger.debug(
|
|
119
|
+
f"Emitted ChunkCount metric: count={chunk_count}, strategy={strategy}",
|
|
120
|
+
extra={
|
|
121
|
+
'documentId': document_id,
|
|
122
|
+
'chunkCount': chunk_count,
|
|
123
|
+
'strategy': strategy
|
|
124
|
+
}
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning(
|
|
129
|
+
f"Failed to emit ChunkCount metric: {str(e)}",
|
|
130
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def emit_tokens_per_chunk(
|
|
135
|
+
tokens_per_chunk: List[int],
|
|
136
|
+
strategy: str,
|
|
137
|
+
document_id: Optional[str] = None
|
|
138
|
+
) -> None:
|
|
139
|
+
"""
|
|
140
|
+
Emit TokensPerChunk metrics.
|
|
141
|
+
|
|
142
|
+
Emits average and p99 tokens per chunk.
|
|
143
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
tokens_per_chunk: List of token counts for each chunk
|
|
147
|
+
strategy: Chunking strategy used
|
|
148
|
+
document_id: Optional document ID for logging
|
|
149
|
+
|
|
150
|
+
Requirements: 7.4
|
|
151
|
+
"""
|
|
152
|
+
if not _is_metrics_enabled():
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
if not tokens_per_chunk:
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
avg_tokens = sum(tokens_per_chunk) / len(tokens_per_chunk)
|
|
160
|
+
sorted_tokens = sorted(tokens_per_chunk)
|
|
161
|
+
p99_index = int(len(sorted_tokens) * 0.99)
|
|
162
|
+
p99_tokens = sorted_tokens[min(p99_index, len(sorted_tokens) - 1)]
|
|
163
|
+
max_tokens = max(tokens_per_chunk)
|
|
164
|
+
|
|
165
|
+
metrics.add_dimension(name="Strategy", value=strategy)
|
|
166
|
+
metrics.add_metric(name="TokensPerChunkAvg", unit=MetricUnit.Count, value=avg_tokens)
|
|
167
|
+
metrics.add_metric(name="TokensPerChunkP99", unit=MetricUnit.Count, value=p99_tokens)
|
|
168
|
+
metrics.add_metric(name="TokensPerChunkMax", unit=MetricUnit.Count, value=max_tokens)
|
|
169
|
+
|
|
170
|
+
logger.debug(
|
|
171
|
+
f"Emitted TokensPerChunk metrics: avg={avg_tokens:.0f}, p99={p99_tokens}, max={max_tokens}",
|
|
172
|
+
extra={
|
|
173
|
+
'documentId': document_id,
|
|
174
|
+
'avgTokens': avg_tokens,
|
|
175
|
+
'p99Tokens': p99_tokens,
|
|
176
|
+
'maxTokens': max_tokens,
|
|
177
|
+
'strategy': strategy
|
|
178
|
+
}
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.warning(
|
|
183
|
+
f"Failed to emit TokensPerChunk metric: {str(e)}",
|
|
184
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def emit_chunk_processing_time(
|
|
189
|
+
processing_time_ms: float,
|
|
190
|
+
processing_mode: str,
|
|
191
|
+
document_id: Optional[str] = None
|
|
192
|
+
) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Emit ChunkProcessingTime metric.
|
|
195
|
+
|
|
196
|
+
Emits processing time for chunking operation.
|
|
197
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
processing_time_ms: Processing time in milliseconds
|
|
201
|
+
processing_mode: Processing mode (sequential, parallel)
|
|
202
|
+
document_id: Optional document ID for logging
|
|
203
|
+
|
|
204
|
+
Requirements: 7.4
|
|
205
|
+
"""
|
|
206
|
+
if not _is_metrics_enabled():
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
metrics.add_dimension(name="ProcessingMode", value=processing_mode)
|
|
211
|
+
metrics.add_metric(name="ChunkProcessingTime", unit=MetricUnit.Milliseconds, value=processing_time_ms)
|
|
212
|
+
|
|
213
|
+
logger.debug(
|
|
214
|
+
f"Emitted ChunkProcessingTime metric: time={processing_time_ms:.2f}ms, "
|
|
215
|
+
f"mode={processing_mode}",
|
|
216
|
+
extra={
|
|
217
|
+
'documentId': document_id,
|
|
218
|
+
'processingTimeMs': processing_time_ms,
|
|
219
|
+
'processingMode': processing_mode
|
|
220
|
+
}
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
except Exception as e:
|
|
224
|
+
logger.warning(
|
|
225
|
+
f"Failed to emit ChunkProcessingTime metric: {str(e)}",
|
|
226
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def emit_chunk_failure_rate(
|
|
231
|
+
total_chunks: int,
|
|
232
|
+
failed_chunks: int,
|
|
233
|
+
document_id: Optional[str] = None
|
|
234
|
+
) -> None:
|
|
235
|
+
"""
|
|
236
|
+
Emit ChunkFailureRate metric.
|
|
237
|
+
|
|
238
|
+
Calculates and emits the percentage of failed chunks.
|
|
239
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
total_chunks: Total number of chunks
|
|
243
|
+
failed_chunks: Number of failed chunks
|
|
244
|
+
document_id: Optional document ID for logging
|
|
245
|
+
|
|
246
|
+
Requirements: 7.4
|
|
247
|
+
"""
|
|
248
|
+
if not _is_metrics_enabled():
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
if total_chunks == 0:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
failure_rate = (failed_chunks / total_chunks) * 100
|
|
256
|
+
|
|
257
|
+
metrics.add_metric(name="ChunkFailureRate", unit=MetricUnit.Percent, value=failure_rate)
|
|
258
|
+
metrics.add_metric(name="FailedChunks", unit=MetricUnit.Count, value=failed_chunks)
|
|
259
|
+
metrics.add_metric(name="TotalChunks", unit=MetricUnit.Count, value=total_chunks)
|
|
260
|
+
|
|
261
|
+
logger.debug(
|
|
262
|
+
f"Emitted ChunkFailureRate metric: rate={failure_rate:.2f}%",
|
|
263
|
+
extra={
|
|
264
|
+
'documentId': document_id,
|
|
265
|
+
'failureRate': failure_rate,
|
|
266
|
+
'totalChunks': total_chunks,
|
|
267
|
+
'failedChunks': failed_chunks
|
|
268
|
+
}
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning(
|
|
273
|
+
f"Failed to emit ChunkFailureRate metric: {str(e)}",
|
|
274
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def emit_aggregation_time(
|
|
279
|
+
aggregation_time_ms: float,
|
|
280
|
+
document_id: Optional[str] = None
|
|
281
|
+
) -> None:
|
|
282
|
+
"""
|
|
283
|
+
Emit AggregationTime metric.
|
|
284
|
+
|
|
285
|
+
Emits the time taken to aggregate chunk results.
|
|
286
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
aggregation_time_ms: Aggregation time in milliseconds
|
|
290
|
+
document_id: Optional document ID for logging
|
|
291
|
+
|
|
292
|
+
Requirements: 7.4
|
|
293
|
+
"""
|
|
294
|
+
if not _is_metrics_enabled():
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
metrics.add_metric(name="AggregationTime", unit=MetricUnit.Milliseconds, value=aggregation_time_ms)
|
|
299
|
+
|
|
300
|
+
logger.debug(
|
|
301
|
+
f"Emitted AggregationTime metric: time={aggregation_time_ms:.2f}ms",
|
|
302
|
+
extra={
|
|
303
|
+
'documentId': document_id,
|
|
304
|
+
'aggregationTimeMs': aggregation_time_ms
|
|
305
|
+
}
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.warning(
|
|
310
|
+
f"Failed to emit AggregationTime metric: {str(e)}",
|
|
311
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def emit_strategy_usage(
|
|
316
|
+
strategy: str,
|
|
317
|
+
document_id: Optional[str] = None
|
|
318
|
+
) -> None:
|
|
319
|
+
"""
|
|
320
|
+
Emit StrategyUsage metric.
|
|
321
|
+
|
|
322
|
+
Emits a count metric for strategy usage tracking.
|
|
323
|
+
Only emits when observability is enabled (ENABLE_METRICS=true).
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
strategy: Chunking strategy used (fixed-pages, token-based, hybrid)
|
|
327
|
+
document_id: Optional document ID for logging
|
|
328
|
+
|
|
329
|
+
Requirements: 7.4
|
|
330
|
+
"""
|
|
331
|
+
if not _is_metrics_enabled():
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
metrics.add_dimension(name="Strategy", value=strategy)
|
|
336
|
+
metrics.add_metric(name="StrategyUsage", unit=MetricUnit.Count, value=1)
|
|
337
|
+
|
|
338
|
+
logger.debug(
|
|
339
|
+
f"Emitted StrategyUsage metric: strategy={strategy}",
|
|
340
|
+
extra={
|
|
341
|
+
'documentId': document_id,
|
|
342
|
+
'strategy': strategy
|
|
343
|
+
}
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
except Exception as e:
|
|
347
|
+
logger.warning(
|
|
348
|
+
f"Failed to emit StrategyUsage metric: {str(e)}",
|
|
349
|
+
extra={'documentId': document_id, 'error': str(e)}
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def emit_chunking_metrics(
|
|
354
|
+
document_id: str,
|
|
355
|
+
strategy: str,
|
|
356
|
+
requires_chunking: bool,
|
|
357
|
+
chunk_count: int = 0,
|
|
358
|
+
tokens_per_chunk: Optional[List[int]] = None,
|
|
359
|
+
processing_time_ms: float = 0,
|
|
360
|
+
processing_mode: str = 'parallel'
|
|
361
|
+
) -> None:
|
|
362
|
+
"""
|
|
363
|
+
Convenience function to emit all chunking-related metrics.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
document_id: Document identifier
|
|
367
|
+
strategy: Chunking strategy used
|
|
368
|
+
requires_chunking: Whether chunking was required
|
|
369
|
+
chunk_count: Number of chunks created (if chunking was required)
|
|
370
|
+
tokens_per_chunk: List of token counts per chunk
|
|
371
|
+
processing_time_ms: Total processing time in milliseconds
|
|
372
|
+
processing_mode: Processing mode (sequential, parallel)
|
|
373
|
+
|
|
374
|
+
Requirements: 7.4
|
|
375
|
+
"""
|
|
376
|
+
# Always emit operation and strategy usage metrics
|
|
377
|
+
emit_chunking_operation(strategy, requires_chunking, document_id)
|
|
378
|
+
emit_strategy_usage(strategy, document_id)
|
|
379
|
+
|
|
380
|
+
# Emit chunk-specific metrics only if chunking was performed
|
|
381
|
+
if requires_chunking and chunk_count > 0:
|
|
382
|
+
emit_chunk_count(chunk_count, strategy, document_id)
|
|
383
|
+
|
|
384
|
+
if tokens_per_chunk:
|
|
385
|
+
emit_tokens_per_chunk(tokens_per_chunk, strategy, document_id)
|
|
386
|
+
|
|
387
|
+
if processing_time_ms > 0:
|
|
388
|
+
emit_chunk_processing_time(
|
|
389
|
+
processing_time_ms,
|
|
390
|
+
processing_mode,
|
|
391
|
+
document_id
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def timed_operation(metric_name: str = 'OperationTime'):
|
|
396
|
+
"""
|
|
397
|
+
Decorator to measure and emit operation timing.
|
|
398
|
+
|
|
399
|
+
Only emits metrics when observability is enabled (ENABLE_METRICS=true).
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
metric_name: Name of the metric to emit
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Decorated function
|
|
406
|
+
"""
|
|
407
|
+
def decorator(func):
|
|
408
|
+
@wraps(func)
|
|
409
|
+
def wrapper(*args, **kwargs):
|
|
410
|
+
start_time = time.time()
|
|
411
|
+
try:
|
|
412
|
+
result = func(*args, **kwargs)
|
|
413
|
+
return result
|
|
414
|
+
finally:
|
|
415
|
+
if _is_metrics_enabled():
|
|
416
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
417
|
+
try:
|
|
418
|
+
metrics.add_metric(name=metric_name, unit=MetricUnit.Milliseconds, value=elapsed_ms)
|
|
419
|
+
except Exception as e:
|
|
420
|
+
logger.warning(f"Failed to emit timing metric: {str(e)}")
|
|
421
|
+
return wrapper
|
|
422
|
+
return decorator
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# Export the metrics instance for use with @metrics.log_metrics decorator
|
|
426
|
+
def get_metrics() -> Metrics:
|
|
427
|
+
"""
|
|
428
|
+
Get the Powertools Metrics instance.
|
|
429
|
+
|
|
430
|
+
Use this to access the metrics instance for the @metrics.log_metrics decorator.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Metrics instance
|
|
434
|
+
"""
|
|
435
|
+
return metrics
|