@corbat-tech/coding-standards-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +371 -0
- package/assets/demo.gif +0 -0
- package/dist/agent.d.ts +53 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +629 -0
- package/dist/agent.js.map +1 -0
- package/dist/cli/init.d.ts +3 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +651 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/config.d.ts +73 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +105 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/profiles.d.ts +39 -0
- package/dist/profiles.d.ts.map +1 -0
- package/dist/profiles.js +526 -0
- package/dist/profiles.js.map +1 -0
- package/dist/prompts-legacy.d.ts +25 -0
- package/dist/prompts-legacy.d.ts.map +1 -0
- package/dist/prompts-legacy.js +600 -0
- package/dist/prompts-legacy.js.map +1 -0
- package/dist/prompts-v2.d.ts +30 -0
- package/dist/prompts-v2.d.ts.map +1 -0
- package/dist/prompts-v2.js +310 -0
- package/dist/prompts-v2.js.map +1 -0
- package/dist/prompts.d.ts +30 -0
- package/dist/prompts.d.ts.map +1 -0
- package/dist/prompts.js +310 -0
- package/dist/prompts.js.map +1 -0
- package/dist/resources.d.ts +18 -0
- package/dist/resources.d.ts.map +1 -0
- package/dist/resources.js +95 -0
- package/dist/resources.js.map +1 -0
- package/dist/tools-legacy.d.ts +196 -0
- package/dist/tools-legacy.d.ts.map +1 -0
- package/dist/tools-legacy.js +1230 -0
- package/dist/tools-legacy.js.map +1 -0
- package/dist/tools-v2.d.ts +92 -0
- package/dist/tools-v2.d.ts.map +1 -0
- package/dist/tools-v2.js +410 -0
- package/dist/tools-v2.js.map +1 -0
- package/dist/tools.d.ts +92 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +410 -0
- package/dist/tools.js.map +1 -0
- package/dist/types.d.ts +3054 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +515 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/index.d.ts +6 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +5 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/retry.d.ts +44 -0
- package/dist/utils/retry.d.ts.map +1 -0
- package/dist/utils/retry.js +74 -0
- package/dist/utils/retry.js.map +1 -0
- package/package.json +79 -0
- package/profiles/README.md +199 -0
- package/profiles/custom/.gitkeep +2 -0
- package/profiles/templates/_template.yaml +159 -0
- package/profiles/templates/angular.yaml +494 -0
- package/profiles/templates/java-spring-backend.yaml +512 -0
- package/profiles/templates/minimal.yaml +102 -0
- package/profiles/templates/nodejs.yaml +338 -0
- package/profiles/templates/python.yaml +340 -0
- package/profiles/templates/react.yaml +331 -0
- package/profiles/templates/vue.yaml +598 -0
- package/standards/architecture/ddd.md +173 -0
- package/standards/architecture/hexagonal.md +97 -0
- package/standards/cicd/github-actions.md +567 -0
- package/standards/clean-code/naming.md +175 -0
- package/standards/clean-code/principles.md +179 -0
- package/standards/containerization/dockerfile.md +419 -0
- package/standards/database/selection-guide.md +443 -0
- package/standards/documentation/guidelines.md +189 -0
- package/standards/event-driven/domain-events.md +527 -0
- package/standards/kubernetes/deployment.md +518 -0
- package/standards/observability/guidelines.md +665 -0
- package/standards/project-setup/initialization-checklist.md +650 -0
- package/standards/spring-boot/best-practices.md +598 -0
- package/standards/testing/guidelines.md +559 -0
- package/standards/workflow/llm-development-workflow.md +542 -0
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
# Observability Guidelines
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Observability is the ability to understand the internal state of a system by examining its external outputs. The three pillars of observability are: **Logs**, **Metrics**, and **Traces**.
|
|
6
|
+
|
|
7
|
+
## The Three Pillars
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
11
|
+
│ OBSERVABILITY │
|
|
12
|
+
├──────────────────┬──────────────────┬──────────────────────────┤
|
|
13
|
+
│ LOGS │ METRICS │ TRACES │
|
|
14
|
+
│ │ │ │
|
|
15
|
+
│ What happened? │ How much? │ Where did it go? │
|
|
16
|
+
│ │ How many? │ How long did it take? │
|
|
17
|
+
│ │ How fast? │ │
|
|
18
|
+
├──────────────────┼──────────────────┼──────────────────────────┤
|
|
19
|
+
│ • Errors │ • Counters │ • Request flow │
|
|
20
|
+
│ • Events │ • Gauges │ • Service dependencies │
|
|
21
|
+
│ • Debug info │ • Histograms │ • Latency breakdown │
|
|
22
|
+
│ • Audit trail │ • Timers │ • Error propagation │
|
|
23
|
+
└──────────────────┴──────────────────┴──────────────────────────┘
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Logging
|
|
27
|
+
|
|
28
|
+
### Framework: SLF4J + Logback
|
|
29
|
+
|
|
30
|
+
Always use SLF4J as the logging facade:
|
|
31
|
+
|
|
32
|
+
```java
|
|
33
|
+
import org.slf4j.Logger;
|
|
34
|
+
import org.slf4j.LoggerFactory;
|
|
35
|
+
|
|
36
|
+
@Service
|
|
37
|
+
@RequiredArgsConstructor
|
|
38
|
+
public class OrderService {
|
|
39
|
+
private static final Logger log = LoggerFactory.getLogger(OrderService.class);
|
|
40
|
+
|
|
41
|
+
// Or with Lombok
|
|
42
|
+
// @Slf4j on the class
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Structured Logging (JSON Format)
|
|
47
|
+
|
|
48
|
+
Configure Logback for JSON output in production:
|
|
49
|
+
|
|
50
|
+
```xml
|
|
51
|
+
<!-- logback-spring.xml -->
|
|
52
|
+
<configuration>
|
|
53
|
+
<springProfile name="production">
|
|
54
|
+
<appender name="JSON" class="ch.qos.logback.core.ConsoleAppender">
|
|
55
|
+
<encoder class="net.logstash.logback.encoder.LogstashEncoder">
|
|
56
|
+
<includeMdcKeyName>traceId</includeMdcKeyName>
|
|
57
|
+
<includeMdcKeyName>spanId</includeMdcKeyName>
|
|
58
|
+
<includeMdcKeyName>userId</includeMdcKeyName>
|
|
59
|
+
<includeMdcKeyName>requestId</includeMdcKeyName>
|
|
60
|
+
</encoder>
|
|
61
|
+
</appender>
|
|
62
|
+
<root level="INFO">
|
|
63
|
+
<appender-ref ref="JSON"/>
|
|
64
|
+
</root>
|
|
65
|
+
</springProfile>
|
|
66
|
+
|
|
67
|
+
<springProfile name="!production">
|
|
68
|
+
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
|
|
69
|
+
<encoder>
|
|
70
|
+
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
|
|
71
|
+
</encoder>
|
|
72
|
+
</appender>
|
|
73
|
+
<root level="DEBUG">
|
|
74
|
+
<appender-ref ref="CONSOLE"/>
|
|
75
|
+
</root>
|
|
76
|
+
</springProfile>
|
|
77
|
+
</configuration>
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Log Levels
|
|
81
|
+
|
|
82
|
+
| Level | Usage | Example |
|
|
83
|
+
|-------|-------|---------|
|
|
84
|
+
| ERROR | Unexpected failures requiring attention | Database connection failed, unhandled exception |
|
|
85
|
+
| WARN | Potential issues, recoverable errors | Retry attempt, deprecated API usage |
|
|
86
|
+
| INFO | Business events, state changes | Order created, payment received |
|
|
87
|
+
| DEBUG | Detailed flow information | Method entry/exit, variable values |
|
|
88
|
+
| TRACE | Very detailed debugging | Loop iterations, SQL queries |
|
|
89
|
+
|
|
90
|
+
### Logging Best Practices
|
|
91
|
+
|
|
92
|
+
```java
|
|
93
|
+
@Service
|
|
94
|
+
@Slf4j
|
|
95
|
+
public class PlaceOrderUseCase {
|
|
96
|
+
|
|
97
|
+
public OrderId execute(PlaceOrderCommand command) {
|
|
98
|
+
// Good: Include relevant context
|
|
99
|
+
log.info("Creating order for customer {}", command.customerId());
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
Order order = Order.create(command);
|
|
103
|
+
orderRepository.save(order);
|
|
104
|
+
|
|
105
|
+
// Good: Log business events at INFO level
|
|
106
|
+
log.info("Order {} created successfully with {} items",
|
|
107
|
+
order.getId(), order.getLines().size());
|
|
108
|
+
|
|
109
|
+
return order.getId();
|
|
110
|
+
} catch (InsufficientStockException e) {
|
|
111
|
+
// Good: Log exception with context
|
|
112
|
+
log.warn("Failed to create order: insufficient stock for product {}",
|
|
113
|
+
e.getProductId());
|
|
114
|
+
throw e;
|
|
115
|
+
} catch (Exception e) {
|
|
116
|
+
// Good: Log unexpected errors at ERROR level
|
|
117
|
+
log.error("Unexpected error creating order for customer {}",
|
|
118
|
+
command.customerId(), e);
|
|
119
|
+
throw e;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### What NOT to Log
|
|
126
|
+
|
|
127
|
+
```java
|
|
128
|
+
// BAD - Never log sensitive data
|
|
129
|
+
log.info("User logged in with password: {}", password);
|
|
130
|
+
log.debug("Credit card number: {}", cardNumber);
|
|
131
|
+
log.info("API key used: {}", apiKey);
|
|
132
|
+
|
|
133
|
+
// BAD - Don't use System.out
|
|
134
|
+
System.out.println("Order created");
|
|
135
|
+
|
|
136
|
+
// BAD - Don't use printStackTrace
|
|
137
|
+
catch (Exception e) {
|
|
138
|
+
e.printStackTrace(); // Use log.error instead
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// BAD - Don't log in tight loops
|
|
142
|
+
for (Item item : items) {
|
|
143
|
+
log.debug("Processing item {}", item.getId()); // Too verbose
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// GOOD - Log summary instead
|
|
147
|
+
log.debug("Processing {} items", items.size());
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### MDC (Mapped Diagnostic Context)
|
|
151
|
+
|
|
152
|
+
Use MDC to add context to all log messages:
|
|
153
|
+
|
|
154
|
+
```java
|
|
155
|
+
@Component
|
|
156
|
+
public class CorrelationIdFilter implements Filter {
|
|
157
|
+
|
|
158
|
+
@Override
|
|
159
|
+
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
|
|
160
|
+
throws IOException, ServletException {
|
|
161
|
+
|
|
162
|
+
String correlationId = ((HttpServletRequest) request)
|
|
163
|
+
.getHeader("X-Correlation-ID");
|
|
164
|
+
|
|
165
|
+
if (correlationId == null) {
|
|
166
|
+
correlationId = UUID.randomUUID().toString();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
try {
|
|
170
|
+
MDC.put("correlationId", correlationId);
|
|
171
|
+
MDC.put("requestId", UUID.randomUUID().toString());
|
|
172
|
+
chain.doFilter(request, response);
|
|
173
|
+
} finally {
|
|
174
|
+
MDC.clear();
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Metrics
|
|
181
|
+
|
|
182
|
+
### Framework: Micrometer
|
|
183
|
+
|
|
184
|
+
Micrometer provides a vendor-neutral metrics facade.
|
|
185
|
+
|
|
186
|
+
### Dependencies
|
|
187
|
+
|
|
188
|
+
```xml
|
|
189
|
+
<dependency>
|
|
190
|
+
<groupId>org.springframework.boot</groupId>
|
|
191
|
+
<artifactId>spring-boot-starter-actuator</artifactId>
|
|
192
|
+
</dependency>
|
|
193
|
+
<dependency>
|
|
194
|
+
<groupId>io.micrometer</groupId>
|
|
195
|
+
<artifactId>micrometer-registry-prometheus</artifactId>
|
|
196
|
+
</dependency>
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Metric Types
|
|
200
|
+
|
|
201
|
+
| Type | Purpose | Example |
|
|
202
|
+
|------|---------|---------|
|
|
203
|
+
| Counter | Counts occurrences | `orders.created.total` |
|
|
204
|
+
| Gauge | Current value | `orders.pending.count` |
|
|
205
|
+
| Timer | Measures duration | `order.processing.time` |
|
|
206
|
+
| Distribution Summary | Measures distribution | `order.total.amount` |
|
|
207
|
+
|
|
208
|
+
### Custom Metrics
|
|
209
|
+
|
|
210
|
+
```java
|
|
211
|
+
@Service
|
|
212
|
+
@RequiredArgsConstructor
|
|
213
|
+
public class OrderMetrics {
|
|
214
|
+
|
|
215
|
+
private final MeterRegistry meterRegistry;
|
|
216
|
+
|
|
217
|
+
private Counter ordersCreated;
|
|
218
|
+
private Counter ordersFailed;
|
|
219
|
+
private Timer orderProcessingTime;
|
|
220
|
+
private AtomicInteger pendingOrders;
|
|
221
|
+
|
|
222
|
+
@PostConstruct
|
|
223
|
+
public void init() {
|
|
224
|
+
ordersCreated = Counter.builder("orders.created.total")
|
|
225
|
+
.description("Total number of orders created")
|
|
226
|
+
.tag("service", "order-service")
|
|
227
|
+
.register(meterRegistry);
|
|
228
|
+
|
|
229
|
+
ordersFailed = Counter.builder("orders.failed.total")
|
|
230
|
+
.description("Total number of failed orders")
|
|
231
|
+
.tag("service", "order-service")
|
|
232
|
+
.register(meterRegistry);
|
|
233
|
+
|
|
234
|
+
orderProcessingTime = Timer.builder("order.processing.time")
|
|
235
|
+
.description("Time to process an order")
|
|
236
|
+
.publishPercentiles(0.5, 0.95, 0.99)
|
|
237
|
+
.register(meterRegistry);
|
|
238
|
+
|
|
239
|
+
pendingOrders = meterRegistry.gauge("orders.pending.count",
|
|
240
|
+
new AtomicInteger(0));
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
public void recordOrderCreated() {
|
|
244
|
+
ordersCreated.increment();
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
public void recordOrderFailed(String reason) {
|
|
248
|
+
ordersFailed.increment();
|
|
249
|
+
Counter.builder("orders.failed.total")
|
|
250
|
+
.tag("reason", reason)
|
|
251
|
+
.register(meterRegistry)
|
|
252
|
+
.increment();
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
public Timer.Sample startTimer() {
|
|
256
|
+
return Timer.start(meterRegistry);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
public void stopTimer(Timer.Sample sample) {
|
|
260
|
+
sample.stop(orderProcessingTime);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
public void setPendingOrdersCount(int count) {
|
|
264
|
+
pendingOrders.set(count);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### Using Metrics in Services
|
|
270
|
+
|
|
271
|
+
```java
|
|
272
|
+
@Service
|
|
273
|
+
@RequiredArgsConstructor
|
|
274
|
+
@Slf4j
|
|
275
|
+
public class PlaceOrderUseCase {
|
|
276
|
+
|
|
277
|
+
private final OrderRepository orderRepository;
|
|
278
|
+
private final OrderMetrics orderMetrics;
|
|
279
|
+
|
|
280
|
+
public OrderId execute(PlaceOrderCommand command) {
|
|
281
|
+
Timer.Sample timer = orderMetrics.startTimer();
|
|
282
|
+
|
|
283
|
+
try {
|
|
284
|
+
Order order = Order.create(command);
|
|
285
|
+
orderRepository.save(order);
|
|
286
|
+
|
|
287
|
+
orderMetrics.recordOrderCreated();
|
|
288
|
+
log.info("Order {} created", order.getId());
|
|
289
|
+
|
|
290
|
+
return order.getId();
|
|
291
|
+
} catch (Exception e) {
|
|
292
|
+
orderMetrics.recordOrderFailed(e.getClass().getSimpleName());
|
|
293
|
+
throw e;
|
|
294
|
+
} finally {
|
|
295
|
+
orderMetrics.stopTimer(timer);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Metrics with Annotations
|
|
302
|
+
|
|
303
|
+
```java
|
|
304
|
+
@Service
|
|
305
|
+
public class PaymentService {
|
|
306
|
+
|
|
307
|
+
@Timed(value = "payment.processing.time", description = "Time to process payment")
|
|
308
|
+
@Counted(value = "payment.attempts.total", description = "Payment attempts")
|
|
309
|
+
public PaymentResult processPayment(PaymentRequest request) {
|
|
310
|
+
// ...
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### Prometheus Endpoint
|
|
316
|
+
|
|
317
|
+
Configure the Prometheus endpoint:
|
|
318
|
+
|
|
319
|
+
```yaml
|
|
320
|
+
management:
|
|
321
|
+
endpoints:
|
|
322
|
+
web:
|
|
323
|
+
exposure:
|
|
324
|
+
include: health,info,metrics,prometheus
|
|
325
|
+
metrics:
|
|
326
|
+
export:
|
|
327
|
+
prometheus:
|
|
328
|
+
enabled: true
|
|
329
|
+
tags:
|
|
330
|
+
application: ${spring.application.name}
|
|
331
|
+
environment: ${ENVIRONMENT:local}
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
### Common Metrics to Track
|
|
335
|
+
|
|
336
|
+
```java
|
|
337
|
+
// Business metrics
|
|
338
|
+
- orders.created.total
|
|
339
|
+
- orders.completed.total
|
|
340
|
+
- orders.cancelled.total
|
|
341
|
+
- orders.amount.total (sum)
|
|
342
|
+
- payments.received.total
|
|
343
|
+
- payments.failed.total
|
|
344
|
+
|
|
345
|
+
// Technical metrics
|
|
346
|
+
- http.server.requests (auto by Spring)
|
|
347
|
+
- jvm.memory.used
|
|
348
|
+
- jvm.gc.pause
|
|
349
|
+
- db.pool.active.connections
|
|
350
|
+
- kafka.consumer.lag
|
|
351
|
+
|
|
352
|
+
// SLI (Service Level Indicators)
|
|
353
|
+
- request.latency.p99
|
|
354
|
+
- error.rate
|
|
355
|
+
- availability
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## Distributed Tracing
|
|
359
|
+
|
|
360
|
+
### Framework: Micrometer Tracing (formerly Spring Cloud Sleuth)
|
|
361
|
+
|
|
362
|
+
### Dependencies
|
|
363
|
+
|
|
364
|
+
```xml
|
|
365
|
+
<dependency>
|
|
366
|
+
<groupId>io.micrometer</groupId>
|
|
367
|
+
<artifactId>micrometer-tracing-bridge-brave</artifactId>
|
|
368
|
+
</dependency>
|
|
369
|
+
<dependency>
|
|
370
|
+
<groupId>io.zipkin.reporter2</groupId>
|
|
371
|
+
<artifactId>zipkin-reporter-brave</artifactId>
|
|
372
|
+
</dependency>
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### Configuration
|
|
376
|
+
|
|
377
|
+
```yaml
|
|
378
|
+
management:
|
|
379
|
+
tracing:
|
|
380
|
+
sampling:
|
|
381
|
+
probability: 1.0 # 100% sampling in dev, reduce in production
|
|
382
|
+
zipkin:
|
|
383
|
+
tracing:
|
|
384
|
+
endpoint: ${ZIPKIN_ENDPOINT:http://localhost:9411/api/v2/spans}
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
### Trace Propagation
|
|
388
|
+
|
|
389
|
+
W3C Trace Context is the default propagation format:
|
|
390
|
+
|
|
391
|
+
```
|
|
392
|
+
traceparent: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
### Custom Spans
|
|
396
|
+
|
|
397
|
+
```java
|
|
398
|
+
@Service
|
|
399
|
+
@RequiredArgsConstructor
|
|
400
|
+
public class OrderService {
|
|
401
|
+
|
|
402
|
+
private final Tracer tracer;
|
|
403
|
+
|
|
404
|
+
public Order processOrder(OrderRequest request) {
|
|
405
|
+
// Create a custom span
|
|
406
|
+
Span span = tracer.nextSpan().name("process-order");
|
|
407
|
+
|
|
408
|
+
try (Tracer.SpanInScope ws = tracer.withSpan(span.start())) {
|
|
409
|
+
// Add attributes to the span
|
|
410
|
+
span.tag("orderId", request.orderId());
|
|
411
|
+
span.tag("customerId", request.customerId());
|
|
412
|
+
|
|
413
|
+
Order order = validateOrder(request);
|
|
414
|
+
processPayment(order);
|
|
415
|
+
notifyWarehouse(order);
|
|
416
|
+
|
|
417
|
+
span.event("order-processed");
|
|
418
|
+
return order;
|
|
419
|
+
} catch (Exception e) {
|
|
420
|
+
span.error(e);
|
|
421
|
+
throw e;
|
|
422
|
+
} finally {
|
|
423
|
+
span.end();
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
### Observation API (Spring 6+)
|
|
430
|
+
|
|
431
|
+
```java
|
|
432
|
+
@Service
|
|
433
|
+
@RequiredArgsConstructor
|
|
434
|
+
public class OrderService {
|
|
435
|
+
|
|
436
|
+
private final ObservationRegistry observationRegistry;
|
|
437
|
+
|
|
438
|
+
public Order processOrder(OrderRequest request) {
|
|
439
|
+
return Observation.createNotStarted("order.processing", observationRegistry)
|
|
440
|
+
.lowCardinalityKeyValue("orderType", request.type())
|
|
441
|
+
.highCardinalityKeyValue("orderId", request.orderId())
|
|
442
|
+
.observe(() -> {
|
|
443
|
+
// Business logic here
|
|
444
|
+
return doProcessOrder(request);
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
### Trace Context in Logs
|
|
451
|
+
|
|
452
|
+
Enable trace context in logs:
|
|
453
|
+
|
|
454
|
+
```xml
|
|
455
|
+
<!-- logback-spring.xml -->
|
|
456
|
+
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%X{traceId:-},%X{spanId:-}] %-5level %logger{36} - %msg%n</pattern>
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
## Health Checks
|
|
460
|
+
|
|
461
|
+
### Actuator Health Endpoints
|
|
462
|
+
|
|
463
|
+
```yaml
|
|
464
|
+
management:
|
|
465
|
+
endpoint:
|
|
466
|
+
health:
|
|
467
|
+
show-details: when_authorized
|
|
468
|
+
probes:
|
|
469
|
+
enabled: true # Kubernetes probes
|
|
470
|
+
health:
|
|
471
|
+
livenessState:
|
|
472
|
+
enabled: true
|
|
473
|
+
readinessState:
|
|
474
|
+
enabled: true
|
|
475
|
+
```
|
|
476
|
+
|
|
477
|
+
### Health Endpoints
|
|
478
|
+
|
|
479
|
+
| Endpoint | Purpose | Use Case |
|
|
480
|
+
|----------|---------|----------|
|
|
481
|
+
| `/actuator/health` | Overall health | General health status |
|
|
482
|
+
| `/actuator/health/liveness` | Is the app running? | Kubernetes liveness probe |
|
|
483
|
+
| `/actuator/health/readiness` | Can the app handle traffic? | Kubernetes readiness probe |
|
|
484
|
+
|
|
485
|
+
### Custom Health Indicators
|
|
486
|
+
|
|
487
|
+
```java
|
|
488
|
+
@Component
|
|
489
|
+
public class DatabaseHealthIndicator implements HealthIndicator {
|
|
490
|
+
|
|
491
|
+
private final DataSource dataSource;
|
|
492
|
+
|
|
493
|
+
public DatabaseHealthIndicator(DataSource dataSource) {
|
|
494
|
+
this.dataSource = dataSource;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
@Override
|
|
498
|
+
public Health health() {
|
|
499
|
+
try (Connection connection = dataSource.getConnection()) {
|
|
500
|
+
if (connection.isValid(1)) {
|
|
501
|
+
return Health.up()
|
|
502
|
+
.withDetail("database", "PostgreSQL")
|
|
503
|
+
.withDetail("connection", "valid")
|
|
504
|
+
.build();
|
|
505
|
+
}
|
|
506
|
+
} catch (SQLException e) {
|
|
507
|
+
return Health.down()
|
|
508
|
+
.withException(e)
|
|
509
|
+
.build();
|
|
510
|
+
}
|
|
511
|
+
return Health.down().build();
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
@Component
|
|
516
|
+
public class KafkaHealthIndicator implements HealthIndicator {
|
|
517
|
+
|
|
518
|
+
private final KafkaTemplate<?, ?> kafkaTemplate;
|
|
519
|
+
|
|
520
|
+
@Override
|
|
521
|
+
public Health health() {
|
|
522
|
+
try {
|
|
523
|
+
kafkaTemplate.getDefaultTopic(); // Check connection
|
|
524
|
+
return Health.up()
|
|
525
|
+
.withDetail("kafka", "connected")
|
|
526
|
+
.build();
|
|
527
|
+
} catch (Exception e) {
|
|
528
|
+
return Health.down()
|
|
529
|
+
.withException(e)
|
|
530
|
+
.build();
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
@Component
|
|
536
|
+
public class ExternalApiHealthIndicator implements HealthIndicator {
|
|
537
|
+
|
|
538
|
+
private final RestClient restClient;
|
|
539
|
+
|
|
540
|
+
@Override
|
|
541
|
+
public Health health() {
|
|
542
|
+
try {
|
|
543
|
+
restClient.get()
|
|
544
|
+
.uri("/health")
|
|
545
|
+
.retrieve()
|
|
546
|
+
.toBodilessEntity();
|
|
547
|
+
return Health.up().build();
|
|
548
|
+
} catch (Exception e) {
|
|
549
|
+
return Health.down()
|
|
550
|
+
.withDetail("api", "external-service")
|
|
551
|
+
.withException(e)
|
|
552
|
+
.build();
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
```
|
|
557
|
+
|
|
558
|
+
## Application Configuration
|
|
559
|
+
|
|
560
|
+
### Complete Observability Configuration
|
|
561
|
+
|
|
562
|
+
```yaml
|
|
563
|
+
spring:
|
|
564
|
+
application:
|
|
565
|
+
name: order-service
|
|
566
|
+
|
|
567
|
+
management:
|
|
568
|
+
endpoints:
|
|
569
|
+
web:
|
|
570
|
+
exposure:
|
|
571
|
+
include: health,info,metrics,prometheus,loggers
|
|
572
|
+
endpoint:
|
|
573
|
+
health:
|
|
574
|
+
show-details: when_authorized
|
|
575
|
+
probes:
|
|
576
|
+
enabled: true
|
|
577
|
+
metrics:
|
|
578
|
+
distribution:
|
|
579
|
+
percentiles-histogram:
|
|
580
|
+
http.server.requests: true
|
|
581
|
+
percentiles:
|
|
582
|
+
http.server.requests: 0.5, 0.95, 0.99
|
|
583
|
+
tags:
|
|
584
|
+
application: ${spring.application.name}
|
|
585
|
+
environment: ${ENVIRONMENT:local}
|
|
586
|
+
tracing:
|
|
587
|
+
sampling:
|
|
588
|
+
probability: ${TRACING_SAMPLING_PROBABILITY:1.0}
|
|
589
|
+
zipkin:
|
|
590
|
+
tracing:
|
|
591
|
+
endpoint: ${ZIPKIN_ENDPOINT:http://localhost:9411/api/v2/spans}
|
|
592
|
+
|
|
593
|
+
logging:
|
|
594
|
+
level:
|
|
595
|
+
root: INFO
|
|
596
|
+
com.example: DEBUG
|
|
597
|
+
pattern:
|
|
598
|
+
console: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%X{traceId:-},%X{spanId:-}] %-5level %logger{36} - %msg%n"
|
|
599
|
+
```
|
|
600
|
+
|
|
601
|
+
## Dashboards and Alerting
|
|
602
|
+
|
|
603
|
+
### Prometheus Alert Rules
|
|
604
|
+
|
|
605
|
+
```yaml
|
|
606
|
+
groups:
|
|
607
|
+
- name: order-service
|
|
608
|
+
rules:
|
|
609
|
+
- alert: HighErrorRate
|
|
610
|
+
expr: rate(http_server_requests_seconds_count{status=~"5.."}[5m]) > 0.1
|
|
611
|
+
for: 5m
|
|
612
|
+
labels:
|
|
613
|
+
severity: critical
|
|
614
|
+
annotations:
|
|
615
|
+
summary: High error rate detected
|
|
616
|
+
|
|
617
|
+
- alert: HighLatency
|
|
618
|
+
expr: histogram_quantile(0.99, rate(http_server_requests_seconds_bucket[5m])) > 1
|
|
619
|
+
for: 5m
|
|
620
|
+
labels:
|
|
621
|
+
severity: warning
|
|
622
|
+
annotations:
|
|
623
|
+
summary: High latency detected (p99 > 1s)
|
|
624
|
+
|
|
625
|
+
- alert: LowOrderRate
|
|
626
|
+
expr: rate(orders_created_total[1h]) < 10
|
|
627
|
+
for: 30m
|
|
628
|
+
labels:
|
|
629
|
+
severity: warning
|
|
630
|
+
annotations:
|
|
631
|
+
summary: Low order creation rate
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
### Grafana Dashboard Queries
|
|
635
|
+
|
|
636
|
+
```promql
|
|
637
|
+
# Request rate
|
|
638
|
+
rate(http_server_requests_seconds_count[5m])
|
|
639
|
+
|
|
640
|
+
# Error rate
|
|
641
|
+
rate(http_server_requests_seconds_count{status=~"5.."}[5m])
|
|
642
|
+
/ rate(http_server_requests_seconds_count[5m])
|
|
643
|
+
|
|
644
|
+
# Latency p99
|
|
645
|
+
histogram_quantile(0.99, rate(http_server_requests_seconds_bucket[5m]))
|
|
646
|
+
|
|
647
|
+
# Orders created per minute
|
|
648
|
+
rate(orders_created_total[1m]) * 60
|
|
649
|
+
|
|
650
|
+
# Active database connections
|
|
651
|
+
hikaricp_connections_active
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
## Best Practices Summary
|
|
655
|
+
|
|
656
|
+
1. **Logs**: Use structured logging (JSON) with MDC context
|
|
657
|
+
2. **Metrics**: Track business and technical metrics with Micrometer
|
|
658
|
+
3. **Traces**: Enable distributed tracing for request flow visibility
|
|
659
|
+
4. **Correlation**: Include traceId/spanId in all logs
|
|
660
|
+
5. **Health**: Implement custom health indicators for dependencies
|
|
661
|
+
6. **Alerts**: Set up alerts for SLOs (error rate, latency, availability)
|
|
662
|
+
7. **Dashboards**: Create dashboards for key business and technical metrics
|
|
663
|
+
8. **Sampling**: Use appropriate sampling rates in production
|
|
664
|
+
9. **Avoid noise**: Don't log or trace too verbosely
|
|
665
|
+
10. **Security**: Never log sensitive data (passwords, tokens, PII)
|