omgkit 2.0.6 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/package.json +6 -3
  2. package/plugin/agents/architect.md +357 -43
  3. package/plugin/agents/code-reviewer.md +481 -22
  4. package/plugin/agents/debugger.md +397 -30
  5. package/plugin/agents/docs-manager.md +431 -23
  6. package/plugin/agents/fullstack-developer.md +395 -34
  7. package/plugin/agents/git-manager.md +438 -20
  8. package/plugin/agents/oracle.md +329 -53
  9. package/plugin/agents/planner.md +275 -32
  10. package/plugin/agents/researcher.md +343 -21
  11. package/plugin/agents/scout.md +423 -18
  12. package/plugin/agents/sprint-master.md +418 -48
  13. package/plugin/agents/tester.md +551 -26
  14. package/plugin/skills/backend/api-architecture/SKILL.md +857 -0
  15. package/plugin/skills/backend/caching-strategies/SKILL.md +755 -0
  16. package/plugin/skills/backend/event-driven-architecture/SKILL.md +753 -0
  17. package/plugin/skills/backend/real-time-systems/SKILL.md +635 -0
  18. package/plugin/skills/databases/database-optimization/SKILL.md +571 -0
  19. package/plugin/skills/devops/monorepo-management/SKILL.md +595 -0
  20. package/plugin/skills/devops/observability/SKILL.md +622 -0
  21. package/plugin/skills/devops/performance-profiling/SKILL.md +905 -0
  22. package/plugin/skills/frontend/advanced-ui-design/SKILL.md +426 -0
  23. package/plugin/skills/integrations/ai-integration/SKILL.md +730 -0
  24. package/plugin/skills/integrations/payment-integration/SKILL.md +735 -0
  25. package/plugin/skills/methodology/problem-solving/SKILL.md +355 -0
  26. package/plugin/skills/methodology/research-validation/SKILL.md +668 -0
  27. package/plugin/skills/methodology/sequential-thinking/SKILL.md +260 -0
  28. package/plugin/skills/mobile/mobile-development/SKILL.md +756 -0
  29. package/plugin/skills/security/security-hardening/SKILL.md +633 -0
  30. package/plugin/skills/tools/document-processing/SKILL.md +916 -0
  31. package/plugin/skills/tools/image-processing/SKILL.md +748 -0
  32. package/plugin/skills/tools/mcp-development/SKILL.md +883 -0
  33. package/plugin/skills/tools/media-processing/SKILL.md +831 -0
@@ -0,0 +1,622 @@
1
+ ---
2
+ name: observability
3
+ description: Production observability with structured logging, metrics collection, distributed tracing, and alerting
4
+ category: devops
5
+ triggers:
6
+ - observability
7
+ - logging
8
+ - monitoring
9
+ - distributed tracing
10
+ - metrics
11
+ - prometheus
12
+ - opentelemetry
13
+ - alerting
14
+ ---
15
+
16
+ # Observability
17
+
18
+ Implement **production observability** with structured logging, metrics, distributed tracing, and alerting. This skill covers the three pillars of observability for production systems.
19
+
20
+ ## Purpose
21
+
22
+ Understand and debug production systems:
23
+
24
+ - Implement structured, searchable logging
25
+ - Collect and visualize application metrics
26
+ - Trace requests across distributed services
27
+ - Set up meaningful alerts
28
+ - Create actionable dashboards
29
+ - Debug production issues efficiently
30
+
31
+ ## Features
32
+
33
+ ### 1. Structured Logging
34
+
35
+ ```typescript
36
+ import pino from 'pino';
37
+ import { v4 as uuid } from 'uuid';
38
+
39
+ // Logger configuration
40
+ const logger = pino({
41
+ level: process.env.LOG_LEVEL || 'info',
42
+ formatters: {
43
+ level: (label) => ({ level: label }),
44
+ bindings: () => ({}),
45
+ },
46
+ timestamp: () => `,"timestamp":"${new Date().toISOString()}"`,
47
+ base: {
48
+ service: process.env.SERVICE_NAME,
49
+ environment: process.env.NODE_ENV,
50
+ version: process.env.APP_VERSION,
51
+ },
52
+ redact: ['password', 'token', 'authorization', 'cookie', '*.password'],
53
+ });
54
+
55
+ // Child logger with context
56
+ function createRequestLogger(req: Request) {
57
+ return logger.child({
58
+ requestId: req.headers['x-request-id'] || uuid(),
59
+ userId: req.user?.id,
60
+ path: req.path,
61
+ method: req.method,
62
+ });
63
+ }
64
+
65
+ // Logging middleware
66
+ function loggingMiddleware(req: Request, res: Response, next: NextFunction) {
67
+ const log = createRequestLogger(req);
68
+ req.log = log;
69
+
70
+ const startTime = Date.now();
71
+
72
+ // Log request
73
+ log.info({ type: 'request' }, 'Incoming request');
74
+
75
+ // Log response
76
+ res.on('finish', () => {
77
+ const duration = Date.now() - startTime;
78
+ const logData = {
79
+ type: 'response',
80
+ statusCode: res.statusCode,
81
+ duration,
82
+ contentLength: res.get('content-length'),
83
+ };
84
+
85
+ if (res.statusCode >= 500) {
86
+ log.error(logData, 'Request failed');
87
+ } else if (res.statusCode >= 400) {
88
+ log.warn(logData, 'Request error');
89
+ } else {
90
+ log.info(logData, 'Request completed');
91
+ }
92
+ });
93
+
94
+ next();
95
+ }
96
+
97
+ // Structured error logging
98
+ function logError(error: Error, context?: Record<string, any>) {
99
+ logger.error({
100
+ error: {
101
+ message: error.message,
102
+ name: error.name,
103
+ stack: error.stack,
104
+ ...(error as any).details,
105
+ },
106
+ ...context,
107
+ }, 'Error occurred');
108
+ }
109
+
110
+ // Business event logging
111
+ interface BusinessEvent {
112
+ event: string;
113
+ userId?: string;
114
+ data: Record<string, any>;
115
+ tags?: string[];
116
+ }
117
+
118
+ function logBusinessEvent(event: BusinessEvent) {
119
+ logger.info({
120
+ type: 'business_event',
121
+ event: event.event,
122
+ userId: event.userId,
123
+ data: event.data,
124
+ tags: event.tags,
125
+ }, `Business event: ${event.event}`);
126
+ }
127
+
128
+ // Usage
129
+ logBusinessEvent({
130
+ event: 'order.completed',
131
+ userId: 'user_123',
132
+ data: {
133
+ orderId: 'order_456',
134
+ total: 99.99,
135
+ items: 3,
136
+ },
137
+ tags: ['checkout', 'revenue'],
138
+ });
139
+ ```
140
+
141
+ ### 2. Metrics with Prometheus
142
+
143
+ ```typescript
144
+ import { Registry, Counter, Histogram, Gauge, collectDefaultMetrics } from 'prom-client';
145
+
146
+ // Create registry
147
+ const register = new Registry();
148
+
149
+ // Collect Node.js metrics
150
+ collectDefaultMetrics({ register });
151
+
152
+ // Custom metrics
153
+ const httpRequestDuration = new Histogram({
154
+ name: 'http_request_duration_seconds',
155
+ help: 'Duration of HTTP requests in seconds',
156
+ labelNames: ['method', 'route', 'status_code'],
157
+ buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5],
158
+ registers: [register],
159
+ });
160
+
161
+ const httpRequestTotal = new Counter({
162
+ name: 'http_requests_total',
163
+ help: 'Total number of HTTP requests',
164
+ labelNames: ['method', 'route', 'status_code'],
165
+ registers: [register],
166
+ });
167
+
168
+ const activeConnections = new Gauge({
169
+ name: 'active_connections',
170
+ help: 'Number of active connections',
171
+ registers: [register],
172
+ });
173
+
174
+ const businessMetrics = {
175
+ ordersTotal: new Counter({
176
+ name: 'orders_total',
177
+ help: 'Total number of orders',
178
+ labelNames: ['status', 'payment_method'],
179
+ registers: [register],
180
+ }),
181
+ orderValue: new Histogram({
182
+ name: 'order_value_dollars',
183
+ help: 'Value of orders in dollars',
184
+ buckets: [10, 50, 100, 250, 500, 1000],
185
+ registers: [register],
186
+ }),
187
+ activeUsers: new Gauge({
188
+ name: 'active_users',
189
+ help: 'Number of active users',
190
+ registers: [register],
191
+ }),
192
+ };
193
+
194
+ // Metrics middleware
195
+ function metricsMiddleware(req: Request, res: Response, next: NextFunction) {
196
+ const start = Date.now();
197
+
198
+ res.on('finish', () => {
199
+ const duration = (Date.now() - start) / 1000;
200
+ const route = req.route?.path || req.path;
201
+
202
+ httpRequestDuration
203
+ .labels(req.method, route, res.statusCode.toString())
204
+ .observe(duration);
205
+
206
+ httpRequestTotal
207
+ .labels(req.method, route, res.statusCode.toString())
208
+ .inc();
209
+ });
210
+
211
+ next();
212
+ }
213
+
214
+ // Expose metrics endpoint
215
+ app.get('/metrics', async (req, res) => {
216
+ res.set('Content-Type', register.contentType);
217
+ res.end(await register.metrics());
218
+ });
219
+
220
+ // Usage in business logic
221
+ async function completeOrder(order: Order) {
222
+ await saveOrder(order);
223
+
224
+ businessMetrics.ordersTotal
225
+ .labels('completed', order.paymentMethod)
226
+ .inc();
227
+
228
+ businessMetrics.orderValue.observe(order.total);
229
+ }
230
+ ```
231
+
232
+ ### 3. Distributed Tracing with OpenTelemetry
233
+
234
+ ```typescript
235
+ import { NodeSDK } from '@opentelemetry/sdk-node';
236
+ import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
237
+ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
238
+ import { Resource } from '@opentelemetry/resources';
239
+ import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions';
240
+ import { trace, SpanStatusCode, context } from '@opentelemetry/api';
241
+
242
+ // Initialize OpenTelemetry
243
+ const sdk = new NodeSDK({
244
+ resource: new Resource({
245
+ [SemanticResourceAttributes.SERVICE_NAME]: process.env.SERVICE_NAME,
246
+ [SemanticResourceAttributes.SERVICE_VERSION]: process.env.APP_VERSION,
247
+ [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV,
248
+ }),
249
+ traceExporter: new OTLPTraceExporter({
250
+ url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
251
+ }),
252
+ instrumentations: [
253
+ getNodeAutoInstrumentations({
254
+ '@opentelemetry/instrumentation-http': {
255
+ ignoreIncomingRequestHook: (req) => req.url === '/health',
256
+ },
257
+ '@opentelemetry/instrumentation-express': {},
258
+ '@opentelemetry/instrumentation-pg': {},
259
+ '@opentelemetry/instrumentation-redis': {},
260
+ }),
261
+ ],
262
+ });
263
+
264
+ sdk.start();
265
+
266
+ // Custom spans
267
+ const tracer = trace.getTracer('my-service');
268
+
269
+ async function processOrder(orderId: string): Promise<void> {
270
+ return tracer.startActiveSpan('processOrder', async (span) => {
271
+ try {
272
+ span.setAttributes({
273
+ 'order.id': orderId,
274
+ });
275
+
276
+ // Child span for validation
277
+ await tracer.startActiveSpan('validateOrder', async (validationSpan) => {
278
+ const isValid = await validateOrder(orderId);
279
+ validationSpan.setAttributes({ 'order.valid': isValid });
280
+ validationSpan.end();
281
+ });
282
+
283
+ // Child span for payment
284
+ await tracer.startActiveSpan('processPayment', async (paymentSpan) => {
285
+ const payment = await chargeCard(orderId);
286
+ paymentSpan.setAttributes({
287
+ 'payment.id': payment.id,
288
+ 'payment.amount': payment.amount,
289
+ });
290
+ paymentSpan.end();
291
+ });
292
+
293
+ span.setStatus({ code: SpanStatusCode.OK });
294
+ } catch (error) {
295
+ span.setStatus({
296
+ code: SpanStatusCode.ERROR,
297
+ message: error.message,
298
+ });
299
+ span.recordException(error);
300
+ throw error;
301
+ } finally {
302
+ span.end();
303
+ }
304
+ });
305
+ }
306
+
307
+ // Propagate context across services
308
+ async function callExternalService(endpoint: string, data: any) {
309
+ return tracer.startActiveSpan('externalServiceCall', async (span) => {
310
+ span.setAttributes({
311
+ 'http.url': endpoint,
312
+ 'http.method': 'POST',
313
+ });
314
+
315
+ // Inject trace context into headers
316
+ const headers: Record<string, string> = {};
317
+ const propagator = trace.getTracerProvider();
318
+
319
+ const response = await fetch(endpoint, {
320
+ method: 'POST',
321
+ headers: {
322
+ 'Content-Type': 'application/json',
323
+ ...headers,
324
+ },
325
+ body: JSON.stringify(data),
326
+ });
327
+
328
+ span.setAttributes({
329
+ 'http.status_code': response.status,
330
+ });
331
+
332
+ span.end();
333
+ return response;
334
+ });
335
+ }
336
+ ```
337
+
338
+ ### 4. Error Tracking
339
+
340
+ ```typescript
341
+ import * as Sentry from '@sentry/node';
342
+ import { ProfilingIntegration } from '@sentry/profiling-node';
343
+
344
+ // Initialize Sentry
345
+ Sentry.init({
346
+ dsn: process.env.SENTRY_DSN,
347
+ environment: process.env.NODE_ENV,
348
+ release: process.env.APP_VERSION,
349
+ integrations: [
350
+ new ProfilingIntegration(),
351
+ ],
352
+ tracesSampleRate: process.env.NODE_ENV === 'production' ? 0.1 : 1.0,
353
+ profilesSampleRate: 0.1,
354
+ beforeSend(event, hint) {
355
+ // Filter out known errors
356
+ const error = hint.originalException as Error;
357
+ if (error?.message?.includes('ECONNRESET')) {
358
+ return null;
359
+ }
360
+ return event;
361
+ },
362
+ });
363
+
364
+ // Error handler middleware
365
+ app.use(Sentry.Handlers.errorHandler({
366
+ shouldHandleError(error) {
367
+ // Only report 5xx errors
368
+ return error.status >= 500;
369
+ },
370
+ }));
371
+
372
+ // Custom error reporting
373
+ function reportError(error: Error, context?: Record<string, any>) {
374
+ Sentry.withScope((scope) => {
375
+ if (context) {
376
+ scope.setExtras(context);
377
+ }
378
+ scope.setTags({
379
+ component: context?.component || 'unknown',
380
+ });
381
+ Sentry.captureException(error);
382
+ });
383
+
384
+ // Also log
385
+ logError(error, context);
386
+ }
387
+
388
+ // Capture user context
389
+ app.use((req, res, next) => {
390
+ if (req.user) {
391
+ Sentry.setUser({
392
+ id: req.user.id,
393
+ email: req.user.email,
394
+ });
395
+ }
396
+ next();
397
+ });
398
+ ```
399
+
400
+ ### 5. Alerting Configuration
401
+
402
+ ```yaml
403
+ # prometheus/alerts.yml
404
+ groups:
405
+ - name: application
406
+ rules:
407
+ # High error rate
408
+ - alert: HighErrorRate
409
+ expr: |
410
+ sum(rate(http_requests_total{status_code=~"5.."}[5m]))
411
+ /
412
+ sum(rate(http_requests_total[5m]))
413
+ > 0.05
414
+ for: 5m
415
+ labels:
416
+ severity: critical
417
+ annotations:
418
+ summary: High error rate detected
419
+ description: Error rate is {{ $value | humanizePercentage }}
420
+
421
+ # Slow response time
422
+ - alert: SlowResponseTime
423
+ expr: |
424
+ histogram_quantile(0.95,
425
+ sum(rate(http_request_duration_seconds_bucket[5m])) by (le)
426
+ ) > 2
427
+ for: 5m
428
+ labels:
429
+ severity: warning
430
+ annotations:
431
+ summary: Slow response times detected
432
+ description: 95th percentile latency is {{ $value }}s
433
+
434
+ # High memory usage
435
+ - alert: HighMemoryUsage
436
+ expr: |
437
+ process_resident_memory_bytes / 1024 / 1024 / 1024 > 2
438
+ for: 10m
439
+ labels:
440
+ severity: warning
441
+ annotations:
442
+ summary: High memory usage
443
+ description: Memory usage is {{ $value | humanize }}GB
444
+
445
+ # Service down
446
+ - alert: ServiceDown
447
+ expr: up == 0
448
+ for: 1m
449
+ labels:
450
+ severity: critical
451
+ annotations:
452
+ summary: Service is down
453
+ description: "{{ $labels.instance }} has been down for more than 1 minute"
454
+
455
+ - name: business
456
+ rules:
457
+ # Low order volume
458
+ - alert: LowOrderVolume
459
+ expr: |
460
+ sum(rate(orders_total{status="completed"}[1h])) < 10
461
+ for: 30m
462
+ labels:
463
+ severity: warning
464
+ annotations:
465
+ summary: Low order volume
466
+ description: Order rate is {{ $value }} orders/hour
467
+
468
+ # Payment failures
469
+ - alert: HighPaymentFailureRate
470
+ expr: |
471
+ sum(rate(orders_total{status="payment_failed"}[15m]))
472
+ /
473
+ sum(rate(orders_total[15m]))
474
+ > 0.1
475
+ for: 5m
476
+ labels:
477
+ severity: critical
478
+ annotations:
479
+ summary: High payment failure rate
480
+ description: Payment failure rate is {{ $value | humanizePercentage }}
481
+ ```
482
+
483
+ ### 6. Health Checks
484
+
485
+ ```typescript
486
+ import { HealthCheck, HealthCheckResult, HttpHealthIndicator, DiskHealthIndicator, MemoryHealthIndicator } from '@nestjs/terminus';
487
+
488
+ interface HealthStatus {
489
+ status: 'healthy' | 'degraded' | 'unhealthy';
490
+ checks: Record<string, CheckResult>;
491
+ timestamp: string;
492
+ version: string;
493
+ }
494
+
495
+ interface CheckResult {
496
+ status: 'pass' | 'fail' | 'warn';
497
+ duration: number;
498
+ message?: string;
499
+ }
500
+
501
+ async function healthCheck(): Promise<HealthStatus> {
502
+ const checks: Record<string, CheckResult> = {};
503
+ let overallStatus: HealthStatus['status'] = 'healthy';
504
+
505
+ // Database check
506
+ checks.database = await checkDatabase();
507
+ if (checks.database.status === 'fail') overallStatus = 'unhealthy';
508
+
509
+ // Redis check
510
+ checks.redis = await checkRedis();
511
+ if (checks.redis.status === 'fail') overallStatus = 'unhealthy';
512
+
513
+ // External API check
514
+ checks.externalApi = await checkExternalApi();
515
+ if (checks.externalApi.status === 'warn') {
516
+ overallStatus = overallStatus === 'healthy' ? 'degraded' : overallStatus;
517
+ }
518
+
519
+ // Memory check
520
+ checks.memory = checkMemory();
521
+ if (checks.memory.status === 'warn') {
522
+ overallStatus = overallStatus === 'healthy' ? 'degraded' : overallStatus;
523
+ }
524
+
525
+ return {
526
+ status: overallStatus,
527
+ checks,
528
+ timestamp: new Date().toISOString(),
529
+ version: process.env.APP_VERSION || 'unknown',
530
+ };
531
+ }
532
+
533
+ async function checkDatabase(): Promise<CheckResult> {
534
+ const start = Date.now();
535
+ try {
536
+ await db.$queryRaw`SELECT 1`;
537
+ return {
538
+ status: 'pass',
539
+ duration: Date.now() - start,
540
+ };
541
+ } catch (error) {
542
+ return {
543
+ status: 'fail',
544
+ duration: Date.now() - start,
545
+ message: error.message,
546
+ };
547
+ }
548
+ }
549
+
550
+ // Endpoints
551
+ app.get('/health', async (req, res) => {
552
+ const health = await healthCheck();
553
+ const statusCode = health.status === 'healthy' ? 200 :
554
+ health.status === 'degraded' ? 200 : 503;
555
+ res.status(statusCode).json(health);
556
+ });
557
+
558
+ app.get('/health/live', (req, res) => {
559
+ res.status(200).json({ status: 'alive' });
560
+ });
561
+
562
+ app.get('/health/ready', async (req, res) => {
563
+ const health = await healthCheck();
564
+ res.status(health.status === 'unhealthy' ? 503 : 200).json(health);
565
+ });
566
+ ```
567
+
568
+ ## Use Cases
569
+
570
+ ### 1. Debugging Production Issues
571
+
572
+ ```typescript
573
+ // Correlation ID for request tracing
574
+ const correlationId = req.headers['x-correlation-id'] || uuid();
575
+ req.log = logger.child({ correlationId });
576
+
577
+ // Log at decision points
578
+ req.log.info({ userId, action: 'checkout.started' });
579
+ // ... process ...
580
+ req.log.info({ orderId, action: 'order.created' });
581
+ ```
582
+
583
+ ### 2. Performance Monitoring
584
+
585
+ ```typescript
586
+ // Track key business metrics
587
+ businessMetrics.apiLatency.observe(duration);
588
+ businessMetrics.cacheHitRate.set(hits / total);
589
+ ```
590
+
591
+ ## Best Practices
592
+
593
+ ### Do's
594
+
595
+ - **Use correlation IDs** - Trace requests across services
596
+ - **Log at appropriate levels** - Don't log everything as error
597
+ - **Set meaningful alerts** - Alert on symptoms, not causes
598
+ - **Create actionable dashboards** - Show what matters
599
+ - **Implement log rotation** - Prevent disk exhaustion
600
+ - **Sample high-volume traces** - Balance detail vs cost
601
+
602
+ ### Don'ts
603
+
604
+ - Don't log sensitive data
605
+ - Don't ignore alert fatigue
606
+ - Don't skip structured logging
607
+ - Don't forget log levels
608
+ - Don't alert on every error
609
+ - Don't neglect log retention policies
610
+
611
+ ## Related Skills
612
+
613
+ - **kubernetes** - Container orchestration
614
+ - **backend-development** - Application code
615
+ - **performance-profiling** - Performance analysis
616
+
617
+ ## Reference Resources
618
+
619
+ - [OpenTelemetry Documentation](https://opentelemetry.io/docs/)
620
+ - [Prometheus Documentation](https://prometheus.io/docs/)
621
+ - [Grafana Documentation](https://grafana.com/docs/)
622
+ - [Pino Logger](https://getpino.io/)