omgkit 2.22.11 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,852 @@
1
+ ---
2
+ name: DORA Metrics and DevOps Performance
3
+ description: The agent implements DORA metrics tracking for measuring and improving software delivery performance. Use when establishing engineering metrics, benchmarking teams, or driving DevOps transformation.
4
+ category: devops
5
+ ---
6
+
7
+ # DORA Metrics and DevOps Performance
8
+
9
+ ## Purpose
10
+
11
+ DORA (DevOps Research and Assessment) metrics are the industry standard for measuring software delivery performance. Google's research across thousands of organizations identified four key metrics that predict:
12
+
13
+ - **Organizational performance** (profitability, market share)
14
+ - **Non-commercial performance** (quality, customer satisfaction)
15
+ - **Team well-being** and reduced burnout
16
+
17
+ Elite performers who meet reliability targets are **2.3x more likely** to use trunk-based development and continuous delivery practices.
18
+
19
+ ## Features
20
+
21
+ | Metric | What It Measures | Elite Benchmark |
22
+ |--------|------------------|-----------------|
23
+ | Deployment Frequency | How often code reaches production | Multiple times per day |
24
+ | Lead Time for Changes | Time from commit to production | Less than 1 hour |
25
+ | Change Failure Rate | Percentage of deployments causing failures | 0-15% |
26
+ | Time to Restore Service | Recovery time from incidents | Less than 1 hour |
27
+
28
+ ## The Four Key Metrics
29
+
30
+ ### 1. Deployment Frequency
31
+
32
+ **Definition:** How often your organization deploys code to production.
33
+
34
+ ```typescript
35
+ // Deployment frequency calculation
36
+ interface DeploymentData {
37
+ timestamp: Date;
38
+ environment: string;
39
+ service: string;
40
+ success: boolean;
41
+ }
42
+
43
+ function calculateDeploymentFrequency(
44
+ deployments: DeploymentData[],
45
+ periodDays: number = 30
46
+ ): { frequency: string; deploymentsPerDay: number } {
47
+ const productionDeployments = deployments.filter(
48
+ d => d.environment === 'production' && d.success
49
+ );
50
+
51
+ const deploymentsPerDay = productionDeployments.length / periodDays;
52
+
53
+ let frequency: string;
54
+ if (deploymentsPerDay >= 1) {
55
+ frequency = 'elite'; // Multiple times per day or daily
56
+ } else if (deploymentsPerDay >= 1/7) {
57
+ frequency = 'high'; // Weekly to daily
58
+ } else if (deploymentsPerDay >= 1/30) {
59
+ frequency = 'medium'; // Monthly to weekly
60
+ } else {
61
+ frequency = 'low'; // Less than monthly
62
+ }
63
+
64
+ return { frequency, deploymentsPerDay };
65
+ }
66
+ ```
67
+
68
+ ### 2. Lead Time for Changes
69
+
70
+ **Definition:** Time from code commit to code running in production.
71
+
72
+ ```typescript
73
+ // Lead time calculation
74
+ interface ChangeData {
75
+ commitTimestamp: Date;
76
+ deployTimestamp: Date;
77
+ commitSha: string;
78
+ prNumber?: number;
79
+ }
80
+
81
+ function calculateLeadTime(changes: ChangeData[]): {
82
+ medianHours: number;
83
+ p90Hours: number;
84
+ performance: string;
85
+ } {
86
+ const leadTimes = changes.map(c =>
87
+ (c.deployTimestamp.getTime() - c.commitTimestamp.getTime()) / (1000 * 60 * 60)
88
+ );
89
+
90
+ leadTimes.sort((a, b) => a - b);
91
+
92
+ const median = leadTimes[Math.floor(leadTimes.length / 2)];
93
+ const p90 = leadTimes[Math.floor(leadTimes.length * 0.9)];
94
+
95
+ let performance: string;
96
+ if (median < 1) {
97
+ performance = 'elite'; // Less than 1 hour
98
+ } else if (median < 24) {
99
+ performance = 'high'; // Less than 1 day
100
+ } else if (median < 168) {
101
+ performance = 'medium'; // Less than 1 week
102
+ } else {
103
+ performance = 'low'; // More than 1 week
104
+ }
105
+
106
+ return { medianHours: median, p90Hours: p90, performance };
107
+ }
108
+ ```
109
+
110
+ ### 3. Change Failure Rate
111
+
112
+ **Definition:** Percentage of deployments that result in degraded service requiring remediation.
113
+
114
+ ```typescript
115
+ // Change failure rate calculation
116
+ interface DeploymentOutcome {
117
+ deploymentId: string;
118
+ timestamp: Date;
119
+ success: boolean;
120
+ causedIncident: boolean;
121
+ requiredRollback: boolean;
122
+ requiredHotfix: boolean;
123
+ }
124
+
125
+ function calculateChangeFailureRate(deployments: DeploymentOutcome[]): {
126
+ rate: number;
127
+ performance: string;
128
+ } {
129
+ const total = deployments.length;
130
+ const failures = deployments.filter(d =>
131
+ d.causedIncident || d.requiredRollback || d.requiredHotfix
132
+ ).length;
133
+
134
+ const rate = (failures / total) * 100;
135
+
136
+ let performance: string;
137
+ if (rate <= 15) {
138
+ performance = 'elite'; // 0-15%
139
+ } else if (rate <= 30) {
140
+ performance = 'high'; // 16-30%
141
+ } else if (rate <= 45) {
142
+ performance = 'medium'; // 31-45%
143
+ } else {
144
+ performance = 'low'; // 46%+
145
+ }
146
+
147
+ return { rate, performance };
148
+ }
149
+ ```
150
+
151
+ ### 4. Time to Restore Service (MTTR)
152
+
153
+ **Definition:** How long it takes to restore service when an incident occurs.
154
+
155
+ ```typescript
156
+ // MTTR calculation
157
+ interface Incident {
158
+ id: string;
159
+ startTime: Date;
160
+ resolvedTime: Date;
161
+ severity: 'critical' | 'major' | 'minor';
162
+ service: string;
163
+ }
164
+
165
+ function calculateMTTR(incidents: Incident[]): {
166
+ medianHours: number;
167
+ performance: string;
168
+ byService: Record<string, number>;
169
+ } {
170
+ const restorationTimes = incidents.map(i =>
171
+ (i.resolvedTime.getTime() - i.startTime.getTime()) / (1000 * 60 * 60)
172
+ );
173
+
174
+ restorationTimes.sort((a, b) => a - b);
175
+ const median = restorationTimes[Math.floor(restorationTimes.length / 2)];
176
+
177
+ let performance: string;
178
+ if (median < 1) {
179
+ performance = 'elite'; // Less than 1 hour
180
+ } else if (median < 24) {
181
+ performance = 'high'; // Less than 1 day
182
+ } else if (median < 168) {
183
+ performance = 'medium'; // Less than 1 week
184
+ } else {
185
+ performance = 'low'; // More than 1 week
186
+ }
187
+
188
+ // Group by service
189
+ const byService: Record<string, number[]> = {};
190
+ for (const incident of incidents) {
191
+ if (!byService[incident.service]) byService[incident.service] = [];
192
+ const hours = (incident.resolvedTime.getTime() - incident.startTime.getTime()) / (1000 * 60 * 60);
193
+ byService[incident.service].push(hours);
194
+ }
195
+
196
+ const serviceMedians: Record<string, number> = {};
197
+ for (const [service, times] of Object.entries(byService)) {
198
+ times.sort((a, b) => a - b);
199
+ serviceMedians[service] = times[Math.floor(times.length / 2)];
200
+ }
201
+
202
+ return { medianHours: median, performance, byService: serviceMedians };
203
+ }
204
+ ```
205
+
206
+ ## Performance Levels (2024 Benchmarks)
207
+
208
+ | Level | Deploy Freq | Lead Time | Change Failure | MTTR |
209
+ |-------|-------------|-----------|----------------|------|
210
+ | **Elite** | Multiple/day | < 1 hour | 0-15% | < 1 hour |
211
+ | **High** | Daily-Weekly | 1 day - 1 week | 16-30% | < 1 day |
212
+ | **Medium** | Weekly-Monthly | 1 week - 1 month | 16-30% | < 1 day |
213
+ | **Low** | Monthly+ | 1-6 months | 16-30% | < 1 week |
214
+
215
+ **Key Insight (2024 DORA Report):** Elite performers are **2.3x more likely** to meet reliability targets when using trunk-based development.
216
+
217
+ ## Measurement Implementation
218
+
219
+ ### GitHub Actions DORA Workflow
220
+
221
+ ```yaml
222
+ # .github/workflows/dora-metrics.yml
223
+ name: DORA Metrics Collection
224
+
225
+ on:
226
+ schedule:
227
+ - cron: '0 0 * * 0' # Weekly on Sunday
228
+ workflow_dispatch:
229
+
230
+ jobs:
231
+ collect-metrics:
232
+ runs-on: ubuntu-latest
233
+ steps:
234
+ - uses: actions/checkout@v4
235
+
236
+ - name: Collect Deployment Data
237
+ id: deployments
238
+ uses: actions/github-script@v7
239
+ with:
240
+ script: |
241
+ const thirtyDaysAgo = new Date();
242
+ thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
243
+
244
+ // Get workflow runs (deployments)
245
+ const { data: runs } = await github.rest.actions.listWorkflowRuns({
246
+ owner: context.repo.owner,
247
+ repo: context.repo.repo,
248
+ workflow_id: 'deploy.yml',
249
+ created: `>=${thirtyDaysAgo.toISOString()}`,
250
+ status: 'completed'
251
+ });
252
+
253
+ const deployments = runs.workflow_runs.filter(r =>
254
+ r.conclusion === 'success'
255
+ );
256
+
257
+ // Calculate deployment frequency
258
+ const deploymentsPerDay = deployments.length / 30;
259
+
260
+ return {
261
+ count: deployments.length,
262
+ perDay: deploymentsPerDay.toFixed(2),
263
+ frequency: deploymentsPerDay >= 1 ? 'elite' :
264
+ deploymentsPerDay >= 0.14 ? 'high' :
265
+ deploymentsPerDay >= 0.03 ? 'medium' : 'low'
266
+ };
267
+
268
+ - name: Collect Lead Time Data
269
+ id: lead-time
270
+ uses: actions/github-script@v7
271
+ with:
272
+ script: |
273
+ const thirtyDaysAgo = new Date();
274
+ thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
275
+
276
+ // Get merged PRs
277
+ const { data: prs } = await github.rest.pulls.list({
278
+ owner: context.repo.owner,
279
+ repo: context.repo.repo,
280
+ state: 'closed',
281
+ sort: 'updated',
282
+ direction: 'desc',
283
+ per_page: 100
284
+ });
285
+
286
+ const mergedPRs = prs.filter(pr =>
287
+ pr.merged_at &&
288
+ new Date(pr.merged_at) > thirtyDaysAgo
289
+ );
290
+
291
+ const leadTimes = mergedPRs.map(pr => {
292
+ const created = new Date(pr.created_at);
293
+ const merged = new Date(pr.merged_at);
294
+ return (merged - created) / (1000 * 60 * 60); // hours
295
+ });
296
+
297
+ leadTimes.sort((a, b) => a - b);
298
+ const median = leadTimes[Math.floor(leadTimes.length / 2)] || 0;
299
+
300
+ return {
301
+ medianHours: median.toFixed(1),
302
+ performance: median < 1 ? 'elite' :
303
+ median < 24 ? 'high' :
304
+ median < 168 ? 'medium' : 'low'
305
+ };
306
+
307
+ - name: Generate Report
308
+ run: |
309
+ cat << EOF > dora-report.md
310
+ # DORA Metrics Report
311
+ **Period:** Last 30 days
312
+ **Generated:** $(date -u +"%Y-%m-%d %H:%M:%S UTC")
313
+
314
+ ## Metrics Summary
315
+
316
+ | Metric | Value | Performance |
317
+ |--------|-------|-------------|
318
+ | Deployment Frequency | ${{ fromJson(steps.deployments.outputs.result).perDay }}/day | ${{ fromJson(steps.deployments.outputs.result).frequency }} |
319
+ | Lead Time for Changes | ${{ fromJson(steps.lead-time.outputs.result).medianHours }} hours | ${{ fromJson(steps.lead-time.outputs.result).performance }} |
320
+
321
+ ## Recommendations
322
+ $(if [ "${{ fromJson(steps.deployments.outputs.result).frequency }}" != "elite" ]; then echo "- Increase deployment frequency through smaller, more frequent releases"; fi)
323
+ $(if [ "${{ fromJson(steps.lead-time.outputs.result).performance }}" != "elite" ]; then echo "- Reduce lead time by automating more of the review process"; fi)
324
+ EOF
325
+
326
+ - name: Upload Report
327
+ uses: actions/upload-artifact@v4
328
+ with:
329
+ name: dora-metrics-report
330
+ path: dora-report.md
331
+ ```
332
+
333
+ ### Custom Metrics Collection Script
334
+
335
+ ```typescript
336
+ // scripts/collect-dora-metrics.ts
337
+ import { Octokit } from '@octokit/rest';
338
+
339
+ interface DORAMetrics {
340
+ period: { start: Date; end: Date };
341
+ deploymentFrequency: {
342
+ count: number;
343
+ perDay: number;
344
+ performance: 'elite' | 'high' | 'medium' | 'low';
345
+ };
346
+ leadTime: {
347
+ medianHours: number;
348
+ p90Hours: number;
349
+ performance: 'elite' | 'high' | 'medium' | 'low';
350
+ };
351
+ changeFailureRate: {
352
+ total: number;
353
+ failures: number;
354
+ rate: number;
355
+ performance: 'elite' | 'high' | 'medium' | 'low';
356
+ };
357
+ mttr: {
358
+ medianHours: number;
359
+ incidentCount: number;
360
+ performance: 'elite' | 'high' | 'medium' | 'low';
361
+ };
362
+ overallPerformance: 'elite' | 'high' | 'medium' | 'low';
363
+ }
364
+
365
+ class DORAMetricsCollector {
366
+ private octokit: Octokit;
367
+ private owner: string;
368
+ private repo: string;
369
+
370
+ constructor(token: string, owner: string, repo: string) {
371
+ this.octokit = new Octokit({ auth: token });
372
+ this.owner = owner;
373
+ this.repo = repo;
374
+ }
375
+
376
+ async collect(periodDays: number = 30): Promise<DORAMetrics> {
377
+ const end = new Date();
378
+ const start = new Date();
379
+ start.setDate(start.getDate() - periodDays);
380
+
381
+ const [deployments, prs, incidents] = await Promise.all([
382
+ this.getDeployments(start, end),
383
+ this.getMergedPRs(start, end),
384
+ this.getIncidents(start, end)
385
+ ]);
386
+
387
+ // Calculate each metric
388
+ const deploymentFrequency = this.calcDeploymentFrequency(deployments, periodDays);
389
+ const leadTime = this.calcLeadTime(prs);
390
+ const changeFailureRate = this.calcChangeFailureRate(deployments, incidents);
391
+ const mttr = this.calcMTTR(incidents);
392
+
393
+ // Determine overall performance
394
+ const performances = [
395
+ deploymentFrequency.performance,
396
+ leadTime.performance,
397
+ changeFailureRate.performance,
398
+ mttr.performance
399
+ ];
400
+
401
+ const overallPerformance = this.getOverallPerformance(performances);
402
+
403
+ return {
404
+ period: { start, end },
405
+ deploymentFrequency,
406
+ leadTime,
407
+ changeFailureRate,
408
+ mttr,
409
+ overallPerformance
410
+ };
411
+ }
412
+
413
+ private async getDeployments(start: Date, end: Date) {
414
+ const { data } = await this.octokit.actions.listWorkflowRuns({
415
+ owner: this.owner,
416
+ repo: this.repo,
417
+ workflow_id: 'deploy.yml',
418
+ created: `${start.toISOString()}..${end.toISOString()}`
419
+ });
420
+ return data.workflow_runs;
421
+ }
422
+
423
+ private async getMergedPRs(start: Date, end: Date) {
424
+ const { data } = await this.octokit.pulls.list({
425
+ owner: this.owner,
426
+ repo: this.repo,
427
+ state: 'closed',
428
+ sort: 'updated',
429
+ per_page: 100
430
+ });
431
+ return data.filter(pr =>
432
+ pr.merged_at &&
433
+ new Date(pr.merged_at) >= start &&
434
+ new Date(pr.merged_at) <= end
435
+ );
436
+ }
437
+
438
+ private async getIncidents(start: Date, end: Date) {
439
+ // This would typically come from PagerDuty, OpsGenie, or GitHub Issues
440
+ // Placeholder implementation
441
+ const { data } = await this.octokit.issues.listForRepo({
442
+ owner: this.owner,
443
+ repo: this.repo,
444
+ labels: 'incident',
445
+ state: 'closed',
446
+ since: start.toISOString()
447
+ });
448
+ return data;
449
+ }
450
+
451
+ private calcDeploymentFrequency(deployments: any[], periodDays: number) {
452
+ const successful = deployments.filter(d => d.conclusion === 'success');
453
+ const perDay = successful.length / periodDays;
454
+
455
+ return {
456
+ count: successful.length,
457
+ perDay,
458
+ performance: this.getFrequencyPerformance(perDay)
459
+ };
460
+ }
461
+
462
+ private calcLeadTime(prs: any[]) {
463
+ const times = prs.map(pr => {
464
+ const created = new Date(pr.created_at);
465
+ const merged = new Date(pr.merged_at);
466
+ return (merged.getTime() - created.getTime()) / (1000 * 60 * 60);
467
+ });
468
+
469
+ times.sort((a, b) => a - b);
470
+ const median = times[Math.floor(times.length / 2)] || 0;
471
+ const p90 = times[Math.floor(times.length * 0.9)] || 0;
472
+
473
+ return {
474
+ medianHours: median,
475
+ p90Hours: p90,
476
+ performance: this.getLeadTimePerformance(median)
477
+ };
478
+ }
479
+
480
+ private calcChangeFailureRate(deployments: any[], incidents: any[]) {
481
+ const total = deployments.filter(d => d.conclusion === 'success').length;
482
+ const failures = incidents.length; // Simplified
483
+
484
+ const rate = total > 0 ? (failures / total) * 100 : 0;
485
+
486
+ return {
487
+ total,
488
+ failures,
489
+ rate,
490
+ performance: this.getFailureRatePerformance(rate)
491
+ };
492
+ }
493
+
494
+ private calcMTTR(incidents: any[]) {
495
+ const times = incidents
496
+ .filter(i => i.closed_at)
497
+ .map(i => {
498
+ const opened = new Date(i.created_at);
499
+ const closed = new Date(i.closed_at);
500
+ return (closed.getTime() - opened.getTime()) / (1000 * 60 * 60);
501
+ });
502
+
503
+ times.sort((a, b) => a - b);
504
+ const median = times[Math.floor(times.length / 2)] || 0;
505
+
506
+ return {
507
+ medianHours: median,
508
+ incidentCount: incidents.length,
509
+ performance: this.getMTTRPerformance(median)
510
+ };
511
+ }
512
+
513
+ private getFrequencyPerformance(perDay: number): 'elite' | 'high' | 'medium' | 'low' {
514
+ if (perDay >= 1) return 'elite';
515
+ if (perDay >= 1/7) return 'high';
516
+ if (perDay >= 1/30) return 'medium';
517
+ return 'low';
518
+ }
519
+
520
+ private getLeadTimePerformance(hours: number): 'elite' | 'high' | 'medium' | 'low' {
521
+ if (hours < 1) return 'elite';
522
+ if (hours < 24) return 'high';
523
+ if (hours < 168) return 'medium';
524
+ return 'low';
525
+ }
526
+
527
+ private getFailureRatePerformance(rate: number): 'elite' | 'high' | 'medium' | 'low' {
528
+ if (rate <= 15) return 'elite';
529
+ if (rate <= 30) return 'high';
530
+ if (rate <= 45) return 'medium';
531
+ return 'low';
532
+ }
533
+
534
+ private getMTTRPerformance(hours: number): 'elite' | 'high' | 'medium' | 'low' {
535
+ if (hours < 1) return 'elite';
536
+ if (hours < 24) return 'high';
537
+ if (hours < 168) return 'medium';
538
+ return 'low';
539
+ }
540
+
541
+ private getOverallPerformance(performances: string[]): 'elite' | 'high' | 'medium' | 'low' {
542
+ const scores = { elite: 4, high: 3, medium: 2, low: 1 };
543
+ const avg = performances.reduce((sum, p) => sum + scores[p as keyof typeof scores], 0) / performances.length;
544
+
545
+ if (avg >= 3.5) return 'elite';
546
+ if (avg >= 2.5) return 'high';
547
+ if (avg >= 1.5) return 'medium';
548
+ return 'low';
549
+ }
550
+ }
551
+
552
+ // Usage
553
+ const collector = new DORAMetricsCollector(
554
+ process.env.GITHUB_TOKEN!,
555
+ 'myorg',
556
+ 'myrepo'
557
+ );
558
+
559
+ const metrics = await collector.collect(30);
560
+ console.log(JSON.stringify(metrics, null, 2));
561
+ ```
562
+
563
+ ### Grafana Dashboard Configuration
564
+
565
+ ```json
566
+ {
567
+ "dashboard": {
568
+ "title": "DORA Metrics Dashboard",
569
+ "panels": [
570
+ {
571
+ "title": "Deployment Frequency",
572
+ "type": "stat",
573
+ "targets": [
574
+ {
575
+ "expr": "sum(increase(deployments_total{environment=\"production\"}[30d])) / 30",
576
+ "legendFormat": "Deploys/day"
577
+ }
578
+ ],
579
+ "fieldConfig": {
580
+ "defaults": {
581
+ "thresholds": {
582
+ "steps": [
583
+ { "value": 0, "color": "red" },
584
+ { "value": 0.03, "color": "orange" },
585
+ { "value": 0.14, "color": "yellow" },
586
+ { "value": 1, "color": "green" }
587
+ ]
588
+ }
589
+ }
590
+ }
591
+ },
592
+ {
593
+ "title": "Lead Time for Changes",
594
+ "type": "stat",
595
+ "targets": [
596
+ {
597
+ "expr": "histogram_quantile(0.5, sum(rate(lead_time_hours_bucket[30d])) by (le))",
598
+ "legendFormat": "Median (hours)"
599
+ }
600
+ ],
601
+ "fieldConfig": {
602
+ "defaults": {
603
+ "unit": "h",
604
+ "thresholds": {
605
+ "steps": [
606
+ { "value": 0, "color": "green" },
607
+ { "value": 1, "color": "yellow" },
608
+ { "value": 24, "color": "orange" },
609
+ { "value": 168, "color": "red" }
610
+ ]
611
+ }
612
+ }
613
+ }
614
+ },
615
+ {
616
+ "title": "Change Failure Rate",
617
+ "type": "gauge",
618
+ "targets": [
619
+ {
620
+ "expr": "sum(deployments_failed_total) / sum(deployments_total) * 100",
621
+ "legendFormat": "Failure Rate %"
622
+ }
623
+ ],
624
+ "fieldConfig": {
625
+ "defaults": {
626
+ "unit": "percent",
627
+ "min": 0,
628
+ "max": 100,
629
+ "thresholds": {
630
+ "steps": [
631
+ { "value": 0, "color": "green" },
632
+ { "value": 15, "color": "yellow" },
633
+ { "value": 30, "color": "orange" },
634
+ { "value": 45, "color": "red" }
635
+ ]
636
+ }
637
+ }
638
+ }
639
+ },
640
+ {
641
+ "title": "Time to Restore (MTTR)",
642
+ "type": "stat",
643
+ "targets": [
644
+ {
645
+ "expr": "histogram_quantile(0.5, sum(rate(incident_resolution_hours_bucket[30d])) by (le))",
646
+ "legendFormat": "Median (hours)"
647
+ }
648
+ ],
649
+ "fieldConfig": {
650
+ "defaults": {
651
+ "unit": "h",
652
+ "thresholds": {
653
+ "steps": [
654
+ { "value": 0, "color": "green" },
655
+ { "value": 1, "color": "yellow" },
656
+ { "value": 24, "color": "orange" },
657
+ { "value": 168, "color": "red" }
658
+ ]
659
+ }
660
+ }
661
+ }
662
+ }
663
+ ]
664
+ }
665
+ }
666
+ ```
667
+
668
+ ## Tools and Platforms
669
+
670
+ | Tool | Type | Features |
671
+ |------|------|----------|
672
+ | **Four Keys** (Google) | Open Source | GitHub/GitLab integration, BigQuery |
673
+ | **LinearB** | Commercial | Git analytics, workflow metrics |
674
+ | **Sleuth** | Commercial | Deploy tracking, change intelligence |
675
+ | **Faros AI** | Commercial | Multi-source aggregation |
676
+ | **Propelo** | Commercial | SDLC insights |
677
+ | **Jellyfish** | Commercial | Engineering management |
678
+
679
+ ### Four Keys Setup (Google)
680
+
681
+ ```bash
682
+ # Deploy Four Keys to GCP
683
+ git clone https://github.com/dora-team/fourkeys.git
684
+ cd fourkeys
685
+
686
+ # Configure
687
+ export PROJECT_ID="my-project"
688
+ export REGION="us-central1"
689
+
690
+ # Deploy
691
+ ./setup/setup.sh
692
+
693
+ # Configure webhook for GitHub events
694
+ # Add to GitHub repo settings: https://<REGION>-<PROJECT_ID>.cloudfunctions.net/github-parser
695
+ ```
696
+
697
+ ## Improvement Strategies
698
+
699
+ ### Improving Deployment Frequency
700
+
701
+ | Current | Target | Strategy |
702
+ |---------|--------|----------|
703
+ | Monthly | Weekly | Automate deployments, reduce batch size |
704
+ | Weekly | Daily | Feature flags, trunk-based development |
705
+ | Daily | Multiple/day | Continuous deployment, small PRs |
706
+
707
+ ### Improving Lead Time
708
+
709
+ | Bottleneck | Solution |
710
+ |------------|----------|
711
+ | Long code reviews | Smaller PRs, async reviews, automation |
712
+ | Manual testing | Automated tests, shift-left |
713
+ | Manual deployments | CI/CD automation |
714
+ | Environment issues | Infrastructure as code |
715
+
716
+ ### Reducing Change Failure Rate
717
+
718
+ | Problem | Solution |
719
+ |---------|----------|
720
+ | Insufficient testing | Increase coverage, add integration tests |
721
+ | Big bang releases | Feature flags, canary releases |
722
+ | Lack of review | Automated checks, required reviews |
723
+ | Poor monitoring | Better observability, alerting |
724
+
725
+ ### Reducing MTTR
726
+
727
+ | Improvement | Impact |
728
+ |-------------|--------|
729
+ | Runbooks | Faster diagnosis |
730
+ | Feature flags | Instant rollback |
731
+ | Observability | Faster root cause |
732
+ | Chaos engineering | Proactive resilience |
733
+
734
+ ## Best Practices
735
+
736
+ ### 1. Measure Consistently
737
+
738
+ ```typescript
739
+ // Standardized metric definitions
740
+ const METRIC_DEFINITIONS = {
741
+ deploymentFrequency: {
742
+ source: 'GitHub Actions',
743
+ filter: 'workflow=deploy.yml, conclusion=success',
744
+ aggregation: 'count per day'
745
+ },
746
+ leadTime: {
747
+ source: 'GitHub PRs',
748
+ measurement: 'created_at to merged_at',
749
+ aggregation: 'median'
750
+ },
751
+ changeFailureRate: {
752
+ source: 'GitHub Issues + Deployments',
753
+ filter: 'label=incident, within 24h of deployment',
754
+ aggregation: 'incidents / deployments * 100'
755
+ },
756
+ mttr: {
757
+ source: 'PagerDuty',
758
+ measurement: 'triggered_at to resolved_at',
759
+ aggregation: 'median'
760
+ }
761
+ };
762
+ ```
763
+
764
+ ### 2. Set Realistic Goals
765
+
766
+ ```yaml
767
+ # Quarterly improvement targets
768
+ q1_2024:
769
+ deployment_frequency:
770
+ current: 0.5/day
771
+ target: 1.0/day
772
+ improvement: 100%
773
+ lead_time:
774
+ current: 48h
775
+ target: 24h
776
+ improvement: 50%
777
+ change_failure_rate:
778
+ current: 25%
779
+ target: 20%
780
+ improvement: 20%
781
+ mttr:
782
+ current: 4h
783
+ target: 2h
784
+ improvement: 50%
785
+ ```
786
+
787
+ ### 3. Avoid Gaming Metrics
788
+
789
+ | Gaming Behavior | Why It's Bad | Better Approach |
790
+ |-----------------|--------------|-----------------|
791
+ | Deploying empty commits | Fake frequency | Track meaningful changes |
792
+ | Not labeling incidents | Hide failures | Blameless culture |
793
+ | Splitting PRs artificially | Fake lead time | Focus on value |
794
+ | Rushing fixes | Lower quality | Fix root cause |
795
+
796
+ ## Use Cases
797
+
798
+ ### 1. Team Performance Review
799
+
800
+ ```typescript
801
+ // Quarterly DORA review
802
+ async function quarterlyReview(team: string) {
803
+ const metrics = await collectMetrics({ team, period: '90d' });
804
+
805
+ return {
806
+ summary: {
807
+ overallPerformance: metrics.overallPerformance,
808
+ strongestMetric: findStrongest(metrics),
809
+ improvementArea: findWeakest(metrics)
810
+ },
811
+ comparison: {
812
+ vsLastQuarter: await compareToLastQuarter(team, metrics),
813
+ vsIndustry: compareToIndustryBenchmarks(metrics)
814
+ },
815
+ recommendations: generateRecommendations(metrics)
816
+ };
817
+ }
818
+ ```
819
+
820
+ ### 2. DevOps Transformation Tracking
821
+
822
+ ```typescript
823
+ // Track transformation progress
824
+ const transformationGoals = {
825
+ phase1: { // Foundation
826
+ deploymentFrequency: 'weekly',
827
+ leadTime: '< 1 week'
828
+ },
829
+ phase2: { // Acceleration
830
+ deploymentFrequency: 'daily',
831
+ leadTime: '< 1 day',
832
+ changeFailureRate: '< 30%'
833
+ },
834
+ phase3: { // Excellence
835
+ deploymentFrequency: 'multiple/day',
836
+ leadTime: '< 1 hour',
837
+ changeFailureRate: '< 15%',
838
+ mttr: '< 1 hour'
839
+ }
840
+ };
841
+ ```
842
+
843
+ ## Related Skills
844
+
845
+ - `devops/github-actions` - CI/CD automation
846
+ - `devops/observability` - Monitoring and metrics
847
+ - `testing/comprehensive-testing` - Quality gates
848
+ - `devops/feature-flags` - Progressive delivery
849
+
850
+ ---
851
+
852
+ *Think Omega. Build Omega. Be Omega.*