create-interview-cockpit 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import type {
12
12
  GithubLabRulesetRules,
13
13
  } from "./types";
14
14
  import { rulesetFromLegacyProtection } from "./codeowners";
15
+ import { AWS_GOVERNANCE_IAM_FILES } from "./awsGovernanceIamLab";
15
16
 
16
17
  // ─── Default GitHub Lab "org" roster ────────────────────────────────────
17
18
  //
@@ -269,6 +270,29 @@ require("fs").readdirSync(".").forEach((f) => console.log(" -", f));
269
270
 
270
271
  # Docs are reviewed by the docs writer.
271
272
  *.md @acme/docs @carol
273
+ `,
274
+
275
+ // PR template — github.com auto-fills the PR description from this
276
+ // file. Drop any *.md inside .github/PULL_REQUEST_TEMPLATE/ to get a
277
+ // multi-template picker instead.
278
+ ".github/pull_request_template.md": `## Summary
279
+
280
+ <!-- Brief description of what this PR does and why. -->
281
+
282
+ ## Changes
283
+
284
+ -
285
+
286
+ ## Testing
287
+
288
+ - [ ] Ran \`act -j build\` locally
289
+ - [ ] Updated tests
290
+ - [ ] Updated docs
291
+
292
+ ## Checklist
293
+
294
+ - [ ] PR title follows conventional-commit style
295
+ - [ ] Linked the related issue (Closes #...)
272
296
  `,
273
297
  };
274
298
 
@@ -489,6 +513,1907 @@ li {
489
513
  `,
490
514
  };
491
515
 
516
+ // ─── Platform Governance Template ────────────────────────────────────────
517
+ //
518
+ // Mirrors a real-world "PLF-governance" mono-repo: one repo that owns
519
+ // Azure PIM/RBAC, Azure Policy, AWS IAM, and user offboarding — all as
520
+ // code, gated by CODEOWNERS + PR template + automated pipelines.
521
+ //
522
+ // In the lab the focus is the .github/ governance plumbing (CODEOWNERS,
523
+ // PR template, validation workflow, deploy workflows). The cloud config
524
+ // files (Terraform, policy JSON) are included so learners can read them,
525
+ // but the workflows are what the lab actually runs.
526
+
527
+ const GOVERNANCE_FILES: Record<string, string> = {
528
+ ".github/CODEOWNERS": `# CODEOWNERS for platform-governance.
529
+ #
530
+ # WHAT THIS FILE DOES
531
+ # -------------------
532
+ # GitHub auto-requests review from these owners whenever a matching path
533
+ # changes. Combined with branch protection ("require review from Code
534
+ # Owners"), it means governance changes CANNOT be merged without the
535
+ # right team approving.
536
+ #
537
+ # WHY IT MATTERS
538
+ # --------------
539
+ # Governance is the repo that controls access to everything else. If
540
+ # anyone could merge a PR here, they could grant themselves admin
541
+ # everywhere. CODEOWNERS turns that into a hard, audit-friendly rule.
542
+ #
543
+ # SYNTAX
544
+ # ------
545
+ # <pattern> <owner1> <owner2> ...
546
+ # Last matching pattern wins.
547
+
548
+ # Default: platform team owns everything in this repo.
549
+ * @acme/platform-team
550
+
551
+ # Azure governance — PIM/RBAC and Azure Policy require platform + sec review.
552
+ /azure-pim-solution/ @acme/platform-team @acme/security
553
+ /azure-policy-solution/ @acme/platform-team @acme/security
554
+
555
+ # AWS governance — same idea, plus AWS-specific reviewers.
556
+ /aws-governance/ @acme/platform-team @acme/aws-admins
557
+
558
+ # User lifecycle — IT/IAM team co-owns offboarding logic.
559
+ /user-management/ @acme/platform-team @acme/iam
560
+
561
+ # .github changes (workflows, this file, PR template) are highest-trust.
562
+ /.github/ @acme/platform-leads
563
+ `,
564
+ ".github/pull_request_template.md": `<!--
565
+ PR TEMPLATE
566
+ ===========
567
+ This template loads automatically when a contributor opens a PR.
568
+ It forces them to declare WHAT changes, WHERE it deploys, and WHETHER
569
+ it has been validated — so reviewers (and auditors later) have the
570
+ context required to approve a governance change.
571
+
572
+ Senior signal: the repo *governs the process of changing governance*.
573
+ This is what separates a real platform team from a script collection.
574
+ -->
575
+
576
+ ## ⚠️ Heads-up
577
+
578
+ Merging to \`main\` may **automatically deploy to production** via the
579
+ workflows in \`.github/workflows/\`. Re-read your diff before requesting
580
+ review.
581
+
582
+ ---
583
+
584
+ ## Type of change
585
+
586
+ <!-- Pick one. Helps reviewers know what risk to expect. -->
587
+
588
+ - [ ] Azure PIM / RBAC assignment (user, group, or SPN)
589
+ - [ ] Azure Policy / initiative
590
+ - [ ] AWS IAM role / permission set / policy
591
+ - [ ] User offboarding configuration
592
+ - [ ] CI / workflow / repo governance
593
+ - [ ] Documentation only
594
+
595
+ ## Environments affected
596
+
597
+ - [ ] Test
598
+ - [ ] Staging
599
+ - [ ] Production
600
+ - [ ] FedRAMP Test
601
+ - [ ] FedRAMP Prod
602
+
603
+ ## Pre-merge checklist
604
+
605
+ - [ ] Object IDs (user/group/SPN) verified to exist in the target tenant
606
+ - [ ] Policy / role tested in **Test** before promotion
607
+ - [ ] No secrets committed (use OIDC / federated identity, not keys)
608
+ - [ ] Platform team notified in \`#platform-governance\` if scope is large
609
+ - [ ] Terraform \`plan\` reviewed in CI artifacts
610
+
611
+ ## What does this change do?
612
+
613
+ <!-- Plain English, 2–4 sentences. Pretend the reviewer is on call. -->
614
+
615
+ ## How was it tested?
616
+
617
+ <!-- e.g. "applied to test subscription, confirmed role assignment in
618
+ Azure portal, ran \`terraform plan\` against staging — no drift." -->
619
+
620
+ ## Rollback plan
621
+
622
+ <!-- How do we undo this if it breaks production after merge? -->
623
+ `,
624
+ ".github/workflows/aws-governance-deploy.yml": `# Deploy AWS IAM roles, permission sets, and policies via Terraform.
625
+ #
626
+ # Notice the structural twin to azure-pim-deploy.yml — *that consistency
627
+ # is itself a governance signal*. Multi-cloud governance is much easier
628
+ # to audit when every cloud's pipeline looks the same.
629
+
630
+ name: AWS governance deploy
631
+
632
+ on:
633
+ push:
634
+ branches: [main]
635
+ paths: ["aws-governance/**"]
636
+ workflow_dispatch:
637
+
638
+ permissions:
639
+ contents: read
640
+ id-token: write
641
+
642
+ jobs:
643
+ deploy:
644
+ strategy:
645
+ fail-fast: true
646
+ max-parallel: 1
647
+ matrix:
648
+ environment: [test, staging, prod]
649
+ runs-on: ubuntu-latest
650
+ environment: \${{ matrix.environment }}
651
+ concurrency:
652
+ group: aws-governance-\${{ matrix.environment }}
653
+ steps:
654
+ - uses: actions/checkout@v4
655
+
656
+ # OIDC -> AWS. The role's trust policy restricts which repo and
657
+ # which branch can assume it (see aws-governance/iam/github-oidc-role.tf
658
+ # in a real repo). No static AWS keys anywhere.
659
+ - uses: aws-actions/configure-aws-credentials@v4
660
+ with:
661
+ role-to-assume: \${{ vars.AWS_DEPLOY_ROLE_ARN }}
662
+ aws-region: us-east-1
663
+
664
+ - uses: hashicorp/setup-terraform@v3
665
+ - working-directory: aws-governance
666
+ run: |
667
+ terraform init -backend-config=envs/\${{ matrix.environment }}.backend.hcl
668
+ terraform apply -auto-approve -var-file=envs/\${{ matrix.environment }}.tfvars
669
+ `,
670
+ ".github/workflows/azure-pim-deploy.yml": `# Deploy Azure PIM + RBAC assignments.
671
+ #
672
+ # TRIGGERED BY
673
+ # ------------
674
+ # - Push to main (after PR review) -> deploy to test, then staging, then prod.
675
+ # - Manual dispatch -> targeted env, useful for re-runs / drift correction.
676
+ # - Schedule -> nightly drift check (no apply, just plan).
677
+ #
678
+ # DESIGN NOTES
679
+ # ------------
680
+ # - Environments use GitHub "Environment" protection rules so prod
681
+ # requires a separate approver beyond the PR review.
682
+ # - Concurrency group prevents two deploys racing on the same env state.
683
+ # - OIDC federation, no long-lived service principal secrets.
684
+
685
+ name: Azure PIM deploy
686
+
687
+ on:
688
+ push:
689
+ branches: [main]
690
+ paths: ["azure-pim-solution/**"]
691
+ workflow_dispatch:
692
+ inputs:
693
+ environment:
694
+ type: choice
695
+ options: [test, staging, prod, fedramp-test, fedramp-prod]
696
+ default: test
697
+ schedule:
698
+ # Nightly drift check at 02:00 UTC. Plan only, no apply.
699
+ - cron: "0 2 * * *"
700
+
701
+ permissions:
702
+ contents: read
703
+ id-token: write
704
+
705
+ jobs:
706
+ plan-and-apply:
707
+ strategy:
708
+ # Promote sequentially: test -> staging -> prod. If test fails,
709
+ # later envs never run. fail-fast=true is the safer default here.
710
+ fail-fast: true
711
+ max-parallel: 1
712
+ matrix:
713
+ environment:
714
+ - test
715
+ - staging
716
+ - prod
717
+ runs-on: ubuntu-latest
718
+ environment: \${{ matrix.environment }}
719
+ concurrency:
720
+ # One deploy per env at a time. Avoids racing Terraform state.
721
+ group: azure-pim-\${{ matrix.environment }}
722
+ cancel-in-progress: false
723
+ steps:
724
+ - uses: actions/checkout@v4
725
+
726
+ - uses: azure/login@v2
727
+ with:
728
+ client-id: \${{ vars.AZURE_DEPLOY_CLIENT_ID }}
729
+ tenant-id: \${{ vars.AZURE_TENANT_ID }}
730
+ subscription-id: \${{ vars.AZURE_SUBSCRIPTION_ID }}
731
+
732
+ - uses: hashicorp/setup-terraform@v3
733
+
734
+ - name: Terraform init
735
+ working-directory: azure-pim-solution
736
+ run: terraform init -backend-config=envs/\${{ matrix.environment }}.backend.hcl
737
+
738
+ - name: Terraform plan
739
+ working-directory: azure-pim-solution
740
+ run: terraform plan -var-file=envs/\${{ matrix.environment }}.tfvars -out=tfplan
741
+
742
+ # Drift-only mode: scheduled nightly run stops here.
743
+ - name: Skip apply on schedule
744
+ if: github.event_name == 'schedule'
745
+ run: echo "Drift check complete; not applying."
746
+
747
+ - name: Terraform apply
748
+ if: github.event_name != 'schedule'
749
+ working-directory: azure-pim-solution
750
+ run: terraform apply -auto-approve tfplan
751
+ `,
752
+ ".github/workflows/azure-policy-deploy.yml": `# Deploy Azure Policy definitions + assignments.
753
+ #
754
+ # Azure Policy is the rule engine that audits or blocks resources at
755
+ # create/update time (e.g. "deny storage accounts without TLS 1.2").
756
+ #
757
+ # Same promotion model as PIM: test -> staging -> prod.
758
+
759
+ name: Azure Policy deploy
760
+
761
+ on:
762
+ push:
763
+ branches: [main]
764
+ paths: ["azure-policy-solution/**"]
765
+ workflow_dispatch:
766
+
767
+ permissions:
768
+ contents: read
769
+ id-token: write
770
+
771
+ jobs:
772
+ deploy:
773
+ strategy:
774
+ fail-fast: true
775
+ max-parallel: 1
776
+ matrix:
777
+ environment: [test, staging, prod]
778
+ runs-on: ubuntu-latest
779
+ environment: \${{ matrix.environment }}
780
+ concurrency:
781
+ group: azure-policy-\${{ matrix.environment }}
782
+ steps:
783
+ - uses: actions/checkout@v4
784
+
785
+ - uses: azure/login@v2
786
+ with:
787
+ client-id: \${{ vars.AZURE_DEPLOY_CLIENT_ID }}
788
+ tenant-id: \${{ vars.AZURE_TENANT_ID }}
789
+ subscription-id: \${{ vars.AZURE_SUBSCRIPTION_ID }}
790
+
791
+ # Step 1: register / update every policy DEFINITION (the "rule").
792
+ - name: Upsert policy definitions
793
+ run: |
794
+ for f in azure-policy-solution/policies/*.json; do
795
+ name=$(jq -r '.name' "$f")
796
+ az policy definition create \\
797
+ --name "$name" \\
798
+ --rules "$(jq -c '.properties.policyRule' "$f")" \\
799
+ --params "$(jq -c '.properties.parameters // {}' "$f")" \\
800
+ --mode All \\
801
+ --description "$(jq -r '.properties.description' "$f")"
802
+ done
803
+
804
+ # Step 2: register / update INITIATIVES (a bundle of policies).
805
+ - name: Upsert initiatives
806
+ run: |
807
+ for f in azure-policy-solution/initiatives/*.json; do
808
+ az policy set-definition create \\
809
+ --name "$(jq -r '.name' "$f")" \\
810
+ --definitions "$(jq -c '.properties.policyDefinitions' "$f")"
811
+ done
812
+
813
+ # Step 3: ASSIGN initiatives to scopes (subscriptions / mgmt groups)
814
+ # via Terraform, so the assignments are tracked in state and can drift-correct.
815
+ - uses: hashicorp/setup-terraform@v3
816
+ - working-directory: azure-policy-solution/assignments
817
+ run: |
818
+ terraform init -backend-config=envs/\${{ matrix.environment }}.backend.hcl
819
+ terraform apply -auto-approve -var-file=envs/\${{ matrix.environment }}.tfvars
820
+ `,
821
+ ".github/workflows/pr_validations.yml": `# PR validation workflow.
822
+ #
823
+ # WHAT THIS DOES
824
+ # --------------
825
+ # Runs on every PR to enforce baseline hygiene before reviewers spend
826
+ # time on it: PR title format, Terraform fmt/validate, JSON schema for
827
+ # Azure Policy files, and object ID validation for any RBAC assignments.
828
+ #
829
+ # WHY IT MATTERS
830
+ # --------------
831
+ # - Catches "fat-finger" object IDs *before* a deploy fails noisily in prod.
832
+ # - Forces conventional PR titles so the changelog is readable.
833
+ # - Makes "did the author actually run terraform fmt?" not a review topic.
834
+
835
+ name: PR validations
836
+
837
+ on:
838
+ pull_request:
839
+ branches: [main, "release/*"]
840
+
841
+ # Least-privilege token. Workflows should never use the default
842
+ # read-write token unless they truly need it.
843
+ permissions:
844
+ contents: read
845
+ pull-requests: read
846
+ id-token: write # for OIDC federation to Azure / AWS for read-only checks
847
+
848
+ jobs:
849
+ pr-title:
850
+ name: Conventional PR title
851
+ runs-on: ubuntu-latest
852
+ steps:
853
+ - uses: amannn/action-semantic-pull-request@v5
854
+ env:
855
+ GITHUB_TOKEN: \${{ secrets.GITHUB_TOKEN }}
856
+ with:
857
+ # e.g. "feat(azure-pim): add platform-operator role assignment"
858
+ types: |
859
+ feat
860
+ fix
861
+ chore
862
+ docs
863
+ refactor
864
+ ci
865
+ requireScope: true
866
+
867
+ terraform:
868
+ name: Terraform fmt + validate
869
+ runs-on: ubuntu-latest
870
+ strategy:
871
+ matrix:
872
+ # Each governance area has its own Terraform root module.
873
+ dir:
874
+ - azure-pim-solution
875
+ - azure-policy-solution/assignments
876
+ - aws-governance
877
+ steps:
878
+ - uses: actions/checkout@v4
879
+ - uses: hashicorp/setup-terraform@v3
880
+ - run: terraform -chdir=\${{ matrix.dir }} fmt -check -recursive
881
+ - run: terraform -chdir=\${{ matrix.dir }} init -backend=false
882
+ - run: terraform -chdir=\${{ matrix.dir }} validate
883
+
884
+ azure-policy-schema:
885
+ name: Validate Azure Policy JSON
886
+ runs-on: ubuntu-latest
887
+ steps:
888
+ - uses: actions/checkout@v4
889
+ - name: Validate every policy definition has required fields
890
+ # Tiny shell check — real repos use a proper JSON schema, but this
891
+ # makes the *intent* obvious in a learning template.
892
+ run: |
893
+ set -euo pipefail
894
+ for f in azure-policy-solution/policies/*.json; do
895
+ jq -e '.properties.policyType and .properties.policyRule' "$f" >/dev/null \\
896
+ || { echo "::error file=$f::missing properties.policyType or .policyRule"; exit 1; }
897
+ done
898
+
899
+ object-id-validation:
900
+ name: Validate Azure object IDs exist
901
+ runs-on: ubuntu-latest
902
+ steps:
903
+ - uses: actions/checkout@v4
904
+ # OIDC = OpenID Connect. Lets GitHub Actions assume an Azure
905
+ # identity WITHOUT storing a long-lived secret. Big security win.
906
+ - uses: azure/login@v2
907
+ with:
908
+ client-id: \${{ vars.AZURE_RO_CLIENT_ID }}
909
+ tenant-id: \${{ vars.AZURE_TENANT_ID }}
910
+ subscription-id: \${{ vars.AZURE_SUBSCRIPTION_ID }}
911
+ - name: Check every principalId in PIM configs
912
+ run: ./scripts/validate-object-ids.sh azure-pim-solution
913
+ `,
914
+ ".github/workflows/user-offboarding.yml": `# Scheduled user offboarding.
915
+ #
916
+ # WHAT THIS DOES
917
+ # --------------
918
+ # Compares the source-of-truth tenant (where HR-controlled identities
919
+ # live) against R+D and SaaS tenants. Anyone present in R+D / SaaS but
920
+ # missing from the source-of-truth gets removed.
921
+ #
922
+ # WHY IT'S SCHEDULED
923
+ # ------------------
924
+ # Offboarding is a *continuous* governance concern. People leave between
925
+ # deploys. Running on a cron means stale access shrinks toward zero
926
+ # without a human having to remember.
927
+ #
928
+ # SAFETY RAIL
929
+ # -----------
930
+ # If the diff would remove > N users (default 10), the workflow fails
931
+ # loudly and refuses to proceed. Real platform teams burn themselves
932
+ # *exactly once* on a runaway cleanup script before they add this rail.
933
+
934
+ name: User offboarding
935
+
936
+ on:
937
+ schedule:
938
+ - cron: "0 6 * * *" # daily at 06:00 UTC
939
+ workflow_dispatch:
940
+ inputs:
941
+ dry_run:
942
+ type: boolean
943
+ default: true
944
+
945
+ permissions:
946
+ contents: read
947
+ id-token: write
948
+
949
+ jobs:
950
+ offboard:
951
+ runs-on: ubuntu-latest
952
+ environment: prod # protects production with required reviewers if dispatched manually
953
+ steps:
954
+ - uses: actions/checkout@v4
955
+
956
+ - uses: azure/login@v2
957
+ with:
958
+ client-id: \${{ vars.AZURE_OFFBOARD_CLIENT_ID }}
959
+ tenant-id: \${{ vars.AZURE_TENANT_ID }}
960
+
961
+ - name: Run offboarding
962
+ env:
963
+ MAX_DELETIONS: "10"
964
+ SENDGRID_API_KEY: \${{ secrets.SENDGRID_API_KEY }}
965
+ DRY_RUN: \${{ inputs.dry_run || 'false' }}
966
+ run: pwsh -File ./user-management/scripts/offboard-users.ps1
967
+ `,
968
+ "README.md": `# Platform Governance Template
969
+
970
+ > A learning-friendly clone of a real **platform governance mono-repo**
971
+ > (the kind a Cloud Platform / Platform Engineering team owns at a large
972
+ > company). Use it to study, demo in interviews, or push to GitHub as a
973
+ > real template repo.
974
+
975
+ ---
976
+
977
+ ## What this repo is
978
+
979
+ This is the **central control repo** for defining and automatically
980
+ applying the rules, permissions, policies, and identity setup used
981
+ across a multi-cloud platform.
982
+
983
+ In one sentence:
984
+
985
+ > **Governance as code** — version-controlled config + automated
986
+ > pipelines that keep cloud access, security policies, and user lifecycle
987
+ > consistent across Azure, AWS, and multiple environments.
988
+
989
+ It is **not** an end-user product. Think of it as the **control room**
990
+ behind the scenes.
991
+
992
+ ---
993
+
994
+ ## What “governance” means here
995
+
996
+ The platform team uses this repo to make sure cloud environments stay:
997
+
998
+ 1. **secure** — least-privilege access, no rogue admins
999
+ 2. **consistent** — same standards in test, staging, prod, and FedRAMP
1000
+ 3. **auditable** — every change is a reviewed pull request
1001
+ 4. **automated** — pipelines apply changes, not human clicks
1002
+ 5. **harder to misconfigure** — policies block bad resources at create time
1003
+
1004
+ > **FedRAMP** = Federal Risk and Authorization Management Program — a
1005
+ > U.S. government cloud security/compliance baseline. If a repo
1006
+ > mentions "FedRAMP Test/Prod", it usually means stricter controls and
1007
+ > a separate isolated tenant.
1008
+
1009
+ ---
1010
+
1011
+ ## The four governance areas in this mono-repo
1012
+
1013
+ | Folder | Area | Cloud | What it controls |
1014
+ |---|---|---|---|
1015
+ | [\`azure-pim-solution/\`](./azure-pim-solution/) | PIM + RBAC | Azure | Who can elevate to admin, and through which roles |
1016
+ | [\`azure-policy-solution/\`](./azure-policy-solution/) | Policy as Code | Azure | Required tags, TLS, naming, diagnostics |
1017
+ | [\`aws-governance/\`](./aws-governance/) | IAM + permission sets | AWS | IAM roles, SSO permission sets, deny-policies |
1018
+ | [\`user-management/\`](./user-management/) | Offboarding automation | Cross-cloud | Removes stale users from tenants + SaaS |
1019
+
1020
+ > **Mono-repo** = one repository containing several related solutions.
1021
+ > Each subfolder could be its own repo, but keeping them together makes
1022
+ > review, ownership, and cross-cutting changes easier.
1023
+
1024
+ ---
1025
+
1026
+ ## How a change actually flows
1027
+
1028
+ \`\`\`
1029
+ ┌────────────┐ ┌──────────┐ ┌─────────┐ ┌───────────┐ ┌──────────┐
1030
+ │ Edit a │ -> │ Open PR │ -> │ Reviews │ -> │ Merge to │ -> │ Pipeline │
1031
+ │ config │ │ │ │ + CI │ │ main │ │ deploys │
1032
+ └────────────┘ └──────────┘ └─────────┘ └───────────┘ └──────────┘
1033
+ │ │ │ │ │
1034
+ │ PR template CODEOWNERS Branch ruleset GitHub Actions
1035
+ │ (.github/) (.github/) (GitHub UI) (workflows/)
1036
+
1037
+ You edit .tf / .json files describing
1038
+ the DESIRED STATE of the cloud.
1039
+ \`\`\`
1040
+
1041
+ The repo never asks *"what is currently in Azure?"* — it asks
1042
+ *"what **should** be there?"* and lets automation reconcile reality.
1043
+
1044
+ See [docs/CHANGE-FLOW.md](./docs/CHANGE-FLOW.md) for the full walkthrough.
1045
+
1046
+ ---
1047
+
1048
+ ## How to use this template
1049
+
1050
+ 1. Click **Use this template** on GitHub (after pushing it).
1051
+ 2. Replace placeholder Azure tenant IDs, AWS account IDs, and team
1052
+ handles with real ones.
1053
+ 3. Wire the workflows to your cloud credentials (OIDC recommended; see
1054
+ each \`workflows/*.yml\` for the federated identity stub).
1055
+ 4. Read [\`docs/GLOSSARY.md\`](./docs/GLOSSARY.md) first if PIM, RBAC,
1056
+ IAM, Terraform, and Azure Policy are new to you.
1057
+
1058
+ ---
1059
+
1060
+ ## Strong governance signals to look for
1061
+
1062
+ If you’re reviewing a real repo like this in an interview or audit,
1063
+ these are the high-value signals:
1064
+
1065
+ - **CODEOWNERS** restricts who can approve governance changes
1066
+ - **PR template** forces declaration of impact + environments
1067
+ - **PR validation workflow** standardizes change format
1068
+ - **Object ID validation** before deploy (no fake principals slip through)
1069
+ - **Terraform state** tracks managed resources and detects drift
1070
+ - **Scheduled runs** continuously reconcile cloud reality with repo
1071
+ - **Backstage \`catalog-info.yaml\`** registers governance as a real
1072
+ internal platform service with an owner and lifecycle
1073
+
1074
+ All of those are present in this template.
1075
+
1076
+ ---
1077
+
1078
+ ## One-line interview answer
1079
+
1080
+ > A platform governance mono-repo that manages cloud access, policy
1081
+ > standards, and user lifecycle as code across Azure and AWS — with
1082
+ > mandatory reviews, PR validations, and automated pipelines that deploy
1083
+ > approved changes consistently across test, staging, prod, and FedRAMP.
1084
+ `,
1085
+ "aws-governance/README.md": `# AWS governance
1086
+
1087
+ > AWS uses different primitives than Azure but the *governance pattern*
1088
+ > is identical: define identity + permissions as code, gate changes
1089
+ > through PR review, deploy via Terraform pipeline.
1090
+ >
1091
+ > Notice how the folder layout mirrors \`azure-pim-solution/\`. That
1092
+ > consistency is deliberate — auditors and new engineers can reason
1093
+ > about both clouds with the same mental model.
1094
+
1095
+ ---
1096
+
1097
+ ## Key AWS concepts
1098
+
1099
+ | AWS term | Plain English | Azure equivalent |
1100
+ |---|---|---|
1101
+ | **IAM User** | Long-lived human/app credential. Avoid these. | Azure AD user |
1102
+ | **IAM Role** | A set of permissions that *something* can assume temporarily. | Azure RBAC role assignment (closer to PIM-eligible) |
1103
+ | **IAM Policy** | The actual JSON list of \`Allow\`/\`Deny\` statements. | Azure role definition |
1104
+ | **Permission Set** (in AWS Identity Center / SSO) | A pre-baked role users get when they log in via SSO. | Entra group → role assignment |
1105
+ | **SCP** (Service Control Policy) | Org-wide deny rule applied to whole accounts. | Azure Policy at management group |
1106
+ | **OIDC trust policy** | Lets GitHub Actions (or another IdP) assume a role without secrets. | Azure federated credential |
1107
+
1108
+ ---
1109
+
1110
+ ## Folder layout
1111
+
1112
+ \`\`\`
1113
+ aws-governance/
1114
+ ├── iam/
1115
+ │ ├── platform-admin-role.tf # role for platform team SSO users
1116
+ │ └── developer-permission-set.tf # SSO permission set for app teams
1117
+ └── policies/
1118
+ └── deny-root-actions.json # SCP: nobody uses the root account
1119
+ \`\`\`
1120
+
1121
+ ---
1122
+
1123
+ ## How a change lands
1124
+
1125
+ Same flow as Azure: PR → CODEOWNERS review → merge → \`aws-governance-deploy.yml\`
1126
+ runs Terraform per environment using OIDC-federated credentials.
1127
+
1128
+ The \`role-to-assume\` in CI has a **trust policy** that restricts the
1129
+ role to *this exact repo on the main branch*. That's how you safely
1130
+ let CI hold cloud admin without giving developers the keys.
1131
+ `,
1132
+ "aws-governance/iam/developer-permission-set.tf": `# Permission Set assigned to developers via AWS Identity Center (SSO).
1133
+ #
1134
+ # A "permission set" in AWS SSO = the bundle that becomes an IAM Role
1135
+ # in each member account when a user is granted it. So defining it once
1136
+ # here gives developers consistent access across every AWS account in
1137
+ # the organisation.
1138
+
1139
+ resource "aws_ssoadmin_permission_set" "developer_readonly" {
1140
+ name = "DeveloperReadOnly"
1141
+ description = "Read-only access for developers. Includes CloudWatch Logs read so they can debug their apps."
1142
+ instance_arn = var.sso_instance_arn
1143
+ session_duration = "PT4H" # 4 hours — short sessions reduce token theft blast radius
1144
+ }
1145
+
1146
+ # AWS-managed policy gives broad read-only.
1147
+ resource "aws_ssoadmin_managed_policy_attachment" "developer_readonly" {
1148
+ instance_arn = var.sso_instance_arn
1149
+ managed_policy_arn = "arn:aws:iam::aws:policy/ReadOnlyAccess"
1150
+ permission_set_arn = aws_ssoadmin_permission_set.developer_readonly.arn
1151
+ }
1152
+
1153
+ # Inline policy: extra grants we cannot get from a managed policy.
1154
+ # Keeping these tiny and named is much easier to review than one giant
1155
+ # 200-line custom policy.
1156
+ resource "aws_ssoadmin_permission_set_inline_policy" "developer_readonly_logs" {
1157
+ instance_arn = var.sso_instance_arn
1158
+ permission_set_arn = aws_ssoadmin_permission_set.developer_readonly.arn
1159
+ inline_policy = jsonencode({
1160
+ Version = "2012-10-17"
1161
+ Statement = [{
1162
+ Effect = "Allow"
1163
+ Action = [
1164
+ "logs:GetLogEvents",
1165
+ "logs:FilterLogEvents",
1166
+ "logs:StartQuery",
1167
+ "logs:StopQuery",
1168
+ "logs:GetQueryResults"
1169
+ ]
1170
+ Resource = "*"
1171
+ }]
1172
+ })
1173
+ }
1174
+
1175
+ variable "sso_instance_arn" { type = string }
1176
+ `,
1177
+ "aws-governance/iam/platform-admin-role.tf": `# IAM Role used by the platform team via AWS SSO (Identity Center).
1178
+ #
1179
+ # The role's *trust policy* controls WHO can assume it.
1180
+ # The attached *managed policies* control WHAT they can do once assumed.
1181
+ # Splitting the two is what makes IAM least-privilege thinking work.
1182
+
1183
+ resource "aws_iam_role" "platform_admin" {
1184
+ name = "platform-admin"
1185
+ description = "Assumed by platform team members via AWS SSO. Tightly scoped — does NOT include billing or org-level write."
1186
+
1187
+ # Trust policy: only the SSO-managed identity provider can let users
1188
+ # assume this role, and only from sessions tagged with the platform
1189
+ # group. No long-lived keys, no other accounts.
1190
+ assume_role_policy = jsonencode({
1191
+ Version = "2012-10-17"
1192
+ Statement = [{
1193
+ Effect = "Allow"
1194
+ Principal = {
1195
+ Federated = "arn:aws:iam::\${var.account_id}:saml-provider/AWSSSO"
1196
+ }
1197
+ Action = "sts:AssumeRoleWithSAML"
1198
+ Condition = {
1199
+ StringEquals = {
1200
+ "SAML:aud" = "https://signin.aws.amazon.com/saml"
1201
+ }
1202
+ }
1203
+ }]
1204
+ })
1205
+
1206
+ # Permissions boundary = a hard ceiling. Even if someone attaches a
1207
+ # broader policy by mistake, the boundary still wins. Senior signal.
1208
+ permissions_boundary = aws_iam_policy.platform_admin_boundary.arn
1209
+
1210
+ tags = {
1211
+ "owner" = "platform-team"
1212
+ "managed-by" = "platform-governance-repo"
1213
+ "environment" = var.environment
1214
+ }
1215
+ }
1216
+
1217
+ # The boundary explicitly denies dangerous actions that platform admins
1218
+ # should never need (org root, billing, deleting CloudTrail).
1219
+ resource "aws_iam_policy" "platform_admin_boundary" {
1220
+ name = "platform-admin-boundary"
1221
+ description = "Permissions boundary for the platform-admin role."
1222
+ policy = jsonencode({
1223
+ Version = "2012-10-17"
1224
+ Statement = [
1225
+ {
1226
+ Effect = "Allow"
1227
+ Action = "*"
1228
+ Resource = "*"
1229
+ },
1230
+ {
1231
+ Effect = "Deny"
1232
+ Action = [
1233
+ "organizations:*",
1234
+ "account:*",
1235
+ "aws-portal:*",
1236
+ "cloudtrail:DeleteTrail",
1237
+ "cloudtrail:StopLogging"
1238
+ ]
1239
+ Resource = "*"
1240
+ }
1241
+ ]
1242
+ })
1243
+ }
1244
+
1245
+ variable "account_id" { type = string }
1246
+ variable "environment" { type = string }
1247
+ `,
1248
+ "aws-governance/policies/deny-root-actions.json": `{
1249
+ "Version": "2012-10-17",
1250
+ "Statement": [
1251
+ {
1252
+ "Sid": "DenyAllRootUserActions",
1253
+ "Effect": "Deny",
1254
+ "Action": "*",
1255
+ "Resource": "*",
1256
+ "Condition": {
1257
+ "StringLike": {
1258
+ "aws:PrincipalArn": "arn:aws:iam::*:root"
1259
+ }
1260
+ }
1261
+ },
1262
+ {
1263
+ "Sid": "DenyDisablingSecurityServices",
1264
+ "Effect": "Deny",
1265
+ "Action": [
1266
+ "guardduty:DeleteDetector",
1267
+ "guardduty:StopMonitoringMembers",
1268
+ "config:DeleteConfigurationRecorder",
1269
+ "config:StopConfigurationRecorder",
1270
+ "cloudtrail:StopLogging",
1271
+ "cloudtrail:DeleteTrail"
1272
+ ],
1273
+ "Resource": "*"
1274
+ }
1275
+ ]
1276
+ }
1277
+ `,
1278
+ "azure-pim-solution/README.md": `# Azure PIM + RBAC as code
1279
+
1280
+ > **PIM** = Privileged Identity Management — Microsoft's *just-in-time*
1281
+ > elevation system. Instead of being a permanent admin, you "activate"
1282
+ > the role for a few hours after MFA + (optionally) approval.
1283
+ >
1284
+ > **RBAC** = Role-Based Access Control — permissions are granted via
1285
+ > roles (Reader, Contributor, custom roles), assigned at a scope
1286
+ > (management group, subscription, resource group, resource).
1287
+
1288
+ This solution stores the *desired state* of who can do what in Azure,
1289
+ and a Terraform pipeline reconciles Azure to match.
1290
+
1291
+ ---
1292
+
1293
+ ## Folder layout
1294
+
1295
+ \`\`\`
1296
+ azure-pim-solution/
1297
+ ├── main.tf # wires modules + providers
1298
+ ├── variables.tf # env-level inputs (subscription IDs, etc.)
1299
+ ├── envs/ # per-environment tfvars + backend (gitignored secrets)
1300
+ ├── roles/ # custom Azure role DEFINITIONS (JSON)
1301
+ ├── users/ # human identity ASSIGNMENTS (one .tf per user)
1302
+ └── spns/ # Service Principal (non-human) ASSIGNMENTS
1303
+ \`\`\`
1304
+
1305
+ ### Why split \`users/\` and \`spns/\`?
1306
+
1307
+ > **SPN** = Service Principal — a non-human identity used by apps,
1308
+ > pipelines, automation. They have very different review requirements
1309
+ > from human users (no MFA, no PIM activation), so keeping them in a
1310
+ > separate folder lets CODEOWNERS demand stricter approval on
1311
+ > \`spns/\` if needed.
1312
+
1313
+ ---
1314
+
1315
+ ## How a new role assignment lands in Azure
1316
+
1317
+ 1. Engineer adds a \`.tf\` file under \`users/\` or \`spns/\`.
1318
+ 2. PR opens — \`pr_validations.yml\` checks the principal's object ID exists.
1319
+ 3. CODEOWNERS forces platform-team + security review.
1320
+ 4. Merge to \`main\` triggers \`azure-pim-deploy.yml\`:
1321
+ \`test\` → \`staging\` → \`prod\`.
1322
+ 5. Nightly cron does a \`terraform plan\` (no apply) to detect drift —
1323
+ if someone clicked an assignment in the portal, the next deploy
1324
+ will *remove* it because it's not in code.
1325
+
1326
+ ---
1327
+
1328
+ ## Key idea: drift correction
1329
+
1330
+ Terraform state remembers what *this repo* manages. So if a panicked
1331
+ on-call grants someone Owner manually, the nightly drift plan flags
1332
+ it, and the next merge removes it. Governance becomes self-healing
1333
+ rather than one-shot.
1334
+ `,
1335
+ "azure-pim-solution/main.tf": `# Root module for Azure PIM + RBAC.
1336
+ #
1337
+ # Tiny on purpose: each user/SPN file in users/ and spns/ is a self-
1338
+ # contained resource block, so reviewers see a complete diff per
1339
+ # principal in one PR file instead of hunting through a giant module.
1340
+
1341
+ terraform {
1342
+ required_version = ">= 1.6.0"
1343
+
1344
+ required_providers {
1345
+ azurerm = {
1346
+ source = "hashicorp/azurerm"
1347
+ version = "~> 3.110"
1348
+ }
1349
+ azuread = {
1350
+ source = "hashicorp/azuread"
1351
+ version = "~> 2.50"
1352
+ }
1353
+ }
1354
+
1355
+ # Backend config is supplied per-env via -backend-config in CI.
1356
+ # Keeps test/staging/prod state files isolated and gives blast radius.
1357
+ backend "azurerm" {}
1358
+ }
1359
+
1360
+ provider "azurerm" {
1361
+ features {}
1362
+ subscription_id = var.subscription_id
1363
+ }
1364
+
1365
+ provider "azuread" {
1366
+ tenant_id = var.tenant_id
1367
+ }
1368
+
1369
+ # Custom role definitions live in roles/*.json. Loop over them so adding
1370
+ # a new custom role is just dropping a file in the folder.
1371
+ locals {
1372
+ custom_roles = {
1373
+ for f in fileset("\${path.module}/roles", "*.json") :
1374
+ trimsuffix(f, ".json") => jsondecode(file("\${path.module}/roles/\${f}"))
1375
+ }
1376
+ }
1377
+
1378
+ resource "azurerm_role_definition" "custom" {
1379
+ for_each = local.custom_roles
1380
+ name = each.value.Name
1381
+ scope = "/subscriptions/\${var.subscription_id}"
1382
+ description = each.value.Description
1383
+ permissions {
1384
+ actions = each.value.Actions
1385
+ not_actions = each.value.NotActions
1386
+ data_actions = lookup(each.value, "DataActions", [])
1387
+ not_data_actions = lookup(each.value, "NotDataActions", [])
1388
+ }
1389
+ assignable_scopes = each.value.AssignableScopes
1390
+ }
1391
+ `,
1392
+ "azure-pim-solution/roles/platform-operator.json": `{
1393
+ "Name": "Platform Operator",
1394
+ "Description": "Day-to-day platform ops: read everything, restart resources, rotate keys. NOT permitted to grant access or delete resources — those require activating the Owner-level break-glass role through PIM.",
1395
+ "Actions": [
1396
+ "*/read",
1397
+ "Microsoft.Compute/virtualMachines/restart/action",
1398
+ "Microsoft.Web/sites/restart/action",
1399
+ "Microsoft.KeyVault/vaults/keys/rotate/action",
1400
+ "Microsoft.Insights/diagnosticSettings/*"
1401
+ ],
1402
+ "NotActions": [
1403
+ "Microsoft.Authorization/*/Write",
1404
+ "Microsoft.Authorization/*/Delete"
1405
+ ],
1406
+ "AssignableScopes": [
1407
+ "/subscriptions/00000000-0000-0000-0000-000000000000"
1408
+ ]
1409
+ }
1410
+ `,
1411
+ "azure-pim-solution/spns/platform-deploy-spn.tf": `# SPN (Service Principal) assignment — non-human identity used by a
1412
+ # deployment pipeline. SPNs do NOT use PIM (no human to MFA/activate),
1413
+ # so they get a permanent assignment scoped as narrowly as possible.
1414
+ #
1415
+ # Senior signal: the *scope* here is a single resource group, not the
1416
+ # whole subscription. Every extra scope level is blast radius.
1417
+
1418
+ data "azuread_service_principal" "platform_deploy" {
1419
+ display_name = "platform-deploy-spn"
1420
+ }
1421
+
1422
+ resource "azurerm_role_assignment" "platform_deploy_contributor" {
1423
+ scope = "/subscriptions/\${var.subscription_id}/resourceGroups/rg-platform-\${var.environment}"
1424
+ role_definition_name = "Contributor"
1425
+ principal_id = data.azuread_service_principal.platform_deploy.object_id
1426
+ description = "Used by GitHub Actions to deploy platform infra. See PLAT-987."
1427
+ }
1428
+ `,
1429
+ "azure-pim-solution/users/octocat.tf": `# PIM-eligible role assignment for a human user.
1430
+ #
1431
+ # \`azurerm_role_assignment\` would be a *permanent* (active) grant.
1432
+ # \`azurerm_pim_eligible_role_assignment\` makes the user ELIGIBLE — they
1433
+ # must "activate" the role via PIM (MFA + optional approval) for a
1434
+ # limited time window. This is the least-privilege default for humans.
1435
+
1436
+ # Look up the user by UPN so we never hard-code object IDs in the file
1437
+ # the human reads. The data source fails the plan if the user does not
1438
+ # exist — that's the "object ID validation" governance signal.
1439
+ data "azuread_user" "octocat" {
1440
+ user_principal_name = "octocat@acme.example"
1441
+ }
1442
+
1443
+ resource "azurerm_pim_eligible_role_assignment" "octocat_platform_operator" {
1444
+ scope = "/subscriptions/\${var.subscription_id}"
1445
+ role_definition_id = azurerm_role_definition.custom["platform-operator"].role_definition_resource_id
1446
+ principal_id = data.azuread_user.octocat.object_id
1447
+
1448
+ schedule {
1449
+ start_date_time = "2026-01-01T00:00:00Z"
1450
+ expiration {
1451
+ # Hard cap on how long the *eligibility* lasts. After this,
1452
+ # the user must be re-assigned via a fresh PR. Forces periodic
1453
+ # access review — a SOC2 / ISO 27001 friendly pattern.
1454
+ end_date_time = "2026-12-31T23:59:59Z"
1455
+ }
1456
+ }
1457
+
1458
+ justification = "Day-to-day platform ops; ticketed in PLAT-1234."
1459
+ }
1460
+ `,
1461
+ "azure-pim-solution/variables.tf": `variable "subscription_id" {
1462
+ type = string
1463
+ description = "Target Azure subscription for RBAC assignments."
1464
+ }
1465
+
1466
+ variable "tenant_id" {
1467
+ type = string
1468
+ description = "Azure AD / Entra tenant the principals live in."
1469
+ }
1470
+
1471
+ variable "environment" {
1472
+ type = string
1473
+ description = "test | staging | prod | fedramp-test | fedramp-prod"
1474
+ validation {
1475
+ condition = contains(["test", "staging", "prod", "fedramp-test", "fedramp-prod"], var.environment)
1476
+ error_message = "environment must be one of test, staging, prod, fedramp-test, fedramp-prod."
1477
+ }
1478
+ }
1479
+ `,
1480
+ "azure-policy-solution/README.md": `# Azure Policy solution
1481
+
1482
+ > **Azure Policy** = Azure's built-in rule engine. Each policy is a
1483
+ > JSON object with two halves:
1484
+ >
1485
+ > - **\`if\`** — which resources does this rule care about?
1486
+ > - **\`then\`** — what should happen? (\`audit\`, \`deny\`, \`append\`, \`modify\`,
1487
+ > \`deployIfNotExists\`)
1488
+ >
1489
+ > Policies enforce things like "deny storage accounts without TLS 1.2"
1490
+ > or "audit any resource missing a \`cost-center\` tag" — at create AND
1491
+ > update time, before the resource exists.
1492
+
1493
+ ---
1494
+
1495
+ ## Folder layout
1496
+
1497
+ \`\`\`
1498
+ azure-policy-solution/
1499
+ ├── policies/ # individual policy DEFINITIONS (the rules)
1500
+ ├── initiatives/ # bundles of policies (a.k.a. policySets)
1501
+ └── assignments/ # Terraform that ASSIGNS initiatives to scopes
1502
+ \`\`\`
1503
+
1504
+ ### Why three folders?
1505
+
1506
+ This is the **define → bundle → assign** model that scales:
1507
+
1508
+ 1. **Define** a small focused rule once (e.g. "min TLS 1.2").
1509
+ 2. **Bundle** related rules into an initiative (e.g. "Platform Baseline").
1510
+ 3. **Assign** the initiative to a management group / subscription, with
1511
+ parameters per environment.
1512
+
1513
+ Without this split you end up with copy-pasted policy JSON sprinkled
1514
+ across subscriptions and no one knows what the truth is.
1515
+
1516
+ ---
1517
+
1518
+ ## How a new policy lands
1519
+
1520
+ 1. PR adds a \`.json\` to \`policies/\` (and optionally adds it to an
1521
+ initiative in \`initiatives/platform-baseline.json\`).
1522
+ 2. \`pr_validations.yml\` checks the JSON has \`policyType\` and \`policyRule\`.
1523
+ 3. CODEOWNERS forces platform + security review.
1524
+ 4. Merge → \`azure-policy-deploy.yml\`:
1525
+ - upserts every policy definition (\`az policy definition create\`)
1526
+ - upserts every initiative (\`az policy set-definition create\`)
1527
+ - runs Terraform in \`assignments/\` to bind initiatives to scopes
1528
+ 5. The next time anyone creates / updates a resource, Azure evaluates
1529
+ the policy. \`audit\` mode reports it; \`deny\` mode blocks it.
1530
+
1531
+ ---
1532
+
1533
+ ## \`audit\` first, \`deny\` later
1534
+
1535
+ Rolling out \`deny\` straight to prod breaks people. The mature pattern:
1536
+
1537
+ 1. Ship in \`audit\` mode in test → see how many resources are non-compliant.
1538
+ 2. Communicate, give teams a fix window.
1539
+ 3. Flip the parameter to \`deny\` in test → staging → prod.
1540
+
1541
+ The policies in this template expose \`effect\` as a parameter so the
1542
+ assignment in each environment can choose \`audit\` or \`deny\` without
1543
+ touching the policy definition.
1544
+ `,
1545
+ "azure-policy-solution/assignments/production.tf": `# Assign the "platform-baseline" initiative to a subscription.
1546
+ #
1547
+ # This is where the *audit vs deny* decision is made per environment.
1548
+ # - test: effect = "audit" (so devs see warnings but aren't blocked)
1549
+ # - staging: effect = "audit" (one last chance to fix)
1550
+ # - prod: effect = "deny" (real enforcement)
1551
+
1552
+ terraform {
1553
+ required_version = ">= 1.6.0"
1554
+ required_providers {
1555
+ azurerm = { source = "hashicorp/azurerm", version = "~> 3.110" }
1556
+ }
1557
+ backend "azurerm" {}
1558
+ }
1559
+
1560
+ provider "azurerm" {
1561
+ features {}
1562
+ subscription_id = var.subscription_id
1563
+ }
1564
+
1565
+ variable "subscription_id" { type = string }
1566
+ variable "environment" { type = string }
1567
+
1568
+ # Effect chosen per environment. tfvars files in envs/ override this.
1569
+ variable "baseline_effect" {
1570
+ type = string
1571
+ description = "audit | deny | disabled"
1572
+ default = "audit"
1573
+ }
1574
+
1575
+ resource "azurerm_subscription_policy_assignment" "platform_baseline" {
1576
+ name = "platform-baseline-\${var.environment}"
1577
+ display_name = "Platform baseline (\${var.environment})"
1578
+ subscription_id = "/subscriptions/\${var.subscription_id}"
1579
+ policy_definition_id = "/subscriptions/\${var.subscription_id}/providers/Microsoft.Authorization/policySetDefinitions/platform-baseline"
1580
+
1581
+ parameters = jsonencode({
1582
+ effect = { value = var.baseline_effect }
1583
+ })
1584
+
1585
+ # Identity required so deployIfNotExists / modify policies can act
1586
+ # even though this assignment uses simpler audit/deny effects today —
1587
+ # adding the identity now means future policies don't require a
1588
+ # breaking-change re-assignment.
1589
+ identity {
1590
+ type = "SystemAssigned"
1591
+ }
1592
+ location = "eastus"
1593
+ }
1594
+ `,
1595
+ "azure-policy-solution/initiatives/platform-baseline.json": `{
1596
+ "name": "platform-baseline",
1597
+ "properties": {
1598
+ "displayName": "Platform baseline initiative",
1599
+ "description": "Bundle of policies every subscription must comply with. Adding a new platform-wide rule = adding a line here, then re-assigning the initiative.",
1600
+ "policyType": "Custom",
1601
+ "metadata": {
1602
+ "category": "Platform",
1603
+ "version": "1.0.0"
1604
+ },
1605
+ "parameters": {
1606
+ "effect": {
1607
+ "type": "String",
1608
+ "allowedValues": ["audit", "deny", "disabled"],
1609
+ "defaultValue": "audit"
1610
+ }
1611
+ },
1612
+ "policyDefinitions": [
1613
+ {
1614
+ "policyDefinitionReferenceId": "require-cost-center-tag",
1615
+ "policyDefinitionId": "/subscriptions/{subscriptionId}/providers/Microsoft.Authorization/policyDefinitions/require-cost-center-tag",
1616
+ "parameters": { "effect": { "value": "[parameters('effect')]" } }
1617
+ },
1618
+ {
1619
+ "policyDefinitionReferenceId": "storage-min-tls-1-2",
1620
+ "policyDefinitionId": "/subscriptions/{subscriptionId}/providers/Microsoft.Authorization/policyDefinitions/storage-min-tls-1-2",
1621
+ "parameters": { "effect": { "value": "[parameters('effect')]" } }
1622
+ },
1623
+ {
1624
+ "policyDefinitionReferenceId": "storage-naming-convention",
1625
+ "policyDefinitionId": "/subscriptions/{subscriptionId}/providers/Microsoft.Authorization/policyDefinitions/storage-naming-convention",
1626
+ "parameters": { "effect": { "value": "[parameters('effect')]" } }
1627
+ }
1628
+ ]
1629
+ }
1630
+ }
1631
+ `,
1632
+ "azure-policy-solution/policies/min-tls-version.json": `{
1633
+ "name": "storage-min-tls-1-2",
1634
+ "properties": {
1635
+ "displayName": "Storage accounts must use TLS 1.2 or higher",
1636
+ "description": "Block (or audit) any storage account whose minimumTlsVersion is below TLS1_2. Default-deny in prod; audit in test.",
1637
+ "policyType": "Custom",
1638
+ "mode": "All",
1639
+ "metadata": {
1640
+ "category": "Storage",
1641
+ "version": "1.0.0"
1642
+ },
1643
+ "parameters": {
1644
+ "effect": {
1645
+ "type": "String",
1646
+ "metadata": { "displayName": "Effect" },
1647
+ "allowedValues": ["audit", "deny", "disabled"],
1648
+ "defaultValue": "audit"
1649
+ }
1650
+ },
1651
+ "policyRule": {
1652
+ "if": {
1653
+ "allOf": [
1654
+ { "field": "type", "equals": "Microsoft.Storage/storageAccounts" },
1655
+ {
1656
+ "anyOf": [
1657
+ { "field": "Microsoft.Storage/storageAccounts/minimumTlsVersion", "exists": "false" },
1658
+ { "field": "Microsoft.Storage/storageAccounts/minimumTlsVersion", "notEquals": "TLS1_2" }
1659
+ ]
1660
+ }
1661
+ ]
1662
+ },
1663
+ "then": {
1664
+ "effect": "[parameters('effect')]"
1665
+ }
1666
+ }
1667
+ }
1668
+ }
1669
+ `,
1670
+ "azure-policy-solution/policies/require-tags.json": `{
1671
+ "name": "require-cost-center-tag",
1672
+ "properties": {
1673
+ "displayName": "Require cost-center tag on resources",
1674
+ "description": "All resources MUST carry a 'cost-center' tag so finance can chargeback. Effect is parameterised so we can audit first, then deny.",
1675
+ "policyType": "Custom",
1676
+ "mode": "Indexed",
1677
+ "metadata": {
1678
+ "category": "Tags",
1679
+ "version": "1.0.0"
1680
+ },
1681
+ "parameters": {
1682
+ "effect": {
1683
+ "type": "String",
1684
+ "metadata": { "displayName": "Effect" },
1685
+ "allowedValues": ["audit", "deny", "disabled"],
1686
+ "defaultValue": "audit"
1687
+ }
1688
+ },
1689
+ "policyRule": {
1690
+ "if": {
1691
+ "field": "tags['cost-center']",
1692
+ "exists": "false"
1693
+ },
1694
+ "then": {
1695
+ "effect": "[parameters('effect')]"
1696
+ }
1697
+ }
1698
+ }
1699
+ }
1700
+ `,
1701
+ "azure-policy-solution/policies/storage-naming-convention.json": `{
1702
+ "name": "storage-naming-convention",
1703
+ "properties": {
1704
+ "displayName": "Storage account names must follow corp naming standard",
1705
+ "description": "Enforce the corp naming standard: 'st<env><app><region>###'. Example: stprodpaymentseastus001. Helps cost reporting + ownership lookup.",
1706
+ "policyType": "Custom",
1707
+ "mode": "All",
1708
+ "metadata": {
1709
+ "category": "Naming",
1710
+ "version": "1.0.0"
1711
+ },
1712
+ "parameters": {
1713
+ "effect": {
1714
+ "type": "String",
1715
+ "metadata": { "displayName": "Effect" },
1716
+ "allowedValues": ["audit", "deny", "disabled"],
1717
+ "defaultValue": "audit"
1718
+ }
1719
+ },
1720
+ "policyRule": {
1721
+ "if": {
1722
+ "allOf": [
1723
+ { "field": "type", "equals": "Microsoft.Storage/storageAccounts" },
1724
+ { "field": "name", "notMatch": "st[a-z]{4,}[a-z]{3,}[0-9]{3}" }
1725
+ ]
1726
+ },
1727
+ "then": {
1728
+ "effect": "[parameters('effect')]"
1729
+ }
1730
+ }
1731
+ }
1732
+ }
1733
+ `,
1734
+ "catalog-info.yaml": `# Backstage catalog metadata.
1735
+ #
1736
+ # Backstage is a developer portal (open-sourced by Spotify). This file
1737
+ # registers governance as a real "internal platform service" with an
1738
+ # owner, lifecycle, and discoverable docs — so other engineers can find it.
1739
+ #
1740
+ # Senior signal: governance is treated as a *product*, not a script dump.
1741
+
1742
+ apiVersion: backstage.io/v1alpha1
1743
+ kind: System
1744
+ metadata:
1745
+ name: platform-governance
1746
+ description: Cross-cloud governance as code (Azure + AWS + user lifecycle)
1747
+ annotations:
1748
+ backstage.io/techdocs-ref: dir:.
1749
+ spec:
1750
+ owner: group:platform-team
1751
+ domain: platform
1752
+
1753
+ ---
1754
+ apiVersion: backstage.io/v1alpha1
1755
+ kind: Component
1756
+ metadata:
1757
+ name: azure-pim-rbac
1758
+ description: Azure PIM + RBAC assignments deployed via Terraform
1759
+ spec:
1760
+ type: service
1761
+ lifecycle: production
1762
+ owner: group:platform-team
1763
+ system: platform-governance
1764
+
1765
+ ---
1766
+ apiVersion: backstage.io/v1alpha1
1767
+ kind: Component
1768
+ metadata:
1769
+ name: azure-policy-solution
1770
+ description: Azure Policy definitions + initiative assignments
1771
+ spec:
1772
+ type: service
1773
+ lifecycle: production
1774
+ owner: group:platform-team
1775
+ system: platform-governance
1776
+
1777
+ ---
1778
+ apiVersion: backstage.io/v1alpha1
1779
+ kind: Component
1780
+ metadata:
1781
+ name: aws-governance
1782
+ description: AWS IAM roles, permission sets, and deny-policies
1783
+ spec:
1784
+ type: service
1785
+ lifecycle: production
1786
+ owner: group:platform-team
1787
+ system: platform-governance
1788
+
1789
+ ---
1790
+ apiVersion: backstage.io/v1alpha1
1791
+ kind: Component
1792
+ metadata:
1793
+ name: user-offboarding
1794
+ description: Scheduled cross-tenant user offboarding automation
1795
+ spec:
1796
+ type: service
1797
+ lifecycle: production
1798
+ owner: group:platform-team
1799
+ system: platform-governance
1800
+ `,
1801
+ "docs/CHANGE-FLOW.md": `# Change flow: from edit to enforcement
1802
+
1803
+ > Walks through what actually happens when an engineer edits a file in
1804
+ > this repo. Use this as your interview answer to the question
1805
+ > *"how does governance-as-code actually work day to day?"*.
1806
+
1807
+ ---
1808
+
1809
+ ## Scenario
1810
+
1811
+ Alice (a platform engineer) needs to give Bob the \`Platform Operator\`
1812
+ custom role in the **production** Azure subscription, eligible for
1813
+ one year, activated via PIM.
1814
+
1815
+ ---
1816
+
1817
+ ## Step 1 — Edit a config file
1818
+
1819
+ Alice creates \`azure-pim-solution/users/bob.tf\` (mirroring
1820
+ [\`octocat.tf\`](../azure-pim-solution/users/octocat.tf)) with Bob's UPN
1821
+ and an end date.
1822
+
1823
+ She runs \`terraform fmt\` locally. No Azure changes happen yet — the
1824
+ repo is still just files.
1825
+
1826
+ ---
1827
+
1828
+ ## Step 2 — Open a pull request
1829
+
1830
+ Alice pushes a branch and opens a PR. Several things happen automatically:
1831
+
1832
+ | Trigger | Outcome |
1833
+ |---|---|
1834
+ | \`pull_request_template.md\` loads | Forces Alice to declare type / env / pre-merge checks |
1835
+ | \`pr_validations.yml\` runs | PR title check + \`terraform fmt -check\` + \`terraform validate\` + **object ID validation** (does Bob exist in the tenant?) |
1836
+ | \`CODEOWNERS\` matches \`azure-pim-solution/\` | Auto-requests \`@acme/platform-team\` and \`@acme/security\` |
1837
+ | Branch ruleset on \`main\` | Blocks merge until 1+ approval and all required checks pass |
1838
+
1839
+ If Bob's object ID is wrong, the PR fails *here*, not at deploy time.
1840
+ That's the **shift-left** governance signal.
1841
+
1842
+ ---
1843
+
1844
+ ## Step 3 — Review
1845
+
1846
+ Reviewers see:
1847
+ - the PR description (forced by the template)
1848
+ - a focused diff (one new file, one new principal)
1849
+ - green CI showing object IDs validated and Terraform plan output
1850
+
1851
+ They approve. Alice merges.
1852
+
1853
+ ---
1854
+
1855
+ ## Step 4 — Pipeline deploys
1856
+
1857
+ \`azure-pim-deploy.yml\` triggers on push to \`main\` and:
1858
+
1859
+ 1. Authenticates to Azure via **OIDC** — no long-lived secret in the repo.
1860
+ 2. Runs \`terraform init\` against the **test** backend.
1861
+ 3. Runs \`terraform plan\` and \`apply\` for **test**.
1862
+ 4. If green, advances to **staging**, then **prod**. Each env is a
1863
+ GitHub Environment with its own approver (separate from the PR
1864
+ reviewer — segregation of duties).
1865
+ 5. The \`concurrency\` group ensures no two deploys race on the same
1866
+ state file.
1867
+
1868
+ After this, Bob is *eligible* for \`Platform Operator\` in production.
1869
+
1870
+ ---
1871
+
1872
+ ## Step 5 — Bob activates
1873
+
1874
+ Bob goes to the Azure portal → PIM → My Roles → activates \`Platform
1875
+ Operator\` for, say, 4 hours, with a justification ("PLAT-1234,
1876
+ investigating storage latency"). Azure logs the activation. After 4
1877
+ hours the access expires automatically.
1878
+
1879
+ ---
1880
+
1881
+ ## Step 6 — Drift detection
1882
+
1883
+ That night, the scheduled run of \`azure-pim-deploy.yml\` does a plan-only
1884
+ pass. If someone clicked an extra assignment in the portal, the next
1885
+ real deploy will *remove* it because it's not in code.
1886
+
1887
+ Governance becomes self-healing.
1888
+
1889
+ ---
1890
+
1891
+ ## Step 7 — Audit time
1892
+
1893
+ Six months later, an auditor asks "who approved Bob's prod access?"
1894
+ Alice opens the PR link. The PR shows:
1895
+ - the diff (the actual config that was applied)
1896
+ - the reviewer (CODEOWNERS-enforced)
1897
+ - the CI logs (object ID validation result)
1898
+ - the merge commit
1899
+ - the deploy run (linked from the merge)
1900
+
1901
+ That entire chain is the audit trail. No spreadsheets, no screenshots.
1902
+
1903
+ ---
1904
+
1905
+ ## Why this matters
1906
+
1907
+ Compare to the *without-this-repo* version:
1908
+ 1. Alice messages a senior engineer in Slack.
1909
+ 2. Senior engineer clicks around in the portal.
1910
+ 3. Maybe they forget. Maybe they grant Owner instead of Platform Operator.
1911
+ 4. There's no record six months later beyond Slack scrollback.
1912
+
1913
+ Governance-as-code converts that ad-hoc, lossy process into a
1914
+ reviewable, repeatable, auditable workflow.
1915
+ `,
1916
+ "docs/GLOSSARY.md": `# Glossary
1917
+
1918
+ > Quick definitions of every acronym in this template, written for
1919
+ > someone seeing them for the first time. Read this before the
1920
+ > sub-folder READMEs and they'll click much faster.
1921
+
1922
+ ---
1923
+
1924
+ ## Identity & access
1925
+
1926
+ **RBAC — Role-Based Access Control**
1927
+ Permissions are bundled into *roles* (Reader, Contributor, custom),
1928
+ and you assign roles to identities at a *scope*. Both Azure and AWS
1929
+ use this model.
1930
+
1931
+ **PIM — Privileged Identity Management** *(Azure)*
1932
+ Just-in-time elevation. You're *eligible* for a role; you have to
1933
+ *activate* it (with MFA / approval) for a limited window. Reduces
1934
+ standing admin access dramatically.
1935
+
1936
+ **IAM — Identity and Access Management** *(AWS)*
1937
+ The umbrella term for AWS users, roles, policies, and SSO permission
1938
+ sets.
1939
+
1940
+ **SPN — Service Principal** *(Azure)*
1941
+ A non-human identity (apps, pipelines). Cannot use PIM (no human to
1942
+ MFA), so it gets permanent narrowly-scoped grants.
1943
+
1944
+ **SSO — Single Sign-On**
1945
+ Users log into one identity provider and that gives them access to
1946
+ many systems without re-authenticating. AWS Identity Center and Entra
1947
+ both implement this.
1948
+
1949
+ **OIDC — OpenID Connect**
1950
+ A federation standard. Lets a workload (e.g. GitHub Actions) prove its
1951
+ identity to a cloud and assume a role *without* storing a long-lived
1952
+ secret. The biggest practical security win in modern CI/CD.
1953
+
1954
+ **MFA — Multi-Factor Authentication**
1955
+ Something you know + something you have + (optionally) something you are.
1956
+
1957
+ **Object ID**
1958
+ A unique GUID Azure assigns to each user/group/service principal. The
1959
+ *displayName* can collide; the object ID cannot.
1960
+
1961
+ ---
1962
+
1963
+ ## Policy & compliance
1964
+
1965
+ **Policy as Code**
1966
+ Compliance rules expressed as version-controlled config files, not
1967
+ checklists in a Word doc.
1968
+
1969
+ **Initiative** *(Azure Policy)*
1970
+ A bundle of policies. Easier to assign one initiative to a scope than
1971
+ 20 individual policies.
1972
+
1973
+ **SCP — Service Control Policy** *(AWS)*
1974
+ Org-wide deny rules attached to an account or organizational unit. SCPs
1975
+ *only restrict*; they cannot grant.
1976
+
1977
+ **Permissions Boundary** *(AWS)*
1978
+ A hard ceiling on what a role can do, even if a more permissive policy
1979
+ is attached. "You can never do more than this, no matter what."
1980
+
1981
+ **FedRAMP — Federal Risk and Authorization Management Program**
1982
+ US government cloud security/compliance baseline. FedRAMP
1983
+ environments usually live in isolated tenants with stricter controls.
1984
+
1985
+ **Drift**
1986
+ When real cloud state no longer matches the code's desired state.
1987
+ Detected by \`terraform plan\`; corrected by \`terraform apply\`.
1988
+
1989
+ ---
1990
+
1991
+ ## Tooling
1992
+
1993
+ **Terraform / OpenTofu**
1994
+ The dominant Infrastructure-as-Code tool. You declare *desired state*;
1995
+ Terraform calls cloud APIs to make reality match. State is recorded so
1996
+ it knows what to change next time.
1997
+
1998
+ **ARM / Bicep**
1999
+ Microsoft-native IaC for Azure. Bicep is the friendlier syntax that
2000
+ compiles down to ARM JSON.
2001
+
2002
+ **Backstage**
2003
+ Open-source developer portal originally from Spotify. \`catalog-info.yaml\`
2004
+ registers a service so engineers can discover ownership, docs, and
2005
+ lifecycle in one place.
2006
+
2007
+ **CODEOWNERS**
2008
+ A GitHub-native file that auto-requests review from owners when matching
2009
+ paths change. Combined with branch protection it becomes a hard gate.
2010
+
2011
+ ---
2012
+
2013
+ ## Process
2014
+
2015
+ **Change as Code**
2016
+ Even *how changes are made* is governed by config: PR templates,
2017
+ required reviewers, status checks. Repo controls itself.
2018
+
2019
+ **Source of Truth**
2020
+ The one trusted location that defines the correct state. In a
2021
+ governance-as-code repo, that's the repo itself.
2022
+
2023
+ **Blast Radius**
2024
+ How much can a single mistake or compromised credential affect? Lower
2025
+ is always better. Splitting roles, scoping narrowly, separating tenants
2026
+ all shrink blast radius.
2027
+ `,
2028
+ "user-management/README.md": `# User management
2029
+
2030
+ > Governance is **not** only about granting access. It's also about
2031
+ > *removing* access when someone leaves, changes roles, or shouldn't
2032
+ > have been there in the first place. Stale access is one of the most
2033
+ > common findings in real security audits.
2034
+
2035
+ This solution is the *continuous offboarding* engine.
2036
+
2037
+ ---
2038
+
2039
+ ## How it works
2040
+
2041
+ 1. The HR-controlled tenant (call it the **source of truth**) has a
2042
+ list of currently-employed identities.
2043
+ 2. R+D and SaaS tenants (e.g. SendGrid) have their own user lists that
2044
+ tend to drift — people get added, rarely removed.
2045
+ 3. A nightly GitHub Actions cron (\`.github/workflows/user-offboarding.yml\`)
2046
+ runs the script in \`scripts/offboard-users.ps1\`:
2047
+ - pulls users from each non-source tenant
2048
+ - diffs against the source of truth
2049
+ - removes anyone present in the side tenant but missing in the source
2050
+ 4. **Safety rail**: if the diff would delete more than \`MAX_DELETIONS\`
2051
+ users in one run, the script aborts loudly. Forces a human to
2052
+ investigate before mass deletion.
2053
+ 5. **Dry-run mode**: manual dispatch defaults to \`dry_run=true\`, so
2054
+ you can review what *would* be deleted before doing it for real.
2055
+
2056
+ ---
2057
+
2058
+ ## Why it's part of governance, not just IT ops
2059
+
2060
+ - Provable, version-controlled deletion logic (auditors love this).
2061
+ - Tenants stay aligned without anyone having to remember.
2062
+ - Same review/CODEOWNERS gate as access *grants* — symmetry matters.
2063
+ `,
2064
+ "user-management/config/tenants.json": `{
2065
+ "sourceOfTruthTenant": {
2066
+ "name": "corp",
2067
+ "tenantId": "00000000-0000-0000-0000-000000000001",
2068
+ "description": "HR-controlled. Authoritative list of current employees."
2069
+ },
2070
+ "managedTenants": [
2071
+ {
2072
+ "name": "rd-test",
2073
+ "tenantId": "00000000-0000-0000-0000-000000000002",
2074
+ "removeIfMissingFromSource": true
2075
+ },
2076
+ {
2077
+ "name": "rd-prod",
2078
+ "tenantId": "00000000-0000-0000-0000-000000000003",
2079
+ "removeIfMissingFromSource": true
2080
+ }
2081
+ ],
2082
+ "saasTargets": [
2083
+ {
2084
+ "name": "sendgrid",
2085
+ "kind": "sendgrid",
2086
+ "removeIfMissingFromSource": true
2087
+ }
2088
+ ]
2089
+ }
2090
+ `,
2091
+ "user-management/scripts/offboard-users.ps1": `# Cross-tenant offboarding script.
2092
+ #
2093
+ # Reads config/tenants.json, diffs each managed tenant against the
2094
+ # source-of-truth tenant, and removes users who are missing from source.
2095
+ #
2096
+ # Designed to be safe by default:
2097
+ # - DRY_RUN=true -> log only, no deletes
2098
+ # - MAX_DELETIONS guard rail -> aborts if diff is too large
2099
+ # - Each tenant runs independently -> one tenant's failure doesn't skip others
2100
+
2101
+ #Requires -Version 7.2
2102
+ [CmdletBinding()]
2103
+ param(
2104
+ [string]$ConfigPath = "$PSScriptRoot/../config/tenants.json",
2105
+ [int] $MaxDeletions = [int]($env:MAX_DELETIONS ?? 10),
2106
+ [bool] $DryRun = [bool]::Parse(($env:DRY_RUN ?? "true"))
2107
+ )
2108
+
2109
+ $ErrorActionPreference = "Stop"
2110
+ Set-StrictMode -Version Latest
2111
+
2112
+ # Real implementations would import Microsoft.Graph and a SendGrid SDK.
2113
+ # We stub the calls so this file is readable as a learning artifact.
2114
+ function Get-TenantUsers([string]$TenantId) {
2115
+ Write-Host " [stub] Get-TenantUsers $TenantId"
2116
+ return @() # array of @{ upn = '...'; objectId = '...' }
2117
+ }
2118
+
2119
+ function Remove-TenantUser([string]$TenantId, [string]$ObjectId) {
2120
+ Write-Host " [stub] Remove-TenantUser $TenantId $ObjectId"
2121
+ }
2122
+
2123
+ function Remove-SendGridUser([string]$Email) {
2124
+ Write-Host " [stub] Remove-SendGridUser $Email"
2125
+ }
2126
+
2127
+ # ── 1. Load config ────────────────────────────────────────────────────
2128
+ $config = Get-Content $ConfigPath -Raw | ConvertFrom-Json
2129
+ $source = Get-TenantUsers -TenantId $config.sourceOfTruthTenant.tenantId
2130
+ $sourceSet = @{}
2131
+ foreach ($u in $source) { $sourceSet[$u.upn.ToLower()] = $true }
2132
+
2133
+ Write-Host "Source-of-truth tenant has $($source.Count) users."
2134
+
2135
+ # ── 2. Diff each managed tenant ───────────────────────────────────────
2136
+ foreach ($tenant in $config.managedTenants) {
2137
+ Write-Host "\`n=== Tenant: $($tenant.name) ($($tenant.tenantId)) ==="
2138
+ if (-not $tenant.removeIfMissingFromSource) {
2139
+ Write-Host " Skipped (removeIfMissingFromSource=false)"
2140
+ continue
2141
+ }
2142
+
2143
+ $managed = Get-TenantUsers -TenantId $tenant.tenantId
2144
+ $toRemove = @($managed | Where-Object { -not $sourceSet.ContainsKey($_.upn.ToLower()) })
2145
+ Write-Host " Would remove: $($toRemove.Count) user(s)."
2146
+
2147
+ if ($toRemove.Count -gt $MaxDeletions) {
2148
+ throw "ABORT: $($toRemove.Count) deletions exceed MaxDeletions=$MaxDeletions for tenant $($tenant.name). Investigate before re-running."
2149
+ }
2150
+
2151
+ foreach ($u in $toRemove) {
2152
+ if ($DryRun) {
2153
+ Write-Host " [dry-run] would remove $($u.upn)"
2154
+ } else {
2155
+ Remove-TenantUser -TenantId $tenant.tenantId -ObjectId $u.objectId
2156
+ Write-Host " removed $($u.upn)"
2157
+ }
2158
+ }
2159
+ }
2160
+
2161
+ # ── 3. SaaS targets (e.g. SendGrid) ───────────────────────────────────
2162
+ foreach ($saas in $config.saasTargets) {
2163
+ if ($saas.kind -eq "sendgrid" -and -not $DryRun) {
2164
+ # Real impl: list SendGrid users, diff against $sourceSet, call DELETE.
2165
+ Write-Host "\`nSendGrid offboarding stub — implement Get/Delete via SendGrid API."
2166
+ }
2167
+ }
2168
+
2169
+ Write-Host "\`nDone. DryRun=$DryRun"
2170
+ `,
2171
+ };
2172
+
2173
+ // ─── AWS Governance IAM via GitHub Actions ───────────────────────────────
2174
+ //
2175
+ // Combines the Infra Lab's `governance-iam` Terraform module with a
2176
+ // GitHub-Actions-driven deploy flow that mirrors the real PLF setup, but
2177
+ // runs entirely locally via `act` + LocalStack:
2178
+ //
2179
+ // - PR opens / updates → `terraform-pr.yml` runs `tofu plan`
2180
+ // - merge to main → `terraform-ci.yml` runs `tofu apply`
2181
+ // - shared steps live in a composite action `./.github/actions/run-tofu-action`
2182
+ //
2183
+ // LocalStack is reachable from the act runner container at
2184
+ // `host.docker.internal:4566` on macOS / Windows, and via the
2185
+ // `--network host` runner override (or the literal IP) on Linux.
2186
+ const AWS_IAM_GHA_FILES: Record<string, string> = {
2187
+ "README.md": `# AWS Governance IAM — GitHub Actions Lab
2188
+
2189
+ End-to-end mimic of the real PLF \`governance-iam\` deploy flow, but driven
2190
+ by **GitHub Actions** (not Azure DevOps) and pointed at **LocalStack**
2191
+ instead of real AWS.
2192
+
2193
+ ## What this lab demonstrates
2194
+
2195
+ 1. \`PR opens\` → workflow runs \`tofu plan\` (read-only preview)
2196
+ 2. \`merge to main\` → workflow runs \`tofu apply\` (deploy)
2197
+ 3. **Composite action** \`./.github/actions/run-tofu-action\` factors out the
2198
+ init/plan/apply boilerplate, exactly like the real repo's reusable action.
2199
+ 4. **Auth** is faked with static \`test\` credentials and a LocalStack
2200
+ endpoint — the *shape* matches \`provider.tf\`'s assume-role pattern.
2201
+
2202
+ ## How to run it
2203
+
2204
+ In the right pane, pick an event + workflow and click **Run**. Useful combos:
2205
+
2206
+ - \`pull_request\` + \`terraform-pr.yml\` → simulates a plan run on a PR
2207
+ - \`push\` + \`terraform-ci.yml\` → simulates an apply on main
2208
+ - \`workflow_dispatch\` + either workflow → manual trigger
2209
+
2210
+ You also need LocalStack running on \`localhost:4566\` on your host. The
2211
+ runner container reaches it via \`host.docker.internal:4566\`.
2212
+
2213
+ ## How this maps to the real PLF setup
2214
+
2215
+ | Real PLF | This lab |
2216
+ | ------------------------------------- | ----------------------------------------------- |
2217
+ | Azure DevOps pipeline (\`pr.yml\`) | GitHub Actions workflow \`terraform-pr.yml\` |
2218
+ | Azure DevOps pipeline (\`ci.yml\`) | GitHub Actions workflow \`terraform-ci.yml\` |
2219
+ | Shared template \`deploy-aws.yml\` | Composite action \`run-tofu-action\` |
2220
+ | AWS service connection | Static \`test\` creds + LocalStack endpoint |
2221
+ | Azure OIDC for state backend | Local state file (no remote backend) |
2222
+ | \`assume_role\` to target accounts | Single LocalStack account, dual provider alias |
2223
+
2224
+ ## File map
2225
+
2226
+ - \`.github/workflows/terraform-pr.yml\` — plan on PR
2227
+ - \`.github/workflows/terraform-ci.yml\` — apply on push to main
2228
+ - \`.github/actions/run-tofu-action/\` — reusable plan/apply steps
2229
+ - \`.github/CODEOWNERS\` — PR review routing
2230
+ - \`terraform/\` — full governance-iam module
2231
+ `,
2232
+
2233
+ ".github/CODEOWNERS": `# Reviewers auto-requested when terraform/ changes.
2234
+ /terraform/ @acme/platform
2235
+ /.github/ @acme/platform
2236
+ *.md @acme/docs
2237
+ `,
2238
+
2239
+ ".github/pull_request_template.md": `## Summary
2240
+
2241
+ <!-- What does this change to the IAM module? -->
2242
+
2243
+ ## Plan output
2244
+
2245
+ \`\`\`
2246
+ <!-- Paste the relevant section of \`tofu plan\` here -->
2247
+ \`\`\`
2248
+
2249
+ ## Checklist
2250
+
2251
+ - [ ] PR title follows conventional-commit style
2252
+ - [ ] \`terraform-pr.yml\` shows a clean plan
2253
+ - [ ] No unintended role/policy deletions
2254
+ `,
2255
+
2256
+ ".github/workflows/terraform-pr.yml": `name: terraform-pr
2257
+
2258
+ # PR-only workflow. Mirrors the real PLF \`pr.yml\`: run a plan, never apply.
2259
+ on:
2260
+ pull_request:
2261
+ paths:
2262
+ - "terraform/**"
2263
+ - ".github/workflows/terraform-*.yml"
2264
+ - ".github/actions/run-tofu-action/**"
2265
+ workflow_dispatch:
2266
+
2267
+ permissions:
2268
+ contents: read
2269
+ pull-requests: write
2270
+
2271
+ jobs:
2272
+ plan:
2273
+ runs-on: ubuntu-latest
2274
+ steps:
2275
+ - uses: actions/checkout@v4
2276
+
2277
+ - name: Plan IAM module
2278
+ uses: ./.github/actions/run-tofu-action
2279
+ with:
2280
+ working-directory: terraform
2281
+ mode: plan
2282
+ `,
2283
+
2284
+ ".github/workflows/terraform-ci.yml": `name: terraform-ci
2285
+
2286
+ # Main-branch workflow. Mirrors the real PLF \`ci.yml\`: apply after merge.
2287
+ on:
2288
+ push:
2289
+ branches: [main]
2290
+ paths:
2291
+ - "terraform/**"
2292
+ - ".github/workflows/terraform-*.yml"
2293
+ - ".github/actions/run-tofu-action/**"
2294
+ workflow_dispatch:
2295
+
2296
+ permissions:
2297
+ contents: read
2298
+
2299
+ jobs:
2300
+ apply:
2301
+ runs-on: ubuntu-latest
2302
+ steps:
2303
+ - uses: actions/checkout@v4
2304
+
2305
+ - name: Apply IAM module
2306
+ uses: ./.github/actions/run-tofu-action
2307
+ with:
2308
+ working-directory: terraform
2309
+ mode: apply
2310
+ `,
2311
+
2312
+ ".github/actions/run-tofu-action/action.yml": `# Reusable composite action — same idea as the real repo's run-tofu-action.
2313
+ # Hides the OpenTofu install + init + plan/apply behind a single \`uses:\`.
2314
+
2315
+ name: "Run OpenTofu"
2316
+ description: "Install OpenTofu, init, then plan or apply against LocalStack."
2317
+
2318
+ inputs:
2319
+ working-directory:
2320
+ description: "Folder containing the Terraform/OpenTofu config."
2321
+ required: true
2322
+ mode:
2323
+ description: "Either 'plan' or 'apply'."
2324
+ required: true
2325
+ default: "plan"
2326
+
2327
+ runs:
2328
+ using: "composite"
2329
+ steps:
2330
+ - name: Install OpenTofu
2331
+ shell: bash
2332
+ run: |
2333
+ curl -fsSL https://get.opentofu.org/install-opentofu.sh -o /tmp/install-opentofu.sh
2334
+ chmod +x /tmp/install-opentofu.sh
2335
+ /tmp/install-opentofu.sh --install-method standalone --skip-verify
2336
+ tofu --version
2337
+
2338
+ - name: Tofu init
2339
+ shell: bash
2340
+ working-directory: \${{ inputs.working-directory }}
2341
+ env:
2342
+ AWS_ACCESS_KEY_ID: test
2343
+ AWS_SECRET_ACCESS_KEY: test
2344
+ AWS_DEFAULT_REGION: us-east-1
2345
+ run: tofu init -input=false
2346
+
2347
+ - name: Tofu plan
2348
+ if: \${{ inputs.mode == 'plan' }}
2349
+ shell: bash
2350
+ working-directory: \${{ inputs.working-directory }}
2351
+ env:
2352
+ AWS_ACCESS_KEY_ID: test
2353
+ AWS_SECRET_ACCESS_KEY: test
2354
+ AWS_DEFAULT_REGION: us-east-1
2355
+ run: tofu plan -input=false -no-color
2356
+
2357
+ - name: Tofu apply
2358
+ if: \${{ inputs.mode == 'apply' }}
2359
+ shell: bash
2360
+ working-directory: \${{ inputs.working-directory }}
2361
+ env:
2362
+ AWS_ACCESS_KEY_ID: test
2363
+ AWS_SECRET_ACCESS_KEY: test
2364
+ AWS_DEFAULT_REGION: us-east-1
2365
+ run: tofu apply -input=false -auto-approve -no-color
2366
+ `,
2367
+
2368
+ ".actrc": `# Pin the runner image so installs are reproducible across machines.
2369
+ -P ubuntu-latest=catthehacker/ubuntu:act-latest
2370
+ --container-architecture linux/amd64
2371
+ `,
2372
+
2373
+ // Inline the full governance-iam Terraform module under \`terraform/\` so
2374
+ // the workflows have something real to plan/apply against.
2375
+ ...Object.fromEntries(
2376
+ Object.entries(AWS_GOVERNANCE_IAM_FILES).map(([path, body]) => {
2377
+ // The IAM module assumes provider endpoints point at \`localhost:4566\`,
2378
+ // but the act runner is a separate container and can't reach
2379
+ // \`localhost\` on the host. Rewrite to host.docker.internal so the
2380
+ // workflow run actually succeeds.
2381
+ const rewritten =
2382
+ path === "provider.tf"
2383
+ ? body.replace(
2384
+ /http:\/\/localhost:4566/g,
2385
+ "http://host.docker.internal:4566",
2386
+ )
2387
+ : body;
2388
+ return [`terraform/${path}`, rewritten];
2389
+ }),
2390
+ ),
2391
+ };
2392
+
2393
+ export const AWS_GOVERNANCE_GHA_LAB: GithubActionsLabWorkspace = {
2394
+ version: 1,
2395
+ label: "AWS Governance IAM via GitHub Actions",
2396
+ activeFile: ".github/workflows/terraform-pr.yml",
2397
+ defaultEvent: "pull_request",
2398
+ defaultWorkflow: ".github/workflows/terraform-pr.yml",
2399
+ files: AWS_IAM_GHA_FILES,
2400
+ ghOrg: DEFAULT_GH_LAB_ORG,
2401
+ rulesets: DEFAULT_GH_LAB_RULESETS,
2402
+ pullRequest: DEFAULT_GH_LAB_PULL_REQUEST,
2403
+ };
2404
+
2405
+ export const GOVERNANCE_GHA_LAB: GithubActionsLabWorkspace = {
2406
+ version: 1,
2407
+ label: "Platform Governance Template",
2408
+ activeFile: ".github/CODEOWNERS",
2409
+ defaultEvent: "pull_request",
2410
+ defaultWorkflow: ".github/workflows/pr_validations.yml",
2411
+ files: GOVERNANCE_FILES,
2412
+ ghOrg: DEFAULT_GH_LAB_ORG,
2413
+ rulesets: DEFAULT_GH_LAB_RULESETS,
2414
+ pullRequest: DEFAULT_GH_LAB_PULL_REQUEST,
2415
+ };
2416
+
492
2417
  export const DEFAULT_GHA_LAB: GithubActionsLabWorkspace = {
493
2418
  version: 1,
494
2419
  label: "GitHub Lab Playground",
@@ -910,6 +2835,7 @@ function cloneGhLabPullRequest(
910
2835
  reviews,
911
2836
  ...(lastCheckRun ? { lastCheckRun } : {}),
912
2837
  ...(typeof pr.title === "string" && pr.title ? { title: pr.title } : {}),
2838
+ ...(typeof pr.body === "string" ? { body: pr.body } : {}),
913
2839
  };
914
2840
  }
915
2841