cpflow 4.2.0 → 5.0.0.rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/update-changelog.md +367 -0
  3. data/.github/workflows/claude.yml +5 -0
  4. data/.overcommit.yml +43 -3
  5. data/.rubocop.yml +3 -3
  6. data/CHANGELOG.md +28 -4
  7. data/CONTRIBUTING.md +6 -0
  8. data/Gemfile +8 -7
  9. data/Gemfile.lock +92 -72
  10. data/README.md +43 -15
  11. data/cpflow.gemspec +5 -5
  12. data/docs/ai-github-flow-prompt.md +61 -0
  13. data/docs/ci-automation.md +335 -28
  14. data/docs/commands.md +65 -4
  15. data/docs/releasing.md +153 -0
  16. data/lib/command/ai_github_flow_prompt.rb +47 -0
  17. data/lib/command/base.rb +14 -0
  18. data/lib/command/cleanup_images.rb +1 -1
  19. data/lib/command/cleanup_stale_apps.rb +1 -1
  20. data/lib/command/copy_image_from_upstream.rb +14 -3
  21. data/lib/command/exists.rb +13 -2
  22. data/lib/command/generate.rb +153 -4
  23. data/lib/command/generate_github_actions.rb +170 -0
  24. data/lib/command/generator_helpers.rb +31 -0
  25. data/lib/command/github_flow_readiness.rb +37 -0
  26. data/lib/command/run.rb +1 -1
  27. data/lib/command/terraform/generate.rb +1 -0
  28. data/lib/command/version.rb +1 -0
  29. data/lib/constants/exit_code.rb +1 -0
  30. data/lib/core/controlplane.rb +9 -7
  31. data/lib/core/controlplane_api_direct.rb +3 -3
  32. data/lib/core/github_flow_readiness/checks.rb +143 -0
  33. data/lib/core/github_flow_readiness_service.rb +453 -0
  34. data/lib/core/repo_introspection.rb +118 -0
  35. data/lib/core/terraform_config/dsl.rb +1 -1
  36. data/lib/core/terraform_config/local_variable.rb +1 -1
  37. data/lib/cpflow/version.rb +1 -1
  38. data/lib/cpflow.rb +65 -3
  39. data/lib/generator_templates/Dockerfile +59 -3
  40. data/lib/generator_templates/controlplane.yml +27 -39
  41. data/lib/generator_templates/entrypoint.sh +1 -1
  42. data/lib/generator_templates/release_script.sh +23 -0
  43. data/lib/generator_templates/templates/app.yml +5 -8
  44. data/lib/generator_templates/templates/rails.yml +2 -11
  45. data/lib/generator_templates_sqlite/controlplane.yml +46 -0
  46. data/lib/generator_templates_sqlite/release_script.sh +25 -0
  47. data/lib/generator_templates_sqlite/templates/app.yml +15 -0
  48. data/lib/generator_templates_sqlite/templates/db.yml +6 -0
  49. data/lib/generator_templates_sqlite/templates/rails.yml +32 -0
  50. data/lib/generator_templates_sqlite/templates/storage.yml +6 -0
  51. data/lib/github_flow_templates/.github/actions/cpflow-build-docker-image/action.yml +131 -0
  52. data/lib/github_flow_templates/.github/actions/cpflow-delete-control-plane-app/action.yml +24 -0
  53. data/lib/github_flow_templates/.github/actions/cpflow-delete-control-plane-app/delete-app.sh +50 -0
  54. data/lib/github_flow_templates/.github/actions/cpflow-detect-release-phase/action.yml +62 -0
  55. data/lib/github_flow_templates/.github/actions/cpflow-setup-environment/action.yml +98 -0
  56. data/lib/github_flow_templates/.github/actions/cpflow-validate-config/action.yml +85 -0
  57. data/lib/github_flow_templates/.github/actions/cpflow-wait-for-health/action.yml +92 -0
  58. data/lib/github_flow_templates/.github/cpflow-help.md +47 -0
  59. data/lib/github_flow_templates/.github/workflows/cpflow-cleanup-stale-review-apps.yml +56 -0
  60. data/lib/github_flow_templates/.github/workflows/cpflow-delete-review-app.yml +142 -0
  61. data/lib/github_flow_templates/.github/workflows/cpflow-deploy-review-app.yml +445 -0
  62. data/lib/github_flow_templates/.github/workflows/cpflow-deploy-staging.yml +140 -0
  63. data/lib/github_flow_templates/.github/workflows/cpflow-help-command.yml +53 -0
  64. data/lib/github_flow_templates/.github/workflows/cpflow-promote-staging-to-production.yml +490 -0
  65. data/lib/github_flow_templates/.github/workflows/cpflow-review-app-help.yml +46 -0
  66. data/rakelib/create_release.rake +662 -37
  67. data/script/check_command_docs +4 -2
  68. data/script/check_cpln_links +25 -11
  69. data/script/precommit/check_command_docs +22 -0
  70. data/script/precommit/check_cpln_links +21 -0
  71. data/script/precommit/check_trailing_newlines +68 -0
  72. data/script/precommit/get_changed_files +49 -0
  73. data/script/precommit/ruby_autofix +52 -0
  74. data/script/precommit/ruby_lint +33 -0
  75. metadata +52 -14
@@ -0,0 +1,140 @@
1
+ name: Deploy Staging to Control Plane
2
+
3
+ run-name: Deploy Control Plane staging app
4
+
5
+ on:
6
+ push:
7
+ # GitHub does not allow repository vars in branch filters. Default to the common
8
+ # deploy branches unless `cpflow generate-github-actions --staging-branch BRANCH`
9
+ # was used. If STAGING_APP_BRANCH is later changed in repository variables, keep
10
+ # this list in sync so pushes to that branch actually trigger the workflow.
11
+ branches: [__STAGING_BRANCH_FILTER__]
12
+ workflow_dispatch:
13
+
14
+ permissions:
15
+ contents: read
16
+
17
+ env:
18
+ APP_NAME: ${{ vars.STAGING_APP_NAME }}
19
+ CPLN_ORG: ${{ vars.CPLN_ORG_STAGING }}
20
+ STAGING_APP_BRANCH: __STAGING_APP_BRANCH_EXPRESSION__
21
+
22
+ concurrency:
23
+ group: cpflow-deploy-staging-${{ github.ref_name }}
24
+ # Match the review-app and delete workflows: a cancelled `cpflow deploy-image` mid-rollout
25
+ # can leave the staging GVC in a partially-deployed state (some workloads on the new image,
26
+ # others on the old). Let an in-flight deploy finish before the next push starts a new run.
27
+ cancel-in-progress: false
28
+
29
+ jobs:
30
+ validate-branch:
31
+ runs-on: ubuntu-latest
32
+ timeout-minutes: 5
33
+ outputs:
34
+ is_deployable: ${{ steps.check-branch.outputs.is_deployable }}
35
+ steps:
36
+ - name: Check whether this branch should deploy staging
37
+ id: check-branch
38
+ shell: bash
39
+ run: |
40
+ set -euo pipefail
41
+
42
+ if [[ -n "${STAGING_APP_BRANCH}" ]]; then
43
+ if [[ "${GITHUB_REF_NAME}" == "${STAGING_APP_BRANCH}" ]]; then
44
+ echo "is_deployable=true" >> "$GITHUB_OUTPUT"
45
+ else
46
+ echo "Branch '${GITHUB_REF_NAME}' does not match STAGING_APP_BRANCH='${STAGING_APP_BRANCH}'"
47
+ echo "is_deployable=false" >> "$GITHUB_OUTPUT"
48
+ fi
49
+ elif [[ "${GITHUB_REF_NAME}" == "main" || "${GITHUB_REF_NAME}" == "master" ]]; then
50
+ echo "is_deployable=true" >> "$GITHUB_OUTPUT"
51
+ else
52
+ echo "Branch '${GITHUB_REF_NAME}' is not main/master and no STAGING_APP_BRANCH is configured"
53
+ echo "is_deployable=false" >> "$GITHUB_OUTPUT"
54
+ fi
55
+
56
+ - name: Checkout repository
57
+ if: steps.check-branch.outputs.is_deployable == 'true'
58
+ uses: actions/checkout@v4
59
+
60
+ - name: Validate required secrets and variables
61
+ if: steps.check-branch.outputs.is_deployable == 'true'
62
+ uses: ./.github/actions/cpflow-validate-config
63
+ env:
64
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
65
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
66
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
67
+ with:
68
+ required: |
69
+ secret:CPLN_TOKEN_STAGING
70
+ variable:CPLN_ORG_STAGING
71
+ variable:STAGING_APP_NAME
72
+
73
+ build:
74
+ needs: validate-branch
75
+ if: needs.validate-branch.outputs.is_deployable == 'true'
76
+ runs-on: ubuntu-latest
77
+ timeout-minutes: 30
78
+ steps:
79
+ - name: Checkout repository
80
+ uses: actions/checkout@v4
81
+ with:
82
+ persist-credentials: false
83
+
84
+ - name: Setup environment
85
+ uses: ./.github/actions/cpflow-setup-environment
86
+ with:
87
+ token: ${{ secrets.CPLN_TOKEN_STAGING }}
88
+ org: ${{ vars.CPLN_ORG_STAGING }}
89
+ cpln_cli_version: ${{ vars.CPLN_CLI_VERSION }}
90
+ cpflow_version: ${{ vars.CPFLOW_VERSION }}
91
+
92
+ - name: Build Docker image
93
+ uses: ./.github/actions/cpflow-build-docker-image
94
+ with:
95
+ app_name: ${{ env.APP_NAME }}
96
+ org: ${{ vars.CPLN_ORG_STAGING }}
97
+ commit: ${{ github.sha }}
98
+ docker_build_extra_args: ${{ vars.DOCKER_BUILD_EXTRA_ARGS }}
99
+ docker_build_ssh_key: ${{ secrets.DOCKER_BUILD_SSH_KEY }}
100
+ docker_build_ssh_known_hosts: ${{ vars.DOCKER_BUILD_SSH_KNOWN_HOSTS }}
101
+
102
+ deploy:
103
+ needs: [validate-branch, build]
104
+ if: needs.validate-branch.outputs.is_deployable == 'true'
105
+ runs-on: ubuntu-latest
106
+ timeout-minutes: 30
107
+ steps:
108
+ - name: Checkout repository
109
+ uses: actions/checkout@v4
110
+ with:
111
+ persist-credentials: false
112
+
113
+ - name: Setup environment
114
+ uses: ./.github/actions/cpflow-setup-environment
115
+ with:
116
+ token: ${{ secrets.CPLN_TOKEN_STAGING }}
117
+ org: ${{ vars.CPLN_ORG_STAGING }}
118
+ cpln_cli_version: ${{ vars.CPLN_CLI_VERSION }}
119
+ cpflow_version: ${{ vars.CPFLOW_VERSION }}
120
+
121
+ - name: Detect release phase support
122
+ id: release-phase
123
+ uses: ./.github/actions/cpflow-detect-release-phase
124
+ with:
125
+ app_name: ${{ env.APP_NAME }}
126
+
127
+ - name: Deploy staging image
128
+ env:
129
+ RELEASE_PHASE_FLAG: ${{ steps.release-phase.outputs.flag }}
130
+ shell: bash
131
+ run: |
132
+ set -euo pipefail
133
+
134
+ deploy_args=(-a "${APP_NAME}")
135
+ if [[ -n "${RELEASE_PHASE_FLAG}" ]]; then
136
+ deploy_args+=("${RELEASE_PHASE_FLAG}")
137
+ fi
138
+ deploy_args+=(--org "${CPLN_ORG}" --verbose)
139
+
140
+ cpflow deploy-image "${deploy_args[@]}"
@@ -0,0 +1,53 @@
1
+ name: Review App Help Command
2
+
3
+ on:
4
+ issue_comment:
5
+ types: [created]
6
+ workflow_dispatch:
7
+ inputs:
8
+ pr_number:
9
+ description: Pull request number to post help on
10
+ required: true
11
+ type: number
12
+
13
+ permissions:
14
+ contents: read
15
+ issues: write
16
+ pull-requests: write
17
+
18
+ jobs:
19
+ help:
20
+ if: |
21
+ (github.event_name == 'issue_comment' &&
22
+ github.event.issue.pull_request &&
23
+ github.event.comment.body == '/help' &&
24
+ contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) ||
25
+ github.event_name == 'workflow_dispatch'
26
+ runs-on: ubuntu-latest
27
+ timeout-minutes: 5
28
+
29
+ steps:
30
+ - name: Checkout repository
31
+ uses: actions/checkout@v4
32
+ with:
33
+ # Help only reads `.github/cpflow-help.md`; no git push happens, so drop the
34
+ # GITHUB_TOKEN credential helper to keep the token out of .git/config.
35
+ persist-credentials: false
36
+
37
+ - name: Post help message
38
+ uses: actions/github-script@v7
39
+ with:
40
+ script: |
41
+ const fs = require("fs");
42
+ const helpText = fs.readFileSync(".github/cpflow-help.md", "utf8");
43
+
44
+ const prNumber = context.eventName === "workflow_dispatch"
45
+ ? Number(context.payload.inputs.pr_number)
46
+ : context.issue.number;
47
+
48
+ await github.rest.issues.createComment({
49
+ owner: context.repo.owner,
50
+ repo: context.repo.repo,
51
+ issue_number: prNumber,
52
+ body: helpText
53
+ });
@@ -0,0 +1,490 @@
1
+ name: Promote Staging to Production
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ confirm_promotion:
7
+ description: Type "promote" to confirm promotion of staging to production
8
+ required: true
9
+ type: string
10
+
11
+ permissions:
12
+ contents: read
13
+
14
+ env:
15
+ # Override these by editing this file or by setting the matching repository variable.
16
+ # Worst-case wall time per attempt is HEALTH_CHECK_INTERVAL plus the curl --max-time below
17
+ # (10s), so the defaults give a ~10 minute window (24 × (15 + 10) = 600s) — enough for
18
+ # most Rails cold boots (asset precompile + db:migrate + workload readiness).
19
+ HEALTH_CHECK_RETRIES: 24
20
+ HEALTH_CHECK_INTERVAL: 15
21
+ # Space-separated list of HTTP statuses considered healthy. The default accepts 301/302
22
+ # because `curl` is invoked without `-L`, so a root `/` that redirects to a login page
23
+ # (common for Rails apps that auth-gate `/`) would otherwise be reported as unhealthy
24
+ # despite the workload itself being up.
25
+ #
26
+ # Strongly recommended: expose a dedicated `/health` endpoint that returns `200` and set
27
+ # HEALTH_CHECK_ACCEPTED_STATUSES to `"200"` in repository variables. The 301/302 default
28
+ # trades correctness for ergonomics — a maintenance-mode redirect or an auth-gate redirect
29
+ # to a login page can pass this check even when the underlying app is broken. Override
30
+ # via the HEALTH_CHECK_ACCEPTED_STATUSES repo variable to tighten this for apps that
31
+ # expose a dedicated health endpoint (e.g. "200" for a plain /health, or "200 401 403"
32
+ # for apps that auth-gate / without redirecting).
33
+ HEALTH_CHECK_ACCEPTED_STATUSES: ${{ vars.HEALTH_CHECK_ACCEPTED_STATUSES || '200 301 302' }}
34
+ ROLLBACK_READINESS_RETRIES: 24
35
+ ROLLBACK_READINESS_INTERVAL: 15
36
+ PRIMARY_WORKLOAD: ${{ vars.PRIMARY_WORKLOAD }}
37
+
38
+ concurrency:
39
+ # Single global group: only one production promotion may run at a time across the
40
+ # whole repo. Independent of staging deploys and review-app workflows (different
41
+ # GVCs / different concurrency keys), so those can still run in parallel.
42
+ group: cpflow-promote-staging-to-production
43
+ # Don't cancel an in-flight promotion: a half-finished `cpflow deploy-image` plus a
44
+ # rollback can leave production in a worse state than letting the first run finish.
45
+ cancel-in-progress: false
46
+
47
+ jobs:
48
+ promote-to-production:
49
+ if: github.event.inputs.confirm_promotion == 'promote'
50
+ runs-on: ubuntu-latest
51
+ timeout-minutes: 45
52
+
53
+ steps:
54
+ - name: Checkout repository
55
+ uses: actions/checkout@v4
56
+ with:
57
+ persist-credentials: false
58
+
59
+ - name: Validate required secrets and variables
60
+ uses: ./.github/actions/cpflow-validate-config
61
+ # Pass secrets via env so the composite action checks indirect shell
62
+ # variables instead of interpolating secret values into a run script.
63
+ env:
64
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
65
+ CPLN_TOKEN_PRODUCTION: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
66
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
67
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
68
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
69
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
70
+ with:
71
+ required: |
72
+ secret:CPLN_TOKEN_STAGING
73
+ secret:CPLN_TOKEN_PRODUCTION
74
+ variable:CPLN_ORG_STAGING
75
+ variable:CPLN_ORG_PRODUCTION
76
+ variable:STAGING_APP_NAME
77
+ variable:PRODUCTION_APP_NAME
78
+
79
+ - name: Setup production environment
80
+ uses: ./.github/actions/cpflow-setup-environment
81
+ with:
82
+ token: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
83
+ org: ${{ vars.CPLN_ORG_PRODUCTION }}
84
+ cpln_cli_version: ${{ vars.CPLN_CLI_VERSION }}
85
+ cpflow_version: ${{ vars.CPFLOW_VERSION }}
86
+
87
+ # Runs after Setup production environment so the pinned Ruby (>= 3.1) is on PATH.
88
+ # YAML.load_file(..., aliases: true) is not supported on Ruby 3.0 (system Ruby on ubuntu-22.04).
89
+ - name: Resolve production app workloads
90
+ id: workloads
91
+ env:
92
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
93
+ shell: bash
94
+ run: |
95
+ set -euo pipefail
96
+
97
+ workloads="$(ruby - "${PRODUCTION_APP_NAME}" <<'RUBY'
98
+ require "yaml"
99
+
100
+ app = ARGV.fetch(0)
101
+ data = YAML.safe_load(File.read(".controlplane/controlplane.yml"), aliases: true)
102
+ apps = data["apps"] || {}
103
+ app_config = apps[app]
104
+
105
+ unless app_config
106
+ warn "Error: app '#{app}' is not defined under `apps:` in `.controlplane/controlplane.yml`."
107
+ warn " Fix the PRODUCTION_APP_NAME repository variable or add the app to controlplane.yml."
108
+ exit 1
109
+ end
110
+
111
+ workloads = Array(app_config["app_workloads"])
112
+ workloads = ["rails"] if workloads.empty?
113
+ puts workloads.join(",")
114
+ RUBY
115
+ )"
116
+
117
+ echo "names=${workloads}" >> "$GITHUB_OUTPUT"
118
+
119
+ - name: Detect release phase support
120
+ id: release-phase
121
+ uses: ./.github/actions/cpflow-detect-release-phase
122
+ with:
123
+ app_name: ${{ vars.PRODUCTION_APP_NAME }}
124
+
125
+ - name: Verify production environment variables
126
+ env:
127
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
128
+ CPLN_TOKEN_PRODUCTION: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
129
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
130
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
131
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
132
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
133
+ shell: bash
134
+ run: |
135
+ set -euo pipefail
136
+
137
+ staging_vars="$(CPLN_TOKEN="${CPLN_TOKEN_STAGING}" cpln gvc get "${STAGING_APP_NAME}" --org "${CPLN_ORG_STAGING}" -o json | jq -r '.spec.env // [] | .[].name' | sort)"
138
+ production_vars="$(CPLN_TOKEN="${CPLN_TOKEN_PRODUCTION}" cpln gvc get "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -r '.spec.env // [] | .[].name' | sort)"
139
+
140
+ if [[ -z "${staging_vars}" ]]; then
141
+ echo "Staging GVC exposes no environment variables; skipping parity check."
142
+ exit 0
143
+ fi
144
+
145
+ # Treat staging as the promotion source of truth: fail when a variable
146
+ # present in staging is missing in production. Production-only variables
147
+ # are allowed, but surface them so teams can spot drift.
148
+ missing_vars="$(comm -23 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
149
+ production_only_vars="$(comm -13 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
150
+
151
+ if [[ -n "${production_only_vars}" ]]; then
152
+ echo "::warning::Production has environment variables that are not present in staging:"
153
+ echo "${production_only_vars}"
154
+ fi
155
+
156
+ if [[ -n "${missing_vars}" ]]; then
157
+ echo "::error::Production is missing environment variables that exist in staging"
158
+ echo "${missing_vars}"
159
+ exit 1
160
+ fi
161
+
162
+ - name: Capture current production image
163
+ id: capture-current
164
+ env:
165
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
166
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
167
+ WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
168
+ shell: bash
169
+ run: |
170
+ set -euo pipefail
171
+
172
+ selected_workload="${PRIMARY_WORKLOAD:-}"
173
+ selected_image=""
174
+ selected_version=""
175
+ first_image=""
176
+ first_version=""
177
+ rollback_state='{}'
178
+
179
+ while IFS= read -r workload_name; do
180
+ [[ -n "${workload_name}" ]] || continue
181
+
182
+ workload_json="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json)"
183
+ workload_image="$(echo "${workload_json}" | jq -r '.spec.containers[0].image')"
184
+ workload_containers="$(echo "${workload_json}" | jq -c '.spec.containers | map({name, image})')"
185
+ workload_version="$(echo "${workload_json}" | jq -r '.version')"
186
+
187
+ if [[ -z "${first_image}" ]]; then
188
+ first_image="${workload_image}"
189
+ first_version="${workload_version}"
190
+ fi
191
+
192
+ if [[ -n "${selected_workload}" && "${workload_name}" == "${selected_workload}" ]]; then
193
+ selected_image="${workload_image}"
194
+ selected_version="${workload_version}"
195
+ fi
196
+
197
+ rollback_state="$(
198
+ jq -c \
199
+ --arg workload "${workload_name}" \
200
+ --arg image "${workload_image}" \
201
+ --arg version "${workload_version}" \
202
+ --argjson containers "${workload_containers}" \
203
+ '. + {($workload): {image: $image, version: $version, containers: $containers}}' \
204
+ <<< "${rollback_state}"
205
+ )"
206
+ done < <(tr ',' '\n' <<< "${WORKLOAD_NAMES}")
207
+
208
+ current_image="${selected_image:-${first_image}}"
209
+ current_version="${selected_version:-${first_version}}"
210
+
211
+ echo "current_image=${current_image}" >> "$GITHUB_OUTPUT"
212
+ echo "current_version=${current_version}" >> "$GITHUB_OUTPUT"
213
+ # Randomize the heredoc delimiter so a stray "EOF" line inside rollback_state can't terminate it early.
214
+ delim="EOF_$(openssl rand -hex 8)"
215
+ {
216
+ echo "rollback_state<<${delim}"
217
+ echo "${rollback_state}"
218
+ echo "${delim}"
219
+ } >> "$GITHUB_OUTPUT"
220
+
221
+ - name: Capture deployed staging image
222
+ id: staging-image
223
+ env:
224
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
225
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
226
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
227
+ WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
228
+ shell: bash
229
+ run: |
230
+ set -euo pipefail
231
+
232
+ selected_workload="${PRIMARY_WORKLOAD:-}"
233
+ selected_image=""
234
+ first_image=""
235
+
236
+ while IFS= read -r workload_name; do
237
+ [[ -n "${workload_name}" ]] || continue
238
+
239
+ workload_json="$(CPLN_TOKEN="${CPLN_TOKEN_STAGING}" cpln workload get "${workload_name}" --gvc "${STAGING_APP_NAME}" --org "${CPLN_ORG_STAGING}" -o json)"
240
+ workload_image="$(echo "${workload_json}" | jq -r '.spec.containers[0].image // empty')"
241
+
242
+ if [[ -z "${workload_image}" ]]; then
243
+ echo "::error::Could not find an image on staging workload '${workload_name}'." >&2
244
+ exit 1
245
+ fi
246
+
247
+ if [[ -z "${first_image}" ]]; then
248
+ first_image="${workload_image}"
249
+ fi
250
+
251
+ if [[ -n "${selected_workload}" && "${workload_name}" == "${selected_workload}" ]]; then
252
+ selected_image="${workload_image}"
253
+ fi
254
+ done < <(tr ',' '\n' <<< "${WORKLOAD_NAMES}")
255
+
256
+ staging_image_ref="${selected_image:-${first_image}}"
257
+ if [[ -z "${staging_image_ref}" ]]; then
258
+ echo "::error::Could not determine the deployed staging image." >&2
259
+ exit 1
260
+ fi
261
+
262
+ if [[ "${staging_image_ref}" == /org/*/image/* ]]; then
263
+ staging_image="${staging_image_ref##*/image/}"
264
+ elif [[ "${staging_image_ref}" == *.registry.cpln.io/* ]]; then
265
+ staging_image="${staging_image_ref#*.registry.cpln.io/}"
266
+ else
267
+ staging_image="${staging_image_ref}"
268
+ fi
269
+
270
+ echo "image=${staging_image}" >> "$GITHUB_OUTPUT"
271
+
272
+ - name: Copy image from staging
273
+ env:
274
+ # Pass the upstream token via env rather than `-t` so it doesn't appear in /proc/<pid>/cmdline.
275
+ CPLN_UPSTREAM_TOKEN: ${{ secrets.CPLN_TOKEN_STAGING }}
276
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
277
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
278
+ STAGING_IMAGE: ${{ steps.staging-image.outputs.image }}
279
+ shell: bash
280
+ run: |
281
+ set -euo pipefail
282
+ cpflow copy-image-from-upstream -a "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" --image "${STAGING_IMAGE}"
283
+
284
+ - name: Deploy image to production
285
+ env:
286
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
287
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
288
+ RELEASE_PHASE_FLAG: ${{ steps.release-phase.outputs.flag }}
289
+ shell: bash
290
+ run: |
291
+ set -euo pipefail
292
+
293
+ deploy_args=(-a "${PRODUCTION_APP_NAME}")
294
+ if [[ -n "${RELEASE_PHASE_FLAG}" ]]; then
295
+ deploy_args+=("${RELEASE_PHASE_FLAG}")
296
+ fi
297
+ deploy_args+=(--org "${CPLN_ORG_PRODUCTION}" --verbose)
298
+
299
+ cpflow deploy-image "${deploy_args[@]}"
300
+
301
+ - name: Wait for deployment health
302
+ id: health-check
303
+ uses: ./.github/actions/cpflow-wait-for-health
304
+ with:
305
+ workload_name: ${{ env.PRIMARY_WORKLOAD || 'rails' }}
306
+ app_name: ${{ vars.PRODUCTION_APP_NAME }}
307
+ org: ${{ vars.CPLN_ORG_PRODUCTION }}
308
+ max_retries: ${{ env.HEALTH_CHECK_RETRIES }}
309
+ interval_seconds: ${{ env.HEALTH_CHECK_INTERVAL }}
310
+ accepted_statuses: ${{ env.HEALTH_CHECK_ACCEPTED_STATUSES }}
311
+
312
+ - name: Roll back on failure
313
+ if: failure() && steps.capture-current.outputs.rollback_state != '' && steps.capture-current.outputs.rollback_state != '{}'
314
+ env:
315
+ ROLLBACK_STATE: ${{ steps.capture-current.outputs.rollback_state }}
316
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
317
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
318
+ shell: bash
319
+ run: |
320
+ # Best-effort rollback: try every workload, aggregate failures, exit non-zero at the end
321
+ # if any failed. A single cpln hiccup shouldn't leave other workloads mid-promotion.
322
+ set -uo pipefail
323
+
324
+ rollback_failures=0
325
+ if ! rollback_entries="$(echo "${ROLLBACK_STATE}" | jq -r 'to_entries[] | "\(.key)\t\(.value.containers | @json)"')"; then
326
+ echo "::error::Could not parse rollback state; manual recovery may be required." >&2
327
+ exit 1
328
+ fi
329
+
330
+ while IFS=$'\t' read -r workload_name previous_containers; do
331
+ rollback_args=()
332
+ if ! current_names="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -c '.spec.containers | map(.name)')"; then
333
+ echo "::warning::Could not retrieve current containers for workload '${workload_name}'; skipping rollback for this workload." >&2
334
+ rollback_failures=$((rollback_failures + 1))
335
+ continue
336
+ fi
337
+ if ! previous_names="$(echo "${previous_containers}" | jq -c 'map(.name)')"; then
338
+ echo "::warning::Could not parse captured containers for workload '${workload_name}'; skipping rollback for this workload." >&2
339
+ rollback_failures=$((rollback_failures + 1))
340
+ continue
341
+ fi
342
+
343
+ if [[ "$(echo "${current_names}" | jq -c 'sort')" != "$(echo "${previous_names}" | jq -c 'sort')" ]]; then
344
+ echo "::error::Container set changed for workload '${workload_name}'; refusing rollback." >&2
345
+ rollback_failures=$((rollback_failures + 1))
346
+ continue
347
+ fi
348
+
349
+ if ! rollback_container_entries="$(
350
+ jq -r \
351
+ --argjson current_names "${current_names}" \
352
+ '.[] as $container | ($current_names | index($container.name)) as $index | "\($index)\t\($container.image)"' \
353
+ <<< "${previous_containers}"
354
+ )"; then
355
+ echo "::warning::Could not build rollback image list for workload '${workload_name}'; skipping rollback for this workload." >&2
356
+ rollback_failures=$((rollback_failures + 1))
357
+ continue
358
+ fi
359
+
360
+ while IFS=$'\t' read -r index image; do
361
+ rollback_args+=(--set "spec.containers[${index}].image=${image}")
362
+ done <<< "${rollback_container_entries}"
363
+
364
+ if ! cpln workload update "${workload_name}" \
365
+ --gvc "${PRODUCTION_APP_NAME}" \
366
+ --org "${CPLN_ORG_PRODUCTION}" \
367
+ "${rollback_args[@]}"; then
368
+ echo "::warning::Rollback failed for workload '${workload_name}'; continuing with remaining workloads." >&2
369
+ rollback_failures=$((rollback_failures + 1))
370
+ fi
371
+ done <<< "${rollback_entries}"
372
+
373
+ if [[ "${rollback_failures}" -gt 0 ]]; then
374
+ echo "::error::${rollback_failures} workload(s) failed to roll back; inspect the logs above." >&2
375
+ exit 1
376
+ fi
377
+
378
+ - name: Wait for rollback readiness
379
+ if: failure() && steps.capture-current.outputs.rollback_state != '' && steps.capture-current.outputs.rollback_state != '{}'
380
+ env:
381
+ ROLLBACK_STATE: ${{ steps.capture-current.outputs.rollback_state }}
382
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
383
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
384
+ shell: bash
385
+ run: |
386
+ set -euo pipefail
387
+
388
+ mapfile -t workloads < <(echo "${ROLLBACK_STATE}" | jq -r 'keys[]')
389
+
390
+ # Poll workloads in parallel so the worst-case wall time during a
391
+ # production incident is `retries × interval` rather than scaling
392
+ # linearly with the number of workloads. Each per-workload retry
393
+ # loop runs in a backgrounded subshell that writes its final state
394
+ # to a status file; the parent waits for all of them before
395
+ # aggregating warnings, keeping output ordered and deterministic.
396
+ status_dir="$(mktemp -d)"
397
+ trap 'rm -rf "${status_dir}"' EXIT
398
+
399
+ pids=()
400
+ for workload_name in "${workloads[@]}"; do
401
+ [[ -n "${workload_name}" ]] || continue
402
+
403
+ echo "Polling rollback readiness for workload '${workload_name}'..."
404
+ (
405
+ set -euo pipefail
406
+ ready=false
407
+ for attempt in $(seq 1 "${ROLLBACK_READINESS_RETRIES}"); do
408
+ deployment_ready="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -r '.status.ready // false')"
409
+ if [[ "${deployment_ready}" == "true" ]]; then
410
+ ready=true
411
+ break
412
+ fi
413
+
414
+ if [[ "${attempt}" -lt "${ROLLBACK_READINESS_RETRIES}" ]]; then
415
+ sleep "${ROLLBACK_READINESS_INTERVAL}"
416
+ fi
417
+ done
418
+
419
+ if [[ "${ready}" == "true" ]]; then
420
+ printf 'ready\n' > "${status_dir}/${workload_name}"
421
+ else
422
+ printf 'not_ready\n' > "${status_dir}/${workload_name}"
423
+ fi
424
+ ) &
425
+ pids+=("$!")
426
+ done
427
+
428
+ # `|| true` so a single workload that fails to poll (e.g. transient
429
+ # cpln API error) doesn't abort the parent before the others finish.
430
+ # Missing or non-`ready` status files are surfaced in the aggregation
431
+ # loop below, so the failure is still visible to operators.
432
+ for pid in "${pids[@]}"; do
433
+ wait "${pid}" || true
434
+ done
435
+
436
+ for workload_name in "${workloads[@]}"; do
437
+ [[ -n "${workload_name}" ]] || continue
438
+ status_file="${status_dir}/${workload_name}"
439
+ if [[ ! -f "${status_file}" ]] || [[ "$(<"${status_file}")" != "ready" ]]; then
440
+ echo "::warning::Workload '${workload_name}' did not report ready after rollback."
441
+ fi
442
+ done
443
+
444
+ - name: Promotion summary
445
+ if: always()
446
+ env:
447
+ HEALTHY: ${{ steps.health-check.outputs.healthy }}
448
+ PREVIOUS_IMAGE: ${{ steps.capture-current.outputs.current_image }}
449
+ PREVIOUS_VERSION: ${{ steps.capture-current.outputs.current_version }}
450
+ shell: bash
451
+ run: |
452
+ {
453
+ echo "## Promotion Summary"
454
+ echo
455
+ if [[ "${HEALTHY}" == "true" ]]; then
456
+ echo "✅ Status: deployment successful"
457
+ else
458
+ echo "❌ Status: deployment failed"
459
+ fi
460
+ echo
461
+ echo "Previous image: \`${PREVIOUS_IMAGE}\`"
462
+ echo "Previous version: ${PREVIOUS_VERSION}"
463
+ } >> "$GITHUB_STEP_SUMMARY"
464
+
465
+ create-github-release:
466
+ needs: promote-to-production
467
+ if: needs.promote-to-production.result == 'success'
468
+ runs-on: ubuntu-latest
469
+ permissions:
470
+ contents: write
471
+
472
+ steps:
473
+ - name: Create GitHub release
474
+ env:
475
+ GH_REPO: ${{ github.repository }}
476
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
477
+ GITHUB_RUN_ID: ${{ github.run_id }}
478
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
479
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
480
+ shell: bash
481
+ run: |
482
+ set -euo pipefail
483
+
484
+ release_date="$(date '+%Y-%m-%d')"
485
+ timestamp="$(date '+%H%M%S')"
486
+ release_tag="production-${release_date}-${timestamp}-${GITHUB_RUN_ID}"
487
+
488
+ gh release create "${release_tag}" \
489
+ --title "Production Release ${release_date} ${timestamp}" \
490
+ --notes "Promoted ${STAGING_APP_NAME} to ${PRODUCTION_APP_NAME} on ${release_date} at ${timestamp}."