cpflow 5.0.0.rc.1 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/{lib/github_flow_templates/.github → .github}/actions/cpflow-delete-control-plane-app/action.yml +5 -0
  3. data/{lib/github_flow_templates/.github → .github}/actions/cpflow-detect-release-phase/action.yml +7 -0
  4. data/.github/actions/cpflow-setup-environment/action.yml +161 -0
  5. data/.github/workflows/cpflow-cleanup-stale-review-apps.yml +69 -0
  6. data/.github/workflows/cpflow-delete-review-app.yml +182 -0
  7. data/.github/workflows/cpflow-deploy-review-app.yml +507 -0
  8. data/.github/workflows/cpflow-deploy-staging.yml +168 -0
  9. data/.github/workflows/cpflow-help-command.yml +78 -0
  10. data/.github/workflows/cpflow-promote-staging-to-production.yml +510 -0
  11. data/.github/workflows/cpflow-review-app-help.yml +51 -0
  12. data/.github/workflows/rspec-shared.yml +3 -0
  13. data/.github/workflows/trigger-docs-site.yml +90 -0
  14. data/.rubocop.yml +14 -1
  15. data/CHANGELOG.md +43 -1
  16. data/CONTRIBUTING.md +27 -0
  17. data/Gemfile.lock +2 -2
  18. data/README.md +7 -3
  19. data/cpflow.gemspec +1 -1
  20. data/docs/ai-github-flow-prompt.md +1 -1
  21. data/docs/assets/cpflow-deploying.svg +46 -0
  22. data/docs/ci-automation.md +111 -8
  23. data/docs/commands.md +11 -5
  24. data/docs/thruster.md +149 -0
  25. data/docs/troubleshooting.md +8 -0
  26. data/lib/command/apply_template.rb +6 -2
  27. data/lib/command/base.rb +1 -0
  28. data/lib/command/cleanup_stale_apps.rb +53 -14
  29. data/lib/command/delete.rb +3 -1
  30. data/lib/command/deploy_image.rb +5 -2
  31. data/lib/command/generate.rb +7 -3
  32. data/lib/command/generate_github_actions.rb +21 -9
  33. data/lib/command/generator_helpers.rb +5 -1
  34. data/lib/command/info.rb +3 -1
  35. data/lib/command/run.rb +16 -1
  36. data/lib/command/test.rb +1 -3
  37. data/lib/core/controlplane.rb +17 -6
  38. data/lib/core/controlplane_api.rb +3 -1
  39. data/lib/core/controlplane_api_direct.rb +50 -27
  40. data/lib/core/doctor_service.rb +2 -2
  41. data/lib/core/github_flow_readiness_service.rb +26 -2
  42. data/lib/core/repo_introspection.rb +41 -3
  43. data/lib/core/shell.rb +3 -1
  44. data/lib/core/terraform_config/policy.rb +1 -1
  45. data/lib/cpflow/version.rb +1 -1
  46. data/lib/cpflow.rb +27 -13
  47. data/lib/generator_templates/templates/rails.yml +4 -0
  48. data/lib/generator_templates_sqlite/templates/rails.yml +4 -0
  49. data/lib/github_flow_templates/.github/cpflow-help.md +30 -1
  50. data/lib/github_flow_templates/.github/workflows/cpflow-cleanup-stale-review-apps.yml +10 -44
  51. data/lib/github_flow_templates/.github/workflows/cpflow-delete-review-app.yml +15 -114
  52. data/lib/github_flow_templates/.github/workflows/cpflow-deploy-review-app.yml +10 -413
  53. data/lib/github_flow_templates/.github/workflows/cpflow-deploy-staging.yml +12 -123
  54. data/lib/github_flow_templates/.github/workflows/cpflow-help-command.yml +10 -33
  55. data/lib/github_flow_templates/.github/workflows/cpflow-promote-staging-to-production.yml +13 -475
  56. data/lib/github_flow_templates/.github/workflows/cpflow-review-app-help.yml +12 -30
  57. data/lib/github_flow_templates/bin/pin-cpflow-github-ref +72 -0
  58. data/lib/github_flow_templates/bin/test-cpflow-github-flow +89 -0
  59. data/rakelib/create_release.rake +4 -4
  60. metadata +26 -17
  61. data/lib/github_flow_templates/.github/actions/cpflow-setup-environment/action.yml +0 -98
  62. /data/{lib/github_flow_templates/.github → .github}/actions/cpflow-build-docker-image/action.yml +0 -0
  63. /data/{lib/github_flow_templates/.github → .github}/actions/cpflow-delete-control-plane-app/delete-app.sh +0 -0
  64. /data/{lib/github_flow_templates/.github → .github}/actions/cpflow-validate-config/action.yml +0 -0
  65. /data/{lib/github_flow_templates/.github → .github}/actions/cpflow-wait-for-health/action.yml +0 -0
@@ -0,0 +1,78 @@
1
+ name: Review App Help Command
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ control_plane_flow_ref:
7
+ description: Accepted for generated wrapper consistency; unused because this workflow checks out caller content only.
8
+ required: false
9
+ type: string
10
+ default: main
11
+
12
+ permissions:
13
+ contents: read
14
+ issues: write
15
+ pull-requests: write
16
+
17
+ jobs:
18
+ help:
19
+ # Comment-triggered runs are gated on author_association so only repo
20
+ # owners/members/collaborators can invoke them. workflow_dispatch is
21
+ # intentionally not gated here: GitHub already restricts manual dispatches
22
+ # to users with `actions: write` (write access to the repo), which is a
23
+ # stricter bar than COLLABORATOR.
24
+ if: |
25
+ (github.event_name == 'issue_comment' &&
26
+ github.event.issue.pull_request &&
27
+ contains(fromJson('["+review-app-help","+review-app-help\n","+review-app-help\r\n"]'), github.event.comment.body) &&
28
+ contains(fromJson('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) ||
29
+ github.event_name == 'workflow_dispatch'
30
+ runs-on: ubuntu-latest
31
+ timeout-minutes: 5
32
+
33
+ steps:
34
+ - name: React to help command
35
+ if: github.event_name == 'issue_comment'
36
+ continue-on-error: true
37
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd
38
+ with:
39
+ script: |
40
+ try {
41
+ await github.rest.reactions.createForIssueComment({
42
+ owner: context.repo.owner,
43
+ repo: context.repo.repo,
44
+ comment_id: context.payload.comment.id,
45
+ content: "eyes"
46
+ });
47
+ } catch (error) {
48
+ if (error.status === 422) {
49
+ core.info("Help command reaction already exists.");
50
+ } else {
51
+ throw error;
52
+ }
53
+ }
54
+
55
+ - name: Checkout repository
56
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
57
+ with:
58
+ # Help only reads `.github/cpflow-help.md`; no git push happens, so drop the
59
+ # GITHUB_TOKEN credential helper to keep the token out of .git/config.
60
+ persist-credentials: false
61
+
62
+ - name: Post help message
63
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd
64
+ with:
65
+ script: |
66
+ const fs = require("fs");
67
+ const helpText = fs.readFileSync(".github/cpflow-help.md", "utf8");
68
+
69
+ const prNumber = context.eventName === "workflow_dispatch"
70
+ ? Number(context.payload.inputs.pr_number)
71
+ : context.issue.number;
72
+
73
+ await github.rest.issues.createComment({
74
+ owner: context.repo.owner,
75
+ repo: context.repo.repo,
76
+ issue_number: prNumber,
77
+ body: helpText
78
+ });
@@ -0,0 +1,510 @@
1
+ name: Promote Staging to Production
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ control_plane_flow_ref:
7
+ description: Git ref used to load shared cpflow composite actions.
8
+ required: false
9
+ type: string
10
+ default: main
11
+
12
+ permissions:
13
+ contents: read
14
+
15
+ env:
16
+ # Override these by editing this file or by setting the matching repository variable.
17
+ # Worst-case wall time per attempt is HEALTH_CHECK_INTERVAL plus the curl --max-time below
18
+ # (10s), so the defaults give a ~10 minute window (24 × (15 + 10) = 600s) — enough for
19
+ # most Rails cold boots (asset precompile + db:migrate + workload readiness).
20
+ HEALTH_CHECK_RETRIES: 24
21
+ HEALTH_CHECK_INTERVAL: 15
22
+ # Space-separated list of HTTP statuses considered healthy. The default accepts 301/302
23
+ # because `curl` is invoked without `-L`, so a root `/` that redirects to a login page
24
+ # (common for Rails apps that auth-gate `/`) would otherwise be reported as unhealthy
25
+ # despite the workload itself being up.
26
+ #
27
+ # Strongly recommended: expose a dedicated `/health` endpoint that returns `200` and set
28
+ # HEALTH_CHECK_ACCEPTED_STATUSES to `"200"` in repository variables. The 301/302 default
29
+ # trades correctness for ergonomics — a maintenance-mode redirect or an auth-gate redirect
30
+ # to a login page can pass this check even when the underlying app is broken. Override
31
+ # via the HEALTH_CHECK_ACCEPTED_STATUSES repo variable to tighten this for apps that
32
+ # expose a dedicated health endpoint (e.g. "200" for a plain /health, or "200 401 403"
33
+ # for apps that auth-gate / without redirecting).
34
+ HEALTH_CHECK_ACCEPTED_STATUSES: ${{ vars.HEALTH_CHECK_ACCEPTED_STATUSES || '200 301 302' }}
35
+ ROLLBACK_READINESS_RETRIES: 24
36
+ ROLLBACK_READINESS_INTERVAL: 15
37
+
38
+ concurrency:
39
+ # Single global group: only one production promotion may run at a time across the
40
+ # whole repo. Independent of staging deploys and review-app workflows (different
41
+ # GVCs / different concurrency keys), so those can still run in parallel.
42
+ group: cpflow-promote-staging-to-production
43
+ # Don't cancel an in-flight promotion: a half-finished `cpflow deploy-image` plus a
44
+ # rollback can leave production in a worse state than letting the first run finish.
45
+ cancel-in-progress: false
46
+
47
+ jobs:
48
+ promote-to-production:
49
+ if: github.event.inputs.confirm_promotion == 'promote'
50
+ runs-on: ubuntu-latest
51
+ timeout-minutes: 45
52
+
53
+ steps:
54
+ - name: Checkout repository
55
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
56
+ with:
57
+ persist-credentials: false
58
+
59
+ - name: Checkout control-plane-flow actions
60
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
61
+ with:
62
+ repository: shakacode/control-plane-flow
63
+ ref: ${{ inputs.control_plane_flow_ref }}
64
+ path: .cpflow
65
+ persist-credentials: false
66
+
67
+ - name: Validate required secrets and variables
68
+ uses: ./.cpflow/.github/actions/cpflow-validate-config
69
+ # Pass secrets via env so the composite action checks indirect shell
70
+ # variables instead of interpolating secret values into a run script.
71
+ env:
72
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
73
+ CPLN_TOKEN_PRODUCTION: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
74
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
75
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
76
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
77
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
78
+ with:
79
+ required: |
80
+ secret:CPLN_TOKEN_STAGING
81
+ secret:CPLN_TOKEN_PRODUCTION
82
+ variable:CPLN_ORG_STAGING
83
+ variable:CPLN_ORG_PRODUCTION
84
+ variable:STAGING_APP_NAME
85
+ variable:PRODUCTION_APP_NAME
86
+
87
+ - name: Setup production environment
88
+ uses: ./.cpflow/.github/actions/cpflow-setup-environment
89
+ with:
90
+ token: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
91
+ org: ${{ vars.CPLN_ORG_PRODUCTION }}
92
+ working_directory: .cpflow
93
+ cpln_cli_version: ${{ vars.CPLN_CLI_VERSION }}
94
+ cpflow_version: ${{ vars.CPFLOW_VERSION }}
95
+
96
+ # Runs after Setup production environment so the pinned Ruby (>= 3.1) is on PATH.
97
+ # YAML.load_file(..., aliases: true) is not supported on Ruby 3.0 (system Ruby on ubuntu-22.04).
98
+ - name: Resolve production app workloads
99
+ id: workloads
100
+ env:
101
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
102
+ PRIMARY_WORKLOAD: ${{ vars.PRIMARY_WORKLOAD }}
103
+ shell: bash
104
+ run: |
105
+ set -euo pipefail
106
+
107
+ ruby - "${PRODUCTION_APP_NAME}" "${PRIMARY_WORKLOAD}" >> "$GITHUB_OUTPUT" <<'RUBY'
108
+ require "yaml"
109
+
110
+ app = ARGV.fetch(0)
111
+ requested_primary = ARGV.fetch(1, "").to_s.strip
112
+ data = YAML.safe_load(File.read(".controlplane/controlplane.yml"), aliases: true)
113
+ apps = data["apps"] || {}
114
+ app_config = apps[app]
115
+
116
+ unless app_config
117
+ warn "Error: app '#{app}' is not defined under `apps:` in `.controlplane/controlplane.yml`."
118
+ warn " Fix the PRODUCTION_APP_NAME repository variable or add the app to controlplane.yml."
119
+ exit 1
120
+ end
121
+
122
+ workloads = Array(app_config["app_workloads"]).map(&:to_s).reject(&:empty?)
123
+ workloads = ["rails"] if workloads.empty?
124
+
125
+ primary =
126
+ if requested_primary.empty?
127
+ if workloads.length == 1
128
+ workloads.first
129
+ elsif workloads.include?("rails")
130
+ "rails"
131
+ else
132
+ warn "::error::PRIMARY_WORKLOAD is not configured and app '#{app}' has multiple workloads: #{workloads.join(', ')}."
133
+ warn " Set the PRIMARY_WORKLOAD repository variable to one of these workloads."
134
+ exit 1
135
+ end
136
+ elsif workloads.include?(requested_primary)
137
+ requested_primary
138
+ else
139
+ warn "::error::PRIMARY_WORKLOAD '#{requested_primary}' is not one of: #{workloads.join(', ')}."
140
+ exit 1
141
+ end
142
+
143
+ puts "names=#{workloads.join(',')}"
144
+ puts "primary=#{primary}"
145
+ RUBY
146
+
147
+ - name: Detect release phase support
148
+ id: release-phase
149
+ uses: ./.cpflow/.github/actions/cpflow-detect-release-phase
150
+ with:
151
+ app_name: ${{ vars.PRODUCTION_APP_NAME }}
152
+
153
+ - name: Verify production environment variables
154
+ env:
155
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
156
+ CPLN_TOKEN_PRODUCTION: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
157
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
158
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
159
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
160
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
161
+ shell: bash
162
+ run: |
163
+ set -euo pipefail
164
+
165
+ staging_vars="$(CPLN_TOKEN="${CPLN_TOKEN_STAGING}" cpln gvc get "${STAGING_APP_NAME}" --org "${CPLN_ORG_STAGING}" -o json | jq -r '.spec.env // [] | .[].name' | sort)"
166
+ production_vars="$(CPLN_TOKEN="${CPLN_TOKEN_PRODUCTION}" cpln gvc get "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -r '.spec.env // [] | .[].name' | sort)"
167
+
168
+ if [[ -z "${staging_vars}" ]]; then
169
+ echo "Staging GVC exposes no environment variables; skipping parity check."
170
+ exit 0
171
+ fi
172
+
173
+ # Treat staging as the promotion source of truth: fail when a variable
174
+ # present in staging is missing in production. Production-only variables
175
+ # are allowed, but surface them so teams can spot drift.
176
+ missing_vars="$(comm -23 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
177
+ production_only_vars="$(comm -13 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
178
+
179
+ if [[ -n "${production_only_vars}" ]]; then
180
+ echo "::warning::Production has environment variables that are not present in staging:"
181
+ echo "${production_only_vars}"
182
+ fi
183
+
184
+ if [[ -n "${missing_vars}" ]]; then
185
+ echo "::error::Production is missing environment variables that exist in staging"
186
+ echo "${missing_vars}"
187
+ exit 1
188
+ fi
189
+
190
+ - name: Capture current production image
191
+ id: capture-current
192
+ env:
193
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
194
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
195
+ WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
196
+ PRIMARY_WORKLOAD: ${{ steps.workloads.outputs.primary }}
197
+ shell: bash
198
+ run: |
199
+ set -euo pipefail
200
+
201
+ selected_workload="${PRIMARY_WORKLOAD}"
202
+ selected_image=""
203
+ selected_version=""
204
+ rollback_state='{}'
205
+
206
+ while IFS= read -r workload_name; do
207
+ [[ -n "${workload_name}" ]] || continue
208
+
209
+ workload_json="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json)"
210
+ workload_image="$(echo "${workload_json}" | jq -r '.spec.containers[0].image')"
211
+ workload_containers="$(echo "${workload_json}" | jq -c '.spec.containers | map({name, image})')"
212
+ workload_version="$(echo "${workload_json}" | jq -r '.version')"
213
+
214
+ if [[ "${workload_name}" == "${selected_workload}" ]]; then
215
+ selected_image="${workload_image}"
216
+ selected_version="${workload_version}"
217
+ fi
218
+
219
+ rollback_state="$(
220
+ jq -c \
221
+ --arg workload "${workload_name}" \
222
+ --arg image "${workload_image}" \
223
+ --arg version "${workload_version}" \
224
+ --argjson containers "${workload_containers}" \
225
+ '. + {($workload): {image: $image, version: $version, containers: $containers}}' \
226
+ <<< "${rollback_state}"
227
+ )"
228
+ done < <(tr ',' '\n' <<< "${WORKLOAD_NAMES}")
229
+
230
+ if [[ -z "${selected_image}" || -z "${selected_version}" ]]; then
231
+ echo "::error::Could not capture current image/version for primary workload '${selected_workload}'." >&2
232
+ exit 1
233
+ fi
234
+
235
+ echo "current_image=${selected_image}" >> "$GITHUB_OUTPUT"
236
+ echo "current_version=${selected_version}" >> "$GITHUB_OUTPUT"
237
+ # Randomize the heredoc delimiter so a stray "EOF" line inside rollback_state can't terminate it early.
238
+ delim="EOF_$(openssl rand -hex 8)"
239
+ {
240
+ echo "rollback_state<<${delim}"
241
+ echo "${rollback_state}"
242
+ echo "${delim}"
243
+ } >> "$GITHUB_OUTPUT"
244
+
245
+ - name: Capture deployed staging image
246
+ id: staging-image
247
+ env:
248
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
249
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
250
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
251
+ WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
252
+ PRIMARY_WORKLOAD: ${{ steps.workloads.outputs.primary }}
253
+ shell: bash
254
+ run: |
255
+ set -euo pipefail
256
+
257
+ selected_workload="${PRIMARY_WORKLOAD}"
258
+ selected_image=""
259
+
260
+ while IFS= read -r workload_name; do
261
+ [[ -n "${workload_name}" ]] || continue
262
+
263
+ workload_json="$(CPLN_TOKEN="${CPLN_TOKEN_STAGING}" cpln workload get "${workload_name}" --gvc "${STAGING_APP_NAME}" --org "${CPLN_ORG_STAGING}" -o json)"
264
+ workload_image="$(echo "${workload_json}" | jq -r '.spec.containers[0].image // empty')"
265
+
266
+ if [[ -z "${workload_image}" ]]; then
267
+ echo "::error::Could not find an image on staging workload '${workload_name}'." >&2
268
+ exit 1
269
+ fi
270
+
271
+ if [[ "${workload_name}" == "${selected_workload}" ]]; then
272
+ selected_image="${workload_image}"
273
+ fi
274
+ done < <(tr ',' '\n' <<< "${WORKLOAD_NAMES}")
275
+
276
+ staging_image_ref="${selected_image}"
277
+ if [[ -z "${staging_image_ref}" ]]; then
278
+ echo "::error::Could not determine the deployed staging image for primary workload '${selected_workload}'." >&2
279
+ exit 1
280
+ fi
281
+
282
+ if [[ "${staging_image_ref}" == /org/*/image/* ]]; then
283
+ staging_image="${staging_image_ref##*/image/}"
284
+ elif [[ "${staging_image_ref}" == *.registry.cpln.io/* ]]; then
285
+ staging_image="${staging_image_ref#*.registry.cpln.io/}"
286
+ else
287
+ staging_image="${staging_image_ref}"
288
+ fi
289
+
290
+ echo "image=${staging_image}" >> "$GITHUB_OUTPUT"
291
+
292
+ - name: Copy image from staging
293
+ env:
294
+ # Pass the upstream token via env rather than `-t` so it doesn't appear in /proc/<pid>/cmdline.
295
+ CPLN_UPSTREAM_TOKEN: ${{ secrets.CPLN_TOKEN_STAGING }}
296
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
297
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
298
+ STAGING_IMAGE: ${{ steps.staging-image.outputs.image }}
299
+ shell: bash
300
+ run: |
301
+ set -euo pipefail
302
+ cpflow copy-image-from-upstream -a "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" --image "${STAGING_IMAGE}"
303
+
304
+ - name: Deploy image to production
305
+ env:
306
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
307
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
308
+ RELEASE_PHASE_FLAG: ${{ steps.release-phase.outputs.flag }}
309
+ shell: bash
310
+ run: |
311
+ set -euo pipefail
312
+
313
+ deploy_args=(-a "${PRODUCTION_APP_NAME}")
314
+ if [[ -n "${RELEASE_PHASE_FLAG}" ]]; then
315
+ deploy_args+=("${RELEASE_PHASE_FLAG}")
316
+ fi
317
+ deploy_args+=(--org "${CPLN_ORG_PRODUCTION}" --verbose)
318
+
319
+ cpflow deploy-image "${deploy_args[@]}"
320
+
321
+ - name: Wait for deployment health
322
+ id: health-check
323
+ uses: ./.cpflow/.github/actions/cpflow-wait-for-health
324
+ with:
325
+ workload_name: ${{ steps.workloads.outputs.primary }}
326
+ app_name: ${{ vars.PRODUCTION_APP_NAME }}
327
+ org: ${{ vars.CPLN_ORG_PRODUCTION }}
328
+ max_retries: ${{ env.HEALTH_CHECK_RETRIES }}
329
+ interval_seconds: ${{ env.HEALTH_CHECK_INTERVAL }}
330
+ accepted_statuses: ${{ env.HEALTH_CHECK_ACCEPTED_STATUSES }}
331
+
332
+ - name: Roll back on failure
333
+ if: failure() && steps.capture-current.outputs.rollback_state != '' && steps.capture-current.outputs.rollback_state != '{}'
334
+ env:
335
+ ROLLBACK_STATE: ${{ steps.capture-current.outputs.rollback_state }}
336
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
337
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
338
+ shell: bash
339
+ run: |
340
+ # Best-effort rollback: try every workload, aggregate failures, exit non-zero at the end
341
+ # if any failed. A single cpln hiccup shouldn't leave other workloads mid-promotion.
342
+ set -uo pipefail
343
+
344
+ rollback_failures=0
345
+ if ! rollback_entries="$(echo "${ROLLBACK_STATE}" | jq -r 'to_entries[] | "\(.key)\t\(.value.containers | @json)"')"; then
346
+ echo "::error::Could not parse rollback state; manual recovery may be required." >&2
347
+ exit 1
348
+ fi
349
+
350
+ while IFS=$'\t' read -r workload_name previous_containers; do
351
+ rollback_args=()
352
+ if ! current_names="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -c '.spec.containers | map(.name)')"; then
353
+ echo "::warning::Could not retrieve current containers for workload '${workload_name}'; skipping rollback for this workload." >&2
354
+ rollback_failures=$((rollback_failures + 1))
355
+ continue
356
+ fi
357
+ if ! previous_names="$(echo "${previous_containers}" | jq -c 'map(.name)')"; then
358
+ echo "::warning::Could not parse captured containers for workload '${workload_name}'; skipping rollback for this workload." >&2
359
+ rollback_failures=$((rollback_failures + 1))
360
+ continue
361
+ fi
362
+
363
+ if [[ "$(echo "${current_names}" | jq -c 'sort')" != "$(echo "${previous_names}" | jq -c 'sort')" ]]; then
364
+ echo "::error::Container set changed for workload '${workload_name}'; refusing rollback." >&2
365
+ rollback_failures=$((rollback_failures + 1))
366
+ continue
367
+ fi
368
+
369
+ if ! rollback_container_entries="$(
370
+ jq -r \
371
+ --argjson current_names "${current_names}" \
372
+ '.[] as $container | ($current_names | index($container.name)) as $index | "\($index)\t\($container.image)"' \
373
+ <<< "${previous_containers}"
374
+ )"; then
375
+ echo "::warning::Could not build rollback image list for workload '${workload_name}'; skipping rollback for this workload." >&2
376
+ rollback_failures=$((rollback_failures + 1))
377
+ continue
378
+ fi
379
+
380
+ while IFS=$'\t' read -r index image; do
381
+ rollback_args+=(--set "spec.containers[${index}].image=${image}")
382
+ done <<< "${rollback_container_entries}"
383
+
384
+ if ! cpln workload update "${workload_name}" \
385
+ --gvc "${PRODUCTION_APP_NAME}" \
386
+ --org "${CPLN_ORG_PRODUCTION}" \
387
+ "${rollback_args[@]}"; then
388
+ echo "::warning::Rollback failed for workload '${workload_name}'; continuing with remaining workloads." >&2
389
+ rollback_failures=$((rollback_failures + 1))
390
+ fi
391
+ done <<< "${rollback_entries}"
392
+
393
+ if [[ "${rollback_failures}" -gt 0 ]]; then
394
+ echo "::error::${rollback_failures} workload(s) failed to roll back; inspect the logs above." >&2
395
+ exit 1
396
+ fi
397
+
398
+ - name: Wait for rollback readiness
399
+ if: failure() && steps.capture-current.outputs.rollback_state != '' && steps.capture-current.outputs.rollback_state != '{}'
400
+ env:
401
+ ROLLBACK_STATE: ${{ steps.capture-current.outputs.rollback_state }}
402
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
403
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
404
+ shell: bash
405
+ run: |
406
+ set -euo pipefail
407
+
408
+ mapfile -t workloads < <(echo "${ROLLBACK_STATE}" | jq -r 'keys[]')
409
+
410
+ # Poll workloads in parallel so the worst-case wall time during a
411
+ # production incident is `retries × interval` rather than scaling
412
+ # linearly with the number of workloads. Each per-workload retry
413
+ # loop runs in a backgrounded subshell that writes its final state
414
+ # to a status file; the parent waits for all of them before
415
+ # aggregating warnings, keeping output ordered and deterministic.
416
+ status_dir="$(mktemp -d)"
417
+ trap 'rm -rf "${status_dir}"' EXIT
418
+
419
+ pids=()
420
+ for workload_name in "${workloads[@]}"; do
421
+ [[ -n "${workload_name}" ]] || continue
422
+
423
+ echo "Polling rollback readiness for workload '${workload_name}'..."
424
+ (
425
+ set -euo pipefail
426
+ ready=false
427
+ for attempt in $(seq 1 "${ROLLBACK_READINESS_RETRIES}"); do
428
+ deployment_ready="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -r '.status.ready // false')"
429
+ if [[ "${deployment_ready}" == "true" ]]; then
430
+ ready=true
431
+ break
432
+ fi
433
+
434
+ if [[ "${attempt}" -lt "${ROLLBACK_READINESS_RETRIES}" ]]; then
435
+ sleep "${ROLLBACK_READINESS_INTERVAL}"
436
+ fi
437
+ done
438
+
439
+ if [[ "${ready}" == "true" ]]; then
440
+ printf 'ready\n' > "${status_dir}/${workload_name}"
441
+ else
442
+ printf 'not_ready\n' > "${status_dir}/${workload_name}"
443
+ fi
444
+ ) &
445
+ pids+=("$!")
446
+ done
447
+
448
+ # `|| true` so a single workload that fails to poll (e.g. transient
449
+ # cpln API error) doesn't abort the parent before the others finish.
450
+ # Missing or non-`ready` status files are surfaced in the aggregation
451
+ # loop below, so the failure is still visible to operators.
452
+ for pid in "${pids[@]}"; do
453
+ wait "${pid}" || true
454
+ done
455
+
456
+ for workload_name in "${workloads[@]}"; do
457
+ [[ -n "${workload_name}" ]] || continue
458
+ status_file="${status_dir}/${workload_name}"
459
+ if [[ ! -f "${status_file}" ]] || [[ "$(<"${status_file}")" != "ready" ]]; then
460
+ echo "::warning::Workload '${workload_name}' did not report ready after rollback."
461
+ fi
462
+ done
463
+
464
+ - name: Promotion summary
465
+ if: always()
466
+ env:
467
+ HEALTHY: ${{ steps.health-check.outputs.healthy }}
468
+ PREVIOUS_IMAGE: ${{ steps.capture-current.outputs.current_image }}
469
+ PREVIOUS_VERSION: ${{ steps.capture-current.outputs.current_version }}
470
+ shell: bash
471
+ run: |
472
+ {
473
+ echo "## Promotion Summary"
474
+ echo
475
+ if [[ "${HEALTHY}" == "true" ]]; then
476
+ echo "✅ Status: deployment successful"
477
+ else
478
+ echo "❌ Status: deployment failed"
479
+ fi
480
+ echo
481
+ echo "Previous image: \`${PREVIOUS_IMAGE}\`"
482
+ echo "Previous version: ${PREVIOUS_VERSION}"
483
+ } >> "$GITHUB_STEP_SUMMARY"
484
+
485
+ create-github-release:
486
+ needs: promote-to-production
487
+ if: needs.promote-to-production.result == 'success'
488
+ runs-on: ubuntu-latest
489
+ permissions:
490
+ contents: write
491
+
492
+ steps:
493
+ - name: Create GitHub release
494
+ env:
495
+ GH_REPO: ${{ github.repository }}
496
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
497
+ GITHUB_RUN_ID: ${{ github.run_id }}
498
+ STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
499
+ PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
500
+ shell: bash
501
+ run: |
502
+ set -euo pipefail
503
+
504
+ release_date="$(date '+%Y-%m-%d')"
505
+ timestamp="$(date '+%H%M%S')"
506
+ release_tag="production-${release_date}-${timestamp}-${GITHUB_RUN_ID}"
507
+
508
+ gh release create "${release_tag}" \
509
+ --title "Production Release ${release_date} ${timestamp}" \
510
+ --notes "Promoted ${STAGING_APP_NAME} to ${PRODUCTION_APP_NAME} on ${release_date} at ${timestamp}."
@@ -0,0 +1,51 @@
1
+ name: Show Review App Commands on PR Open
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ control_plane_flow_ref:
7
+ description: Accepted for generated wrapper consistency; unused because this workflow does not check out shared actions.
8
+ required: false
9
+ type: string
10
+ default: main
11
+
12
+ permissions:
13
+ issues: write
14
+ pull-requests: write
15
+
16
+ jobs:
17
+ show-help:
18
+ # Skip on PRs in repos that have not configured the cpflow review app flow yet,
19
+ # so this workflow does not noisily comment on every contributor PR. Once the
20
+ # repository sets `vars.REVIEW_APP_PREFIX`, the help message starts appearing.
21
+ if: vars.REVIEW_APP_PREFIX != ''
22
+ runs-on: ubuntu-latest
23
+ timeout-minutes: 5
24
+ steps:
25
+ - name: Post quick reference
26
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd
27
+ with:
28
+ script: |
29
+ const body = [
30
+ "# 🚀 Quick Review App Commands",
31
+ "",
32
+ "Welcome! Here are the commands you can use in this PR:",
33
+ "",
34
+ "### `+review-app-deploy`",
35
+ "Deploy your PR branch for testing.",
36
+ "",
37
+ "### `+review-app-delete`",
38
+ "Remove the review app when done.",
39
+ "",
40
+ "### `+review-app-help`",
41
+ "Show detailed instructions, environment setup, and configuration options.",
42
+ "",
43
+ "_Comment `+review-app-help` for full setup details._"
44
+ ].join("\n");
45
+
46
+ await github.rest.issues.createComment({
47
+ owner: context.repo.owner,
48
+ repo: context.repo.repo,
49
+ issue_number: context.issue.number,
50
+ body
51
+ });
@@ -19,6 +19,9 @@ on:
19
19
  jobs:
20
20
  rspec:
21
21
  runs-on: ${{ inputs.os_version }}
22
+ concurrency:
23
+ group: cpln-shared-org-${{ vars.CPLN_ORG || github.run_id }}
24
+ cancel-in-progress: false
22
25
  env:
23
26
  RAILS_ENV: test
24
27
  # We have to add "_CI" to the end, otherwise it messes with tests where we switch profiles,