cpflow 5.0.4 → 5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.github/actions/cpflow-wait-for-health/action.yml +11 -4
  3. data/.github/workflows/cpflow-promote-staging-to-production.yml +269 -43
  4. data/.github/workflows/rspec-shared.yml +8 -1
  5. data/CHANGELOG.md +28 -1
  6. data/Gemfile.lock +1 -1
  7. data/README.md +36 -11
  8. data/docs/ai-github-flow-prompt.md +1 -1
  9. data/docs/assets/logo/favicon.ico +0 -0
  10. data/docs/assets/logo/icon-1024.png +0 -0
  11. data/docs/assets/logo/icon-128.png +0 -0
  12. data/docs/assets/logo/icon-16.png +0 -0
  13. data/docs/assets/logo/icon-192.png +0 -0
  14. data/docs/assets/logo/icon-24.png +0 -0
  15. data/docs/assets/logo/icon-32.png +0 -0
  16. data/docs/assets/logo/icon-48.png +0 -0
  17. data/docs/assets/logo/icon-512.png +0 -0
  18. data/docs/assets/logo/icon-64.png +0 -0
  19. data/docs/assets/logo/icon-tile.svg +17 -0
  20. data/docs/assets/logo/mark-transparent.svg +16 -0
  21. data/docs/ci-automation.md +137 -47
  22. data/docs/commands.md +13 -3
  23. data/docs/postgres.md +6 -0
  24. data/docs/rds-private-networking.md +649 -0
  25. data/docs/secrets-and-env-values.md +49 -0
  26. data/docs/tips.md +256 -10
  27. data/examples/controlplane.yml +8 -0
  28. data/lib/command/ai_github_flow_prompt.rb +1 -1
  29. data/lib/command/apply_template.rb +3 -0
  30. data/lib/command/base.rb +69 -0
  31. data/lib/command/cleanup_stale_apps.rb +1 -1
  32. data/lib/command/delete.rb +85 -10
  33. data/lib/command/deploy_image.rb +30 -8
  34. data/lib/command/generate_github_actions.rb +6 -0
  35. data/lib/command/maintenance_off.rb +1 -0
  36. data/lib/command/maintenance_on.rb +1 -0
  37. data/lib/command/run.rb +25 -5
  38. data/lib/command/setup_app.rb +11 -2
  39. data/lib/core/config.rb +81 -0
  40. data/lib/core/controlplane.rb +15 -5
  41. data/lib/core/maintenance_mode.rb +93 -6
  42. data/lib/core/template_parser.rb +4 -0
  43. data/lib/cpflow/version.rb +1 -1
  44. data/lib/generator_templates/controlplane.yml +7 -0
  45. data/lib/generator_templates_sqlite/controlplane.yml +7 -0
  46. data/lib/github_flow_templates/.github/cpflow-help.md +48 -13
  47. data/lib/github_flow_templates/.github/workflows/cpflow-promote-staging-to-production.yml +768 -15
  48. data/lib/github_flow_templates/bin/pin-cpflow-github-ref +17 -3
  49. data/lib/github_flow_templates/bin/test-cpflow-github-flow +61 -9
  50. metadata +15 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51e0566b72525c5e975b384c12930f95ad5f13faaf1691ccbcca27b0c50a13b4
4
- data.tar.gz: 91560ebafb43692488b8996ef5e05799a5621bc6066329a71636b3677d1c63e5
3
+ metadata.gz: 183da85ac156c39e59af60c42727a8144e9b23bcc44fcacb3eb6a0498a3ab831
4
+ data.tar.gz: c801e2e1c97114fbd405494600ad9c637b757256331dd59a4069cc56e59b3934
5
5
  SHA512:
6
- metadata.gz: 89ec31dcc8d5b6b53ee62246ec8b514513101466c2f3751297583a540fad6ac99b4579ff6d13dbd332080313e3c8fb25df527b55d69a3e1037ce35a0625f98a4
7
- data.tar.gz: 2f3c85b15e050142703328aa6ef5b24bfd5c6ca32d386c6c2918dee0d1ee4b1a34092980781195f65ca59bacc095d57df24a85a26a5be5e719fd79d6cb205748
6
+ metadata.gz: d9a96ff2bafc56fa5d735780295c2b100f43bacc39c5726b9171e2f4390799168ed6d08286147d3fda284484488687dd98869d69b3eb1de41b87ca8e211c48ec
7
+ data.tar.gz: affc08be1954d87d78a3284ba44c44a66617bdd8ddf3d1987f661fbb2751d7897b53feb82c8a1bfa4085e649f6f8dd5222d04f9142ed73b65f1c4b8af3b1eb2d
@@ -1,8 +1,9 @@
1
1
  name: Wait for Control Plane workload health
2
2
  description: >-
3
- Polls the workload's status endpoint with curl and exits success when the
4
- HTTP response status is in the accepted list. Fails non-zero (and reports
5
- `healthy=false`) once retries are exhausted.
3
+ Polls Control Plane until the latest workload version is ready, then checks
4
+ the workload endpoint with curl. Exits success when the HTTP response status
5
+ is in the accepted list. Fails non-zero (and reports `healthy=false`) once
6
+ retries are exhausted.
6
7
 
7
8
  inputs:
8
9
  workload_name:
@@ -68,8 +69,14 @@ runs:
68
69
  exit 1
69
70
  fi
70
71
 
72
+ workload_ready="$(echo "${workload_json}" | jq -r '.status.ready // false')"
73
+ latest_ready="$(echo "${workload_json}" | jq -r '.status.readyLatest // false')"
74
+ readiness_status="$(echo "${workload_json}" | jq -r '.health.readiness // "unknown"')"
71
75
  endpoint="$(echo "${workload_json}" | jq -r '.status.endpoint // empty')"
72
- if [[ -n "${endpoint}" ]]; then
76
+
77
+ if [[ "${workload_ready}" != "true" || "${latest_ready}" != "true" ]]; then
78
+ echo "Workload status: ready=${workload_ready}, readyLatest=${latest_ready}, readiness=${readiness_status}; waiting for latest deployment."
79
+ elif [[ -n "${endpoint}" ]]; then
73
80
  http_status="$(curl -s -o /dev/null -w '%{http_code}' --max-time "${CPFLOW_CURL_MAX_TIME}" "${endpoint}" 2>/dev/null || echo 000)"
74
81
  echo "Endpoint: ${endpoint}, HTTP status: ${http_status}"
75
82
 
@@ -39,6 +39,8 @@ env:
39
39
  # expose a dedicated health endpoint (e.g. "200" for a plain /health, or "200 401 403"
40
40
  # for apps that auth-gate / without redirecting).
41
41
  HEALTH_CHECK_ACCEPTED_STATUSES: ${{ vars.HEALTH_CHECK_ACCEPTED_STATUSES || '200 301 302' }}
42
+ COPY_IMAGE_RETRIES: ${{ vars.COPY_IMAGE_RETRIES || '3' }}
43
+ COPY_IMAGE_RETRY_INTERVAL: ${{ vars.COPY_IMAGE_RETRY_INTERVAL || '20' }}
42
44
  ROLLBACK_READINESS_RETRIES: 24
43
45
  ROLLBACK_READINESS_INTERVAL: 15
44
46
 
@@ -108,11 +110,58 @@ jobs:
108
110
  variable:STAGING_APP_NAME
109
111
  variable:PRODUCTION_APP_NAME
110
112
 
113
+ - name: Normalize Control Plane org names
114
+ id: cpln-orgs
115
+ env:
116
+ CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
117
+ CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
118
+ shell: bash
119
+ run: |
120
+ set -euo pipefail
121
+
122
+ sanitize_control_plane_name() {
123
+ local label="$1"
124
+ local value="$2"
125
+
126
+ value="${value#"${value%%[![:space:]]*}"}"
127
+ value="${value%"${value##*[![:space:]]}"}"
128
+
129
+ if [[ "${value}" == *$'\r'* || "${value}" == *$'\n'* ]]; then
130
+ echo "::error::${label} contains embedded line endings; remove them from the repository variable instead of relying on normalization." >&2
131
+ exit 1
132
+ fi
133
+
134
+ printf '%s' "${value}"
135
+ }
136
+
137
+ validate_control_plane_org() {
138
+ local label="$1"
139
+ local value="$2"
140
+
141
+ if ! [[ "${value}" =~ ^[a-z0-9]([a-z0-9-]*[a-z0-9])?$ ]]; then
142
+ local display_value
143
+ display_value="$(printf '%q' "${value}")"
144
+ echo "::error::${label} (${display_value}) must be a valid Control Plane org name; use lowercase alphanumeric characters and hyphens only, with no leading or trailing hyphen." >&2
145
+ exit 1
146
+ fi
147
+ }
148
+
149
+ staging_org="$(sanitize_control_plane_name "CPLN_ORG_STAGING" "${CPLN_ORG_STAGING}")"
150
+ production_org="$(sanitize_control_plane_name "CPLN_ORG_PRODUCTION" "${CPLN_ORG_PRODUCTION}")"
151
+
152
+ validate_control_plane_org "CPLN_ORG_STAGING" "${staging_org}"
153
+ validate_control_plane_org "CPLN_ORG_PRODUCTION" "${production_org}"
154
+
155
+ {
156
+ echo "staging=${staging_org}"
157
+ echo "production=${production_org}"
158
+ } >> "$GITHUB_OUTPUT"
159
+
111
160
  - name: Setup production environment
112
161
  uses: ./.cpflow/.github/actions/cpflow-setup-environment
113
162
  with:
114
163
  token: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
115
- org: ${{ vars.CPLN_ORG_PRODUCTION }}
164
+ org: ${{ steps.cpln-orgs.outputs.production }}
116
165
  working_directory: .cpflow
117
166
  cpln_cli_version: ${{ vars.CPLN_CLI_VERSION }}
118
167
  cpflow_version: ${{ vars.CPFLOW_VERSION }}
@@ -181,42 +230,100 @@ jobs:
181
230
  CPLN_TOKEN_PRODUCTION: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
182
231
  STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
183
232
  PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
184
- CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
185
- CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
233
+ CPLN_ORG_STAGING: ${{ steps.cpln-orgs.outputs.staging }}
234
+ CPLN_ORG_PRODUCTION: ${{ steps.cpln-orgs.outputs.production }}
235
+ WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
186
236
  shell: bash
187
237
  run: |
188
238
  set -euo pipefail
189
239
 
190
- staging_vars="$(CPLN_TOKEN="${CPLN_TOKEN_STAGING}" cpln gvc get "${STAGING_APP_NAME}" --org "${CPLN_ORG_STAGING}" -o json | jq -r '.spec.env // [] | .[].name' | sort)"
191
- production_vars="$(CPLN_TOKEN="${CPLN_TOKEN_PRODUCTION}" cpln gvc get "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -r '.spec.env // [] | .[].name' | sort)"
240
+ list_gvc_env_names() {
241
+ local token="$1"
242
+ local org="$2"
243
+ local app="$3"
244
+
245
+ CPLN_TOKEN="${token}" cpln gvc get "${app}" --org "${org}" -o json |
246
+ jq -r '.spec.env // [] | .[] | .name // empty' |
247
+ sort -u
248
+ }
249
+
250
+ list_workload_env_names() {
251
+ local token="$1"
252
+ local org="$2"
253
+ local app="$3"
254
+ local workload="$4"
255
+
256
+ CPLN_TOKEN="${token}" cpln workload get "${workload}" --gvc "${app}" --org "${org}" -o json |
257
+ jq -r '.spec.containers // [] | .[] | (.env // [])[]? | .name // empty' |
258
+ sort -u
259
+ }
260
+
261
+ check_required_vars() {
262
+ local staging_scope="$1"
263
+ local production_scope="$2"
264
+ local missing_message="$3"
265
+ local staging_vars="$4"
266
+ local production_vars="$5"
267
+ local missing_vars
268
+ local production_only_vars
269
+
270
+ if [[ -z "${staging_vars}" ]]; then
271
+ echo "Staging ${staging_scope} exposes no environment variables; skipping parity check."
272
+ return
273
+ fi
192
274
 
193
- if [[ -z "${staging_vars}" ]]; then
194
- echo "Staging GVC exposes no environment variables; skipping parity check."
195
- exit 0
196
- fi
275
+ # Treat staging as the promotion source of truth: fail when a variable
276
+ # present in staging is missing in production. Production-only variables
277
+ # are allowed, but surface them so teams can spot drift.
278
+ missing_vars="$(comm -23 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
279
+ production_only_vars="$(comm -13 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
197
280
 
198
- # Treat staging as the promotion source of truth: fail when a variable
199
- # present in staging is missing in production. Production-only variables
200
- # are allowed, but surface them so teams can spot drift.
201
- missing_vars="$(comm -23 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
202
- production_only_vars="$(comm -13 <(printf '%s\n' "${staging_vars}") <(printf '%s\n' "${production_vars}"))"
281
+ if [[ -n "${production_only_vars}" ]]; then
282
+ echo "::warning::Production ${production_scope} has environment variables that are not present in staging:"
283
+ echo "${production_only_vars}"
284
+ fi
203
285
 
204
- if [[ -n "${production_only_vars}" ]]; then
205
- echo "::warning::Production has environment variables that are not present in staging:"
206
- echo "${production_only_vars}"
207
- fi
286
+ if [[ -n "${missing_vars}" ]]; then
287
+ echo "::error::${missing_message}"
288
+ echo "${missing_vars}"
289
+ env_check_failed=1
290
+ fi
291
+ }
292
+
293
+ # check_required_vars intentionally mutates env_check_failed in this
294
+ # shell; keep calls outside subshells so failures aggregate before the
295
+ # final exit.
296
+ env_check_failed=0
297
+
298
+ staging_vars="$(list_gvc_env_names "${CPLN_TOKEN_STAGING}" "${CPLN_ORG_STAGING}" "${STAGING_APP_NAME}")"
299
+ production_vars="$(list_gvc_env_names "${CPLN_TOKEN_PRODUCTION}" "${CPLN_ORG_PRODUCTION}" "${PRODUCTION_APP_NAME}")"
300
+ check_required_vars \
301
+ "GVC '${STAGING_APP_NAME}'" \
302
+ "GVC '${PRODUCTION_APP_NAME}'" \
303
+ "Production GVC '${PRODUCTION_APP_NAME}' is missing environment variables that exist in staging" \
304
+ "${staging_vars}" \
305
+ "${production_vars}"
208
306
 
209
- if [[ -n "${missing_vars}" ]]; then
210
- echo "::error::Production is missing environment variables that exist in staging"
211
- echo "${missing_vars}"
212
- exit 1
213
- fi
307
+ while IFS= read -r workload_name; do
308
+ [[ -n "${workload_name}" ]] || continue
309
+
310
+ staging_workload_vars="$(list_workload_env_names "${CPLN_TOKEN_STAGING}" "${CPLN_ORG_STAGING}" "${STAGING_APP_NAME}" "${workload_name}")"
311
+ production_workload_vars="$(list_workload_env_names "${CPLN_TOKEN_PRODUCTION}" "${CPLN_ORG_PRODUCTION}" "${PRODUCTION_APP_NAME}" "${workload_name}")"
312
+ check_required_vars \
313
+ "workload '${workload_name}'" \
314
+ "workload '${workload_name}'" \
315
+ "Production workload '${workload_name}' is missing environment variables that exist in staging" \
316
+ "${staging_workload_vars}" \
317
+ "${production_workload_vars}"
318
+ done < <(tr ',' '\n' <<< "${WORKLOAD_NAMES}")
319
+
320
+ exit "${env_check_failed}"
214
321
 
215
322
  - name: Capture current production image
216
323
  id: capture-current
217
324
  env:
218
325
  PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
219
- CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
326
+ CPLN_ORG_PRODUCTION: ${{ steps.cpln-orgs.outputs.production }}
220
327
  WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
221
328
  PRIMARY_WORKLOAD: ${{ steps.workloads.outputs.primary }}
222
329
  shell: bash
@@ -272,7 +379,7 @@ jobs:
272
379
  env:
273
380
  CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
274
381
  STAGING_APP_NAME: ${{ vars.STAGING_APP_NAME }}
275
- CPLN_ORG_STAGING: ${{ vars.CPLN_ORG_STAGING }}
382
+ CPLN_ORG_STAGING: ${{ steps.cpln-orgs.outputs.staging }}
276
383
  WORKLOAD_NAMES: ${{ steps.workloads.outputs.names }}
277
384
  PRIMARY_WORKLOAD: ${{ steps.workloads.outputs.primary }}
278
385
  shell: bash
@@ -314,22 +421,137 @@ jobs:
314
421
 
315
422
  echo "image=${staging_image}" >> "$GITHUB_OUTPUT"
316
423
 
424
+ - name: Set up Docker Buildx
425
+ uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5
426
+
317
427
  - name: Copy image from staging
428
+ id: copy-image
318
429
  env:
319
- # Pass the upstream token via env rather than `-t` so it doesn't appear in /proc/<pid>/cmdline.
320
- CPLN_UPSTREAM_TOKEN: ${{ secrets.CPLN_TOKEN_STAGING }}
430
+ CPLN_TOKEN_STAGING: ${{ secrets.CPLN_TOKEN_STAGING }}
431
+ CPLN_TOKEN_PRODUCTION: ${{ secrets.CPLN_TOKEN_PRODUCTION }}
321
432
  PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
322
- CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
433
+ CPLN_ORG_STAGING: ${{ steps.cpln-orgs.outputs.staging }}
434
+ CPLN_ORG_PRODUCTION: ${{ steps.cpln-orgs.outputs.production }}
323
435
  STAGING_IMAGE: ${{ steps.staging-image.outputs.image }}
324
436
  shell: bash
325
437
  run: |
326
438
  set -euo pipefail
327
- cpflow copy-image-from-upstream -a "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" --image "${STAGING_IMAGE}"
439
+
440
+ if ! [[ "${COPY_IMAGE_RETRIES}" =~ ^[0-9]+$ ]]; then
441
+ echo "::error::COPY_IMAGE_RETRIES must be a non-negative integer."
442
+ exit 1
443
+ fi
444
+
445
+ if ! [[ "${COPY_IMAGE_RETRY_INTERVAL}" =~ ^[0-9]+$ ]]; then
446
+ echo "::error::COPY_IMAGE_RETRY_INTERVAL must be a non-negative integer."
447
+ exit 1
448
+ fi
449
+
450
+ copy_image_retries=$((10#${COPY_IMAGE_RETRIES}))
451
+ copy_image_attempts=$((copy_image_retries + 1))
452
+ copy_image_retry_interval=$((10#${COPY_IMAGE_RETRY_INTERVAL}))
453
+
454
+ staging_image="${STAGING_IMAGE}"
455
+ if [[ -z "${staging_image}" ]]; then
456
+ echo "::error::STAGING_IMAGE is not set or is empty."
457
+ exit 1
458
+ fi
459
+
460
+ if ! CPLN_TOKEN="${CPLN_TOKEN_STAGING}" cpln image get "${staging_image}" --org "${CPLN_ORG_STAGING}" -o json >/dev/null; then
461
+ echo "::error::Staging image '${STAGING_IMAGE}' was not found in org '${CPLN_ORG_STAGING}'; aborting promotion."
462
+ exit 1
463
+ fi
464
+
465
+ staging_tag=""
466
+ if [[ "${staging_image}" == *@* ]]; then
467
+ staging_tag="${staging_image##*@}"
468
+ elif [[ "${staging_image}" == *:* ]]; then
469
+ staging_tag="${staging_image##*:}"
470
+ fi
471
+ staging_commit=""
472
+ if [[ "${staging_tag}" == *_* ]]; then
473
+ staging_commit="${staging_tag##*_}"
474
+ else
475
+ echo "::warning::Staging image '${staging_image}' did not include a '_<commit>' suffix; production image tag will omit the commit suffix."
476
+ fi
477
+
478
+ # The workflow-level concurrency group serializes this sequence so two
479
+ # production promotions cannot derive and publish the same next tag.
480
+ # See the top-level concurrency group: cpflow-promote-staging-to-production.
481
+ latest_number="$(
482
+ cpln image query --org "${CPLN_ORG_PRODUCTION}" --prop "name~${PRODUCTION_APP_NAME}:" --max 0 -o json |
483
+ jq -r --arg prefix "${PRODUCTION_APP_NAME}:" \
484
+ '[.items[].name | select(startswith($prefix)) | (try capture("^[^:]+:(?<number>[0-9]+)") catch empty) | .number | tonumber] | max // 0'
485
+ )"
486
+ if ! [[ "${latest_number}" =~ ^[0-9]+$ ]]; then
487
+ echo "::error::Could not determine the next production image number for app '${PRODUCTION_APP_NAME}' in org '${CPLN_ORG_PRODUCTION}'."
488
+ exit 1
489
+ fi
490
+
491
+ production_image="${PRODUCTION_APP_NAME}:$((latest_number + 1))"
492
+ if [[ -n "${staging_commit}" ]]; then
493
+ production_image="${production_image}_${staging_commit}"
494
+ fi
495
+
496
+ staging_registry="${CPLN_ORG_STAGING}.registry.cpln.io"
497
+ production_registry="${CPLN_ORG_PRODUCTION}.registry.cpln.io"
498
+ source_image_ref="${staging_registry}/${STAGING_IMAGE}"
499
+ production_image_ref="${production_registry}/${production_image}"
500
+
501
+ docker_config_dir="$(mktemp -d)"
502
+ cleanup_copy_credentials() {
503
+ rm -rf "${docker_config_dir}"
504
+ }
505
+ trap cleanup_copy_credentials EXIT
506
+
507
+ export DOCKER_CONFIG="${docker_config_dir}"
508
+
509
+ if ! printf '%s' "${CPLN_TOKEN_STAGING}" |
510
+ docker login "${staging_registry}" -u '<token>' --password-stdin >/dev/null; then
511
+ echo "::error::Failed to authenticate to staging registry '${staging_registry}'."
512
+ exit 1
513
+ fi
514
+
515
+ if ! printf '%s' "${CPLN_TOKEN_PRODUCTION}" |
516
+ docker login "${production_registry}" -u '<token>' --password-stdin >/dev/null; then
517
+ echo "::error::Failed to authenticate to production registry '${production_registry}'."
518
+ exit 1
519
+ fi
520
+
521
+ if docker buildx imagetools inspect "${production_image_ref}" >/dev/null 2>&1; then
522
+ echo "::error::Production image '${production_image}' already exists in org '${CPLN_ORG_PRODUCTION}'; aborting to avoid overwriting it."
523
+ exit 1
524
+ fi
525
+
526
+ copy_status=1
527
+ for attempt in $(seq 1 "${copy_image_attempts}"); do
528
+ if docker buildx imagetools inspect "${source_image_ref}" >/dev/null &&
529
+ docker buildx imagetools create --prefer-index=false --tag "${production_image_ref}" "${source_image_ref}"; then
530
+ copy_status=0
531
+ break
532
+ else
533
+ copy_status=$?
534
+ fi
535
+
536
+ if [[ "${attempt}" -lt "${copy_image_attempts}" ]]; then
537
+ echo "::warning::Image copy attempt ${attempt}/${copy_image_attempts} failed with exit ${copy_status}; retrying in ${copy_image_retry_interval}s."
538
+ sleep "${copy_image_retry_interval}"
539
+ else
540
+ echo "::warning::Image copy attempt ${attempt}/${copy_image_attempts} failed with exit ${copy_status}; no attempts remain."
541
+ fi
542
+ done
543
+
544
+ if [[ "${copy_status}" -ne 0 ]]; then
545
+ echo "::error::Could not copy staging image '${STAGING_IMAGE}' from '${CPLN_ORG_STAGING}' to '${CPLN_ORG_PRODUCTION}' after ${copy_image_attempts} attempt(s)."
546
+ exit "${copy_status}"
547
+ fi
548
+
549
+ echo "image=${production_image}" >> "$GITHUB_OUTPUT"
328
550
 
329
551
  - name: Deploy image to production
330
552
  env:
331
553
  PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
332
- CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
554
+ CPLN_ORG_PRODUCTION: ${{ steps.cpln-orgs.outputs.production }}
333
555
  RELEASE_PHASE_FLAG: ${{ steps.release-phase.outputs.flag }}
334
556
  shell: bash
335
557
  run: |
@@ -339,6 +561,9 @@ jobs:
339
561
  if [[ -n "${RELEASE_PHASE_FLAG}" ]]; then
340
562
  deploy_args+=("${RELEASE_PHASE_FLAG}")
341
563
  fi
564
+ # `cpflow deploy-image` deploys the latest image for the app. The
565
+ # workflow-level concurrency group keeps production promotion copy and
566
+ # deploy steps coupled across workflow runs.
342
567
  deploy_args+=(--org "${CPLN_ORG_PRODUCTION}" --verbose)
343
568
 
344
569
  cpflow deploy-image "${deploy_args[@]}"
@@ -349,7 +574,7 @@ jobs:
349
574
  with:
350
575
  workload_name: ${{ steps.workloads.outputs.primary }}
351
576
  app_name: ${{ vars.PRODUCTION_APP_NAME }}
352
- org: ${{ vars.CPLN_ORG_PRODUCTION }}
577
+ org: ${{ steps.cpln-orgs.outputs.production }}
353
578
  max_retries: ${{ env.HEALTH_CHECK_RETRIES }}
354
579
  interval_seconds: ${{ env.HEALTH_CHECK_INTERVAL }}
355
580
  accepted_statuses: ${{ env.HEALTH_CHECK_ACCEPTED_STATUSES }}
@@ -359,7 +584,7 @@ jobs:
359
584
  env:
360
585
  ROLLBACK_STATE: ${{ steps.capture-current.outputs.rollback_state }}
361
586
  PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
362
- CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
587
+ CPLN_ORG_PRODUCTION: ${{ steps.cpln-orgs.outputs.production }}
363
588
  shell: bash
364
589
  run: |
365
590
  # Best-effort rollback: try every workload, aggregate failures, exit non-zero at the end
@@ -391,19 +616,14 @@ jobs:
391
616
  continue
392
617
  fi
393
618
 
394
- if ! rollback_container_entries="$(
395
- jq -r \
396
- --argjson current_names "${current_names}" \
397
- '.[] as $container | ($current_names | index($container.name)) as $index | "\($index)\t\($container.image)"' \
398
- <<< "${previous_containers}"
399
- )"; then
619
+ if ! rollback_container_entries="$(jq -r '.[] | "\(.name)\t\(.image)"' <<< "${previous_containers}")"; then
400
620
  echo "::warning::Could not build rollback image list for workload '${workload_name}'; skipping rollback for this workload." >&2
401
621
  rollback_failures=$((rollback_failures + 1))
402
622
  continue
403
623
  fi
404
624
 
405
- while IFS=$'\t' read -r index image; do
406
- rollback_args+=(--set "spec.containers[${index}].image=${image}")
625
+ while IFS=$'\t' read -r container_name image; do
626
+ rollback_args+=(--set "spec.containers.${container_name}.image=${image}")
407
627
  done <<< "${rollback_container_entries}"
408
628
 
409
629
  if ! cpln workload update "${workload_name}" \
@@ -425,7 +645,7 @@ jobs:
425
645
  env:
426
646
  ROLLBACK_STATE: ${{ steps.capture-current.outputs.rollback_state }}
427
647
  PRODUCTION_APP_NAME: ${{ vars.PRODUCTION_APP_NAME }}
428
- CPLN_ORG_PRODUCTION: ${{ vars.CPLN_ORG_PRODUCTION }}
648
+ CPLN_ORG_PRODUCTION: ${{ steps.cpln-orgs.outputs.production }}
429
649
  shell: bash
430
650
  run: |
431
651
  set -euo pipefail
@@ -450,8 +670,10 @@ jobs:
450
670
  set -euo pipefail
451
671
  ready=false
452
672
  for attempt in $(seq 1 "${ROLLBACK_READINESS_RETRIES}"); do
453
- deployment_ready="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json | jq -r '.status.ready // false')"
454
- if [[ "${deployment_ready}" == "true" ]]; then
673
+ workload_status="$(cpln workload get "${workload_name}" --gvc "${PRODUCTION_APP_NAME}" --org "${CPLN_ORG_PRODUCTION}" -o json)"
674
+ deployment_ready="$(echo "${workload_status}" | jq -r '.status.ready // false')"
675
+ latest_ready="$(echo "${workload_status}" | jq -r '.status.readyLatest // false')"
676
+ if [[ "${deployment_ready}" == "true" && "${latest_ready}" == "true" ]]; then
455
677
  ready=true
456
678
  break
457
679
  fi
@@ -492,6 +714,7 @@ jobs:
492
714
  HEALTHY: ${{ steps.health-check.outputs.healthy }}
493
715
  PREVIOUS_IMAGE: ${{ steps.capture-current.outputs.current_image }}
494
716
  PREVIOUS_VERSION: ${{ steps.capture-current.outputs.current_version }}
717
+ COPIED_IMAGE: ${{ steps.copy-image.outputs.image }}
495
718
  shell: bash
496
719
  run: |
497
720
  {
@@ -499,12 +722,15 @@ jobs:
499
722
  echo
500
723
  if [[ "${HEALTHY}" == "true" ]]; then
501
724
  echo "✅ Status: deployment successful"
725
+ deployed_image="${COPIED_IMAGE}"
502
726
  else
503
727
  echo "❌ Status: deployment failed"
728
+ deployed_image="${PREVIOUS_IMAGE}"
504
729
  fi
505
730
  echo
506
731
  echo "Previous image: \`${PREVIOUS_IMAGE}\`"
507
732
  echo "Previous version: ${PREVIOUS_VERSION}"
733
+ echo "Deployed image: \`${deployed_image}\`"
508
734
  } >> "$GITHUB_STEP_SUMMARY"
509
735
 
510
736
  create-github-release:
@@ -19,8 +19,15 @@ on:
19
19
  jobs:
20
20
  rspec:
21
21
  runs-on: ${{ inputs.os_version }}
22
+ # Scope the live Control Plane org queue per PR (or ref) instead of globally:
23
+ # each run uses its own random app suffix (SecureRandom.hex(2)) on a fresh
24
+ # runner, so concurrent PRs don't collide on app names or CLI profiles. PRs
25
+ # run only the fast (~slow) suite, which doesn't switch the shared domain's
26
+ # route; domain-mutating specs are :slow and dispatched manually, keyed by
27
+ # github.ref so same-ref dispatches still serialize. cancel-in-progress is
28
+ # false, so queued runs wait their turn rather than being cancelled.
22
29
  concurrency:
23
- group: cpln-shared-org-${{ vars.CPLN_ORG || github.run_id }}
30
+ group: cpln-shared-org-${{ vars.CPLN_ORG || github.run_id }}-${{ github.event.pull_request.number || github.ref }}
24
31
  cancel-in-progress: false
25
32
  env:
26
33
  RAILS_ENV: test
data/CHANGELOG.md CHANGED
@@ -12,6 +12,31 @@ In addition to the standard keepachangelog.com categories, this project uses a l
12
12
 
13
13
  ## [Unreleased]
14
14
 
15
+ ## [5.1.1] - 2026-06-03
16
+
17
+ ### Changed
18
+
19
+ - **Changed `cpflow maintenance:on` and `cpflow maintenance:off` to confirm the domain route has switched by polling the Control Plane API (bounded retry, 30 attempts, 1 second apart) instead of sleeping a fixed 30 seconds.** [PR 337](https://github.com/shakacode/control-plane-flow/pull/337) by [Justin Gordon](https://github.com/justin808). Fixes [issue 157](https://github.com/shakacode/control-plane-flow/issues/157). If the route never updates within the poll window, the command aborts before stopping workloads so traffic stays on the current workload, and transient API errors during polling are retried rather than aborting the switch. Because the route switch and the workload stop run as separate steps, re-running the command also finishes a switch whose poll timed out after the route had already updated.
20
+ - **Reworked generated production-promotion image copy to authenticate directly to the staging and production Docker registries and copy via `docker buildx imagetools create`, handling digest-pinned, plain numeric, commit-suffixed, and multi-arch image refs.** [PR 356](https://github.com/shakacode/control-plane-flow/pull/356) by [Justin Gordon](https://github.com/justin808). Promotion now normalizes Control Plane org variables before each step, preflights environment-variable parity between staging and production at the GVC and app-workload container level (failing before the copy when production is missing names that exist in staging), and requires both `status.ready` and `status.readyLatest` before endpoint health checks and rollback polling so a stale ready replica cannot mask a failed latest revision.
21
+ - **Generated production promotion now emits a workflow warning when a staging image tag lacks a `_<commit>` suffix**, so production tags without commit traceability are visible in logs, and documents the `cpflow-promote-staging-to-production` concurrency group in the copy step. [PR 360](https://github.com/shakacode/control-plane-flow/pull/360) by [Justin Gordon](https://github.com/justin808).
22
+ - **Restored review-app security guidance in generated `.github/cpflow-help.md`** (public-repo staging-token scoping, fork-PR deploy limits, secret exposure via `cpln://secret/...`, and read-only deploy keys for `DOCKER_BUILD_SSH_KEY`), and simplified the promotion workflow's staging image assignment while preserving digest refs. [PR 359](https://github.com/shakacode/control-plane-flow/pull/359) by [Justin Gordon](https://github.com/justin808).
23
+
24
+ ### Fixed
25
+
26
+ - **Fixed `cpflow run` so short non-interactive runner jobs no longer hang when the Control Plane cron job finishes before a runner replica is visible.** [PR 361](https://github.com/shakacode/control-plane-flow/pull/361) by [Justin Gordon](https://github.com/justin808). This prevents generated deploy workflows with release-phase commands from waiting until the GitHub Actions job timeout even though the release job already completed successfully.
27
+
28
+ ## [5.1.0] - 2026-06-02
29
+
30
+ ### Added
31
+
32
+ - **Added `shared_secret_grants` configuration so apps can reference org-level Control Plane secrets by name instead of hardcoding them in templates.** [PR 354](https://github.com/shakacode/control-plane-flow/pull/354) by [Justin Gordon](https://github.com/justin808). Each grant validates a unique placeholder, a safe Control Plane resource name, and a secret policy that targets exactly that secret; templates gain `{{SHARED_SECRET_<NAME>}}` substitution, and the shared-policy lifecycle is wired through `setup-app`, `deploy-image`, `delete`, and `cleanup-stale-apps`. Enables the shared staging-database pattern for cheaper review apps.
33
+
34
+ ### Fixed
35
+
36
+ - **Fixed `cpflow generate-github-actions` so the generated `.github/cpflow-help.md` version-locking example derives a `CPFLOW_VERSION=<major>.<minor>.x` placeholder from the installed gem version instead of a hardcoded release that goes stale against the `@v<version>` wrapper refs in the same file.** [PR 343](https://github.com/shakacode/control-plane-flow/pull/343) by [Justin Gordon](https://github.com/justin808). Fixes [issue 341](https://github.com/shakacode/control-plane-flow/issues/341).
37
+ - **Fixed generated production promotion so `cpflow-promote-staging-to-production.yml` runs as a caller-owned job with `environment: production`, letting GitHub inject the `CPLN_TOKEN_PRODUCTION` environment secret after the protected gate instead of failing because a cross-repo reusable workflow cannot receive caller environment secrets.** [PR 353](https://github.com/shakacode/control-plane-flow/pull/353) by [Justin Gordon](https://github.com/justin808). The job checks out the pinned `control-plane-flow` ref into `.cpflow`, and generated help plus `docs/ci-automation.md` now explain why a same-named repository or organization secret can mask a missing environment secret.
38
+ - **Hardened generated production promotion image copy to preflight the staging image, retry the copy via configurable `COPY_IMAGE_RETRIES` and `COPY_IMAGE_RETRY_INTERVAL` repo vars, and roll back failed deploys using `spec.containers.<name>.image` paths instead of unsupported array-index paths.** [PR 355](https://github.com/shakacode/control-plane-flow/pull/355) by [Justin Gordon](https://github.com/justin808).
39
+
15
40
  ## [5.0.4] - 2026-05-27
16
41
 
17
42
  ### Fixed
@@ -398,7 +423,9 @@ Deprecated `cpl` gem. New gem is `cpflow`.
398
423
 
399
424
  First release.
400
425
 
401
- [Unreleased]: https://github.com/shakacode/control-plane-flow/compare/v5.0.4...HEAD
426
+ [Unreleased]: https://github.com/shakacode/control-plane-flow/compare/v5.1.1...HEAD
427
+ [5.1.1]: https://github.com/shakacode/control-plane-flow/compare/v5.1.0...v5.1.1
428
+ [5.1.0]: https://github.com/shakacode/control-plane-flow/compare/v5.0.4...v5.1.0
402
429
  [5.0.4]: https://github.com/shakacode/control-plane-flow/compare/v5.0.3...v5.0.4
403
430
  [5.0.3]: https://github.com/shakacode/control-plane-flow/compare/v5.0.2...v5.0.3
404
431
  [5.0.2]: https://github.com/shakacode/control-plane-flow/compare/v5.0.1...v5.0.2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cpflow (5.0.4)
4
+ cpflow (5.1.1)
5
5
  dotenv (~> 3.1)
6
6
  jwt (~> 3.1)
7
7
  psych (~> 5.2)
data/README.md CHANGED
@@ -1,3 +1,7 @@
1
+ <p align="center">
2
+ <img src="./docs/assets/logo/icon-tile.svg" alt="Control Plane Flow (cpflow) logo" width="160" height="160" />
3
+ </p>
4
+
1
5
  # The power of Kubernetes with the ease of Heroku!
2
6
 
3
7
  <meta name="author" content="Justin Gordon and Sergey Tarasov" />
@@ -241,6 +245,18 @@ aliases:
241
245
  # it would be 'my-app-review-secrets-policy'
242
246
  secrets_policy_name: my-secrets-policy
243
247
 
248
+ # Optional: grant each app identity access to shared org-level secrets
249
+ # without hardcoding shared secret names in workload templates.
250
+ #
251
+ # This is useful for review apps that share one staging database secret
252
+ # instead of provisioning a database per PR. Create the shared secret and
253
+ # policy once, then reference the secret in templates with
254
+ # {{SHARED_SECRET_DATABASE}}.
255
+ # shared_secret_grants:
256
+ # - name: database
257
+ # secret_name: my-shared-database-secrets
258
+ # policy_name: my-shared-database-secrets-policy
259
+
244
260
  # Configure the workload name used as a template for one-off scripts, like a Heroku one-off dyno.
245
261
  one_off_workload: rails
246
262
 
@@ -500,6 +516,11 @@ aws-rds-single-pg-instance
500
516
  mydb-review-333
501
517
  ```
502
518
 
519
+ For production, you'll typically want RDS or Aurora in private subnets, reached from your Control Plane workloads
520
+ over a private network path rather than the public internet. See
521
+ [Connecting Control Plane workloads to a private AWS RDS/Aurora database](./docs/rds-private-networking.md) for the
522
+ full Cloud Wormhole + Agent setup.
523
+
503
524
  If you want to run PostgreSQL on Control Plane instead of keeping a Heroku add-on or moving to RDS, review the
504
525
  [Control Plane PostgreSQL Template Catalog page](https://shakadocs.controlplane.com/template-catalog/templates/postgres). It includes
505
526
  persistent storage and optional scheduled backups. Additionally, we provide a default `postgres` template in this
@@ -565,17 +586,21 @@ cpflow --help
565
586
 
566
587
  ## Mapping of Heroku Commands to `cpflow` and `cpln`
567
588
 
568
- | Heroku Command | `cpflow` or `cpln` |
569
- | -------------------------------------------------------------------------------------------------------------- | ------------------------------- |
570
- | [heroku ps](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-ps-type-type) | `cpflow ps` |
571
- | [heroku config](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-config) | ? |
572
- | [heroku maintenance](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-maintenance) | `cpflow maintenance` |
573
- | [heroku logs](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-logs) | `cpflow logs` |
574
- | [heroku pg](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-pg-database) | ? |
575
- | [heroku pipelines:promote](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-pipelines-promote) | `cpflow promote-app-from-upstream` |
576
- | [heroku psql](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-psql-database) | ? |
577
- | [heroku redis](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-redis-database) | ? |
578
- | [heroku releases](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-releases) | ? |
589
+ | Heroku Command | `cpflow` or `cpln` |
590
+ | -------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
591
+ | [heroku ps](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-ps-type-type) | `cpflow ps` |
592
+ | [heroku config](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-config) | `cpflow env -a APP_NAME` displays Control Plane app environment variables; `cpflow config -a APP_NAME` displays local `.controlplane/controlplane.yml` settings |
593
+ | [heroku maintenance](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-maintenance) | `cpflow maintenance`, `cpflow maintenance:on`, `cpflow maintenance:off`, and `cpflow maintenance:set-page` |
594
+ | [heroku logs](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-logs) | `cpflow logs -a APP_NAME`; add `-w WORKLOAD_NAME` to filter by workload, or `-w WORKLOAD_NAME -r REPLICA_NAME` to narrow to a specific replica |
595
+ | [heroku pg](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-pg-database) | No direct `cpflow` add-on wrapper. Use an external Postgres provider, the Control Plane Template Catalog, or project templates such as `.controlplane/templates/postgres.yml`. |
596
+ | [heroku pipelines:promote](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-pipelines-promote) | `cpflow promote-app-from-upstream` |
597
+ | [heroku psql](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-psql-database) | No direct `cpflow` equivalent. Connect with your provider's `psql` flow, or run `cpflow run -a APP_NAME -- psql "$DATABASE_URL"` when `psql` is available in the application image. |
598
+ | [heroku redis](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-redis-database) | No direct `cpflow` add-on wrapper. Use an external Redis provider, the Control Plane Template Catalog, or project templates such as `.controlplane/templates/redis.yml`. |
599
+ | [heroku releases](https://devcenter.heroku.com/articles/heroku-cli-commands#heroku-releases) | `cpflow latest-image -a APP_NAME` for the latest image tag; `cpflow deploy-image -a APP_NAME` deploys that image. No `cpflow` equivalent for browsing full release history. |
600
+
601
+ Unlike Heroku add-ons, Control Plane database and cache services are usually managed as provider resources or workload
602
+ templates. `cpflow` focuses on the application deployment flow and leaves provider-specific database administration to
603
+ the provider tooling, Control Plane templates, or direct `cpln` operations.
579
604
 
580
605
  ## Examples
581
606