@layr-labs/ecloud-sdk 1.0.0-dev.3 → 1.0.0-devep2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +2 -2
- package/dist/billing.cjs.map +1 -1
- package/dist/billing.js.map +1 -1
- package/dist/browser.cjs +1 -1
- package/dist/browser.cjs.map +1 -1
- package/dist/browser.js +1 -1
- package/dist/browser.js.map +1 -1
- package/dist/compute.cjs +324 -52
- package/dist/compute.cjs.map +1 -1
- package/dist/compute.js +324 -52
- package/dist/compute.js.map +1 -1
- package/dist/index.cjs +324 -52
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +324 -52
- package/dist/index.js.map +1 -1
- package/package.json +5 -4
- package/tools/ecloud-drain-watcher-linux-amd64 +0 -0
- package/tools/tls-keygen-linux-amd64 +0 -0
package/dist/compute.cjs
CHANGED
|
@@ -183,6 +183,7 @@ var ENV_SOURCE_SCRIPT_NAME = "compute-source-env.sh";
|
|
|
183
183
|
var KMS_CLIENT_BINARY_NAME = "kms-client";
|
|
184
184
|
var KMS_SIGNING_KEY_NAME = "kms-signing-public-key.pem";
|
|
185
185
|
var TLS_KEYGEN_BINARY_NAME = "tls-keygen";
|
|
186
|
+
var DRAIN_WATCHER_BINARY_NAME = "ecloud-drain-watcher";
|
|
186
187
|
var CADDYFILE_NAME = "Caddyfile";
|
|
187
188
|
var LAYERED_BUILD_DIR_PREFIX = "ecloud-layered-build";
|
|
188
189
|
|
|
@@ -456,7 +457,7 @@ var PushPermissionError = class extends Error {
|
|
|
456
457
|
var import_handlebars = __toESM(require("handlebars"), 1);
|
|
457
458
|
|
|
458
459
|
// src/client/common/templates/Dockerfile.layered.tmpl
|
|
459
|
-
var Dockerfile_layered_default = '{{#if includeTLS}}\n# Get Caddy from official image\nFROM caddy:2.10.2-alpine AS caddy\n{{/if}}\n\nFROM {{baseImage}}\n\n{{#if originalUser}}\n# Switch to root to perform setup (base image has non-root USER: {{originalUser}})\nUSER root\n{{/if}}\n\n# Copy core TEE components\nCOPY compute-source-env.sh /usr/local/bin/\nCOPY kms-client /usr/local/bin/\nCOPY kms-signing-public-key.pem /usr/local/bin/\n\n{{#if includeTLS}}\n# Copy Caddy from official image\nCOPY --from=caddy /usr/bin/caddy /usr/local/bin/caddy\n\n# Copy TLS components\nCOPY tls-keygen /usr/local/bin/\nCOPY Caddyfile /etc/caddy/\n{{/if}}\n\n{{#if originalUser}}\n# Make binaries executable (755 for executables, 644 for keys)\nRUN chmod 755 /usr/local/bin/compute-source-env.sh \\\n && chmod 755 /usr/local/bin/kms-client{{#if includeTLS}} \\\n && chmod 755 /usr/local/bin/tls-keygen \\\n && chmod 755 /usr/local/bin/caddy{{/if}} \\\n && chmod 644 /usr/local/bin/kms-signing-public-key.pem\n\n# Store original user - entrypoint will drop privileges to this user after TEE setup\nENV __ECLOUD_ORIGINAL_USER={{originalUser}}\n{{else}}\n# Make binaries executable (preserve existing permissions, just add execute)\nRUN chmod +x /usr/local/bin/compute-source-env.sh \\\n && chmod +x /usr/local/bin/kms-client{{#if includeTLS}} \\\n && chmod +x /usr/local/bin/tls-keygen{{/if}}\n{{/if}}\n\n{{#if logRedirect}}\n\nLABEL tee.launch_policy.log_redirect={{logRedirect}}\n{{/if}}\n{{#if resourceUsageAllow}}\n\nLABEL tee.launch_policy.monitoring_memory_allow={{resourceUsageAllow}}\n{{/if}}\n\nLABEL eigenx_cli_version={{ecloudCLIVersion}}\nLABEL eigenx_vm_image=eigen\n\n{{#if includeTLS}}\n# Expose both HTTP and HTTPS ports for Caddy\nEXPOSE 80 443\n{{/if}}\n\nENTRYPOINT ["/usr/local/bin/compute-source-env.sh"]\nCMD {{{originalCmd}}}\n';
|
|
460
|
+
var Dockerfile_layered_default = '{{#if includeTLS}}\n# Get Caddy from official image\nFROM caddy:2.10.2-alpine AS caddy\n{{/if}}\n\nFROM {{baseImage}}\n\n{{#if originalUser}}\n# Switch to root to perform setup (base image has non-root USER: {{originalUser}})\nUSER root\n{{/if}}\n\n# Copy core TEE components\nCOPY compute-source-env.sh /usr/local/bin/\nCOPY kms-client /usr/local/bin/\nCOPY kms-signing-public-key.pem /usr/local/bin/\n{{#if includeDrainWatcher}}\nCOPY ecloud-drain-watcher /usr/local/bin/\n{{/if}}\n\n{{#if includeTLS}}\n# Copy Caddy from official image\nCOPY --from=caddy /usr/bin/caddy /usr/local/bin/caddy\n\n# Copy TLS components\nCOPY tls-keygen /usr/local/bin/\nCOPY Caddyfile /etc/caddy/\n{{/if}}\n\n{{#if originalUser}}\n# Make binaries executable (755 for executables, 644 for keys)\nRUN chmod 755 /usr/local/bin/compute-source-env.sh \\\n && chmod 755 /usr/local/bin/kms-client{{#if includeDrainWatcher}} \\\n && chmod 755 /usr/local/bin/ecloud-drain-watcher{{/if}}{{#if includeTLS}} \\\n && chmod 755 /usr/local/bin/tls-keygen \\\n && chmod 755 /usr/local/bin/caddy{{/if}} \\\n && chmod 644 /usr/local/bin/kms-signing-public-key.pem\n\n# Store original user - entrypoint will drop privileges to this user after TEE setup\nENV __ECLOUD_ORIGINAL_USER={{originalUser}}\n{{else}}\n# Make binaries executable (preserve existing permissions, just add execute)\nRUN chmod +x /usr/local/bin/compute-source-env.sh \\\n && chmod +x /usr/local/bin/kms-client{{#if includeDrainWatcher}} \\\n && chmod +x /usr/local/bin/ecloud-drain-watcher{{/if}}{{#if includeTLS}} \\\n && chmod +x /usr/local/bin/tls-keygen{{/if}}\n{{/if}}\n\n{{#if logRedirect}}\n\nLABEL tee.launch_policy.log_redirect={{logRedirect}}\n{{/if}}\n{{#if resourceUsageAllow}}\n\nLABEL tee.launch_policy.monitoring_memory_allow={{resourceUsageAllow}}\n{{/if}}\n\n# Allow-list the envvars the ecloud-platform sets via GCE `tee-env-*`\n# metadata. Without this label, Confidential Space\'s launcher rejects\n# any `tee-env-*` override at container-start with\n# "env var {...} is not allowed to be overridden on this image" and\n# exits with code 1 \u2014 which terminates the VM before the entrypoint\n# ever runs. ECLOUD_PD_EXPECTED is set on PD-backed apps so the\n# entrypoint (compute-source-env.sh) knows to wait for the persistent\n# disk before exec\'ing the user workload. User-supplied env vars\n# flow through KMS (not tee-env-*) and don\'t need to be listed here.\nLABEL tee.launch_policy.allow_env_override=ECLOUD_PD_EXPECTED\n\nLABEL eigenx_cli_version={{ecloudCLIVersion}}\nLABEL eigenx_vm_image=eigen\nLABEL eigenx_container_contract=v1\n\n{{#if includeTLS}}\n# Expose both HTTP and HTTPS ports for Caddy\nEXPOSE 80 443\n{{/if}}\n\nENTRYPOINT ["/usr/local/bin/compute-source-env.sh"]\nCMD {{{originalCmd}}}\n';
|
|
460
461
|
|
|
461
462
|
// src/client/common/templates/dockerfileTemplate.ts
|
|
462
463
|
function processDockerfileTemplate(data) {
|
|
@@ -469,6 +470,49 @@ var import_handlebars2 = __toESM(require("handlebars"), 1);
|
|
|
469
470
|
|
|
470
471
|
// src/client/common/templates/compute-source-env.sh.tmpl
|
|
471
472
|
var compute_source_env_sh_default = `#!/bin/sh
|
|
473
|
+
# EigenCompute container entrypoint script
|
|
474
|
+
# This script handles KMS secret fetching, TLS setup, and privilege dropping
|
|
475
|
+
# before executing the user's application.
|
|
476
|
+
#
|
|
477
|
+
# Handlebars template variables (replaced at build time by the CLI):
|
|
478
|
+
# kmsServerURL - URL of the KMS server
|
|
479
|
+
# userAPIURL - URL of the user API (ecloud-platform)
|
|
480
|
+
# The KMS signing public key is copied into the image as
|
|
481
|
+
# /usr/local/bin/kms-signing-public-key.pem at layer-build time by the CLI.
|
|
482
|
+
#
|
|
483
|
+
# ecloud-platform divergence from compute-tee:
|
|
484
|
+
# This script emits ECLOUD_READY / ECLOUD_FAIL / ECLOUD_AWAITING_USERDATA /
|
|
485
|
+
# ECLOUD_DETACHED markers to stdout at key lifecycle points. The GCP
|
|
486
|
+
# provisioner's serial-console watcher in ecloud-platform
|
|
487
|
+
# (pkg/services/infraService/providers/gcp/compute.go) parses those
|
|
488
|
+
# markers to gate "VM ready" and to coordinate the prewarm-detach
|
|
489
|
+
# upgrade flow. Without the markers, the platform's waitForStartupReady
|
|
490
|
+
# times out at ~10 minutes per deploy, rollback fires, and the VM is
|
|
491
|
+
# deleted \u2014 seen in dev on 2026-05-04 with an older copy of this
|
|
492
|
+
# template that lacked the markers.
|
|
493
|
+
#
|
|
494
|
+
# Prewarm-detach contract:
|
|
495
|
+
# - If ECLOUD_PD_EXPECTED=1 and /mnt/disks/userdata is not present at boot,
|
|
496
|
+
# emit ECLOUD_AWAITING_USERDATA and wait until the disk is attached.
|
|
497
|
+
# - On SIGTERM (drain-requested), forward to child, wait for exit, sync
|
|
498
|
+
# + unmount /mnt/disks/userdata, emit ECLOUD_DETACHED, exit.
|
|
499
|
+
# - ECLOUD_READY is emitted once runtime is bootstrapped (same as before).
|
|
500
|
+
# - ECLOUD_FAIL is emitted on any unrecoverable setup error.
|
|
501
|
+
# Keep the markers on any line that resolves a lifecycle outcome.
|
|
502
|
+
#
|
|
503
|
+
# This file is kept in lockstep with
|
|
504
|
+
# ecloud-platform/pkg/services/buildService/assets/compute-source-env.sh.tmpl
|
|
505
|
+
# \u2014 if you change one, change the other. Differences vs the platform copy
|
|
506
|
+
# are intentionally minimal:
|
|
507
|
+
# - Handlebars placeholders use the CLI's naming (kmsServerURL,
|
|
508
|
+
# userAPIURL) rather than the platform's (KMS_SERVER_URL,
|
|
509
|
+
# USER_API_URL). (See top of file for real placeholder syntax \u2014
|
|
510
|
+
# not repeated here so Handlebars doesn't expand it in this comment.)
|
|
511
|
+
# - KMS signing key is read from a file the CLI copies into the image,
|
|
512
|
+
# not heredoc-embedded in the script, because the CLI's image
|
|
513
|
+
# layering writes it as a separate file (kms-signing-public-key.pem).
|
|
514
|
+
# - TLS binary is \`tls-keygen\` (CLI-bundled) not \`tls-client\`.
|
|
515
|
+
|
|
472
516
|
echo "compute-source-env.sh: Running setup script..."
|
|
473
517
|
|
|
474
518
|
# Fetch and source environment variables from KMS
|
|
@@ -484,92 +528,93 @@ if /usr/local/bin/kms-client \\
|
|
|
484
528
|
else
|
|
485
529
|
echo "compute-source-env.sh: ERROR - Failed to fetch environment variables from KMS"
|
|
486
530
|
echo "compute-source-env.sh: Exiting - cannot start user workload without KMS secrets"
|
|
531
|
+
echo "ECLOUD_FAIL kms_bootstrap"
|
|
487
532
|
exit 1
|
|
488
533
|
fi
|
|
489
534
|
|
|
490
|
-
# Setup TLS if tls-keygen is present
|
|
535
|
+
# Setup TLS if tls-keygen is present and DOMAIN is configured
|
|
491
536
|
setup_tls() {
|
|
492
537
|
# If tls-keygen isn't present, TLS wasn't configured during build
|
|
493
538
|
if [ ! -x /usr/local/bin/tls-keygen ]; then
|
|
494
539
|
echo "compute-source-env.sh: TLS not configured (no tls-keygen binary)"
|
|
495
540
|
return 0
|
|
496
541
|
fi
|
|
497
|
-
|
|
542
|
+
|
|
498
543
|
local domain="\${DOMAIN:-}"
|
|
499
544
|
local mnemonic="\${MNEMONIC:-}"
|
|
500
|
-
|
|
501
|
-
#
|
|
545
|
+
|
|
546
|
+
# If DOMAIN is not set or is localhost, skip TLS setup
|
|
502
547
|
if [ -z "$domain" ] || [ "$domain" = "localhost" ]; then
|
|
503
|
-
echo "compute-source-env.sh:
|
|
504
|
-
|
|
505
|
-
exit 1
|
|
548
|
+
echo "compute-source-env.sh: TLS skipped (DOMAIN not set or is localhost)"
|
|
549
|
+
return 0
|
|
506
550
|
fi
|
|
507
|
-
|
|
551
|
+
|
|
508
552
|
if [ -z "$mnemonic" ]; then
|
|
509
|
-
echo "compute-source-env.sh: ERROR - TLS
|
|
553
|
+
echo "compute-source-env.sh: ERROR - TLS requested but MNEMONIC not available"
|
|
510
554
|
echo "compute-source-env.sh: Cannot obtain TLS certificate without mnemonic"
|
|
555
|
+
echo "ECLOUD_FAIL tls_mnemonic_missing"
|
|
511
556
|
exit 1
|
|
512
557
|
fi
|
|
513
|
-
|
|
558
|
+
|
|
514
559
|
if [ ! -x /usr/local/bin/caddy ]; then
|
|
515
|
-
echo "compute-source-env.sh: ERROR - TLS
|
|
560
|
+
echo "compute-source-env.sh: ERROR - TLS requested but Caddy not found"
|
|
561
|
+
echo "ECLOUD_FAIL tls_caddy_missing"
|
|
516
562
|
exit 1
|
|
517
563
|
fi
|
|
518
|
-
|
|
564
|
+
|
|
519
565
|
echo "compute-source-env.sh: Setting up TLS for domain: $domain"
|
|
520
|
-
|
|
566
|
+
|
|
521
567
|
# Obtain TLS certificate using ACME
|
|
522
|
-
# Default to http-01, but allow override via ACME_CHALLENGE env var
|
|
523
568
|
local challenge="\${ACME_CHALLENGE:-http-01}"
|
|
524
|
-
|
|
569
|
+
|
|
525
570
|
# Check if we should use staging (for testing)
|
|
526
571
|
local staging_flag=""
|
|
527
572
|
if [ "\${ACME_STAGING:-false}" = "true" ]; then
|
|
528
573
|
staging_flag="-staging"
|
|
529
|
-
echo "compute-source-env.sh: Using Let's Encrypt STAGING environment
|
|
574
|
+
echo "compute-source-env.sh: Using Let's Encrypt STAGING environment"
|
|
530
575
|
fi
|
|
531
|
-
|
|
576
|
+
|
|
532
577
|
echo "compute-source-env.sh: Obtaining TLS certificate using $challenge challenge..."
|
|
533
|
-
# Pass the API URL for certificate persistence
|
|
534
578
|
if ! MNEMONIC="$mnemonic" DOMAIN="$domain" API_URL="{{userAPIURL}}" /usr/local/bin/tls-keygen \\
|
|
535
579
|
-challenge "$challenge" \\
|
|
536
580
|
$staging_flag; then
|
|
537
581
|
echo "compute-source-env.sh: ERROR - Failed to obtain TLS certificate"
|
|
538
|
-
echo "
|
|
582
|
+
echo "ECLOUD_FAIL tls_setup"
|
|
539
583
|
exit 1
|
|
540
584
|
fi
|
|
541
|
-
|
|
585
|
+
|
|
542
586
|
echo "compute-source-env.sh: TLS certificate obtained successfully"
|
|
543
|
-
|
|
587
|
+
|
|
544
588
|
# Validate Caddyfile before starting
|
|
545
|
-
if
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
fi
|
|
550
|
-
|
|
551
|
-
# Start Caddy in background
|
|
552
|
-
echo "compute-source-env.sh: Starting Caddy reverse proxy..."
|
|
553
|
-
|
|
554
|
-
# Check if Caddy logs should be enabled
|
|
555
|
-
if [ "\${ENABLE_CADDY_LOGS:-false}" = "true" ]; then
|
|
556
|
-
if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile 2>&1; then
|
|
557
|
-
echo "compute-source-env.sh: ERROR - Failed to start Caddy"
|
|
558
|
-
echo "compute-source-env.sh: TLS was requested (DOMAIN=$domain) but setup failed"
|
|
589
|
+
if [ -f /etc/caddy/Caddyfile ]; then
|
|
590
|
+
if ! /usr/local/bin/caddy validate --config /etc/caddy/Caddyfile --adapter caddyfile 2>/dev/null; then
|
|
591
|
+
echo "compute-source-env.sh: ERROR - Invalid Caddyfile"
|
|
592
|
+
echo "ECLOUD_FAIL tls_invalid_caddyfile"
|
|
559
593
|
exit 1
|
|
560
594
|
fi
|
|
561
|
-
|
|
562
|
-
#
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
595
|
+
|
|
596
|
+
# Start Caddy in background
|
|
597
|
+
echo "compute-source-env.sh: Starting Caddy reverse proxy..."
|
|
598
|
+
if [ "\${ENABLE_CADDY_LOGS:-false}" = "true" ]; then
|
|
599
|
+
if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile 2>&1; then
|
|
600
|
+
echo "compute-source-env.sh: ERROR - Failed to start Caddy"
|
|
601
|
+
echo "ECLOUD_FAIL tls_caddy_start"
|
|
602
|
+
exit 1
|
|
603
|
+
fi
|
|
604
|
+
else
|
|
605
|
+
if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile >/dev/null 2>&1; then
|
|
606
|
+
echo "compute-source-env.sh: ERROR - Failed to start Caddy"
|
|
607
|
+
echo "ECLOUD_FAIL tls_caddy_start"
|
|
608
|
+
exit 1
|
|
609
|
+
fi
|
|
567
610
|
fi
|
|
611
|
+
|
|
612
|
+
sleep 2
|
|
613
|
+
echo "compute-source-env.sh: Caddy started successfully"
|
|
614
|
+
else
|
|
615
|
+
echo "compute-source-env.sh: No Caddyfile found, skipping Caddy"
|
|
568
616
|
fi
|
|
569
|
-
|
|
570
|
-
# Give Caddy a moment to fully initialize
|
|
571
|
-
sleep 2
|
|
572
|
-
echo "compute-source-env.sh: Caddy started successfully"
|
|
617
|
+
|
|
573
618
|
return 0
|
|
574
619
|
}
|
|
575
620
|
|
|
@@ -580,15 +625,233 @@ setup_tls
|
|
|
580
625
|
export KMS_SERVER_URL="{{kmsServerURL}}"
|
|
581
626
|
export KMS_PUBLIC_KEY="$(cat /usr/local/bin/kms-signing-public-key.pem)"
|
|
582
627
|
|
|
628
|
+
# \u2500\u2500 Prewarm-detach: wait for PD if expected \u2500\u2500
|
|
629
|
+
# Orchestrator sets ECLOUD_PD_EXPECTED=1 on apps using StorageBackend=pd.
|
|
630
|
+
# When the prewarm path is used, the new VM boots WITHOUT the disk; we
|
|
631
|
+
# signal awaiting-userdata and poll until the disk is attached.
|
|
632
|
+
USERDATA_MOUNT="/mnt/disks/userdata"
|
|
633
|
+
USERDATA_DEV="/dev/disk/by-id/google-persistent_storage_1"
|
|
634
|
+
|
|
635
|
+
wait_for_userdata() {
|
|
636
|
+
if [ "\${ECLOUD_PD_EXPECTED:-0}" != "1" ]; then
|
|
637
|
+
return 0
|
|
638
|
+
fi
|
|
639
|
+
if mountpoint -q "$USERDATA_MOUNT" 2>/dev/null; then
|
|
640
|
+
echo "compute-source-env.sh: userdata already mounted at $USERDATA_MOUNT"
|
|
641
|
+
return 0
|
|
642
|
+
fi
|
|
643
|
+
# Refuse to proceed if the tools we need for safe first-attach
|
|
644
|
+
# detection are missing. Without blkid we cannot tell an empty new
|
|
645
|
+
# disk from an already-formatted one \u2014 running mkfs.ext4 on the
|
|
646
|
+
# latter would destroy data.
|
|
647
|
+
if ! command -v blkid >/dev/null 2>&1; then
|
|
648
|
+
echo "ECLOUD_FAIL pd_tools_missing"
|
|
649
|
+
exit 1
|
|
650
|
+
fi
|
|
651
|
+
echo "ECLOUD_AWAITING_USERDATA"
|
|
652
|
+
echo "compute-source-env.sh: waiting for PD at $USERDATA_DEV..."
|
|
653
|
+
# Poll for up to 10 minutes (120 * 5s). The orchestrator's overall
|
|
654
|
+
# attach timeout is shorter; the ceiling here just bounds the wait
|
|
655
|
+
# for manual / diagnostic scenarios.
|
|
656
|
+
local i=0
|
|
657
|
+
local mount_failures=0
|
|
658
|
+
while [ "$i" -lt 120 ]; do
|
|
659
|
+
if [ -e "$USERDATA_DEV" ]; then
|
|
660
|
+
mkdir -p "$USERDATA_MOUNT"
|
|
661
|
+
if mount -o noatime "$USERDATA_DEV" "$USERDATA_MOUNT" 2>/dev/null; then
|
|
662
|
+
echo "compute-source-env.sh: PD mounted at $USERDATA_MOUNT"
|
|
663
|
+
return 0
|
|
664
|
+
fi
|
|
665
|
+
# Disk present but mount failed. Check whether it has a
|
|
666
|
+
# recognized filesystem. \`blkid -s TYPE -o value\` prints the
|
|
667
|
+
# FS type (empty if none). We only mkfs when there is
|
|
668
|
+
# demonstrably NO filesystem \u2014 never on the basis of blkid
|
|
669
|
+
# returning non-zero alone, which could mean "blkid missing"
|
|
670
|
+
# or "device busy".
|
|
671
|
+
local fstype
|
|
672
|
+
fstype=$(blkid -s TYPE -o value "$USERDATA_DEV" 2>/dev/null)
|
|
673
|
+
if [ -z "$fstype" ]; then
|
|
674
|
+
echo "compute-source-env.sh: formatting $USERDATA_DEV (first attach)"
|
|
675
|
+
mkfs.ext4 -F -L eclouddata "$USERDATA_DEV" >/dev/null 2>&1 || {
|
|
676
|
+
echo "ECLOUD_FAIL pd_mkfs_failed"
|
|
677
|
+
exit 1
|
|
678
|
+
}
|
|
679
|
+
mount -o noatime "$USERDATA_DEV" "$USERDATA_MOUNT" || {
|
|
680
|
+
echo "ECLOUD_FAIL pd_mount_after_format_failed"
|
|
681
|
+
exit 1
|
|
682
|
+
}
|
|
683
|
+
return 0
|
|
684
|
+
fi
|
|
685
|
+
# Disk has a filesystem but mount still failed. Give it a
|
|
686
|
+
# few retries to cover transient cases (device busy, udev
|
|
687
|
+
# still settling), but don't pretend this is an attach
|
|
688
|
+
# timeout if it persists.
|
|
689
|
+
mount_failures=$((mount_failures + 1))
|
|
690
|
+
if [ "$mount_failures" -ge 6 ]; then
|
|
691
|
+
echo "ECLOUD_FAIL pd_mount_failed"
|
|
692
|
+
exit 1
|
|
693
|
+
fi
|
|
694
|
+
else
|
|
695
|
+
# Device disappeared (e.g. udev re-enumeration between
|
|
696
|
+
# attach and mount). Reset the consecutive-failure counter
|
|
697
|
+
# so only true back-to-back mount failures trip
|
|
698
|
+
# pd_mount_failed; a device blip should not steal retries.
|
|
699
|
+
mount_failures=0
|
|
700
|
+
fi
|
|
701
|
+
i=$((i + 1))
|
|
702
|
+
sleep 5
|
|
703
|
+
done
|
|
704
|
+
echo "ECLOUD_FAIL pd_attach_timeout"
|
|
705
|
+
exit 1
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
wait_for_userdata
|
|
709
|
+
|
|
710
|
+
# \u2500\u2500 Prewarm-detach: install SIGTERM handler for graceful drain \u2500\u2500
|
|
711
|
+
# Orchestrator signals drain by setting the instance metadata key
|
|
712
|
+
# ECLOUD_DRAIN_REQUESTED=1, which a host-level agent translates into
|
|
713
|
+
# SIGTERM on PID 1. On SIGTERM we:
|
|
714
|
+
# 1. Forward to the child (wakes the user's app for graceful exit)
|
|
715
|
+
# 2. Wait for child exit
|
|
716
|
+
# 3. Sync + unmount the PD
|
|
717
|
+
# 4. Emit ECLOUD_DETACHED so the orchestrator can proceed to detach
|
|
718
|
+
CHILD_PID=""
|
|
719
|
+
_DRAIN_IN_PROGRESS=0
|
|
720
|
+
|
|
721
|
+
drain_handler() {
|
|
722
|
+
# Guard against re-entry if SIGTERM arrives twice (e.g. both the
|
|
723
|
+
# drain_watcher and an external signal fire in quick succession).
|
|
724
|
+
if [ "$_DRAIN_IN_PROGRESS" = "1" ]; then
|
|
725
|
+
return 0
|
|
726
|
+
fi
|
|
727
|
+
_DRAIN_IN_PROGRESS=1
|
|
728
|
+
echo "compute-source-env.sh: received drain signal, forwarding to child pgid=$CHILD_PID"
|
|
729
|
+
if [ -n "$CHILD_PID" ]; then
|
|
730
|
+
# Send to the process group so intermediate wrappers (su, sh -c,
|
|
731
|
+
# etc.) don't swallow the signal. The leading \`-\` targets the
|
|
732
|
+
# pgid, which equals the direct child's pid for a shell-backgrounded
|
|
733
|
+
# process. Fall back to the pid alone if pgid signaling fails
|
|
734
|
+
# (e.g. kernel older than 3.9 or PID namespace edge cases).
|
|
735
|
+
kill -TERM -"$CHILD_PID" 2>/dev/null || kill -TERM "$CHILD_PID" 2>/dev/null || true
|
|
736
|
+
# Give the app up to 30s to exit cleanly.
|
|
737
|
+
local i=0
|
|
738
|
+
while [ "$i" -lt 30 ] && kill -0 "$CHILD_PID" 2>/dev/null; do
|
|
739
|
+
i=$((i + 1))
|
|
740
|
+
sleep 1
|
|
741
|
+
done
|
|
742
|
+
if kill -0 "$CHILD_PID" 2>/dev/null; then
|
|
743
|
+
echo "compute-source-env.sh: child did not exit in 30s, sending SIGKILL"
|
|
744
|
+
kill -KILL -"$CHILD_PID" 2>/dev/null || kill -KILL "$CHILD_PID" 2>/dev/null || true
|
|
745
|
+
# Reap the process so its in-flight I/O is flushed to the
|
|
746
|
+
# filesystem before we sync + unmount. SIGKILL schedules
|
|
747
|
+
# death; wait guarantees it's complete.
|
|
748
|
+
wait "$CHILD_PID" 2>/dev/null || true
|
|
749
|
+
fi
|
|
750
|
+
fi
|
|
751
|
+
if [ "\${ECLOUD_PD_EXPECTED:-0}" = "1" ] && mountpoint -q "$USERDATA_MOUNT" 2>/dev/null; then
|
|
752
|
+
sync
|
|
753
|
+
if umount "$USERDATA_MOUNT" 2>/dev/null; then
|
|
754
|
+
echo "compute-source-env.sh: unmounted $USERDATA_MOUNT cleanly"
|
|
755
|
+
else
|
|
756
|
+
# Force lazy unmount as last resort \u2014 orchestrator still needs
|
|
757
|
+
# the DETACHED signal to proceed.
|
|
758
|
+
umount -l "$USERDATA_MOUNT" 2>/dev/null || true
|
|
759
|
+
echo "compute-source-env.sh: WARNING - used lazy unmount on $USERDATA_MOUNT"
|
|
760
|
+
fi
|
|
761
|
+
# ECLOUD_DETACHED is strictly a PD-lifecycle signal. Only emit
|
|
762
|
+
# it when we actually had a PD mount in play, so serial-log
|
|
763
|
+
# parsers and alerting for non-PD apps don't see spurious
|
|
764
|
+
# lifecycle markers on routine container SIGTERM.
|
|
765
|
+
echo "ECLOUD_DETACHED"
|
|
766
|
+
fi
|
|
767
|
+
# Always exit 0: drain is a managed shutdown and the orchestrator
|
|
768
|
+
# waits on ECLOUD_DETACHED, not the container exit code. Forwarding
|
|
769
|
+
# the child's exit status here would make a crash-during-drain look
|
|
770
|
+
# like a drain failure to whatever reads the container exit code.
|
|
771
|
+
exit 0
|
|
772
|
+
}
|
|
773
|
+
trap drain_handler TERM
|
|
774
|
+
|
|
775
|
+
# \u2500\u2500 Prewarm-detach: background drain watcher \u2500\u2500
|
|
776
|
+
# Container metadata delivery in Confidential Space is limited, so we
|
|
777
|
+
# poll the instance metadata server for ECLOUD_DRAIN_REQUESTED and
|
|
778
|
+
# raise SIGTERM on ourselves when it flips to "1".
|
|
779
|
+
#
|
|
780
|
+
# Try wget first (present in most Alpine bases), fall back to curl.
|
|
781
|
+
# If neither is present, drain watcher is disabled \u2014 the orchestrator
|
|
782
|
+
# will hit its drain timeout and fail the upgrade explicitly, which is
|
|
783
|
+
# the correct behavior (we cannot silently ignore a drain request).
|
|
784
|
+
_fetch_drain_flag() {
|
|
785
|
+
local url="http://metadata.google.internal/computeMetadata/v1/instance/attributes/ECLOUD_DRAIN_REQUESTED"
|
|
786
|
+
if command -v wget >/dev/null 2>&1; then
|
|
787
|
+
wget -q --tries=1 --timeout=2 --header='Metadata-Flavor: Google' -O - "$url" 2>/dev/null
|
|
788
|
+
elif command -v curl >/dev/null 2>&1; then
|
|
789
|
+
curl -sf --max-time 2 -H 'Metadata-Flavor: Google' "$url" 2>/dev/null
|
|
790
|
+
else
|
|
791
|
+
return 2
|
|
792
|
+
fi
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
drain_watcher() {
|
|
796
|
+
# Preflight: confirm we have an HTTP client
|
|
797
|
+
if ! _fetch_drain_flag >/dev/null 2>&1; then
|
|
798
|
+
# Either no http client available OR metadata server not
|
|
799
|
+
# responding yet. If no client, give up and log; otherwise the
|
|
800
|
+
# loop below will retry.
|
|
801
|
+
if ! command -v wget >/dev/null 2>&1 && ! command -v curl >/dev/null 2>&1; then
|
|
802
|
+
echo "compute-source-env.sh: WARNING - no wget/curl; drain_watcher disabled"
|
|
803
|
+
return 0
|
|
804
|
+
fi
|
|
805
|
+
fi
|
|
806
|
+
while true; do
|
|
807
|
+
local v
|
|
808
|
+
v=$(_fetch_drain_flag || true)
|
|
809
|
+
if [ "$v" = "1" ]; then
|
|
810
|
+
echo "compute-source-env.sh: drain_watcher saw ECLOUD_DRAIN_REQUESTED=1, signaling PID 1"
|
|
811
|
+
# The CS launcher runs this script directly as PID 1, so
|
|
812
|
+
# kill -TERM 1 delivers SIGTERM to the shell that installed
|
|
813
|
+
# the drain_handler trap. If the launch mechanism ever
|
|
814
|
+
# wraps this script in another process, this assumption
|
|
815
|
+
# breaks and drain will silently no-op \u2014 audit here.
|
|
816
|
+
kill -TERM 1 2>/dev/null || true
|
|
817
|
+
return 0
|
|
818
|
+
fi
|
|
819
|
+
sleep 2
|
|
820
|
+
done
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
if [ "\${ECLOUD_PD_EXPECTED:-0}" = "1" ]; then
|
|
824
|
+
# Assumption: the orchestrator only flips ECLOUD_DRAIN_REQUESTED=1
|
|
825
|
+
# after observing ECLOUD_AWAITING_USERDATA (old VM) or
|
|
826
|
+
# ECLOUD_READY (new VM), so CHILD_PID is always set by the time
|
|
827
|
+
# drain_handler fires. If drain somehow arrived in the tiny window
|
|
828
|
+
# between this watcher spawn and CHILD_PID assignment below,
|
|
829
|
+
# drain_handler would skip the child-kill branch and still emit
|
|
830
|
+
# ECLOUD_DETACHED \u2014 harmless because there's nothing to drain yet.
|
|
831
|
+
if [ -x /usr/local/bin/ecloud-drain-watcher ]; then
|
|
832
|
+
/usr/local/bin/ecloud-drain-watcher &
|
|
833
|
+
else
|
|
834
|
+
drain_watcher &
|
|
835
|
+
fi
|
|
836
|
+
fi
|
|
837
|
+
|
|
583
838
|
echo "compute-source-env.sh: Environment sourced."
|
|
839
|
+
echo "ECLOUD_READY runtime_bootstrapped"
|
|
584
840
|
|
|
585
841
|
# Drop privileges to original user for the application command
|
|
586
842
|
if [ -n "$__ECLOUD_ORIGINAL_USER" ] && [ "$(id -u)" = "0" ]; then
|
|
587
843
|
echo "compute-source-env.sh: Dropping privileges to user: $__ECLOUD_ORIGINAL_USER"
|
|
588
|
-
|
|
844
|
+
# Must background the child so our trap can fire; exec replaces PID 1.
|
|
845
|
+
su -s /bin/sh "$__ECLOUD_ORIGINAL_USER" -c 'exec "$@"' -- sh "$@" &
|
|
846
|
+
CHILD_PID=$!
|
|
847
|
+
wait "$CHILD_PID"
|
|
848
|
+
exit $?
|
|
589
849
|
fi
|
|
590
850
|
|
|
591
|
-
|
|
851
|
+
"$@" &
|
|
852
|
+
CHILD_PID=$!
|
|
853
|
+
wait "$CHILD_PID"
|
|
854
|
+
exit $?
|
|
592
855
|
`;
|
|
593
856
|
|
|
594
857
|
// src/client/common/templates/scriptTemplate.ts
|
|
@@ -781,6 +1044,8 @@ async function layerLocalImage(options, logger) {
|
|
|
781
1044
|
logger.debug(`Found DOMAIN=${domainMatch[1]} in ${envFilePath}, including TLS components`);
|
|
782
1045
|
}
|
|
783
1046
|
}
|
|
1047
|
+
const drainWatcherSource = findBinary("ecloud-drain-watcher-linux-amd64");
|
|
1048
|
+
const includeDrainWatcher = fs.existsSync(drainWatcherSource);
|
|
784
1049
|
const layeredDockerfileContent = processDockerfileTemplate({
|
|
785
1050
|
baseImage: sourceImageRef,
|
|
786
1051
|
originalCmd: JSON.stringify(originalCmd),
|
|
@@ -788,8 +1053,9 @@ async function layerLocalImage(options, logger) {
|
|
|
788
1053
|
logRedirect,
|
|
789
1054
|
resourceUsageAllow,
|
|
790
1055
|
includeTLS,
|
|
791
|
-
ecloudCLIVersion: "0.1.0"
|
|
1056
|
+
ecloudCLIVersion: "0.1.0",
|
|
792
1057
|
// TODO: Get from package.json
|
|
1058
|
+
includeDrainWatcher
|
|
793
1059
|
});
|
|
794
1060
|
const scriptContent = processScriptTemplate({
|
|
795
1061
|
kmsServerURL: environmentConfig.kmsServerURL,
|
|
@@ -799,7 +1065,8 @@ async function layerLocalImage(options, logger) {
|
|
|
799
1065
|
environmentConfig,
|
|
800
1066
|
layeredDockerfileContent,
|
|
801
1067
|
scriptContent,
|
|
802
|
-
includeTLS
|
|
1068
|
+
includeTLS,
|
|
1069
|
+
includeDrainWatcher ? drainWatcherSource : void 0
|
|
803
1070
|
// logger
|
|
804
1071
|
);
|
|
805
1072
|
try {
|
|
@@ -814,7 +1081,7 @@ async function layerLocalImage(options, logger) {
|
|
|
814
1081
|
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
815
1082
|
}
|
|
816
1083
|
}
|
|
817
|
-
async function setupLayeredBuildDirectory(environmentConfig, layeredDockerfileContent, scriptContent, includeTLS) {
|
|
1084
|
+
async function setupLayeredBuildDirectory(environmentConfig, layeredDockerfileContent, scriptContent, includeTLS, drainWatcherSource) {
|
|
818
1085
|
const tempDir = fs.mkdtempSync(path2.join(os.tmpdir(), LAYERED_BUILD_DIR_PREFIX));
|
|
819
1086
|
try {
|
|
820
1087
|
const layeredDockerfilePath = path2.join(tempDir, LAYERED_DOCKERFILE_NAME);
|
|
@@ -838,6 +1105,11 @@ async function setupLayeredBuildDirectory(environmentConfig, layeredDockerfileCo
|
|
|
838
1105
|
}
|
|
839
1106
|
fs.copyFileSync(kmsClientSource, kmsClientPath);
|
|
840
1107
|
fs.chmodSync(kmsClientPath, 493);
|
|
1108
|
+
if (drainWatcherSource && fs.existsSync(drainWatcherSource)) {
|
|
1109
|
+
const drainWatcherPath = path2.join(tempDir, DRAIN_WATCHER_BINARY_NAME);
|
|
1110
|
+
fs.copyFileSync(drainWatcherSource, drainWatcherPath);
|
|
1111
|
+
fs.chmodSync(drainWatcherPath, 493);
|
|
1112
|
+
}
|
|
841
1113
|
if (includeTLS) {
|
|
842
1114
|
const tlsKeygenPath = path2.join(tempDir, TLS_KEYGEN_BINARY_NAME);
|
|
843
1115
|
const tlsKeygenSource = findBinary("tls-keygen-linux-amd64");
|
|
@@ -4729,7 +5001,7 @@ var CanViewAppLogsPermission = "0x2fd3f2fe";
|
|
|
4729
5001
|
var CanViewSensitiveAppInfoPermission = "0x0e67b22f";
|
|
4730
5002
|
var CanUpdateAppProfilePermission = "0x036fef61";
|
|
4731
5003
|
function getDefaultClientId() {
|
|
4732
|
-
const version = true ? "1.0.0-
|
|
5004
|
+
const version = true ? "1.0.0-devep2" : "0.0.0";
|
|
4733
5005
|
return `ecloud-sdk/v${version}`;
|
|
4734
5006
|
}
|
|
4735
5007
|
var UserApiClient = class {
|