@layr-labs/ecloud-sdk 1.0.0-dev.2 → 1.0.0-devep1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -300,6 +300,7 @@ var ENV_SOURCE_SCRIPT_NAME = "compute-source-env.sh";
300
300
  var KMS_CLIENT_BINARY_NAME = "kms-client";
301
301
  var KMS_SIGNING_KEY_NAME = "kms-signing-public-key.pem";
302
302
  var TLS_KEYGEN_BINARY_NAME = "tls-keygen";
303
+ var DRAIN_WATCHER_BINARY_NAME = "ecloud-drain-watcher";
303
304
  var CADDYFILE_NAME = "Caddyfile";
304
305
  var LAYERED_BUILD_DIR_PREFIX = "ecloud-layered-build";
305
306
 
@@ -573,7 +574,7 @@ var PushPermissionError = class extends Error {
573
574
  var import_handlebars = __toESM(require("handlebars"), 1);
574
575
 
575
576
  // src/client/common/templates/Dockerfile.layered.tmpl
576
- var Dockerfile_layered_default = '{{#if includeTLS}}\n# Get Caddy from official image\nFROM caddy:2.10.2-alpine AS caddy\n{{/if}}\n\nFROM {{baseImage}}\n\n{{#if originalUser}}\n# Switch to root to perform setup (base image has non-root USER: {{originalUser}})\nUSER root\n{{/if}}\n\n# Copy core TEE components\nCOPY compute-source-env.sh /usr/local/bin/\nCOPY kms-client /usr/local/bin/\nCOPY kms-signing-public-key.pem /usr/local/bin/\n\n{{#if includeTLS}}\n# Copy Caddy from official image\nCOPY --from=caddy /usr/bin/caddy /usr/local/bin/caddy\n\n# Copy TLS components\nCOPY tls-keygen /usr/local/bin/\nCOPY Caddyfile /etc/caddy/\n{{/if}}\n\n{{#if originalUser}}\n# Make binaries executable (755 for executables, 644 for keys)\nRUN chmod 755 /usr/local/bin/compute-source-env.sh \\\n && chmod 755 /usr/local/bin/kms-client{{#if includeTLS}} \\\n && chmod 755 /usr/local/bin/tls-keygen \\\n && chmod 755 /usr/local/bin/caddy{{/if}} \\\n && chmod 644 /usr/local/bin/kms-signing-public-key.pem\n\n# Store original user - entrypoint will drop privileges to this user after TEE setup\nENV __ECLOUD_ORIGINAL_USER={{originalUser}}\n{{else}}\n# Make binaries executable (preserve existing permissions, just add execute)\nRUN chmod +x /usr/local/bin/compute-source-env.sh \\\n && chmod +x /usr/local/bin/kms-client{{#if includeTLS}} \\\n && chmod +x /usr/local/bin/tls-keygen{{/if}}\n{{/if}}\n\n{{#if logRedirect}}\n\nLABEL tee.launch_policy.log_redirect={{logRedirect}}\n{{/if}}\n{{#if resourceUsageAllow}}\n\nLABEL tee.launch_policy.monitoring_memory_allow={{resourceUsageAllow}}\n{{/if}}\n\nLABEL eigenx_cli_version={{ecloudCLIVersion}}\nLABEL eigenx_vm_image=eigen\n\n{{#if includeTLS}}\n# Expose both HTTP and HTTPS ports for Caddy\nEXPOSE 80 443\n{{/if}}\n\nENTRYPOINT ["/usr/local/bin/compute-source-env.sh"]\nCMD {{{originalCmd}}}\n';
577
+ var Dockerfile_layered_default = '{{#if includeTLS}}\n# Get Caddy from official image\nFROM caddy:2.10.2-alpine AS caddy\n{{/if}}\n\nFROM {{baseImage}}\n\n{{#if originalUser}}\n# Switch to root to perform setup (base image has non-root USER: {{originalUser}})\nUSER root\n{{/if}}\n\n# Copy core TEE components\nCOPY compute-source-env.sh /usr/local/bin/\nCOPY kms-client /usr/local/bin/\nCOPY kms-signing-public-key.pem /usr/local/bin/\n{{#if includeDrainWatcher}}\nCOPY ecloud-drain-watcher /usr/local/bin/\n{{/if}}\n\n{{#if includeTLS}}\n# Copy Caddy from official image\nCOPY --from=caddy /usr/bin/caddy /usr/local/bin/caddy\n\n# Copy TLS components\nCOPY tls-keygen /usr/local/bin/\nCOPY Caddyfile /etc/caddy/\n{{/if}}\n\n{{#if originalUser}}\n# Make binaries executable (755 for executables, 644 for keys)\nRUN chmod 755 /usr/local/bin/compute-source-env.sh \\\n && chmod 755 /usr/local/bin/kms-client{{#if includeDrainWatcher}} \\\n && chmod 755 /usr/local/bin/ecloud-drain-watcher{{/if}}{{#if includeTLS}} \\\n && chmod 755 /usr/local/bin/tls-keygen \\\n && chmod 755 /usr/local/bin/caddy{{/if}} \\\n && chmod 644 /usr/local/bin/kms-signing-public-key.pem\n\n# Store original user - entrypoint will drop privileges to this user after TEE setup\nENV __ECLOUD_ORIGINAL_USER={{originalUser}}\n{{else}}\n# Make binaries executable (preserve existing permissions, just add execute)\nRUN chmod +x /usr/local/bin/compute-source-env.sh \\\n && chmod +x /usr/local/bin/kms-client{{#if includeDrainWatcher}} \\\n && chmod +x /usr/local/bin/ecloud-drain-watcher{{/if}}{{#if includeTLS}} \\\n && chmod +x /usr/local/bin/tls-keygen{{/if}}\n{{/if}}\n\n{{#if logRedirect}}\n\nLABEL tee.launch_policy.log_redirect={{logRedirect}}\n{{/if}}\n{{#if resourceUsageAllow}}\n\nLABEL tee.launch_policy.monitoring_memory_allow={{resourceUsageAllow}}\n{{/if}}\n\n# Allow-list the envvars the ecloud-platform sets via GCE `tee-env-*`\n# metadata. Without this label, Confidential Space\'s launcher rejects\n# any `tee-env-*` override at container-start with\n# "env var {...} is not allowed to be overridden on this image" and\n# exits with code 1 \u2014 which terminates the VM before the entrypoint\n# ever runs. ECLOUD_PD_EXPECTED is set on PD-backed apps so the\n# entrypoint (compute-source-env.sh) knows to wait for the persistent\n# disk before exec\'ing the user workload. User-supplied env vars\n# flow through KMS (not tee-env-*) and don\'t need to be listed here.\nLABEL tee.launch_policy.allow_env_override=ECLOUD_PD_EXPECTED\n\nLABEL eigenx_cli_version={{ecloudCLIVersion}}\nLABEL eigenx_vm_image=eigen\nLABEL eigenx_container_contract=v1\n\n{{#if includeTLS}}\n# Expose both HTTP and HTTPS ports for Caddy\nEXPOSE 80 443\n{{/if}}\n\nENTRYPOINT ["/usr/local/bin/compute-source-env.sh"]\nCMD {{{originalCmd}}}\n';
577
578
 
578
579
  // src/client/common/templates/dockerfileTemplate.ts
579
580
  function processDockerfileTemplate(data) {
@@ -586,6 +587,49 @@ var import_handlebars2 = __toESM(require("handlebars"), 1);
586
587
 
587
588
  // src/client/common/templates/compute-source-env.sh.tmpl
588
589
  var compute_source_env_sh_default = `#!/bin/sh
590
+ # EigenCompute container entrypoint script
591
+ # This script handles KMS secret fetching, TLS setup, and privilege dropping
592
+ # before executing the user's application.
593
+ #
594
+ # Handlebars template variables (replaced at build time by the CLI):
595
+ # kmsServerURL - URL of the KMS server
596
+ # userAPIURL - URL of the user API (ecloud-platform)
597
+ # The KMS signing public key is copied into the image as
598
+ # /usr/local/bin/kms-signing-public-key.pem at layer-build time by the CLI.
599
+ #
600
+ # ecloud-platform divergence from compute-tee:
601
+ # This script emits ECLOUD_READY / ECLOUD_FAIL / ECLOUD_AWAITING_USERDATA /
602
+ # ECLOUD_DETACHED markers to stdout at key lifecycle points. The GCP
603
+ # provisioner's serial-console watcher in ecloud-platform
604
+ # (pkg/services/infraService/providers/gcp/compute.go) parses those
605
+ # markers to gate "VM ready" and to coordinate the prewarm-detach
606
+ # upgrade flow. Without the markers, the platform's waitForStartupReady
607
+ # times out at ~10 minutes per deploy, rollback fires, and the VM is
608
+ # deleted \u2014 seen in dev on 2026-05-04 with an older copy of this
609
+ # template that lacked the markers.
610
+ #
611
+ # Prewarm-detach contract:
612
+ # - If ECLOUD_PD_EXPECTED=1 and /mnt/disks/userdata is not present at boot,
613
+ # emit ECLOUD_AWAITING_USERDATA and wait until the disk is attached.
614
+ # - On SIGTERM (drain-requested), forward to child, wait for exit, sync
615
+ # + unmount /mnt/disks/userdata, emit ECLOUD_DETACHED, exit.
616
+ # - ECLOUD_READY is emitted once runtime is bootstrapped (same as before).
617
+ # - ECLOUD_FAIL is emitted on any unrecoverable setup error.
618
+ # Keep the markers on any line that resolves a lifecycle outcome.
619
+ #
620
+ # This file is kept in lockstep with
621
+ # ecloud-platform/pkg/services/buildService/assets/compute-source-env.sh.tmpl
622
+ # \u2014 if you change one, change the other. Differences vs the platform copy
623
+ # are intentionally minimal:
624
+ # - Handlebars placeholders use the CLI's naming (kmsServerURL,
625
+ # userAPIURL) rather than the platform's (KMS_SERVER_URL,
626
+ # USER_API_URL). (See top of file for real placeholder syntax \u2014
627
+ # not repeated here so Handlebars doesn't expand it in this comment.)
628
+ # - KMS signing key is read from a file the CLI copies into the image,
629
+ # not heredoc-embedded in the script, because the CLI's image
630
+ # layering writes it as a separate file (kms-signing-public-key.pem).
631
+ # - TLS binary is \`tls-keygen\` (CLI-bundled) not \`tls-client\`.
632
+
589
633
  echo "compute-source-env.sh: Running setup script..."
590
634
 
591
635
  # Fetch and source environment variables from KMS
@@ -601,92 +645,93 @@ if /usr/local/bin/kms-client \\
601
645
  else
602
646
  echo "compute-source-env.sh: ERROR - Failed to fetch environment variables from KMS"
603
647
  echo "compute-source-env.sh: Exiting - cannot start user workload without KMS secrets"
648
+ echo "ECLOUD_FAIL kms_bootstrap"
604
649
  exit 1
605
650
  fi
606
651
 
607
- # Setup TLS if tls-keygen is present (which means TLS was configured at build time)
652
+ # Setup TLS if tls-keygen is present and DOMAIN is configured
608
653
  setup_tls() {
609
654
  # If tls-keygen isn't present, TLS wasn't configured during build
610
655
  if [ ! -x /usr/local/bin/tls-keygen ]; then
611
656
  echo "compute-source-env.sh: TLS not configured (no tls-keygen binary)"
612
657
  return 0
613
658
  fi
614
-
659
+
615
660
  local domain="\${DOMAIN:-}"
616
661
  local mnemonic="\${MNEMONIC:-}"
617
-
618
- # Since tls-keygen is present, TLS is expected - validate requirements
662
+
663
+ # If DOMAIN is not set or is localhost, skip TLS setup
619
664
  if [ -z "$domain" ] || [ "$domain" = "localhost" ]; then
620
- echo "compute-source-env.sh: ERROR - TLS binary present but DOMAIN not configured or is localhost"
621
- echo "compute-source-env.sh: Set DOMAIN environment variable to a valid domain"
622
- exit 1
665
+ echo "compute-source-env.sh: TLS skipped (DOMAIN not set or is localhost)"
666
+ return 0
623
667
  fi
624
-
668
+
625
669
  if [ -z "$mnemonic" ]; then
626
- echo "compute-source-env.sh: ERROR - TLS binary present but MNEMONIC not available"
670
+ echo "compute-source-env.sh: ERROR - TLS requested but MNEMONIC not available"
627
671
  echo "compute-source-env.sh: Cannot obtain TLS certificate without mnemonic"
672
+ echo "ECLOUD_FAIL tls_mnemonic_missing"
628
673
  exit 1
629
674
  fi
630
-
675
+
631
676
  if [ ! -x /usr/local/bin/caddy ]; then
632
- echo "compute-source-env.sh: ERROR - TLS binary present but Caddy not found"
677
+ echo "compute-source-env.sh: ERROR - TLS requested but Caddy not found"
678
+ echo "ECLOUD_FAIL tls_caddy_missing"
633
679
  exit 1
634
680
  fi
635
-
681
+
636
682
  echo "compute-source-env.sh: Setting up TLS for domain: $domain"
637
-
683
+
638
684
  # Obtain TLS certificate using ACME
639
- # Default to http-01, but allow override via ACME_CHALLENGE env var
640
685
  local challenge="\${ACME_CHALLENGE:-http-01}"
641
-
686
+
642
687
  # Check if we should use staging (for testing)
643
688
  local staging_flag=""
644
689
  if [ "\${ACME_STAGING:-false}" = "true" ]; then
645
690
  staging_flag="-staging"
646
- echo "compute-source-env.sh: Using Let's Encrypt STAGING environment (certificates won't be trusted)"
691
+ echo "compute-source-env.sh: Using Let's Encrypt STAGING environment"
647
692
  fi
648
-
693
+
649
694
  echo "compute-source-env.sh: Obtaining TLS certificate using $challenge challenge..."
650
- # Pass the API URL for certificate persistence
651
695
  if ! MNEMONIC="$mnemonic" DOMAIN="$domain" API_URL="{{userAPIURL}}" /usr/local/bin/tls-keygen \\
652
696
  -challenge "$challenge" \\
653
697
  $staging_flag; then
654
698
  echo "compute-source-env.sh: ERROR - Failed to obtain TLS certificate"
655
- echo "compute-source-env.sh: Certificate issuance failed for $domain"
699
+ echo "ECLOUD_FAIL tls_setup"
656
700
  exit 1
657
701
  fi
658
-
702
+
659
703
  echo "compute-source-env.sh: TLS certificate obtained successfully"
660
-
704
+
661
705
  # Validate Caddyfile before starting
662
- if ! /usr/local/bin/caddy validate --config /etc/caddy/Caddyfile --adapter caddyfile 2>/dev/null; then
663
- echo "compute-source-env.sh: ERROR - Invalid Caddyfile"
664
- echo "compute-source-env.sh: TLS was requested (DOMAIN=$domain) but setup failed"
665
- exit 1
666
- fi
667
-
668
- # Start Caddy in background
669
- echo "compute-source-env.sh: Starting Caddy reverse proxy..."
670
-
671
- # Check if Caddy logs should be enabled
672
- if [ "\${ENABLE_CADDY_LOGS:-false}" = "true" ]; then
673
- if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile 2>&1; then
674
- echo "compute-source-env.sh: ERROR - Failed to start Caddy"
675
- echo "compute-source-env.sh: TLS was requested (DOMAIN=$domain) but setup failed"
706
+ if [ -f /etc/caddy/Caddyfile ]; then
707
+ if ! /usr/local/bin/caddy validate --config /etc/caddy/Caddyfile --adapter caddyfile 2>/dev/null; then
708
+ echo "compute-source-env.sh: ERROR - Invalid Caddyfile"
709
+ echo "ECLOUD_FAIL tls_invalid_caddyfile"
676
710
  exit 1
677
711
  fi
678
- else
679
- # Redirect Caddy output to /dev/null to silence logs
680
- if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile >/dev/null 2>&1; then
681
- echo "compute-source-env.sh: ERROR - Failed to start Caddy"
682
- echo "compute-source-env.sh: TLS was requested (DOMAIN=$domain) but setup failed"
683
- exit 1
712
+
713
+ # Start Caddy in background
714
+ echo "compute-source-env.sh: Starting Caddy reverse proxy..."
715
+ if [ "\${ENABLE_CADDY_LOGS:-false}" = "true" ]; then
716
+ if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile 2>&1; then
717
+ echo "compute-source-env.sh: ERROR - Failed to start Caddy"
718
+ echo "ECLOUD_FAIL tls_caddy_start"
719
+ exit 1
720
+ fi
721
+ else
722
+ if ! /usr/local/bin/caddy start --config /etc/caddy/Caddyfile --adapter caddyfile >/dev/null 2>&1; then
723
+ echo "compute-source-env.sh: ERROR - Failed to start Caddy"
724
+ echo "ECLOUD_FAIL tls_caddy_start"
725
+ exit 1
726
+ fi
684
727
  fi
728
+
729
+ sleep 2
730
+ echo "compute-source-env.sh: Caddy started successfully"
731
+ else
732
+ echo "compute-source-env.sh: No Caddyfile found, skipping Caddy"
685
733
  fi
686
-
687
- # Give Caddy a moment to fully initialize
688
- sleep 2
689
- echo "compute-source-env.sh: Caddy started successfully"
734
+
690
735
  return 0
691
736
  }
692
737
 
@@ -697,15 +742,233 @@ setup_tls
697
742
  export KMS_SERVER_URL="{{kmsServerURL}}"
698
743
  export KMS_PUBLIC_KEY="$(cat /usr/local/bin/kms-signing-public-key.pem)"
699
744
 
745
+ # \u2500\u2500 Prewarm-detach: wait for PD if expected \u2500\u2500
746
+ # Orchestrator sets ECLOUD_PD_EXPECTED=1 on apps using StorageBackend=pd.
747
+ # When the prewarm path is used, the new VM boots WITHOUT the disk; we
748
+ # signal awaiting-userdata and poll until the disk is attached.
749
+ USERDATA_MOUNT="/mnt/disks/userdata"
750
+ USERDATA_DEV="/dev/disk/by-id/google-persistent_storage_1"
751
+
752
+ wait_for_userdata() {
753
+ if [ "\${ECLOUD_PD_EXPECTED:-0}" != "1" ]; then
754
+ return 0
755
+ fi
756
+ if mountpoint -q "$USERDATA_MOUNT" 2>/dev/null; then
757
+ echo "compute-source-env.sh: userdata already mounted at $USERDATA_MOUNT"
758
+ return 0
759
+ fi
760
+ # Refuse to proceed if the tools we need for safe first-attach
761
+ # detection are missing. Without blkid we cannot tell an empty new
762
+ # disk from an already-formatted one \u2014 running mkfs.ext4 on the
763
+ # latter would destroy data.
764
+ if ! command -v blkid >/dev/null 2>&1; then
765
+ echo "ECLOUD_FAIL pd_tools_missing"
766
+ exit 1
767
+ fi
768
+ echo "ECLOUD_AWAITING_USERDATA"
769
+ echo "compute-source-env.sh: waiting for PD at $USERDATA_DEV..."
770
+ # Poll for up to 10 minutes (120 * 5s). The orchestrator's overall
771
+ # attach timeout is shorter; the ceiling here just bounds the wait
772
+ # for manual / diagnostic scenarios.
773
+ local i=0
774
+ local mount_failures=0
775
+ while [ "$i" -lt 120 ]; do
776
+ if [ -e "$USERDATA_DEV" ]; then
777
+ mkdir -p "$USERDATA_MOUNT"
778
+ if mount -o noatime "$USERDATA_DEV" "$USERDATA_MOUNT" 2>/dev/null; then
779
+ echo "compute-source-env.sh: PD mounted at $USERDATA_MOUNT"
780
+ return 0
781
+ fi
782
+ # Disk present but mount failed. Check whether it has a
783
+ # recognized filesystem. \`blkid -s TYPE -o value\` prints the
784
+ # FS type (empty if none). We only mkfs when there is
785
+ # demonstrably NO filesystem \u2014 never on the basis of blkid
786
+ # returning non-zero alone, which could mean "blkid missing"
787
+ # or "device busy".
788
+ local fstype
789
+ fstype=$(blkid -s TYPE -o value "$USERDATA_DEV" 2>/dev/null)
790
+ if [ -z "$fstype" ]; then
791
+ echo "compute-source-env.sh: formatting $USERDATA_DEV (first attach)"
792
+ mkfs.ext4 -F -L eclouddata "$USERDATA_DEV" >/dev/null 2>&1 || {
793
+ echo "ECLOUD_FAIL pd_mkfs_failed"
794
+ exit 1
795
+ }
796
+ mount -o noatime "$USERDATA_DEV" "$USERDATA_MOUNT" || {
797
+ echo "ECLOUD_FAIL pd_mount_after_format_failed"
798
+ exit 1
799
+ }
800
+ return 0
801
+ fi
802
+ # Disk has a filesystem but mount still failed. Give it a
803
+ # few retries to cover transient cases (device busy, udev
804
+ # still settling), but don't pretend this is an attach
805
+ # timeout if it persists.
806
+ mount_failures=$((mount_failures + 1))
807
+ if [ "$mount_failures" -ge 6 ]; then
808
+ echo "ECLOUD_FAIL pd_mount_failed"
809
+ exit 1
810
+ fi
811
+ else
812
+ # Device disappeared (e.g. udev re-enumeration between
813
+ # attach and mount). Reset the consecutive-failure counter
814
+ # so only true back-to-back mount failures trip
815
+ # pd_mount_failed; a device blip should not steal retries.
816
+ mount_failures=0
817
+ fi
818
+ i=$((i + 1))
819
+ sleep 5
820
+ done
821
+ echo "ECLOUD_FAIL pd_attach_timeout"
822
+ exit 1
823
+ }
824
+
825
+ wait_for_userdata
826
+
827
+ # \u2500\u2500 Prewarm-detach: install SIGTERM handler for graceful drain \u2500\u2500
828
+ # Orchestrator signals drain by setting the instance metadata key
829
+ # ECLOUD_DRAIN_REQUESTED=1, which a host-level agent translates into
830
+ # SIGTERM on PID 1. On SIGTERM we:
831
+ # 1. Forward to the child (wakes the user's app for graceful exit)
832
+ # 2. Wait for child exit
833
+ # 3. Sync + unmount the PD
834
+ # 4. Emit ECLOUD_DETACHED so the orchestrator can proceed to detach
835
+ CHILD_PID=""
836
+ _DRAIN_IN_PROGRESS=0
837
+
838
+ drain_handler() {
839
+ # Guard against re-entry if SIGTERM arrives twice (e.g. both the
840
+ # drain_watcher and an external signal fire in quick succession).
841
+ if [ "$_DRAIN_IN_PROGRESS" = "1" ]; then
842
+ return 0
843
+ fi
844
+ _DRAIN_IN_PROGRESS=1
845
+ echo "compute-source-env.sh: received drain signal, forwarding to child pgid=$CHILD_PID"
846
+ if [ -n "$CHILD_PID" ]; then
847
+ # Send to the process group so intermediate wrappers (su, sh -c,
848
+ # etc.) don't swallow the signal. The leading \`-\` targets the
849
+ # pgid, which equals the direct child's pid for a shell-backgrounded
850
+ # process. Fall back to the pid alone if pgid signaling fails
851
+ # (e.g. kernel older than 3.9 or PID namespace edge cases).
852
+ kill -TERM -"$CHILD_PID" 2>/dev/null || kill -TERM "$CHILD_PID" 2>/dev/null || true
853
+ # Give the app up to 30s to exit cleanly.
854
+ local i=0
855
+ while [ "$i" -lt 30 ] && kill -0 "$CHILD_PID" 2>/dev/null; do
856
+ i=$((i + 1))
857
+ sleep 1
858
+ done
859
+ if kill -0 "$CHILD_PID" 2>/dev/null; then
860
+ echo "compute-source-env.sh: child did not exit in 30s, sending SIGKILL"
861
+ kill -KILL -"$CHILD_PID" 2>/dev/null || kill -KILL "$CHILD_PID" 2>/dev/null || true
862
+ # Reap the process so its in-flight I/O is flushed to the
863
+ # filesystem before we sync + unmount. SIGKILL schedules
864
+ # death; wait guarantees it's complete.
865
+ wait "$CHILD_PID" 2>/dev/null || true
866
+ fi
867
+ fi
868
+ if [ "\${ECLOUD_PD_EXPECTED:-0}" = "1" ] && mountpoint -q "$USERDATA_MOUNT" 2>/dev/null; then
869
+ sync
870
+ if umount "$USERDATA_MOUNT" 2>/dev/null; then
871
+ echo "compute-source-env.sh: unmounted $USERDATA_MOUNT cleanly"
872
+ else
873
+ # Force lazy unmount as last resort \u2014 orchestrator still needs
874
+ # the DETACHED signal to proceed.
875
+ umount -l "$USERDATA_MOUNT" 2>/dev/null || true
876
+ echo "compute-source-env.sh: WARNING - used lazy unmount on $USERDATA_MOUNT"
877
+ fi
878
+ # ECLOUD_DETACHED is strictly a PD-lifecycle signal. Only emit
879
+ # it when we actually had a PD mount in play, so serial-log
880
+ # parsers and alerting for non-PD apps don't see spurious
881
+ # lifecycle markers on routine container SIGTERM.
882
+ echo "ECLOUD_DETACHED"
883
+ fi
884
+ # Always exit 0: drain is a managed shutdown and the orchestrator
885
+ # waits on ECLOUD_DETACHED, not the container exit code. Forwarding
886
+ # the child's exit status here would make a crash-during-drain look
887
+ # like a drain failure to whatever reads the container exit code.
888
+ exit 0
889
+ }
890
+ trap drain_handler TERM
891
+
892
+ # \u2500\u2500 Prewarm-detach: background drain watcher \u2500\u2500
893
+ # Container metadata delivery in Confidential Space is limited, so we
894
+ # poll the instance metadata server for ECLOUD_DRAIN_REQUESTED and
895
+ # raise SIGTERM on ourselves when it flips to "1".
896
+ #
897
+ # Try wget first (present in most Alpine bases), fall back to curl.
898
+ # If neither is present, drain watcher is disabled \u2014 the orchestrator
899
+ # will hit its drain timeout and fail the upgrade explicitly, which is
900
+ # the correct behavior (we cannot silently ignore a drain request).
901
+ _fetch_drain_flag() {
902
+ local url="http://metadata.google.internal/computeMetadata/v1/instance/attributes/ECLOUD_DRAIN_REQUESTED"
903
+ if command -v wget >/dev/null 2>&1; then
904
+ wget -q --tries=1 --timeout=2 --header='Metadata-Flavor: Google' -O - "$url" 2>/dev/null
905
+ elif command -v curl >/dev/null 2>&1; then
906
+ curl -sf --max-time 2 -H 'Metadata-Flavor: Google' "$url" 2>/dev/null
907
+ else
908
+ return 2
909
+ fi
910
+ }
911
+
912
+ drain_watcher() {
913
+ # Preflight: confirm we have an HTTP client
914
+ if ! _fetch_drain_flag >/dev/null 2>&1; then
915
+ # Either no http client available OR metadata server not
916
+ # responding yet. If no client, give up and log; otherwise the
917
+ # loop below will retry.
918
+ if ! command -v wget >/dev/null 2>&1 && ! command -v curl >/dev/null 2>&1; then
919
+ echo "compute-source-env.sh: WARNING - no wget/curl; drain_watcher disabled"
920
+ return 0
921
+ fi
922
+ fi
923
+ while true; do
924
+ local v
925
+ v=$(_fetch_drain_flag || true)
926
+ if [ "$v" = "1" ]; then
927
+ echo "compute-source-env.sh: drain_watcher saw ECLOUD_DRAIN_REQUESTED=1, signaling PID 1"
928
+ # The CS launcher runs this script directly as PID 1, so
929
+ # kill -TERM 1 delivers SIGTERM to the shell that installed
930
+ # the drain_handler trap. If the launch mechanism ever
931
+ # wraps this script in another process, this assumption
932
+ # breaks and drain will silently no-op \u2014 audit here.
933
+ kill -TERM 1 2>/dev/null || true
934
+ return 0
935
+ fi
936
+ sleep 2
937
+ done
938
+ }
939
+
940
+ if [ "\${ECLOUD_PD_EXPECTED:-0}" = "1" ]; then
941
+ # Assumption: the orchestrator only flips ECLOUD_DRAIN_REQUESTED=1
942
+ # after observing ECLOUD_AWAITING_USERDATA (old VM) or
943
+ # ECLOUD_READY (new VM), so CHILD_PID is always set by the time
944
+ # drain_handler fires. If drain somehow arrived in the tiny window
945
+ # between this watcher spawn and CHILD_PID assignment below,
946
+ # drain_handler would skip the child-kill branch and still emit
947
+ # ECLOUD_DETACHED \u2014 harmless because there's nothing to drain yet.
948
+ if [ -x /usr/local/bin/ecloud-drain-watcher ]; then
949
+ /usr/local/bin/ecloud-drain-watcher &
950
+ else
951
+ drain_watcher &
952
+ fi
953
+ fi
954
+
700
955
  echo "compute-source-env.sh: Environment sourced."
956
+ echo "ECLOUD_READY runtime_bootstrapped"
701
957
 
702
958
  # Drop privileges to original user for the application command
703
959
  if [ -n "$__ECLOUD_ORIGINAL_USER" ] && [ "$(id -u)" = "0" ]; then
704
960
  echo "compute-source-env.sh: Dropping privileges to user: $__ECLOUD_ORIGINAL_USER"
705
- exec su -s /bin/sh "$__ECLOUD_ORIGINAL_USER" -c 'exec "$@"' -- sh "$@"
961
+ # Must background the child so our trap can fire; exec replaces PID 1.
962
+ su -s /bin/sh "$__ECLOUD_ORIGINAL_USER" -c 'exec "$@"' -- sh "$@" &
963
+ CHILD_PID=$!
964
+ wait "$CHILD_PID"
965
+ exit $?
706
966
  fi
707
967
 
708
- exec "$@"
968
+ "$@" &
969
+ CHILD_PID=$!
970
+ wait "$CHILD_PID"
971
+ exit $?
709
972
  `;
710
973
 
711
974
  // src/client/common/templates/scriptTemplate.ts
@@ -898,6 +1161,8 @@ async function layerLocalImage(options, logger) {
898
1161
  logger.debug(`Found DOMAIN=${domainMatch[1]} in ${envFilePath}, including TLS components`);
899
1162
  }
900
1163
  }
1164
+ const drainWatcherSource = findBinary("ecloud-drain-watcher-linux-amd64");
1165
+ const includeDrainWatcher = fs.existsSync(drainWatcherSource);
901
1166
  const layeredDockerfileContent = processDockerfileTemplate({
902
1167
  baseImage: sourceImageRef,
903
1168
  originalCmd: JSON.stringify(originalCmd),
@@ -905,8 +1170,9 @@ async function layerLocalImage(options, logger) {
905
1170
  logRedirect,
906
1171
  resourceUsageAllow,
907
1172
  includeTLS,
908
- ecloudCLIVersion: "0.1.0"
1173
+ ecloudCLIVersion: "0.1.0",
909
1174
  // TODO: Get from package.json
1175
+ includeDrainWatcher
910
1176
  });
911
1177
  const scriptContent = processScriptTemplate({
912
1178
  kmsServerURL: environmentConfig.kmsServerURL,
@@ -916,7 +1182,8 @@ async function layerLocalImage(options, logger) {
916
1182
  environmentConfig,
917
1183
  layeredDockerfileContent,
918
1184
  scriptContent,
919
- includeTLS
1185
+ includeTLS,
1186
+ includeDrainWatcher ? drainWatcherSource : void 0
920
1187
  // logger
921
1188
  );
922
1189
  try {
@@ -931,7 +1198,7 @@ async function layerLocalImage(options, logger) {
931
1198
  fs.rmSync(tempDir, { recursive: true, force: true });
932
1199
  }
933
1200
  }
934
- async function setupLayeredBuildDirectory(environmentConfig, layeredDockerfileContent, scriptContent, includeTLS) {
1201
+ async function setupLayeredBuildDirectory(environmentConfig, layeredDockerfileContent, scriptContent, includeTLS, drainWatcherSource) {
935
1202
  const tempDir = fs.mkdtempSync(path2.join(os.tmpdir(), LAYERED_BUILD_DIR_PREFIX));
936
1203
  try {
937
1204
  const layeredDockerfilePath = path2.join(tempDir, LAYERED_DOCKERFILE_NAME);
@@ -955,6 +1222,11 @@ async function setupLayeredBuildDirectory(environmentConfig, layeredDockerfileCo
955
1222
  }
956
1223
  fs.copyFileSync(kmsClientSource, kmsClientPath);
957
1224
  fs.chmodSync(kmsClientPath, 493);
1225
+ if (drainWatcherSource && fs.existsSync(drainWatcherSource)) {
1226
+ const drainWatcherPath = path2.join(tempDir, DRAIN_WATCHER_BINARY_NAME);
1227
+ fs.copyFileSync(drainWatcherSource, drainWatcherPath);
1228
+ fs.chmodSync(drainWatcherPath, 493);
1229
+ }
958
1230
  if (includeTLS) {
959
1231
  const tlsKeygenPath = path2.join(tempDir, TLS_KEYGEN_BINARY_NAME);
960
1232
  const tlsKeygenSource = findBinary("tls-keygen-linux-amd64");
@@ -4986,7 +5258,7 @@ var CanViewAppLogsPermission = "0x2fd3f2fe";
4986
5258
  var CanViewSensitiveAppInfoPermission = "0x0e67b22f";
4987
5259
  var CanUpdateAppProfilePermission = "0x036fef61";
4988
5260
  function getDefaultClientId() {
4989
- const version = true ? "1.0.0-dev.2" : "0.0.0";
5261
+ const version = true ? "1.0.0-devep1" : "0.0.0";
4990
5262
  return `ecloud-sdk/v${version}`;
4991
5263
  }
4992
5264
  var UserApiClient = class {