@arcote.tech/arc-cli 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,13 @@ interface PlatformDeployOptions {
36
36
  * Format: bare content hash (e.g. `abc123def456`) or full ref.
37
37
  */
38
38
  imageTag?: string;
39
+ /**
40
+ * Force the Ansible host-bootstrap step to run even when the marker says
41
+ * the host is already configured. Default behavior skips Ansible whenever
42
+ * the server is reachable and has Docker — use this after editing the
43
+ * embedded playbook or to recover from a corrupted host config.
44
+ */
45
+ forceBootstrap?: boolean;
39
46
  }
40
47
 
41
48
  // ---------------------------------------------------------------------------
@@ -78,7 +85,7 @@ export async function platformDeploy(
78
85
  : Object.keys(cfg.envs);
79
86
 
80
87
  // 2. Ensure local build (unless --image-tag rollback skips build+push entirely)
81
- const manifestPath = join(ws.modulesDir, "manifest.json");
88
+ const manifestPath = join(ws.arcDir, "manifest.json");
82
89
  if (!options.imageTag) {
83
90
  const needBuild = options.rebuild || !existsSync(manifestPath);
84
91
  if (needBuild && !options.skipBuild) {
@@ -91,9 +98,8 @@ export async function platformDeploy(
91
98
  }
92
99
  }
93
100
 
94
- // 3. Resolve the full image ref. Two paths:
95
- // a) --image-tag <hash> rollback / pin. No build, no push.
96
- // b) Default — buildImage locally, push to private registry.
101
+ // 3. Build the image locally. Push happens AFTER bootstrap so the registry
102
+ // container exists when we try to push to it (chicken-and-egg otherwise).
97
103
  const imageName = sanitizeImageName(ws.rootPkg.name ?? ws.appName);
98
104
  let fullRef: string;
99
105
  let contentHash: string;
@@ -119,31 +125,34 @@ export async function platformDeploy(
119
125
  log(`contentHash: ${contentHash}`);
120
126
  return;
121
127
  }
122
-
123
- // 4. Push to the private registry. dockerLogin reads password from the
124
- // env var named in cfg.registry.passwordEnv.
125
- log(`Logging in to ${cfg.registry.domain}...`);
126
- await dockerLogin(cfg.registry);
127
- log(`Pushing ${fullRef}...`);
128
- await dockerPush(fullRef);
129
- ok("Image pushed");
130
128
  }
131
129
 
132
- // 5. Detect remote state, bootstrap if needed
130
+ // 4. Detect remote state + bootstrap. This brings up caddy + registry on
131
+ // first deploy (and regenerates the stack if config changed). MUST run
132
+ // before dockerLogin/dockerPush — without registry container + Caddy vhost
133
+ // for it, dockerLogin would TLS-fail.
133
134
  log("Inspecting remote server...");
134
135
  const state = await detectRemoteState(cfg);
135
136
  log(`Remote state: ${state.kind}`);
136
137
 
137
138
  const cliVersion = readCliVersion();
138
139
  const configHash = await hashDeployConfig(ws.rootDir);
139
- if (state.kind !== "ready") {
140
- await bootstrap({
141
- cfg,
142
- rootDir: ws.rootDir,
143
- state,
144
- cliVersion,
145
- configHash,
146
- });
140
+ await bootstrap({
141
+ cfg,
142
+ rootDir: ws.rootDir,
143
+ state,
144
+ cliVersion,
145
+ configHash,
146
+ forceAnsible: options.forceBootstrap,
147
+ });
148
+
149
+ // 5. Push the image to the now-running registry.
150
+ if (!options.imageTag) {
151
+ log(`Logging in to ${cfg.registry.domain}...`);
152
+ await dockerLogin(cfg.registry);
153
+ log(`Pushing ${fullRef}...`);
154
+ await dockerPush(fullRef);
155
+ ok("Image pushed");
147
156
  }
148
157
 
149
158
  // 6. Update each env — atomic /opt/arc/.env line + pull + up + health
@@ -1,10 +1,24 @@
1
1
  import { spawn as nodeSpawn } from "node:child_process";
2
- import { mkdirSync, writeFileSync } from "fs";
3
- import { tmpdir } from "os";
2
+ import { existsSync, mkdirSync, writeFileSync } from "fs";
3
+ import { homedir, tmpdir } from "os";
4
4
  import { join } from "path";
5
5
  import { ASSETS, materializeAssets } from "./assets";
6
6
  import type { DeployProvisionAnsible, DeployTarget } from "./config";
7
7
 
8
+ function pickSshKeyForAnsible(configured?: string): string | null {
9
+ if (configured) {
10
+ const expanded = configured.startsWith("~")
11
+ ? join(homedir(), configured.slice(1))
12
+ : configured;
13
+ return existsSync(expanded) ? expanded : null;
14
+ }
15
+ for (const name of ["id_ed25519", "id_ecdsa", "id_rsa"]) {
16
+ const path = join(homedir(), ".ssh", name);
17
+ if (existsSync(path)) return path;
18
+ }
19
+ return null;
20
+ }
21
+
8
22
  // ---------------------------------------------------------------------------
9
23
  // Runs Ansible from embedded assets. Inventory is generated on the fly,
10
24
  // targeting the single host described by DeployTarget. Runs as root on the
@@ -27,12 +41,18 @@ export async function runAnsible(inputs: AnsibleInputs): Promise<void> {
27
41
  const user = inputs.asRoot ? "root" : inputs.target.user;
28
42
  const port = inputs.ansible?.sshPort ?? inputs.target.port;
29
43
 
44
+ // IdentitiesOnly=yes + explicit -i prevent ssh from walking every key in
45
+ // ssh-agent (the server's MaxAuthTries=3 trips when the agent holds >3 keys
46
+ // and ours isn't first). PreferredAuthentications=publickey skips gssapi
47
+ // prompts that also count against MaxAuthTries.
48
+ const sshKey = pickSshKeyForAnsible(inputs.target.sshKey);
49
+ const sshKeyArg = sshKey ? ` -o IdentitiesOnly=yes -i ${sshKey}` : "";
30
50
  const inventory = [
31
51
  "[arc]",
32
52
  `${inputs.target.host} ansible_user=${user} ansible_port=${port}`,
33
53
  "",
34
54
  "[arc:vars]",
35
- "ansible_ssh_common_args='-o StrictHostKeyChecking=accept-new -o BatchMode=yes'",
55
+ `ansible_ssh_common_args='-o StrictHostKeyChecking=accept-new -o BatchMode=yes -o PreferredAuthentications=publickey${sshKeyArg}'`,
36
56
  "ansible_python_interpreter=/usr/bin/python3",
37
57
  "",
38
58
  ].join("\n");
@@ -115,7 +115,22 @@
115
115
  - { policy: deny, dir: incoming }
116
116
  - { policy: allow, dir: outgoing }
117
117
 
118
- - name: Open firewall ports
118
+ - name: Remove legacy ufw limit rule on SSH (replaced by plain allow)
119
+ # If a prior bootstrap installed `ufw limit 22/tcp`, drop it — otherwise
120
+ # the limit rule shadows the allow rule and rate-throttles deploy flows.
121
+ ufw:
122
+ rule: limit
123
+ port: "{{ ssh_port }}"
124
+ proto: tcp
125
+ delete: true
126
+ ignore_errors: true
127
+
128
+ - name: Open firewall ports (SSH key-only auth, no brute-force surface)
129
+ # SSH on port 22: PasswordAuthentication=no + key-only means brute force
130
+ # is impossible without the operator's private key. Rate-limiting (ufw
131
+ # limit / fail2ban sshd jail) breaks legitimate deploy flows that open
132
+ # many short SSH connections in sequence (canSsh → sshExec → scp → ...).
133
+ # 80/443: Caddy ACME + app traffic, never rate-limited.
119
134
  ufw:
120
135
  rule: allow
121
136
  port: "{{ item }}"
@@ -129,17 +144,18 @@
129
144
  ufw:
130
145
  state: enabled
131
146
 
132
- - name: Configure fail2ban for sshd
147
+ - name: Disable fail2ban sshd jail
148
+ # Key-only SSH + ufw rate-limit make fail2ban for sshd redundant and
149
+ # actively harmful when the operator's IP roams. Keep fail2ban installed
150
+ # for future jails (web/db) but turn off the sshd jail explicitly.
133
151
  copy:
134
152
  dest: /etc/fail2ban/jail.local
135
153
  content: |
136
154
  [sshd]
137
- enabled = true
138
- port = {{ ssh_port }}
139
- maxretry = 5
140
- findtime = 600
141
- bantime = 3600
155
+ enabled = false
156
+ {% if extra_allowed_ips %}
142
157
  ignoreip = 127.0.0.1/8 ::1 {{ extra_allowed_ips | join(' ') }}
158
+ {% endif %}
143
159
  mode: "0644"
144
160
  notify: restart fail2ban
145
161
 
@@ -201,7 +201,22 @@ const ANSIBLE_SITE_YML = `---
201
201
  - { policy: deny, dir: incoming }
202
202
  - { policy: allow, dir: outgoing }
203
203
 
204
- - name: Open firewall ports
204
+ - name: Remove legacy ufw limit rule on SSH (replaced by plain allow)
205
+ # If a prior bootstrap installed \`ufw limit 22/tcp\`, drop it — otherwise
206
+ # the limit rule shadows the allow rule and rate-throttles deploy flows.
207
+ ufw:
208
+ rule: limit
209
+ port: "{{ ssh_port }}"
210
+ proto: tcp
211
+ delete: true
212
+ ignore_errors: true
213
+
214
+ - name: Open firewall ports (SSH key-only auth, no brute-force surface)
215
+ # SSH on port 22: PasswordAuthentication=no + key-only means brute force
216
+ # is impossible without the operator's private key. Rate-limiting (ufw
217
+ # limit / fail2ban sshd jail) breaks legitimate deploy flows that open
218
+ # many short SSH connections in sequence (canSsh -> sshExec -> scp -> ...).
219
+ # 80/443: Caddy ACME + app traffic, never rate-limited.
205
220
  ufw:
206
221
  rule: allow
207
222
  port: "{{ item }}"
@@ -215,17 +230,18 @@ const ANSIBLE_SITE_YML = `---
215
230
  ufw:
216
231
  state: enabled
217
232
 
218
- - name: Configure fail2ban for sshd
233
+ - name: Disable fail2ban sshd jail
234
+ # Key-only SSH + ufw rate-limit make fail2ban for sshd redundant and
235
+ # actively harmful when the operator's IP roams. Keep fail2ban installed
236
+ # for future jails (web/db) but turn off the sshd jail explicitly.
219
237
  copy:
220
238
  dest: /etc/fail2ban/jail.local
221
239
  content: |
222
240
  [sshd]
223
- enabled = true
224
- port = {{ ssh_port }}
225
- maxretry = 5
226
- findtime = 600
227
- bantime = 3600
241
+ enabled = false
242
+ {% if extra_allowed_ips %}
228
243
  ignoreip = 127.0.0.1/8 ::1 {{ extra_allowed_ips | join(' ') }}
244
+ {% endif %}
229
245
  mode: "0644"
230
246
  notify: restart fail2ban
231
247
 
@@ -13,6 +13,32 @@ import { ok, log, err } from "../platform/shared";
13
13
  import { writeStateMarker, STATE_MARKER_PATH } from "./remote-state";
14
14
  import type { RemoteState } from "./remote-state";
15
15
  import { assertExec, baseSshArgs, canSsh, scpUpload, sshExec, waitForSsh } from "./ssh";
16
+ import type { DeployTarget } from "./config";
17
+
18
+ /**
19
+ * Wait until *any* of the supplied SSH targets accepts a connection. Polls
20
+ * all targets in parallel each round; the first one that succeeds wins.
21
+ * Useful when we don't know whether the host is on its first ansible-less
22
+ * boot (root only) or already hardened (deploy user only).
23
+ */
24
+ async function waitForAnySsh(
25
+ targets: DeployTarget[],
26
+ opts: { timeoutMs?: number; intervalMs?: number } = {},
27
+ ): Promise<void> {
28
+ const timeout = opts.timeoutMs ?? 300_000;
29
+ const interval = opts.intervalMs ?? 5_000;
30
+ const start = Date.now();
31
+ while (Date.now() - start < timeout) {
32
+ const results = await Promise.all(targets.map((t) => canSsh(t)));
33
+ if (results.some(Boolean)) return;
34
+ await Bun.sleep(interval);
35
+ }
36
+ throw new Error(
37
+ `Timed out waiting for SSH on ${targets
38
+ .map((t) => `${t.user}@${t.host}`)
39
+ .join(" or ")}`,
40
+ );
41
+ }
16
42
 
17
43
  // ---------------------------------------------------------------------------
18
44
  // Bootstrap orchestrator.
@@ -35,6 +61,8 @@ export interface BootstrapInputs {
35
61
  cliVersion: string;
36
62
  /** sha256 of deploy.arc.json — used for the remote state marker. */
37
63
  configHash: string;
64
+ /** Force the ansible run even when the host is already bootstrapped. */
65
+ forceAnsible?: boolean;
38
66
  }
39
67
 
40
68
  export async function bootstrap(inputs: BootstrapInputs): Promise<void> {
@@ -66,16 +94,31 @@ export async function bootstrap(inputs: BootstrapInputs): Promise<void> {
66
94
  saveDeployConfig(rootDir, cfg);
67
95
 
68
96
  log("Waiting for SSH to come up...");
69
- await waitForSsh({ ...cfg.target, user: "root" });
97
+ // On a brand-new VM only root exists; on a re-applied (no-op) terraform
98
+ // the deploy user already exists and root login is disabled by ansible
99
+ // hardening. Probe both — succeed on whichever lands first.
100
+ await waitForAnySsh([
101
+ { ...cfg.target, user: "root" },
102
+ { ...cfg.target, user: cfg.target.user },
103
+ ]);
70
104
  ok("SSH reachable");
71
105
  }
72
106
 
73
- if (state.kind === "unreachable" || state.kind === "no-docker") {
107
+ // Ansible only runs on fresh hosts (unreachable / no-docker) by default —
108
+ // it's idempotent but slow (~30–60s) and the host config rarely drifts.
109
+ // `--force-bootstrap` re-runs it on demand (after editing the embedded
110
+ // playbook, or to recover from manual host edits).
111
+ const needAnsible =
112
+ state.kind === "unreachable" ||
113
+ state.kind === "no-docker" ||
114
+ inputs.forceAnsible === true;
115
+
116
+ if (needAnsible) {
74
117
  log("Running Ansible bootstrap (Docker + firewall + SSH hardening)...");
75
118
  // Run as root whenever the configured user can't SSH (covers both freshly
76
119
  // provisioned VMs and second-attempt deploys after ansible failure).
77
120
  const deployUserWorks =
78
- state.kind === "no-docker" && (await canSsh(cfg.target));
121
+ state.kind !== "unreachable" && (await canSsh(cfg.target));
79
122
  const asRoot = !deployUserWorks;
80
123
  await runAnsible({
81
124
  target: cfg.target,
@@ -85,7 +128,21 @@ export async function bootstrap(inputs: BootstrapInputs): Promise<void> {
85
128
  ok("Host bootstrapped");
86
129
  }
87
130
 
88
- if (state.kind !== "ready") {
131
+ // Force upStack whenever:
132
+ // - stack isn't fully ready, OR
133
+ // - marker is missing (legacy v0.5 deploy with no .arc-state.json), OR
134
+ // - configHash differs from last bootstrap (deploy.arc.json changed), OR
135
+ // - registry container isn't running (e.g. legacy stack predates v0.7)
136
+ // Without this, an old v0.5 stack (no registry container) is classified as
137
+ // "ready" and bootstrap is skipped — then `docker login` on the next step
138
+ // hits a vhost that doesn't exist and fails with a TLS error.
139
+ const needUpStack =
140
+ state.kind !== "ready" ||
141
+ state.marker === null ||
142
+ state.marker.configHash !== inputs.configHash ||
143
+ !(await isRegistryRunning(cfg));
144
+
145
+ if (needUpStack) {
89
146
  await upStack(inputs);
90
147
  ok("Docker stack up");
91
148
  }
@@ -98,6 +155,23 @@ export async function bootstrap(inputs: BootstrapInputs): Promise<void> {
98
155
  });
99
156
  }
100
157
 
158
+ /**
159
+ * Returns true iff `registry` service is up in /opt/arc/docker-compose.yml.
160
+ * Used by bootstrap to detect legacy v0.5 stacks that have no registry
161
+ * container and need a fresh stack write + restart.
162
+ */
163
+ async function isRegistryRunning(cfg: DeployConfig): Promise<boolean> {
164
+ const res = await sshExec(
165
+ cfg.target,
166
+ `cd ${cfg.target.remoteDir} && docker compose ps --status running --format '{{.Service}}' 2>/dev/null || true`,
167
+ { quiet: true },
168
+ );
169
+ return res.stdout
170
+ .split("\n")
171
+ .map((s) => s.trim())
172
+ .includes("registry");
173
+ }
174
+
101
175
  async function upStack(inputs: BootstrapInputs): Promise<void> {
102
176
  const { cfg } = inputs;
103
177
  const workDir = join(tmpdir(), "arc-deploy", `stack-${Date.now()}`);
@@ -206,19 +280,26 @@ async function sshDockerLogin(cfg: DeployConfig): Promise<void> {
206
280
  `Registry password env var ${cfg.registry.passwordEnv} is not set on the deploy host (CLI machine).`,
207
281
  );
208
282
  }
209
- // Pipe via stdin — keeps password off the command line and shell history.
210
- const cmd = `echo "$ARC_REGISTRY_PASSWORD_FORWARDED" | docker login ${cfg.registry.domain} -u ${cfg.registry.username} --password-stdin`;
283
+ // Stream password over SSH stdin — never reach the command line (no shell
284
+ // history, no `ps`, no double-shell-escape bugs). The remote shell pipes
285
+ // its own stdin straight into `docker login --password-stdin`.
286
+ const cmd = `docker login ${cfg.registry.domain} -u ${cfg.registry.username} --password-stdin`;
211
287
  const proc = spawn({
212
288
  cmd: [
213
289
  "ssh",
214
290
  ...baseSshArgs(cfg.target),
215
291
  `${cfg.target.user}@${cfg.target.host}`,
216
292
  "--",
217
- `ARC_REGISTRY_PASSWORD_FORWARDED='${password.replace(/'/g, "'\\''")}' bash -c ${JSON.stringify(cmd)}`,
293
+ cmd,
218
294
  ],
295
+ stdin: "pipe",
219
296
  stdout: "pipe",
220
297
  stderr: "pipe",
221
298
  });
299
+ if (proc.stdin) {
300
+ await (proc.stdin as any).write(new TextEncoder().encode(password));
301
+ await (proc.stdin as any).end?.();
302
+ }
222
303
  const exit = await proc.exited;
223
304
  if (exit !== 0) {
224
305
  const stderr = await new Response(proc.stderr).text();
@@ -256,33 +337,61 @@ async function listConfiguredEnvs(cfg: DeployConfig): Promise<string[]> {
256
337
  * fail repeatedly. Fail fast with an actionable hint instead.
257
338
  */
258
339
  async function assertRegistryDnsResolves(cfg: DeployConfig): Promise<void> {
340
+ // Source of truth for "is the DNS update live?" is the authoritative NS for
341
+ // the apex domain — public resolvers (8.8.8.8 / 1.1.1.1) cache for minutes
342
+ // after a record change and disagree among themselves during propagation.
343
+ // Let's Encrypt ACME validates against the authoritative NS too, so this
344
+ // matches what Caddy will see when it tries to issue the cert.
345
+ const apex = apexDomain(cfg.registry.domain);
346
+ let nameservers = await digQuery("8.8.8.8", "NS", apex);
347
+ nameservers = nameservers.map((s) => s.replace(/\.$/, ""));
348
+
349
+ // Sources to query, in order: authoritative NS, then public resolvers.
350
+ // Accept the first source where any answer matches target.host.
351
+ const sources = [...nameservers, "1.1.1.1", "8.8.8.8"];
352
+ let lastAnswers: string[] = [];
353
+
354
+ for (const source of sources) {
355
+ const answers = await digQuery(source, "A", cfg.registry.domain);
356
+ if (answers.length === 0) continue;
357
+ lastAnswers = answers;
358
+ if (answers.includes(cfg.target.host)) return;
359
+ }
360
+
361
+ if (lastAnswers.length === 0) {
362
+ throw new Error(
363
+ `Registry DNS not configured: ${cfg.registry.domain} doesn't resolve. ` +
364
+ `Add an A record pointing to ${cfg.target.host} and re-run deploy.`,
365
+ );
366
+ }
367
+ throw new Error(
368
+ `Registry DNS mismatch: ${cfg.registry.domain} resolves to [${lastAnswers.join(", ")}], ` +
369
+ `but target host is ${cfg.target.host}. Update the A record before continuing.`,
370
+ );
371
+ }
372
+
373
+ function apexDomain(host: string): string {
374
+ // Naive eTLD+1 extraction: last 2 labels. Works for `.pl`, `.com`, etc.
375
+ // For `.co.uk` style TLDs the authoritative NS query still returns the
376
+ // correct NS — dig handles the SOA chase upstream.
377
+ const parts = host.split(".");
378
+ return parts.slice(-2).join(".");
379
+ }
380
+
381
+ async function digQuery(
382
+ server: string,
383
+ type: "A" | "NS",
384
+ name: string,
385
+ ): Promise<string[]> {
259
386
  const proc = spawn({
260
- cmd: ["dig", "+short", "+time=3", "+tries=1", cfg.registry.domain],
387
+ cmd: ["dig", `@${server}`, "+short", "+time=3", "+tries=1", type, name],
261
388
  stdout: "pipe",
262
389
  stderr: "ignore",
263
390
  });
264
391
  const exit = await proc.exited;
265
- if (exit !== 0) {
266
- err(
267
- `\`dig\` is not available — skipping DNS pre-flight for ${cfg.registry.domain}.`,
268
- );
269
- return;
270
- }
271
- const resolved = (await new Response(proc.stdout).text())
392
+ if (exit !== 0) return [];
393
+ return (await new Response(proc.stdout).text())
272
394
  .split("\n")
273
395
  .map((s) => s.trim())
274
396
  .filter(Boolean);
275
-
276
- if (resolved.length === 0) {
277
- throw new Error(
278
- `Registry DNS not configured: ${cfg.registry.domain} doesn't resolve. ` +
279
- `Add an A record pointing to ${cfg.target.host} and re-run deploy.`,
280
- );
281
- }
282
- if (!resolved.includes(cfg.target.host)) {
283
- throw new Error(
284
- `Registry DNS mismatch: ${cfg.registry.domain} resolves to [${resolved.join(", ")}], ` +
285
- `but target host is ${cfg.target.host}. Update the A record before continuing.`,
286
- );
287
- }
288
397
  }
@@ -61,10 +61,11 @@ export function generateCompose({ cfg }: ComposeOptions): string {
61
61
  const upperName = name.toUpperCase().replace(/-/g, "_");
62
62
  lines.push(` arc-${name}:`);
63
63
  // Image ref comes from /opt/arc/.env, written per-deploy with the content
64
- // hash of the latest build. The `:?` fallback fails compose with a clear
65
- // error if the env var isn't set that means "deploy hasn't run yet".
64
+ // hash of the latest build. Default to a placeholder so `docker compose
65
+ // pull caddy registry` doesn't fail with `:?` interpolation errors on this
66
+ // service before the first deploy ever sets ARC_IMAGE_<ENV>.
66
67
  lines.push(
67
- ` image: \${ARC_IMAGE_${upperName}:?Run \\\`arc platform deploy ${name}\\\` to publish an image first}`,
68
+ ` image: \${ARC_IMAGE_${upperName}:-arc-${name}:not-deployed}`,
68
69
  );
69
70
  lines.push(` container_name: arc-${name}`);
70
71
  lines.push(" restart: unless-stopped");
@@ -1,5 +1,6 @@
1
1
  import { existsSync, readFileSync, writeFileSync } from "fs";
2
2
  import { join } from "path";
3
+ import { applyDeployGlobals, loadDeployEnvFiles } from "./env-file";
3
4
 
4
5
  // ---------------------------------------------------------------------------
5
6
  // deploy.arc.json — single source of truth for deployment configuration.
@@ -86,8 +87,17 @@ export function deployConfigExists(rootDir: string): boolean {
86
87
  }
87
88
 
88
89
  /**
89
- * Load deploy.arc.json, expand `${VAR}` references against process.env,
90
- * and validate shape. Throws with a precise error on any issue.
90
+ * Load deploy.arc.json + side-car env files (deploy.arc.env for globals,
91
+ * deploy.arc.<env>.env for per-env secrets), expand `${VAR}` references
92
+ * against process.env, and validate shape.
93
+ *
94
+ * Resolution order (last wins):
95
+ * 1. deploy.arc.json `envs.<name>.envVars` (declared in config)
96
+ * 2. deploy.arc.<name>.env (sidecar file, gitignored)
97
+ * 3. existing process.env values (CI/CD secret store)
98
+ *
99
+ * Globals from `deploy.arc.env` populate process.env (without overriding
100
+ * existing values), so terraform/dockerLogin/ansible see them naturally.
91
101
  */
92
102
  export function loadDeployConfig(rootDir: string): DeployConfig {
93
103
  const path = deployConfigPath(rootDir);
@@ -101,8 +111,33 @@ export function loadDeployConfig(rootDir: string): DeployConfig {
101
111
  } catch (e) {
102
112
  throw new Error(`Invalid JSON in ${DEPLOY_CONFIG_FILE}: ${(e as Error).message}`);
103
113
  }
114
+
115
+ // Read env names from raw JSON (pre-validation) to know which sidecar
116
+ // files to look for. Validation runs next.
117
+ const envNames = isObject(parsed) && isObject(parsed.envs)
118
+ ? Object.keys(parsed.envs)
119
+ : [];
120
+ const envFiles = loadDeployEnvFiles(rootDir, envNames);
121
+
122
+ // Globals → process.env (without overriding) so downstream code can read
123
+ // HCLOUD_TOKEN, ARC_REGISTRY_PASSWORD etc. as if they were exported in shell.
124
+ applyDeployGlobals(envFiles.globals);
125
+
104
126
  const expanded = expandEnvVars(parsed, process.env);
105
- return validateDeployConfig(expanded);
127
+ const validated = validateDeployConfig(expanded);
128
+
129
+ // Merge sidecar per-env vars into cfg.envs[name].envVars.
130
+ // Existing keys (declared in deploy.arc.json) win over sidecar — config is
131
+ // the source of truth for variable NAMES, sidecar provides VALUES for
132
+ // anything not pinned otherwise.
133
+ for (const [envName, vars] of Object.entries(envFiles.perEnv)) {
134
+ if (!(envName in validated.envs)) continue;
135
+ const env = validated.envs[envName];
136
+ const merged: Record<string, string> = { ...vars, ...(env.envVars ?? {}) };
137
+ validated.envs[envName] = { ...env, envVars: merged };
138
+ }
139
+
140
+ return validated;
106
141
  }
107
142
 
108
143
  export function saveDeployConfig(rootDir: string, cfg: DeployConfig): void {
@@ -51,7 +51,7 @@ export async function updateEnvDeployment(
51
51
  const envPath = `${cfg.target.remoteDir}/.env`;
52
52
  const escapedRef = fullRef.replace(/"/g, '\\"');
53
53
  const updateScript = [
54
- `touch ${envPath}`,
54
+ `touch ${envPath} && `,
55
55
  `awk -v line="${envVarName}=${escapedRef}" -v key="${envVarName}=" '`,
56
56
  ` BEGIN { replaced=0 } `,
57
57
  ` $0 ~ "^"key { print line; replaced=1; next } `,
@@ -0,0 +1,103 @@
1
+ import { existsSync, readFileSync } from "fs";
2
+ import { join } from "path";
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Per-env secret files for `arc platform deploy`.
6
+ //
7
+ // Layout next to deploy.arc.json:
8
+ // deploy.arc.env — globals (HCLOUD_TOKEN, ARC_REGISTRY_PASSWORD, ...)
9
+ // deploy.arc.<env>.env — per-env user-app secrets (one file per env name
10
+ // declared in deploy.arc.json)
11
+ //
12
+ // Both files are optional. `process.env` ALWAYS wins — CI/CD pipelines that
13
+ // set secrets through the runner's secret store don't need a file on disk.
14
+ //
15
+ // Format: KEY=VALUE per line. `#` starts a comment. Surrounding single or
16
+ // double quotes are stripped. Values may NOT span multiple lines (keep it
17
+ // boring, no escape sequences).
18
+ // ---------------------------------------------------------------------------
19
+
20
+ export interface DeployEnvFiles {
21
+ /** Variables from `deploy.arc.env`. Applied to process.env unless already set. */
22
+ globals: Record<string, string>;
23
+ /** envName → variables from `deploy.arc.<envName>.env`. Merged into cfg.envs[name].envVars. */
24
+ perEnv: Record<string, Record<string, string>>;
25
+ }
26
+
27
+ export function loadDeployEnvFiles(
28
+ rootDir: string,
29
+ envNames: readonly string[],
30
+ ): DeployEnvFiles {
31
+ const globalsPath = join(rootDir, "deploy.arc.env");
32
+ const globals = existsSync(globalsPath)
33
+ ? parseEnvFile(readFileSync(globalsPath, "utf-8"), globalsPath)
34
+ : {};
35
+
36
+ const perEnv: Record<string, Record<string, string>> = {};
37
+ for (const name of envNames) {
38
+ const envPath = join(rootDir, `deploy.arc.${name}.env`);
39
+ if (existsSync(envPath)) {
40
+ perEnv[name] = parseEnvFile(readFileSync(envPath, "utf-8"), envPath);
41
+ }
42
+ }
43
+
44
+ return { globals, perEnv };
45
+ }
46
+
47
+ /**
48
+ * Apply globals to process.env. Existing values win — env vars set in the
49
+ * shell or by a CI runner take precedence over the file. This lets pipelines
50
+ * inject secrets via their native secret store without rewriting the file.
51
+ */
52
+ export function applyDeployGlobals(globals: Record<string, string>): void {
53
+ for (const [k, v] of Object.entries(globals)) {
54
+ if (process.env[k] === undefined) {
55
+ process.env[k] = v;
56
+ }
57
+ }
58
+ }
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // Parser
62
+ // ---------------------------------------------------------------------------
63
+
64
+ function parseEnvFile(
65
+ content: string,
66
+ pathForErrors: string,
67
+ ): Record<string, string> {
68
+ const out: Record<string, string> = {};
69
+ const lines = content.split(/\r?\n/);
70
+
71
+ for (let i = 0; i < lines.length; i++) {
72
+ const raw = lines[i];
73
+ const line = raw.trim();
74
+ if (!line || line.startsWith("#")) continue;
75
+
76
+ const eq = line.indexOf("=");
77
+ if (eq <= 0) {
78
+ throw new Error(
79
+ `${pathForErrors}:${i + 1}: malformed line (expected KEY=VALUE): ${raw}`,
80
+ );
81
+ }
82
+
83
+ const key = line.slice(0, eq).trim();
84
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(key)) {
85
+ throw new Error(
86
+ `${pathForErrors}:${i + 1}: invalid variable name "${key}"`,
87
+ );
88
+ }
89
+
90
+ let value = line.slice(eq + 1).trim();
91
+ // Strip surrounding quotes (single or double) — leave inner content alone.
92
+ if (
93
+ (value.startsWith('"') && value.endsWith('"')) ||
94
+ (value.startsWith("'") && value.endsWith("'"))
95
+ ) {
96
+ value = value.slice(1, -1);
97
+ }
98
+
99
+ out[key] = value;
100
+ }
101
+
102
+ return out;
103
+ }