@arcote.tech/arc-cli 0.7.5 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,293 @@
1
+ import type { DeployConfig, DeployObservability } from "./config";
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Observability stack config templates.
5
+ //
6
+ // All strings are deterministic for the inputs (cfg + retention) — no random
7
+ // IDs, no timestamps — so re-running deploy with unchanged config is a no-op
8
+ // at the file-write level. Bootstrap diffs filesystem before bouncing
9
+ // services, so this matters.
10
+ //
11
+ // Defaults:
12
+ // - traces: 7d retention (Tempo block storage on local disk)
13
+ // - logs: 7d retention (Loki chunks on local disk)
14
+ // - metrics: 30d retention (Prometheus TSDB on local disk)
15
+ //
16
+ // Tail sampling: every error + every span >500ms + 10% random. Decided in
17
+ // the collector so per-service SDKs can be left at always-on without
18
+ // flooding the backend.
19
+ // ---------------------------------------------------------------------------
20
+
21
+ const DEFAULT_RETENTION = {
22
+ traces: "168h", // 7d
23
+ logs: "168h",
24
+ metrics: "30d",
25
+ } as const;
26
+
27
+ function pickRetention(o: DeployObservability | undefined) {
28
+ return {
29
+ traces: o?.retention?.traces ?? DEFAULT_RETENTION.traces,
30
+ logs: o?.retention?.logs ?? DEFAULT_RETENTION.logs,
31
+ metrics: o?.retention?.metrics ?? DEFAULT_RETENTION.metrics,
32
+ };
33
+ }
34
+
35
+ /** OpenTelemetry Collector — receives OTLP from app containers + browser,
36
+ * applies tail sampling, fans out to Tempo (traces), Loki (logs),
37
+ * Prometheus remote-write (metrics). */
38
+ export function generateOtelCollectorConfig(cfg: DeployConfig): string {
39
+ const envNames = Object.keys(cfg.envs);
40
+ return `# Generated by \`arc platform deploy\` — do not edit by hand.
41
+ receivers:
42
+ otlp:
43
+ protocols:
44
+ grpc:
45
+ endpoint: 0.0.0.0:4317
46
+ http:
47
+ endpoint: 0.0.0.0:4318
48
+ cors:
49
+ allowed_origins:
50
+ ${envNames.map((name) => ` - "https://${cfg.envs[name]!.domain}"`).join("\n")}
51
+ allowed_headers:
52
+ - traceparent
53
+ - tracestate
54
+ - content-type
55
+
56
+ processors:
57
+ batch:
58
+ timeout: 5s
59
+ send_batch_size: 512
60
+ send_batch_max_size: 1024
61
+
62
+ # Tail-based sampling — applied after a full trace has been assembled.
63
+ # Errors and slow traces are kept 100%, everything else at 10%.
64
+ tail_sampling:
65
+ decision_wait: 10s
66
+ num_traces: 50000
67
+ expected_new_traces_per_sec: 100
68
+ policies:
69
+ - name: errors
70
+ type: status_code
71
+ status_code: { status_codes: [ERROR] }
72
+ - name: slow
73
+ type: latency
74
+ latency: { threshold_ms: 500 }
75
+ - name: random_10pct
76
+ type: probabilistic
77
+ probabilistic: { sampling_percentage: 10 }
78
+
79
+ # Drop high-cardinality / PII attributes that might slip past app-side
80
+ # sanitization. Belt-and-suspenders before they hit long-term storage.
81
+ attributes:
82
+ actions:
83
+ - key: http.request.header.authorization
84
+ action: delete
85
+ - key: http.request.header.cookie
86
+ action: delete
87
+
88
+ exporters:
89
+ otlp/tempo:
90
+ endpoint: tempo:4317
91
+ tls:
92
+ insecure: true
93
+
94
+ otlphttp/loki:
95
+ endpoint: http://loki:3100/otlp
96
+ tls:
97
+ insecure: true
98
+
99
+ prometheusremotewrite:
100
+ endpoint: http://prometheus:9090/api/v1/write
101
+ tls:
102
+ insecure: true
103
+
104
+ extensions:
105
+ health_check: {}
106
+ zpages: {}
107
+
108
+ service:
109
+ extensions: [health_check, zpages]
110
+ pipelines:
111
+ traces:
112
+ receivers: [otlp]
113
+ processors: [tail_sampling, attributes, batch]
114
+ exporters: [otlp/tempo]
115
+ logs:
116
+ receivers: [otlp]
117
+ processors: [attributes, batch]
118
+ exporters: [otlphttp/loki]
119
+ metrics:
120
+ receivers: [otlp]
121
+ processors: [batch]
122
+ exporters: [prometheusremotewrite]
123
+ `;
124
+ }
125
+
126
+ /** Grafana Tempo — single-binary mode with local block storage. */
127
+ export function generateTempoConfig(cfg: DeployConfig): string {
128
+ const retention = pickRetention(cfg.observability);
129
+ return `# Generated by \`arc platform deploy\` — do not edit by hand.
130
+ server:
131
+ http_listen_port: 3200
132
+ grpc_listen_port: 9095
133
+
134
+ distributor:
135
+ receivers:
136
+ otlp:
137
+ protocols:
138
+ grpc:
139
+ endpoint: 0.0.0.0:4317
140
+ http:
141
+ endpoint: 0.0.0.0:4318
142
+
143
+ ingester:
144
+ trace_idle_period: 10s
145
+ max_block_bytes: 1048576
146
+ max_block_duration: 5m
147
+
148
+ compactor:
149
+ compaction:
150
+ block_retention: ${retention.traces}
151
+
152
+ storage:
153
+ trace:
154
+ backend: local
155
+ local:
156
+ path: /var/tempo/blocks
157
+ wal:
158
+ path: /var/tempo/wal
159
+
160
+ metrics_generator:
161
+ registry:
162
+ external_labels:
163
+ source: tempo
164
+ storage:
165
+ path: /var/tempo/generator/wal
166
+ remote_write:
167
+ - url: http://prometheus:9090/api/v1/write
168
+ send_exemplars: true
169
+
170
+ overrides:
171
+ defaults:
172
+ metrics_generator:
173
+ processors: [service-graphs, span-metrics]
174
+ `;
175
+ }
176
+
177
+ /** Loki — single-binary mode, filesystem chunks. */
178
+ export function generateLokiConfig(cfg: DeployConfig): string {
179
+ const retention = pickRetention(cfg.observability);
180
+ return `# Generated by \`arc platform deploy\` — do not edit by hand.
181
+ auth_enabled: false
182
+
183
+ server:
184
+ http_listen_port: 3100
185
+
186
+ common:
187
+ instance_addr: 127.0.0.1
188
+ path_prefix: /loki
189
+ storage:
190
+ filesystem:
191
+ chunks_directory: /loki/chunks
192
+ rules_directory: /loki/rules
193
+ replication_factor: 1
194
+ ring:
195
+ kvstore:
196
+ store: inmemory
197
+
198
+ schema_config:
199
+ configs:
200
+ - from: 2024-01-01
201
+ store: tsdb
202
+ object_store: filesystem
203
+ schema: v13
204
+ index:
205
+ prefix: index_
206
+ period: 24h
207
+
208
+ limits_config:
209
+ retention_period: ${retention.logs}
210
+ allow_structured_metadata: true
211
+
212
+ compactor:
213
+ working_directory: /loki/compactor
214
+ retention_enabled: true
215
+ delete_request_store: filesystem
216
+ `;
217
+ }
218
+
219
+ /** Prometheus — accepts remote_write from the collector, scrapes itself. */
220
+ export function generatePrometheusConfig(cfg: DeployConfig): string {
221
+ const retention = pickRetention(cfg.observability);
222
+ return `# Generated by \`arc platform deploy\` — do not edit by hand.
223
+ global:
224
+ scrape_interval: 15s
225
+ evaluation_interval: 15s
226
+
227
+ scrape_configs:
228
+ - job_name: prometheus
229
+ static_configs:
230
+ - targets: [localhost:9090]
231
+ - job_name: otel-collector
232
+ static_configs:
233
+ - targets: [otel-collector:8888]
234
+
235
+ storage:
236
+ tsdb:
237
+ retention.time: ${retention.metrics}
238
+
239
+ # Note: remote-write inbound is enabled via the --web.enable-remote-write-receiver
240
+ # command-line flag (set in docker-compose), not here.
241
+ `;
242
+ }
243
+
244
+ /** Grafana datasource provisioning — Tempo + Loki + Prometheus, all pre-wired. */
245
+ export function generateGrafanaDatasources(): string {
246
+ return `# Generated by \`arc platform deploy\` — do not edit by hand.
247
+ apiVersion: 1
248
+ datasources:
249
+ - name: Tempo
250
+ type: tempo
251
+ access: proxy
252
+ url: http://tempo:3200
253
+ uid: tempo
254
+ jsonData:
255
+ tracesToLogsV2:
256
+ datasourceUid: loki
257
+ spanStartTimeShift: -5m
258
+ spanEndTimeShift: 5m
259
+ serviceMap:
260
+ datasourceUid: prometheus
261
+ - name: Loki
262
+ type: loki
263
+ access: proxy
264
+ url: http://loki:3100
265
+ uid: loki
266
+ jsonData:
267
+ derivedFields:
268
+ - datasourceUid: tempo
269
+ matcherRegex: "trace_id=(\\\\w+)"
270
+ name: TraceID
271
+ url: $\${__value.raw}
272
+ - name: Prometheus
273
+ type: prometheus
274
+ access: proxy
275
+ url: http://prometheus:9090
276
+ uid: prometheus
277
+ isDefault: true
278
+ `;
279
+ }
280
+
281
+ /** All config files needed on the host. Returns map of relative-path → contents
282
+ * so bootstrap can write+upload them in one pass. */
283
+ export function generateObservabilityConfigs(
284
+ cfg: DeployConfig,
285
+ ): Record<string, string> {
286
+ return {
287
+ "observability/otel-collector-config.yaml": generateOtelCollectorConfig(cfg),
288
+ "observability/tempo.yaml": generateTempoConfig(cfg),
289
+ "observability/loki-config.yaml": generateLokiConfig(cfg),
290
+ "observability/prometheus.yml": generatePrometheusConfig(cfg),
291
+ "observability/grafana-datasources.yaml": generateGrafanaDatasources(),
292
+ };
293
+ }
@@ -11,7 +11,7 @@ import {
11
11
  import { existsSync, mkdirSync } from "fs";
12
12
  import { join } from "path";
13
13
  import { readTranslationsConfig } from "../i18n";
14
- import type { BuildManifest, WorkspaceInfo } from "./shared";
14
+ import { err, ok, type BuildManifest, type WorkspaceInfo } from "./shared";
15
15
  import type { BuildManifestGroup, ModuleAccess } from "@arcote.tech/platform";
16
16
 
17
17
  // ---------------------------------------------------------------------------
@@ -97,6 +97,21 @@ export function generateShellHtml(
97
97
  initial?: { file: string; hash: string },
98
98
  stylesHash?: string,
99
99
  ): string {
100
+ // OpenTelemetry config — injected as a global so the browser SDK chunk
101
+ // (lazy-loaded by start-app.ts) can pick it up without a fetch. Endpoint
102
+ // is same-origin so Caddy can apply CORS + auth uniformly.
103
+ const otelConfig = process.env.ARC_OTEL_ENABLED === "true"
104
+ ? {
105
+ enabled: true,
106
+ endpoint: "/otel",
107
+ serviceName: process.env.OTEL_SERVICE_NAME ?? `${appName}-browser`,
108
+ environment: process.env.NODE_ENV === "production" ? "production" : "development",
109
+ sampleRate: Number(process.env.ARC_OTEL_BROWSER_SAMPLE_RATE ?? "0.1"),
110
+ }
111
+ : null;
112
+ const otelTag = otelConfig
113
+ ? `\n <script>window.__ARC_OTEL_CONFIG=${JSON.stringify(otelConfig)};</script>`
114
+ : "";
100
115
  // Initial bundle carries framework, public modules, and PlatformApp re-export.
101
116
  // No importmap — single Bun.build with splitting:true inlines + dedups everything
102
117
  // across initial and per-token group bundles via auto-emitted chunk-<hash>.js.
@@ -117,7 +132,7 @@ export function generateShellHtml(
117
132
  <title>${manifest?.title ?? appName}</title>${manifest?.favicon ? `\n <link rel="icon" href="${manifest.favicon}">` : ""}${manifest ? `\n <link rel="manifest" href="/manifest.json">` : ""}
118
133
  <link rel="stylesheet" href="/styles.css${stylesQs}" />
119
134
  <link rel="stylesheet" href="/theme.css${stylesQs}" />
120
- <link rel="modulepreload" href="${initialUrl}" />
135
+ <link rel="modulepreload" href="${initialUrl}" />${otelTag}
121
136
  </head>
122
137
  <body>
123
138
  <div id="root"></div>
@@ -480,6 +495,30 @@ export async function startPlatformServer(
480
495
  ): Promise<PlatformServer> {
481
496
  const { ws, port, devMode, context } = opts;
482
497
  ensureModuleSigSecret(ws, !!devMode);
498
+
499
+ // OpenTelemetry — only when explicitly enabled (deploy injects the env
500
+ // when `observability.enabled` is set in deploy.arc.json). Dynamic import
501
+ // keeps the OTel SDK out of bundles that don't use it.
502
+ let telemetry: import("@arcote.tech/arc-otel").ArcTelemetry | undefined;
503
+ let telemetryShutdown: (() => Promise<void>) | undefined;
504
+ if (process.env.ARC_OTEL_ENABLED === "true") {
505
+ try {
506
+ const { initServerTelemetry } = await import("@arcote.tech/arc-otel/server");
507
+ const init = initServerTelemetry({
508
+ serviceName: process.env.OTEL_SERVICE_NAME ?? ws.appName,
509
+ environment: "server",
510
+ endpoint: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
511
+ mode: devMode ? "development" : "production",
512
+ sampleRate: devMode ? 1.0 : 1.0, // head-based 100%, collector tail-samples
513
+ });
514
+ telemetry = init.telemetry;
515
+ telemetryShutdown = init.shutdown;
516
+ ok("Telemetry enabled — exporting to " + process.env.OTEL_EXPORTER_OTLP_ENDPOINT);
517
+ } catch (e) {
518
+ err(`Failed to init telemetry: ${(e as Error).message}`);
519
+ }
520
+ }
521
+
483
522
  const moduleAccessMap = opts.moduleAccess ?? new Map();
484
523
  let manifest = opts.manifest;
485
524
  const getManifest = () => manifest;
@@ -575,6 +614,7 @@ export async function startPlatformServer(
575
614
  spaFallbackHandler(getShellHtml),
576
615
  ],
577
616
  onWsClose: (clientId) => cleanupClientSubs(clientId),
617
+ telemetry,
578
618
  });
579
619
 
580
620
  return {
@@ -583,6 +623,9 @@ export async function startPlatformServer(
583
623
  connectionManager: arcServer.connectionManager,
584
624
  setManifest,
585
625
  notifyReload,
586
- stop: () => arcServer.stop(),
626
+ stop: () => {
627
+ arcServer.stop();
628
+ void telemetryShutdown?.();
629
+ },
587
630
  };
588
631
  }