@arcote.tech/arc-cli 0.7.19 → 0.7.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +767 -160
- package/package.json +9 -9
- package/src/builder/access-extractor.ts +11 -15
- package/src/builder/module-builder.ts +210 -35
- package/src/deploy/bootstrap.ts +8 -3
- package/src/deploy/caddyfile.ts +43 -8
- package/src/deploy/compose.ts +73 -0
- package/src/deploy/config.ts +15 -0
- package/src/deploy/observability-configs.ts +688 -48
- package/src/platform/server.ts +3 -0
- package/src/platform/shared.ts +34 -73
- package/src/platform/startup.ts +2 -2
package/dist/index.js
CHANGED
|
@@ -853,7 +853,7 @@ Expecting one of '${allowedValues.join("', '")}'`);
|
|
|
853
853
|
this._exitCallback = (err) => {
|
|
854
854
|
if (err.code !== "commander.executeSubCommandAsync") {
|
|
855
855
|
throw err;
|
|
856
|
-
}
|
|
856
|
+
}
|
|
857
857
|
};
|
|
858
858
|
}
|
|
859
859
|
return this;
|
|
@@ -25870,6 +25870,7 @@ import {
|
|
|
25870
25870
|
ATTR_SERVICE_NAME,
|
|
25871
25871
|
ATTR_SERVICE_VERSION
|
|
25872
25872
|
} from "@opentelemetry/semantic-conventions/incubating";
|
|
25873
|
+
import { format as format2 } from "util";
|
|
25873
25874
|
import {
|
|
25874
25875
|
context,
|
|
25875
25876
|
propagation,
|
|
@@ -25880,6 +25881,41 @@ import {
|
|
|
25880
25881
|
logs,
|
|
25881
25882
|
SeverityNumber
|
|
25882
25883
|
} from "@opentelemetry/api-logs";
|
|
25884
|
+
function patchConsole(telemetry) {
|
|
25885
|
+
if (patched || !telemetry.active)
|
|
25886
|
+
return () => {};
|
|
25887
|
+
patched = true;
|
|
25888
|
+
const originals = [];
|
|
25889
|
+
for (const [method, level] of METHODS) {
|
|
25890
|
+
const original = console[method].bind(console);
|
|
25891
|
+
originals.push([method, original]);
|
|
25892
|
+
console[method] = (...args) => {
|
|
25893
|
+
original(...args);
|
|
25894
|
+
if (emitting)
|
|
25895
|
+
return;
|
|
25896
|
+
emitting = true;
|
|
25897
|
+
try {
|
|
25898
|
+
const body = format2(...args);
|
|
25899
|
+
if (body.startsWith("[arc-otel]"))
|
|
25900
|
+
return;
|
|
25901
|
+
const error = args.find((arg) => arg instanceof Error);
|
|
25902
|
+
telemetry.log(level, body, error ? {
|
|
25903
|
+
"exception.type": error.name,
|
|
25904
|
+
"exception.message": error.message,
|
|
25905
|
+
"exception.stacktrace": error.stack ?? ""
|
|
25906
|
+
} : {});
|
|
25907
|
+
} catch {} finally {
|
|
25908
|
+
emitting = false;
|
|
25909
|
+
}
|
|
25910
|
+
};
|
|
25911
|
+
}
|
|
25912
|
+
return () => {
|
|
25913
|
+
for (const [method, original] of originals) {
|
|
25914
|
+
console[method] = original;
|
|
25915
|
+
}
|
|
25916
|
+
patched = false;
|
|
25917
|
+
};
|
|
25918
|
+
}
|
|
25883
25919
|
function sanitizeAttrs(input, opts = {}) {
|
|
25884
25920
|
if (!input)
|
|
25885
25921
|
return {};
|
|
@@ -25939,6 +25975,7 @@ class ArcTelemetry {
|
|
|
25939
25975
|
meter = null;
|
|
25940
25976
|
histograms = new Map;
|
|
25941
25977
|
counters = new Map;
|
|
25978
|
+
upDownCounters = new Map;
|
|
25942
25979
|
constructor(config) {
|
|
25943
25980
|
const mode = config.mode ?? "development";
|
|
25944
25981
|
const enabled = config.enabled ?? mode !== "disabled";
|
|
@@ -26050,6 +26087,18 @@ class ArcTelemetry {
|
|
|
26050
26087
|
counter.add(value, attrs);
|
|
26051
26088
|
} catch {}
|
|
26052
26089
|
}
|
|
26090
|
+
addUpDown(name, delta, attrs = {}) {
|
|
26091
|
+
if (!this.active || !this.meter)
|
|
26092
|
+
return;
|
|
26093
|
+
let counter = this.upDownCounters.get(name);
|
|
26094
|
+
if (!counter) {
|
|
26095
|
+
counter = this.meter.createUpDownCounter(name);
|
|
26096
|
+
this.upDownCounters.set(name, counter);
|
|
26097
|
+
}
|
|
26098
|
+
try {
|
|
26099
|
+
counter.add(delta, attrs);
|
|
26100
|
+
} catch {}
|
|
26101
|
+
}
|
|
26053
26102
|
recordHistogram(name, value, attrs = {}) {
|
|
26054
26103
|
if (!this.active || !this.meter)
|
|
26055
26104
|
return;
|
|
@@ -26097,53 +26146,31 @@ function noopSpan() {
|
|
|
26097
26146
|
function wrapDbAdapter(adapter, telemetry, dbSystem) {
|
|
26098
26147
|
if (!telemetry || !telemetry.active)
|
|
26099
26148
|
return adapter;
|
|
26149
|
+
const dbAttrs = (operation, store) => ({
|
|
26150
|
+
"db.system": dbSystem,
|
|
26151
|
+
"db.operation.name": operation,
|
|
26152
|
+
...store ? { "db.collection.name": store } : {}
|
|
26153
|
+
});
|
|
26154
|
+
const measureOp = async (operation, store, fn) => {
|
|
26155
|
+
const start = Date.now();
|
|
26156
|
+
try {
|
|
26157
|
+
return await fn();
|
|
26158
|
+
} finally {
|
|
26159
|
+
telemetry.measureSince("arc.db.operation.duration", start, dbAttrs(operation, store));
|
|
26160
|
+
}
|
|
26161
|
+
};
|
|
26100
26162
|
const wrapRead = (tx) => ({
|
|
26101
|
-
find: async (store, options) => telemetry.startSpan(`db.find ${store}`, async (span) => {
|
|
26102
|
-
const
|
|
26103
|
-
|
|
26104
|
-
|
|
26105
|
-
|
|
26106
|
-
return rows;
|
|
26107
|
-
} finally {
|
|
26108
|
-
telemetry.measureSince("arc.db.find_ms", start, {
|
|
26109
|
-
"db.system": dbSystem,
|
|
26110
|
-
"db.collection.name": store
|
|
26111
|
-
});
|
|
26112
|
-
}
|
|
26113
|
-
}, {
|
|
26114
|
-
kind: 3,
|
|
26115
|
-
attributes: {
|
|
26116
|
-
"db.system": dbSystem,
|
|
26117
|
-
"db.operation.name": "find",
|
|
26118
|
-
"db.collection.name": store
|
|
26119
|
-
}
|
|
26120
|
-
})
|
|
26163
|
+
find: async (store, options) => telemetry.startSpan(`db.find ${store}`, async (span) => measureOp("find", store, async () => {
|
|
26164
|
+
const rows = await tx.find(store, options);
|
|
26165
|
+
span.setAttribute("db.response.row_count", rows.length);
|
|
26166
|
+
return rows;
|
|
26167
|
+
}), { kind: 3, attributes: dbAttrs("find", store) })
|
|
26121
26168
|
});
|
|
26122
26169
|
const wrapReadWrite = (tx) => ({
|
|
26123
26170
|
...wrapRead(tx),
|
|
26124
|
-
set: async (store, data) => telemetry.startSpan(`db.set ${store}`, () => tx.set(store, data), {
|
|
26125
|
-
|
|
26126
|
-
|
|
26127
|
-
"db.system": dbSystem,
|
|
26128
|
-
"db.operation.name": "set",
|
|
26129
|
-
"db.collection.name": store
|
|
26130
|
-
}
|
|
26131
|
-
}),
|
|
26132
|
-
remove: async (store, id3) => telemetry.startSpan(`db.remove ${store}`, () => tx.remove(store, id3), {
|
|
26133
|
-
kind: 3,
|
|
26134
|
-
attributes: {
|
|
26135
|
-
"db.system": dbSystem,
|
|
26136
|
-
"db.operation.name": "remove",
|
|
26137
|
-
"db.collection.name": store
|
|
26138
|
-
}
|
|
26139
|
-
}),
|
|
26140
|
-
commit: async () => telemetry.startSpan("db.commit", () => tx.commit(), {
|
|
26141
|
-
kind: 3,
|
|
26142
|
-
attributes: {
|
|
26143
|
-
"db.system": dbSystem,
|
|
26144
|
-
"db.operation.name": "commit"
|
|
26145
|
-
}
|
|
26146
|
-
})
|
|
26171
|
+
set: async (store, data) => telemetry.startSpan(`db.set ${store}`, () => measureOp("set", store, () => tx.set(store, data)), { kind: 3, attributes: dbAttrs("set", store) }),
|
|
26172
|
+
remove: async (store, id3) => telemetry.startSpan(`db.remove ${store}`, () => measureOp("remove", store, () => tx.remove(store, id3)), { kind: 3, attributes: dbAttrs("remove", store) }),
|
|
26173
|
+
commit: async () => telemetry.startSpan("db.commit", () => measureOp("commit", undefined, () => tx.commit()), { kind: 3, attributes: dbAttrs("commit") })
|
|
26147
26174
|
});
|
|
26148
26175
|
return new Proxy(adapter, {
|
|
26149
26176
|
get(target, prop) {
|
|
@@ -26206,6 +26233,7 @@ function initServerTelemetry(config) {
|
|
|
26206
26233
|
logger: loggerProvider.getLogger(config.serviceName),
|
|
26207
26234
|
meter: meterProvider.getMeter(config.serviceName)
|
|
26208
26235
|
});
|
|
26236
|
+
const restoreConsole = config.patchConsole !== false ? patchConsole(telemetry) : () => {};
|
|
26209
26237
|
if (telemetry.config.debug) {
|
|
26210
26238
|
console.log("[arc-otel] server init", {
|
|
26211
26239
|
serviceName: config.serviceName,
|
|
@@ -26216,6 +26244,7 @@ function initServerTelemetry(config) {
|
|
|
26216
26244
|
});
|
|
26217
26245
|
}
|
|
26218
26246
|
const shutdown = async () => {
|
|
26247
|
+
restoreConsole();
|
|
26219
26248
|
try {
|
|
26220
26249
|
await Promise.all([
|
|
26221
26250
|
tracerProvider.shutdown(),
|
|
@@ -26228,8 +26257,15 @@ function initServerTelemetry(config) {
|
|
|
26228
26257
|
};
|
|
26229
26258
|
return { telemetry, shutdown };
|
|
26230
26259
|
}
|
|
26231
|
-
var DEFAULT_REDACT_KEY_PATTERN, DEFAULT_MAX_STRING_LEN = 2048, DEFAULT_MAX_JSON_LEN = 4096;
|
|
26260
|
+
var METHODS, patched = false, emitting = false, DEFAULT_REDACT_KEY_PATTERN, DEFAULT_MAX_STRING_LEN = 2048, DEFAULT_MAX_JSON_LEN = 4096;
|
|
26232
26261
|
var init_init_server = __esm(() => {
|
|
26262
|
+
METHODS = [
|
|
26263
|
+
["debug", "debug"],
|
|
26264
|
+
["log", "info"],
|
|
26265
|
+
["info", "info"],
|
|
26266
|
+
["warn", "warn"],
|
|
26267
|
+
["error", "error"]
|
|
26268
|
+
];
|
|
26233
26269
|
DEFAULT_REDACT_KEY_PATTERN = /(password|passwd|token|secret|authorization|jwt|api[_-]?key|cookie|email|credit[_-]?card|ssn)/i;
|
|
26234
26270
|
});
|
|
26235
26271
|
|
|
@@ -34687,6 +34723,17 @@ function serverExternalsPlugin() {
|
|
|
34687
34723
|
}
|
|
34688
34724
|
};
|
|
34689
34725
|
}
|
|
34726
|
+
function workspaceSourcePlugin(srcByName) {
|
|
34727
|
+
return {
|
|
34728
|
+
name: "workspace-source",
|
|
34729
|
+
setup(build2) {
|
|
34730
|
+
build2.onResolve({ filter: /^[^./]/ }, (args) => {
|
|
34731
|
+
const src = srcByName.get(args.path);
|
|
34732
|
+
return src ? { path: src, sideEffects: true } : null;
|
|
34733
|
+
});
|
|
34734
|
+
}
|
|
34735
|
+
};
|
|
34736
|
+
}
|
|
34690
34737
|
function jsxDevShimPlugin() {
|
|
34691
34738
|
return {
|
|
34692
34739
|
name: "jsx-dev-runtime-shim",
|
|
@@ -34707,9 +34754,10 @@ export { Fragment };
|
|
|
34707
34754
|
};
|
|
34708
34755
|
}
|
|
34709
34756
|
var CONTEXT_CLIENTS = [
|
|
34710
|
-
{ name: "server", target: "bun", defines: { ONLY_SERVER: "true", ONLY_BROWSER: "false", ONLY_CLIENT: "false" } },
|
|
34711
34757
|
{ name: "browser", target: "browser", defines: { ONLY_SERVER: "false", ONLY_BROWSER: "true", ONLY_CLIENT: "true" } }
|
|
34712
34758
|
];
|
|
34759
|
+
var SERVER_DEFINES = { ONLY_SERVER: "true", ONLY_BROWSER: "false", ONLY_CLIENT: "false" };
|
|
34760
|
+
var SERVER_ENTRY_FILE = "_server.js";
|
|
34713
34761
|
function discoverPackages(rootDir) {
|
|
34714
34762
|
const rootPkg = JSON.parse(readFileSync7(join8(rootDir, "package.json"), "utf-8"));
|
|
34715
34763
|
const workspaceGlobs = rootPkg.workspaces ?? [];
|
|
@@ -34802,9 +34850,7 @@ async function buildContextClient(pkg, rootDir, client, cache, noCache) {
|
|
|
34802
34850
|
console.log(` building: ${pkg.name} (${client.name})`);
|
|
34803
34851
|
const peerDeps = Object.keys(pkg.packageJson.peerDependencies ?? {});
|
|
34804
34852
|
const allDeps = pkg.packageJson.dependencies ?? {};
|
|
34805
|
-
const
|
|
34806
|
-
const workspaceDeps = isBrowser2 ? Object.keys(allDeps) : Object.entries(allDeps).filter(([, spec]) => !spec.startsWith("workspace:")).map(([name]) => name);
|
|
34807
|
-
const externals = [...peerDeps, ...workspaceDeps];
|
|
34853
|
+
const externals = [...peerDeps, ...Object.keys(allDeps)];
|
|
34808
34854
|
const result = await Bun.build({
|
|
34809
34855
|
entrypoints: [pkg.entrypoint],
|
|
34810
34856
|
outdir: join8(outDir, "main"),
|
|
@@ -34812,7 +34858,7 @@ async function buildContextClient(pkg, rootDir, client, cache, noCache) {
|
|
|
34812
34858
|
format: "esm",
|
|
34813
34859
|
naming: "index.[ext]",
|
|
34814
34860
|
external: externals,
|
|
34815
|
-
plugins:
|
|
34861
|
+
plugins: [jsxDevShimPlugin()],
|
|
34816
34862
|
define: client.defines
|
|
34817
34863
|
});
|
|
34818
34864
|
if (!result.success) {
|
|
@@ -34875,6 +34921,80 @@ async function buildContextPackages(rootDir, packages, cache, noCache) {
|
|
|
34875
34921
|
}
|
|
34876
34922
|
return { declarationErrors };
|
|
34877
34923
|
}
|
|
34924
|
+
async function buildServerApp(rootDir, serverDir, packages, cache, noCache) {
|
|
34925
|
+
const contexts = packages.filter((p) => isContextPackage(p.packageJson));
|
|
34926
|
+
mkdirSync6(serverDir, { recursive: true });
|
|
34927
|
+
const srcByName = new Map(packages.map((p) => [p.name, p.entrypoint]));
|
|
34928
|
+
const externalSet = new Set(FRAMEWORK_PEERS);
|
|
34929
|
+
for (const p of packages) {
|
|
34930
|
+
for (const name of Object.keys(p.packageJson.peerDependencies ?? {})) {
|
|
34931
|
+
externalSet.add(name);
|
|
34932
|
+
}
|
|
34933
|
+
for (const [name, spec] of Object.entries(p.packageJson.dependencies ?? {})) {
|
|
34934
|
+
if (!spec.startsWith("workspace:"))
|
|
34935
|
+
externalSet.add(name);
|
|
34936
|
+
}
|
|
34937
|
+
}
|
|
34938
|
+
const external = [...externalSet];
|
|
34939
|
+
const unitId = "server-app";
|
|
34940
|
+
const inputHash = sha256OfJson({
|
|
34941
|
+
members: packages.map((p) => ({ name: p.name, src: pkgSourceHash(p) })).sort((a, b) => a.name.localeCompare(b.name)),
|
|
34942
|
+
contexts: contexts.map((p) => p.name).sort(),
|
|
34943
|
+
external: [...external].sort(),
|
|
34944
|
+
defines: SERVER_DEFINES
|
|
34945
|
+
});
|
|
34946
|
+
const entryFileAbs = join8(serverDir, SERVER_ENTRY_FILE);
|
|
34947
|
+
if (!noCache && isCacheHit(cache, unitId, inputHash, [entryFileAbs])) {
|
|
34948
|
+
console.log(` \u2713 cached: ${unitId}`);
|
|
34949
|
+
return { entryFile: SERVER_ENTRY_FILE, cached: true };
|
|
34950
|
+
}
|
|
34951
|
+
console.log(` building: ${unitId} (${contexts.length} server modules)`);
|
|
34952
|
+
for (const f of readdirSync4(serverDir)) {
|
|
34953
|
+
if (f.endsWith(".js"))
|
|
34954
|
+
rmSync(join8(serverDir, f), { force: true });
|
|
34955
|
+
}
|
|
34956
|
+
const tmpDir = join8(serverDir, "_entries");
|
|
34957
|
+
mkdirSync6(tmpDir, { recursive: true });
|
|
34958
|
+
const entrySrc = join8(tmpDir, SERVER_ENTRY_FILE.replace(/\.js$/, ".ts"));
|
|
34959
|
+
writeFileSync6(entrySrc, contexts.map((p) => `import "${p.name}";`).join(`
|
|
34960
|
+
`) + `
|
|
34961
|
+
`);
|
|
34962
|
+
let result;
|
|
34963
|
+
try {
|
|
34964
|
+
result = await Bun.build({
|
|
34965
|
+
entrypoints: [entrySrc],
|
|
34966
|
+
outdir: serverDir,
|
|
34967
|
+
target: "bun",
|
|
34968
|
+
format: "esm",
|
|
34969
|
+
splitting: true,
|
|
34970
|
+
naming: "[name].[ext]",
|
|
34971
|
+
external,
|
|
34972
|
+
plugins: [
|
|
34973
|
+
jsxDevShimPlugin(),
|
|
34974
|
+
serverExternalsPlugin(),
|
|
34975
|
+
workspaceSourcePlugin(srcByName)
|
|
34976
|
+
],
|
|
34977
|
+
define: SERVER_DEFINES
|
|
34978
|
+
});
|
|
34979
|
+
} finally {
|
|
34980
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
34981
|
+
}
|
|
34982
|
+
if (!result.success) {
|
|
34983
|
+
for (const log2 of result.logs)
|
|
34984
|
+
console.error(log2);
|
|
34985
|
+
throw new Error("Server app build failed");
|
|
34986
|
+
}
|
|
34987
|
+
const entryOut = result.outputs.find((o) => o.kind === "entry-point");
|
|
34988
|
+
if (!entryOut) {
|
|
34989
|
+
throw new Error("Server app build: entry not found in outputs");
|
|
34990
|
+
}
|
|
34991
|
+
if (basename2(entryOut.path) !== SERVER_ENTRY_FILE) {
|
|
34992
|
+
throw new Error(`Server app build: unexpected entry name ${basename2(entryOut.path)} (wanted ${SERVER_ENTRY_FILE})`);
|
|
34993
|
+
}
|
|
34994
|
+
const outputHash = sha256OfDir(serverDir);
|
|
34995
|
+
updateCache(cache, unitId, inputHash, { outputHash });
|
|
34996
|
+
return { entryFile: SERVER_ENTRY_FILE, cached: false };
|
|
34997
|
+
}
|
|
34878
34998
|
async function buildBrowserApp(rootDir, outDir, plan, cache, noCache, i18nCollector) {
|
|
34879
34999
|
mkdirSync6(outDir, { recursive: true });
|
|
34880
35000
|
const publicMembers = plan.groups.get("public") ?? [];
|
|
@@ -35180,11 +35300,8 @@ import {
|
|
|
35180
35300
|
writeFileSync as writeFileSync7
|
|
35181
35301
|
} from "fs";
|
|
35182
35302
|
import { join as join9 } from "path";
|
|
35183
|
-
async function extractAccessMap(rootDir,
|
|
35184
|
-
const serverBundles =
|
|
35185
|
-
name: p.name,
|
|
35186
|
-
path: join9(p.path, "dist", "server", "main", "index.js")
|
|
35187
|
-
})).filter((b) => existsSync8(b.path));
|
|
35303
|
+
async function extractAccessMap(rootDir, serverBundlePath) {
|
|
35304
|
+
const serverBundles = existsSync8(serverBundlePath) ? [{ name: "server", path: serverBundlePath }] : [];
|
|
35188
35305
|
const workerDir = join9(rootDir, ".arc", ".tmp");
|
|
35189
35306
|
mkdirSync7(workerDir, { recursive: true });
|
|
35190
35307
|
const workerPath = join9(workerDir, `access-extractor-${Date.now()}.mjs`);
|
|
@@ -35513,8 +35630,9 @@ async function buildAll(ws, opts = {}) {
|
|
|
35513
35630
|
log2(`Building (concurrency parallel${noCache ? ", no-cache" : ""})...`);
|
|
35514
35631
|
assertOneModulePerPackage(ws.packages);
|
|
35515
35632
|
await buildContextPackages(ws.rootDir, ws.packages, cache, noCache);
|
|
35516
|
-
|
|
35517
|
-
const
|
|
35633
|
+
const serverDir = join12(ws.arcDir, "server");
|
|
35634
|
+
const { entryFile: serverEntry } = await buildServerApp(ws.rootDir, serverDir, ws.packages, cache, noCache);
|
|
35635
|
+
const accessMap = await extractAccessMap(ws.rootDir, join12(serverDir, serverEntry));
|
|
35518
35636
|
mkdirSync9(ws.arcDir, { recursive: true });
|
|
35519
35637
|
writeFileSync9(join12(ws.arcDir, "access.json"), JSON.stringify(accessMap, null, 2) + `
|
|
35520
35638
|
`);
|
|
@@ -35545,22 +35663,6 @@ function assembleManifest(ws, browser, cache) {
|
|
|
35545
35663
|
buildTime: new Date().toISOString()
|
|
35546
35664
|
};
|
|
35547
35665
|
}
|
|
35548
|
-
function copyContextServerBundles(ws) {
|
|
35549
|
-
const outDir = join12(ws.arcDir, "server");
|
|
35550
|
-
mkdirSync9(outDir, { recursive: true });
|
|
35551
|
-
for (const pkg of ws.packages) {
|
|
35552
|
-
if (!isContextPackage(pkg.packageJson))
|
|
35553
|
-
continue;
|
|
35554
|
-
const src = join12(pkg.path, "dist", "server", "main", "index.js");
|
|
35555
|
-
if (!existsSync10(src)) {
|
|
35556
|
-
err(`Server bundle missing for ${pkg.name}: ${src}`);
|
|
35557
|
-
continue;
|
|
35558
|
-
}
|
|
35559
|
-
const safeName = pkg.path.split("/").pop();
|
|
35560
|
-
const dst = join12(outDir, `${safeName}.js`);
|
|
35561
|
-
copyFileSync(src, dst);
|
|
35562
|
-
}
|
|
35563
|
-
}
|
|
35564
35666
|
function resolveAssetSource(from, pkgDir, rootDir) {
|
|
35565
35667
|
if (from.startsWith("./") || from.startsWith("../")) {
|
|
35566
35668
|
const resolved = join12(pkgDir, from);
|
|
@@ -35656,34 +35758,15 @@ async function loadServerContext(ws) {
|
|
|
35656
35758
|
const platformPkg = JSON.parse(readFileSync11(join12(platformDir, "package.json"), "utf-8"));
|
|
35657
35759
|
const platformEntry = join12(platformDir, platformPkg.main ?? "src/index.ts");
|
|
35658
35760
|
await import(platformEntry);
|
|
35659
|
-
const
|
|
35660
|
-
|
|
35661
|
-
if (bundles.length > 0) {
|
|
35662
|
-
for (const file of bundles) {
|
|
35663
|
-
const bundlePath = join12(serverDir, file);
|
|
35664
|
-
try {
|
|
35665
|
-
await import(bundlePath);
|
|
35666
|
-
} catch (e) {
|
|
35667
|
-
err(`Failed to load server bundle ${file}: ${e}`);
|
|
35668
|
-
}
|
|
35669
|
-
}
|
|
35670
|
-
} else if (ws.packages.length > 0) {
|
|
35671
|
-
const ctxPackages = ws.packages.filter((p) => isContextPackage(p.packageJson));
|
|
35672
|
-
for (const ctx of ctxPackages) {
|
|
35673
|
-
const serverDist = join12(ctx.path, "dist", "server", "main", "index.js");
|
|
35674
|
-
if (!existsSync10(serverDist)) {
|
|
35675
|
-
err(`Context server dist not found: ${serverDist}`);
|
|
35676
|
-
continue;
|
|
35677
|
-
}
|
|
35678
|
-
try {
|
|
35679
|
-
await import(serverDist);
|
|
35680
|
-
} catch (e) {
|
|
35681
|
-
err(`Failed to load server context from ${ctx.name}: ${e}`);
|
|
35682
|
-
}
|
|
35683
|
-
}
|
|
35684
|
-
} else {
|
|
35761
|
+
const serverEntry = join12(ws.arcDir, "server", SERVER_ENTRY_FILE);
|
|
35762
|
+
if (!existsSync10(serverEntry)) {
|
|
35685
35763
|
return { context: null, moduleAccess: new Map };
|
|
35686
35764
|
}
|
|
35765
|
+
try {
|
|
35766
|
+
await import(serverEntry);
|
|
35767
|
+
} catch (e) {
|
|
35768
|
+
err(`Failed to load server bundle ${SERVER_ENTRY_FILE}: ${e}`);
|
|
35769
|
+
}
|
|
35687
35770
|
const { getContext, getAllModuleAccess } = await import(platformEntry);
|
|
35688
35771
|
return {
|
|
35689
35772
|
context: getContext() ?? null,
|
|
@@ -36054,18 +36137,32 @@ function generateCaddyfile(cfg) {
|
|
|
36054
36137
|
email ${cfg.caddy.email}`;
|
|
36055
36138
|
const tlsDirective = cfg.caddy.email === "internal" ? `
|
|
36056
36139
|
tls internal` : "";
|
|
36140
|
+
const observability = cfg.observability?.enabled === true;
|
|
36141
|
+
const logDirective = observability ? [" log {", " output stdout", " format json", " }"] : [];
|
|
36057
36142
|
const lines = [];
|
|
36058
36143
|
lines.push("# Generated by `arc platform deploy` \u2014 do not edit by hand.");
|
|
36059
36144
|
lines.push("");
|
|
36060
36145
|
lines.push("{");
|
|
36061
36146
|
lines.push(" admin off");
|
|
36147
|
+
if (observability) {
|
|
36148
|
+
lines.push(" metrics {");
|
|
36149
|
+
lines.push(" per_host");
|
|
36150
|
+
lines.push(" }");
|
|
36151
|
+
}
|
|
36062
36152
|
if (email)
|
|
36063
36153
|
lines.push(` ${email.trim()}`);
|
|
36064
36154
|
lines.push("}");
|
|
36065
36155
|
lines.push("");
|
|
36156
|
+
if (observability) {
|
|
36157
|
+
lines.push(":2020 {");
|
|
36158
|
+
lines.push(" metrics");
|
|
36159
|
+
lines.push("}");
|
|
36160
|
+
lines.push("");
|
|
36161
|
+
}
|
|
36066
36162
|
for (const [name, env2] of Object.entries(cfg.envs)) {
|
|
36067
36163
|
lines.push(`${env2.domain} {${tlsDirective}`);
|
|
36068
|
-
|
|
36164
|
+
lines.push(...logDirective);
|
|
36165
|
+
if (observability) {
|
|
36069
36166
|
lines.push(" handle_path /otel/* {");
|
|
36070
36167
|
lines.push(" reverse_proxy otel-collector:4318");
|
|
36071
36168
|
lines.push(" }");
|
|
@@ -36078,13 +36175,11 @@ function generateCaddyfile(cfg) {
|
|
|
36078
36175
|
lines.push("}");
|
|
36079
36176
|
lines.push("");
|
|
36080
36177
|
}
|
|
36081
|
-
if (
|
|
36082
|
-
const
|
|
36083
|
-
if (
|
|
36084
|
-
|
|
36085
|
-
|
|
36086
|
-
const observabilityDomain = `${subdomain}.${apex}`;
|
|
36087
|
-
lines.push(`${observabilityDomain} {${tlsDirective}`);
|
|
36178
|
+
if (observability) {
|
|
36179
|
+
const domain = observabilityDomain(cfg);
|
|
36180
|
+
if (domain) {
|
|
36181
|
+
lines.push(`${domain} {${tlsDirective}`);
|
|
36182
|
+
lines.push(...logDirective);
|
|
36088
36183
|
lines.push(" basic_auth {");
|
|
36089
36184
|
lines.push(" import /etc/caddy/observability-htpasswd");
|
|
36090
36185
|
lines.push(" }");
|
|
@@ -36094,6 +36189,7 @@ function generateCaddyfile(cfg) {
|
|
|
36094
36189
|
}
|
|
36095
36190
|
}
|
|
36096
36191
|
lines.push(`${cfg.registry.domain} {${tlsDirective}`);
|
|
36192
|
+
lines.push(...logDirective);
|
|
36097
36193
|
lines.push(" reverse_proxy registry:5000 {");
|
|
36098
36194
|
lines.push(" header_up Host {host}");
|
|
36099
36195
|
lines.push(" }");
|
|
@@ -36105,6 +36201,15 @@ function generateCaddyfile(cfg) {
|
|
|
36105
36201
|
`) + `
|
|
36106
36202
|
`;
|
|
36107
36203
|
}
|
|
36204
|
+
function observabilityDomain(cfg) {
|
|
36205
|
+
if (!cfg.observability?.enabled)
|
|
36206
|
+
return null;
|
|
36207
|
+
const firstEnv = Object.values(cfg.envs)[0];
|
|
36208
|
+
if (!firstEnv)
|
|
36209
|
+
return null;
|
|
36210
|
+
const subdomain = cfg.observability.subdomain ?? "observability";
|
|
36211
|
+
return `${subdomain}.${apexOf(firstEnv.domain)}`;
|
|
36212
|
+
}
|
|
36108
36213
|
function apexOf(host) {
|
|
36109
36214
|
const parts = host.split(".");
|
|
36110
36215
|
if (parts.length <= 2)
|
|
@@ -36113,6 +36218,13 @@ function apexOf(host) {
|
|
|
36113
36218
|
}
|
|
36114
36219
|
|
|
36115
36220
|
// src/deploy/compose.ts
|
|
36221
|
+
function pushLogging(lines) {
|
|
36222
|
+
lines.push(" logging:");
|
|
36223
|
+
lines.push(" driver: json-file");
|
|
36224
|
+
lines.push(" options:");
|
|
36225
|
+
lines.push(' max-size: "10m"');
|
|
36226
|
+
lines.push(' max-file: "3"');
|
|
36227
|
+
}
|
|
36116
36228
|
function generateCompose({ cfg }) {
|
|
36117
36229
|
const lines = [];
|
|
36118
36230
|
lines.push("# Generated by `arc platform deploy` \u2014 do not edit by hand.");
|
|
@@ -36121,6 +36233,7 @@ function generateCompose({ cfg }) {
|
|
|
36121
36233
|
lines.push(" caddy:");
|
|
36122
36234
|
lines.push(" image: caddy:2-alpine");
|
|
36123
36235
|
lines.push(" restart: unless-stopped");
|
|
36236
|
+
pushLogging(lines);
|
|
36124
36237
|
lines.push(" ports:");
|
|
36125
36238
|
lines.push(' - "80:80"');
|
|
36126
36239
|
lines.push(' - "443:443"');
|
|
@@ -36133,10 +36246,15 @@ function generateCompose({ cfg }) {
|
|
|
36133
36246
|
lines.push(" - caddy_config:/config");
|
|
36134
36247
|
lines.push(" networks:");
|
|
36135
36248
|
lines.push(" - arc-net");
|
|
36249
|
+
if (cfg.observability?.enabled) {
|
|
36250
|
+
lines.push(" expose:");
|
|
36251
|
+
lines.push(' - "2020" # Prometheus metrics endpoint (Caddyfile :2020 site)');
|
|
36252
|
+
}
|
|
36136
36253
|
lines.push("");
|
|
36137
36254
|
lines.push(" registry:");
|
|
36138
36255
|
lines.push(" image: registry:2");
|
|
36139
36256
|
lines.push(" restart: unless-stopped");
|
|
36257
|
+
pushLogging(lines);
|
|
36140
36258
|
lines.push(" volumes:");
|
|
36141
36259
|
lines.push(" - registry_data:/var/lib/registry");
|
|
36142
36260
|
lines.push(" - ./registry-auth/htpasswd:/auth/htpasswd:ro");
|
|
@@ -36157,6 +36275,13 @@ function generateCompose({ cfg }) {
|
|
|
36157
36275
|
lines.push(` image: \${ARC_IMAGE_${upperName}:-arc-${name}:not-deployed}`);
|
|
36158
36276
|
lines.push(` container_name: arc-${name}`);
|
|
36159
36277
|
lines.push(" restart: unless-stopped");
|
|
36278
|
+
pushLogging(lines);
|
|
36279
|
+
lines.push(" healthcheck:");
|
|
36280
|
+
lines.push(' test: ["CMD", "wget", "-qO-", "http://127.0.0.1:5005/health"]');
|
|
36281
|
+
lines.push(" interval: 30s");
|
|
36282
|
+
lines.push(" timeout: 5s");
|
|
36283
|
+
lines.push(" retries: 3");
|
|
36284
|
+
lines.push(" start_period: 20s");
|
|
36160
36285
|
if (usePostgres) {
|
|
36161
36286
|
lines.push(" depends_on:");
|
|
36162
36287
|
lines.push(` arc-db-${name}:`);
|
|
@@ -36209,6 +36334,7 @@ function generateCompose({ cfg }) {
|
|
|
36209
36334
|
lines.push(` image: ${image2}`);
|
|
36210
36335
|
lines.push(` container_name: arc-db-${name}`);
|
|
36211
36336
|
lines.push(" restart: unless-stopped");
|
|
36337
|
+
pushLogging(lines);
|
|
36212
36338
|
lines.push(" environment:");
|
|
36213
36339
|
lines.push(" POSTGRES_USER: arc");
|
|
36214
36340
|
lines.push(" POSTGRES_DB: arc");
|
|
@@ -36233,9 +36359,13 @@ function generateCompose({ cfg }) {
|
|
|
36233
36359
|
lines.push(" image: otel/opentelemetry-collector-contrib:0.114.0");
|
|
36234
36360
|
lines.push(" container_name: arc-otel-collector");
|
|
36235
36361
|
lines.push(" restart: unless-stopped");
|
|
36362
|
+
pushLogging(lines);
|
|
36363
|
+
lines.push(' user: "0:0"');
|
|
36236
36364
|
lines.push(' command: ["--config=/etc/otelcol-contrib/config.yaml"]');
|
|
36237
36365
|
lines.push(" volumes:");
|
|
36238
36366
|
lines.push(" - ./observability/otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro");
|
|
36367
|
+
lines.push(" - /:/hostfs:ro # hostmetrics root_path");
|
|
36368
|
+
lines.push(" - /var/run/docker.sock:/var/run/docker.sock:ro # docker_stats");
|
|
36239
36369
|
lines.push(" networks: [arc-net]");
|
|
36240
36370
|
lines.push(" expose:");
|
|
36241
36371
|
lines.push(' - "4317" # OTLP gRPC');
|
|
@@ -36250,6 +36380,7 @@ function generateCompose({ cfg }) {
|
|
|
36250
36380
|
lines.push(" image: grafana/tempo:2.6.1");
|
|
36251
36381
|
lines.push(" container_name: arc-tempo");
|
|
36252
36382
|
lines.push(" restart: unless-stopped");
|
|
36383
|
+
pushLogging(lines);
|
|
36253
36384
|
lines.push(' command: ["-config.file=/etc/tempo.yaml"]');
|
|
36254
36385
|
lines.push(' user: "0" # tempo writes to /var/tempo, owned by root in the image');
|
|
36255
36386
|
lines.push(" volumes:");
|
|
@@ -36264,6 +36395,7 @@ function generateCompose({ cfg }) {
|
|
|
36264
36395
|
lines.push(" image: grafana/loki:3.3.2");
|
|
36265
36396
|
lines.push(" container_name: arc-loki");
|
|
36266
36397
|
lines.push(" restart: unless-stopped");
|
|
36398
|
+
pushLogging(lines);
|
|
36267
36399
|
lines.push(' command: ["-config.file=/etc/loki/local-config.yaml"]');
|
|
36268
36400
|
lines.push(' user: "0"');
|
|
36269
36401
|
lines.push(" volumes:");
|
|
@@ -36278,6 +36410,7 @@ function generateCompose({ cfg }) {
|
|
|
36278
36410
|
lines.push(" image: prom/prometheus:v2.55.1");
|
|
36279
36411
|
lines.push(" container_name: arc-prometheus");
|
|
36280
36412
|
lines.push(" restart: unless-stopped");
|
|
36413
|
+
pushLogging(lines);
|
|
36281
36414
|
lines.push(" command:");
|
|
36282
36415
|
lines.push(' - "--config.file=/etc/prometheus/prometheus.yml"');
|
|
36283
36416
|
lines.push(' - "--storage.tsdb.path=/prometheus"');
|
|
@@ -36291,20 +36424,47 @@ function generateCompose({ cfg }) {
|
|
|
36291
36424
|
lines.push(" expose:");
|
|
36292
36425
|
lines.push(' - "9090" # HTTP API + remote_write receiver');
|
|
36293
36426
|
lines.push("");
|
|
36427
|
+
lines.push(" alloy:");
|
|
36428
|
+
lines.push(" image: grafana/alloy:v1.16.1");
|
|
36429
|
+
lines.push(" container_name: arc-alloy");
|
|
36430
|
+
lines.push(" restart: unless-stopped");
|
|
36431
|
+
pushLogging(lines);
|
|
36432
|
+
lines.push(' user: "0" # docker.sock access');
|
|
36433
|
+
lines.push(" command:");
|
|
36434
|
+
lines.push(" - run");
|
|
36435
|
+
lines.push(" - --server.http.listen-addr=0.0.0.0:12345");
|
|
36436
|
+
lines.push(" - --storage.path=/var/lib/alloy/data");
|
|
36437
|
+
lines.push(" - /etc/alloy/config.alloy");
|
|
36438
|
+
lines.push(" volumes:");
|
|
36439
|
+
lines.push(" - ./observability/alloy-config.alloy:/etc/alloy/config.alloy:ro");
|
|
36440
|
+
lines.push(" - /var/run/docker.sock:/var/run/docker.sock:ro");
|
|
36441
|
+
lines.push(" - alloy_data:/var/lib/alloy/data");
|
|
36442
|
+
lines.push(" networks: [arc-net]");
|
|
36443
|
+
lines.push(" expose:");
|
|
36444
|
+
lines.push(' - "12345" # Alloy self-metrics (Prom scrape)');
|
|
36445
|
+
lines.push(" depends_on:");
|
|
36446
|
+
lines.push(" - loki");
|
|
36447
|
+
lines.push("");
|
|
36294
36448
|
const adminPasswordEnv = cfg.observability.adminPasswordEnv ?? "ARC_OBSERVABILITY_PASSWORD";
|
|
36449
|
+
const grafanaDomain = observabilityDomain(cfg);
|
|
36295
36450
|
lines.push(" grafana:");
|
|
36296
36451
|
lines.push(" image: grafana/grafana:11.4.0");
|
|
36297
36452
|
lines.push(" container_name: arc-grafana");
|
|
36298
36453
|
lines.push(" restart: unless-stopped");
|
|
36454
|
+
pushLogging(lines);
|
|
36299
36455
|
lines.push(" environment:");
|
|
36300
36456
|
lines.push(" GF_SECURITY_ADMIN_USER: admin");
|
|
36301
36457
|
lines.push(` GF_SECURITY_ADMIN_PASSWORD: \${${adminPasswordEnv}:?missing ${adminPasswordEnv}}`);
|
|
36302
36458
|
lines.push(' GF_USERS_ALLOW_SIGN_UP: "false"');
|
|
36303
36459
|
lines.push(' GF_AUTH_ANONYMOUS_ENABLED: "false"');
|
|
36460
|
+
if (grafanaDomain) {
|
|
36461
|
+
lines.push(` GF_SERVER_ROOT_URL: "https://${grafanaDomain}"`);
|
|
36462
|
+
}
|
|
36304
36463
|
lines.push(" volumes:");
|
|
36305
36464
|
lines.push(" - ./observability/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro");
|
|
36306
36465
|
lines.push(" - ./observability/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro");
|
|
36307
36466
|
lines.push(" - ./observability/grafana-dashboards:/etc/grafana/provisioning/dashboards/arc:ro");
|
|
36467
|
+
lines.push(" - ./observability/grafana-alerting:/etc/grafana/provisioning/alerting:ro");
|
|
36308
36468
|
lines.push(" - grafana_data:/var/lib/grafana");
|
|
36309
36469
|
lines.push(" networks: [arc-net]");
|
|
36310
36470
|
lines.push(" expose:");
|
|
@@ -36334,6 +36494,7 @@ function generateCompose({ cfg }) {
|
|
|
36334
36494
|
lines.push(" loki_data:");
|
|
36335
36495
|
lines.push(" prometheus_data:");
|
|
36336
36496
|
lines.push(" grafana_data:");
|
|
36497
|
+
lines.push(" alloy_data:");
|
|
36337
36498
|
}
|
|
36338
36499
|
return lines.join(`
|
|
36339
36500
|
`) + `
|
|
@@ -36408,6 +36569,69 @@ ${envNames.map((name) => ` - "https://${cfg.envs[name].domain}"`).joi
|
|
|
36408
36569
|
- tracestate
|
|
36409
36570
|
- content-type
|
|
36410
36571
|
|
|
36572
|
+
# Host-level CPU / memory / load / disk / filesystem / network metrics.
|
|
36573
|
+
# The host root is bind-mounted read-only at /hostfs (see compose).
|
|
36574
|
+
hostmetrics:
|
|
36575
|
+
collection_interval: 30s
|
|
36576
|
+
root_path: /hostfs
|
|
36577
|
+
scrapers:
|
|
36578
|
+
cpu:
|
|
36579
|
+
metrics:
|
|
36580
|
+
system.cpu.utilization:
|
|
36581
|
+
enabled: true
|
|
36582
|
+
memory:
|
|
36583
|
+
metrics:
|
|
36584
|
+
system.memory.utilization:
|
|
36585
|
+
enabled: true
|
|
36586
|
+
load: {}
|
|
36587
|
+
disk: {}
|
|
36588
|
+
filesystem:
|
|
36589
|
+
metrics:
|
|
36590
|
+
system.filesystem.utilization:
|
|
36591
|
+
enabled: true
|
|
36592
|
+
exclude_fs_types:
|
|
36593
|
+
fs_types: [autofs, binfmt_misc, bpf, cgroup2, configfs, debugfs, devpts, devtmpfs, fusectl, hugetlbfs, iso9660, mqueue, nsfs, overlay, proc, procfs, pstore, rpc_pipefs, securityfs, selinuxfs, squashfs, sysfs, tracefs, tmpfs]
|
|
36594
|
+
match_type: strict
|
|
36595
|
+
exclude_mount_points:
|
|
36596
|
+
mount_points: ["/var/lib/docker/.*", "/run/.*", "/snap/.*", "/boot/.*"]
|
|
36597
|
+
match_type: regexp
|
|
36598
|
+
network: {}
|
|
36599
|
+
paging: {}
|
|
36600
|
+
|
|
36601
|
+
# Per-container CPU / memory / network / block-IO + restarts straight from
|
|
36602
|
+
# the Docker daemon (socket bind-mounted read-only, see compose).
|
|
36603
|
+
# api_version pinned: the receiver defaults to Docker API 1.25, which modern
|
|
36604
|
+
# daemons (Engine 25+ require >= 1.40) reject \u2014 without this the receiver
|
|
36605
|
+
# fails to start and takes the whole collector down. Quoted so YAML doesn't
|
|
36606
|
+
# parse 1.40 \u2192 1.4. Must be <= the daemon's max; 1.40 is the safe floor.
|
|
36607
|
+
docker_stats:
|
|
36608
|
+
endpoint: unix:///var/run/docker.sock
|
|
36609
|
+
api_version: "1.40"
|
|
36610
|
+
collection_interval: 30s
|
|
36611
|
+
metrics:
|
|
36612
|
+
container.restarts:
|
|
36613
|
+
enabled: true
|
|
36614
|
+
container.uptime:
|
|
36615
|
+
enabled: true
|
|
36616
|
+
|
|
36617
|
+
connectors:
|
|
36618
|
+
# Span\u2192metrics computed from 100% of spans (pipeline runs BEFORE tail
|
|
36619
|
+
# sampling) \u2014 lowering the sampling policy later never skews dashboards.
|
|
36620
|
+
spanmetrics:
|
|
36621
|
+
histogram:
|
|
36622
|
+
unit: ms
|
|
36623
|
+
explicit:
|
|
36624
|
+
buckets: [2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1s, 2s, 5s, 10s]
|
|
36625
|
+
metrics_flush_interval: 15s
|
|
36626
|
+
# Emits traces_service_graph_* (same metric names Tempo's generator would).
|
|
36627
|
+
servicegraph:
|
|
36628
|
+
metrics_flush_interval: 15s
|
|
36629
|
+
store:
|
|
36630
|
+
ttl: 5s
|
|
36631
|
+
max_items: 5000
|
|
36632
|
+
# Joins the raw-trace pipeline to the sampled-storage pipeline.
|
|
36633
|
+
forward: {}
|
|
36634
|
+
|
|
36411
36635
|
processors:
|
|
36412
36636
|
batch:
|
|
36413
36637
|
timeout: 5s
|
|
@@ -36418,7 +36642,8 @@ processors:
|
|
|
36418
36642
|
# Errors + slow traces zachowywane w 100%, normalne traces r\xF3wnie\u017C 100%
|
|
36419
36643
|
# przy obecnej skali (boostrap produkcji). Tail sampling matchuje OR po
|
|
36420
36644
|
# policies \u2014 bez "always" policy WSZYSTKIE OK traces by\u0142yby droppowane.
|
|
36421
|
-
# Obni\u017C 'random_100pct' do np. 10% gdy ruch eksploduje
|
|
36645
|
+
# Obni\u017C 'random_100pct' do np. 10% gdy ruch eksploduje \u2014 span-metrics s\u0105
|
|
36646
|
+
# liczone przed samplingiem, wi\u0119c dashboardy pozostan\u0105 dok\u0142adne.
|
|
36422
36647
|
tail_sampling:
|
|
36423
36648
|
decision_wait: 10s
|
|
36424
36649
|
num_traces: 50000
|
|
@@ -36443,6 +36668,34 @@ processors:
|
|
|
36443
36668
|
- key: http.request.header.cookie
|
|
36444
36669
|
action: delete
|
|
36445
36670
|
|
|
36671
|
+
# Cardinality guard for span-metrics: the SPA fallback answers EVERY path,
|
|
36672
|
+
# so raw span names (one per bot-scanned URL) would explode Prometheus
|
|
36673
|
+
# series. Static assets collapse to "<METHOD> static", /route/* to
|
|
36674
|
+
# "<METHOD> /route", anything else outside the known API surface to
|
|
36675
|
+
# "<METHOD> other". Runs BEFORE the spanmetrics connector. Note: "$$" is a
|
|
36676
|
+
# literal "$" (collector env expansion), RE2 has no lookahead \u2192 IsMatch+not.
|
|
36677
|
+
transform/span_names:
|
|
36678
|
+
error_mode: ignore
|
|
36679
|
+
trace_statements:
|
|
36680
|
+
- context: span
|
|
36681
|
+
statements:
|
|
36682
|
+
- set(name, Concat([attributes["http.request.method"], "static"], " ")) where IsMatch(name, "^[A-Z]+ /.*\\\\.(js|mjs|css|map|ico|png|jpe?g|svg|gif|webp|avif|woff2?|ttf|otf|txt|xml|json|webmanifest)$$")
|
|
36683
|
+
- replace_pattern(name, "^([A-Z]+) /route(/.*)?$$", "$$1 /route")
|
|
36684
|
+
- set(name, Concat([attributes["http.request.method"], "other"], " ")) where IsMatch(name, "^[A-Z]+ /") and not IsMatch(name, "^[A-Z]+ ((/api|/command|/query)(/.*)?|/route|/ws|/health|/otel(/.*)?|/)$$")
|
|
36685
|
+
|
|
36686
|
+
# Stable service.name for infra metric streams (becomes the service_name
|
|
36687
|
+
# label after resource_to_telemetry_conversion).
|
|
36688
|
+
resource/host:
|
|
36689
|
+
attributes:
|
|
36690
|
+
- key: service.name
|
|
36691
|
+
value: arc-host
|
|
36692
|
+
action: upsert
|
|
36693
|
+
resource/docker:
|
|
36694
|
+
attributes:
|
|
36695
|
+
- key: service.name
|
|
36696
|
+
value: arc-docker
|
|
36697
|
+
action: upsert
|
|
36698
|
+
|
|
36446
36699
|
exporters:
|
|
36447
36700
|
otlp/tempo:
|
|
36448
36701
|
endpoint: tempo:4317
|
|
@@ -36458,6 +36711,10 @@ exporters:
|
|
|
36458
36711
|
endpoint: http://prometheus:9090/api/v1/write
|
|
36459
36712
|
tls:
|
|
36460
36713
|
insecure: true
|
|
36714
|
+
# Copy resource attributes (service.name, deployment.environment, \u2026)
|
|
36715
|
+
# onto every series \u2014 dashboards filter by service_name.
|
|
36716
|
+
resource_to_telemetry_conversion:
|
|
36717
|
+
enabled: true
|
|
36461
36718
|
|
|
36462
36719
|
extensions:
|
|
36463
36720
|
health_check: {}
|
|
@@ -36465,19 +36722,42 @@ extensions:
|
|
|
36465
36722
|
|
|
36466
36723
|
service:
|
|
36467
36724
|
extensions: [health_check, zpages]
|
|
36725
|
+
# Collector self-metrics. Since 0.111 the default bind is localhost only \u2014
|
|
36726
|
+
# Prometheus scrapes otel-collector:8888, so listen on all interfaces.
|
|
36727
|
+
telemetry:
|
|
36728
|
+
metrics:
|
|
36729
|
+
level: detailed
|
|
36730
|
+
readers:
|
|
36731
|
+
- pull:
|
|
36732
|
+
exporter:
|
|
36733
|
+
prometheus:
|
|
36734
|
+
host: "0.0.0.0"
|
|
36735
|
+
port: 8888
|
|
36468
36736
|
pipelines:
|
|
36469
|
-
traces:
|
|
36737
|
+
traces/in:
|
|
36470
36738
|
receivers: [otlp]
|
|
36471
|
-
processors: [
|
|
36739
|
+
processors: [attributes, transform/span_names]
|
|
36740
|
+
exporters: [spanmetrics, servicegraph, forward]
|
|
36741
|
+
traces/sampled:
|
|
36742
|
+
receivers: [forward]
|
|
36743
|
+
processors: [tail_sampling, batch]
|
|
36472
36744
|
exporters: [otlp/tempo]
|
|
36473
36745
|
logs:
|
|
36474
36746
|
receivers: [otlp]
|
|
36475
36747
|
processors: [attributes, batch]
|
|
36476
36748
|
exporters: [otlphttp/loki]
|
|
36477
36749
|
metrics:
|
|
36478
|
-
receivers: [otlp]
|
|
36750
|
+
receivers: [otlp, spanmetrics, servicegraph]
|
|
36479
36751
|
processors: [batch]
|
|
36480
36752
|
exporters: [prometheusremotewrite]
|
|
36753
|
+
metrics/host:
|
|
36754
|
+
receivers: [hostmetrics]
|
|
36755
|
+
processors: [resource/host, batch]
|
|
36756
|
+
exporters: [prometheusremotewrite]
|
|
36757
|
+
metrics/docker:
|
|
36758
|
+
receivers: [docker_stats]
|
|
36759
|
+
processors: [resource/docker, batch]
|
|
36760
|
+
exporters: [prometheusremotewrite]
|
|
36481
36761
|
`;
|
|
36482
36762
|
}
|
|
36483
36763
|
function generateTempoConfig(cfg) {
|
|
@@ -36513,20 +36793,9 @@ storage:
|
|
|
36513
36793
|
wal:
|
|
36514
36794
|
path: /var/tempo/wal
|
|
36515
36795
|
|
|
36516
|
-
metrics_generator
|
|
36517
|
-
|
|
36518
|
-
|
|
36519
|
-
source: tempo
|
|
36520
|
-
storage:
|
|
36521
|
-
path: /var/tempo/generator/wal
|
|
36522
|
-
remote_write:
|
|
36523
|
-
- url: http://prometheus:9090/api/v1/write
|
|
36524
|
-
send_exemplars: true
|
|
36525
|
-
|
|
36526
|
-
overrides:
|
|
36527
|
-
defaults:
|
|
36528
|
-
metrics_generator:
|
|
36529
|
-
processors: [service-graphs, span-metrics]
|
|
36796
|
+
# NOTE: no metrics_generator \u2014 span-metrics + service-graph are produced by
|
|
36797
|
+
# the otel-collector connectors BEFORE tail sampling (accurate rates even
|
|
36798
|
+
# when sampling is later tightened) and remote-written to Prometheus there.
|
|
36530
36799
|
`;
|
|
36531
36800
|
}
|
|
36532
36801
|
function generateLokiConfig(cfg) {
|
|
@@ -36582,11 +36851,214 @@ scrape_configs:
|
|
|
36582
36851
|
- job_name: otel-collector
|
|
36583
36852
|
static_configs:
|
|
36584
36853
|
- targets: [otel-collector:8888]
|
|
36854
|
+
- job_name: caddy
|
|
36855
|
+
static_configs:
|
|
36856
|
+
- targets: [caddy:2020]
|
|
36857
|
+
- job_name: loki
|
|
36858
|
+
static_configs:
|
|
36859
|
+
- targets: [loki:3100]
|
|
36860
|
+
- job_name: tempo
|
|
36861
|
+
static_configs:
|
|
36862
|
+
- targets: [tempo:3200]
|
|
36863
|
+
- job_name: grafana
|
|
36864
|
+
static_configs:
|
|
36865
|
+
- targets: [grafana:3000]
|
|
36866
|
+
- job_name: alloy
|
|
36867
|
+
static_configs:
|
|
36868
|
+
- targets: [alloy:12345]
|
|
36585
36869
|
|
|
36586
36870
|
# remote-write inbound is enabled via the --web.enable-remote-write-receiver
|
|
36587
36871
|
# command-line flag (compose.ts). Retention via --storage.tsdb.retention.time.
|
|
36588
36872
|
`;
|
|
36589
36873
|
}
|
|
36874
|
+
function generateAlloyConfig() {
|
|
36875
|
+
return `// Generated by \`arc platform deploy\` \u2014 do not edit by hand.
|
|
36876
|
+
discovery.docker "containers" {
|
|
36877
|
+
host = "unix:///var/run/docker.sock"
|
|
36878
|
+
refresh_interval = "15s"
|
|
36879
|
+
|
|
36880
|
+
// Only containers managed by a compose project (our stack). Ad-hoc / rogue
|
|
36881
|
+
// containers (manual debug runs, other stacks) are excluded \u2014 one bad
|
|
36882
|
+
// stream (e.g. log entries older than Loki's reject window) otherwise 400s
|
|
36883
|
+
// the whole loki.write batch and drops good app logs with it.
|
|
36884
|
+
filter {
|
|
36885
|
+
name = "label"
|
|
36886
|
+
values = ["com.docker.compose.project"]
|
|
36887
|
+
}
|
|
36888
|
+
}
|
|
36889
|
+
|
|
36890
|
+
discovery.relabel "containers" {
|
|
36891
|
+
targets = discovery.docker.containers.targets
|
|
36892
|
+
|
|
36893
|
+
rule {
|
|
36894
|
+
source_labels = ["__meta_docker_container_name"]
|
|
36895
|
+
regex = "/(.*)"
|
|
36896
|
+
target_label = "container"
|
|
36897
|
+
}
|
|
36898
|
+
rule {
|
|
36899
|
+
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
|
|
36900
|
+
target_label = "compose_service"
|
|
36901
|
+
}
|
|
36902
|
+
}
|
|
36903
|
+
|
|
36904
|
+
loki.source.docker "containers" {
|
|
36905
|
+
host = "unix:///var/run/docker.sock"
|
|
36906
|
+
targets = discovery.docker.containers.targets
|
|
36907
|
+
relabel_rules = discovery.relabel.containers.rules
|
|
36908
|
+
labels = { source = "docker" }
|
|
36909
|
+
forward_to = [loki.write.loki.receiver]
|
|
36910
|
+
}
|
|
36911
|
+
|
|
36912
|
+
loki.write "loki" {
|
|
36913
|
+
endpoint {
|
|
36914
|
+
url = "http://loki:3100/loki/api/v1/push"
|
|
36915
|
+
}
|
|
36916
|
+
}
|
|
36917
|
+
`;
|
|
36918
|
+
}
|
|
36919
|
+
function generateGrafanaAlerting(cfg) {
|
|
36920
|
+
const webhookUrl = cfg.observability?.alertWebhookUrl;
|
|
36921
|
+
const rules = [
|
|
36922
|
+
{
|
|
36923
|
+
uid: "arc-high-error-rate",
|
|
36924
|
+
title: "High server error rate (>5%)",
|
|
36925
|
+
expr: 'sum(rate(traces_span_metrics_calls_total{span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"}[5m])) / clamp_min(sum(rate(traces_span_metrics_calls_total{span_kind="SPAN_KIND_SERVER"}[5m])), 0.001)',
|
|
36926
|
+
threshold: 0.05,
|
|
36927
|
+
pendingFor: "5m",
|
|
36928
|
+
summary: "More than 5% of server spans are errors over the last 5 minutes."
|
|
36929
|
+
},
|
|
36930
|
+
{
|
|
36931
|
+
uid: "arc-high-latency-p95",
|
|
36932
|
+
title: "High p95 latency (>1s)",
|
|
36933
|
+
expr: 'histogram_quantile(0.95, sum by (le) (rate(traces_span_metrics_duration_milliseconds_bucket{span_kind="SPAN_KIND_SERVER"}[5m])))',
|
|
36934
|
+
threshold: 1000,
|
|
36935
|
+
pendingFor: "10m",
|
|
36936
|
+
summary: "Server p95 latency above 1s for 10 minutes."
|
|
36937
|
+
},
|
|
36938
|
+
{
|
|
36939
|
+
uid: "arc-host-disk-high",
|
|
36940
|
+
title: "Host disk usage >85%",
|
|
36941
|
+
expr: 'max by (mountpoint) (sum by (device, mountpoint) (system_filesystem_usage_bytes{state="used"}) / sum by (device, mountpoint) (system_filesystem_usage_bytes))',
|
|
36942
|
+
threshold: 0.85,
|
|
36943
|
+
pendingFor: "15m",
|
|
36944
|
+
summary: "A host filesystem is more than 85% full."
|
|
36945
|
+
},
|
|
36946
|
+
{
|
|
36947
|
+
uid: "arc-host-memory-high",
|
|
36948
|
+
title: "Host memory usage >90%",
|
|
36949
|
+
expr: 'sum(system_memory_usage_bytes{state="used"}) / sum(system_memory_usage_bytes)',
|
|
36950
|
+
threshold: 0.9,
|
|
36951
|
+
pendingFor: "10m",
|
|
36952
|
+
summary: "Host memory usage above 90% for 10 minutes."
|
|
36953
|
+
},
|
|
36954
|
+
{
|
|
36955
|
+
uid: "arc-container-restarts",
|
|
36956
|
+
title: "Container restarted",
|
|
36957
|
+
expr: "sum by (container_name) (increase(container_restarts_total[15m]))",
|
|
36958
|
+
threshold: 0,
|
|
36959
|
+
pendingFor: "0s",
|
|
36960
|
+
summary: "A container restarted within the last 15 minutes."
|
|
36961
|
+
},
|
|
36962
|
+
{
|
|
36963
|
+
uid: "arc-app-silent",
|
|
36964
|
+
title: "App stopped reporting metrics",
|
|
36965
|
+
expr: "absent(arc_commands_total)",
|
|
36966
|
+
threshold: 0,
|
|
36967
|
+
pendingFor: "10m",
|
|
36968
|
+
summary: "No arc_commands_total series for 10 minutes \u2014 app down or telemetry broken."
|
|
36969
|
+
},
|
|
36970
|
+
{
|
|
36971
|
+
uid: "arc-collector-export-failures",
|
|
36972
|
+
title: "Telemetry export failures",
|
|
36973
|
+
expr: 'sum(increase({__name__=~"otelcol_exporter_send_failed_(spans|metric_points|log_records)"}[15m]))',
|
|
36974
|
+
threshold: 0,
|
|
36975
|
+
pendingFor: "0s",
|
|
36976
|
+
summary: "The otel-collector failed to export telemetry within the last 15 minutes."
|
|
36977
|
+
},
|
|
36978
|
+
{
|
|
36979
|
+
uid: "arc-target-down",
|
|
36980
|
+
title: "Scrape target down",
|
|
36981
|
+
expr: "min(up)",
|
|
36982
|
+
threshold: 1,
|
|
36983
|
+
op: "lt",
|
|
36984
|
+
pendingFor: "5m",
|
|
36985
|
+
summary: "A Prometheus scrape target has been down for 5 minutes."
|
|
36986
|
+
}
|
|
36987
|
+
];
|
|
36988
|
+
const ruleYaml = rules.map((rule) => {
|
|
36989
|
+
const op = rule.op ?? "gt";
|
|
36990
|
+
return ` - uid: ${rule.uid}
|
|
36991
|
+
title: ${JSON.stringify(rule.title)}
|
|
36992
|
+
condition: C
|
|
36993
|
+
for: ${rule.pendingFor}
|
|
36994
|
+
noDataState: OK
|
|
36995
|
+
execErrState: OK
|
|
36996
|
+
annotations:
|
|
36997
|
+
summary: ${JSON.stringify(rule.summary)}
|
|
36998
|
+
labels:
|
|
36999
|
+
source: arc
|
|
37000
|
+
data:
|
|
37001
|
+
- refId: A
|
|
37002
|
+
relativeTimeRange: { from: 600, to: 0 }
|
|
37003
|
+
datasourceUid: prometheus
|
|
37004
|
+
model:
|
|
37005
|
+
expr: ${JSON.stringify(rule.expr)}
|
|
37006
|
+
instant: true
|
|
37007
|
+
intervalMs: 1000
|
|
37008
|
+
maxDataPoints: 43200
|
|
37009
|
+
refId: A
|
|
37010
|
+
- refId: B
|
|
37011
|
+
relativeTimeRange: { from: 0, to: 0 }
|
|
37012
|
+
datasourceUid: __expr__
|
|
37013
|
+
model:
|
|
37014
|
+
type: reduce
|
|
37015
|
+
expression: A
|
|
37016
|
+
reducer: last
|
|
37017
|
+
refId: B
|
|
37018
|
+
- refId: C
|
|
37019
|
+
relativeTimeRange: { from: 0, to: 0 }
|
|
37020
|
+
datasourceUid: __expr__
|
|
37021
|
+
model:
|
|
37022
|
+
type: threshold
|
|
37023
|
+
expression: B
|
|
37024
|
+
refId: C
|
|
37025
|
+
conditions:
|
|
37026
|
+
- evaluator:
|
|
37027
|
+
type: ${op}
|
|
37028
|
+
params: [${rule.threshold}]`;
|
|
37029
|
+
}).join(`
|
|
37030
|
+
`);
|
|
37031
|
+
const contactSection = webhookUrl ? `
|
|
37032
|
+
contactPoints:
|
|
37033
|
+
- orgId: 1
|
|
37034
|
+
name: arc-webhook
|
|
37035
|
+
receivers:
|
|
37036
|
+
- uid: arc-webhook
|
|
37037
|
+
type: webhook
|
|
37038
|
+
settings:
|
|
37039
|
+
url: ${JSON.stringify(webhookUrl)}
|
|
37040
|
+
httpMethod: POST
|
|
37041
|
+
|
|
37042
|
+
policies:
|
|
37043
|
+
- orgId: 1
|
|
37044
|
+
receiver: arc-webhook
|
|
37045
|
+
group_by: ["grafana_folder", "alertname"]
|
|
37046
|
+
group_wait: 30s
|
|
37047
|
+
group_interval: 5m
|
|
37048
|
+
repeat_interval: 4h
|
|
37049
|
+
` : "";
|
|
37050
|
+
return `# Generated by \`arc platform deploy\` \u2014 do not edit by hand.
|
|
37051
|
+
apiVersion: 1
|
|
37052
|
+
|
|
37053
|
+
groups:
|
|
37054
|
+
- orgId: 1
|
|
37055
|
+
name: arc-alerts
|
|
37056
|
+
folder: Arc
|
|
37057
|
+
interval: 1m
|
|
37058
|
+
rules:
|
|
37059
|
+
${ruleYaml}
|
|
37060
|
+
${contactSection}`;
|
|
37061
|
+
}
|
|
36590
37062
|
function generateGrafanaDatasources() {
|
|
36591
37063
|
return `# Generated by \`arc platform deploy\` \u2014 do not edit by hand.
|
|
36592
37064
|
apiVersion: 1
|
|
@@ -36610,10 +37082,17 @@ datasources:
|
|
|
36610
37082
|
uid: loki
|
|
36611
37083
|
jsonData:
|
|
36612
37084
|
derivedFields:
|
|
37085
|
+
# Plain-text logs that happen to contain "trace_id=<id>".
|
|
36613
37086
|
- datasourceUid: tempo
|
|
36614
37087
|
matcherRegex: "trace_id=(\\\\w+)"
|
|
36615
37088
|
name: TraceID
|
|
36616
37089
|
url: $\${__value.raw}
|
|
37090
|
+
# OTLP-ingested logs \u2014 trace_id arrives as structured metadata.
|
|
37091
|
+
- datasourceUid: tempo
|
|
37092
|
+
matcherType: label
|
|
37093
|
+
matcherRegex: trace_id
|
|
37094
|
+
name: TraceID (OTLP)
|
|
37095
|
+
url: $\${__value.raw}
|
|
36617
37096
|
- name: Prometheus
|
|
36618
37097
|
type: prometheus
|
|
36619
37098
|
access: proxy
|
|
@@ -36656,7 +37135,7 @@ function generateArcOverviewDashboard() {
|
|
|
36656
37135
|
label: "Service",
|
|
36657
37136
|
type: "query",
|
|
36658
37137
|
datasource: { type: "prometheus", uid: "prometheus" },
|
|
36659
|
-
query: "label_values(
|
|
37138
|
+
query: "label_values(traces_span_metrics_calls_total, service_name)",
|
|
36660
37139
|
refresh: 2,
|
|
36661
37140
|
includeAll: false,
|
|
36662
37141
|
multi: false,
|
|
@@ -36665,29 +37144,29 @@ function generateArcOverviewDashboard() {
|
|
|
36665
37144
|
]
|
|
36666
37145
|
},
|
|
36667
37146
|
panels: [
|
|
36668
|
-
panelStat("Request rate (req/s)", { x: 0, y: 0, w: 6, h: 4 }, 'sum(rate(
|
|
36669
|
-
panelStat("Error rate (%)", { x: 6, y: 0, w: 6, h: 4 }, 'sum(rate(
|
|
36670
|
-
panelStat("P99 latency", { x: 12, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.99, sum(rate(
|
|
37147
|
+
panelStat("Request rate (req/s)", { x: 0, y: 0, w: 6, h: 4 }, 'sum(rate(traces_span_metrics_calls_total{service_name="$service", span_kind="SPAN_KIND_SERVER"}[5m]))', "reqps"),
|
|
37148
|
+
panelStat("Error rate (%)", { x: 6, y: 0, w: 6, h: 4 }, 'sum(rate(traces_span_metrics_calls_total{service_name="$service", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"}[5m])) / clamp_min(sum(rate(traces_span_metrics_calls_total{service_name="$service", span_kind="SPAN_KIND_SERVER"}[5m])), 0.001) * 100', "percent", { red: 1, orange: 0.1 }),
|
|
37149
|
+
panelStat("P99 latency", { x: 12, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.99, sum(rate(traces_span_metrics_duration_milliseconds_bucket{service_name="$service", span_kind="SPAN_KIND_SERVER"}[5m])) by (le))', "ms", { red: 1000, orange: 300 }),
|
|
36671
37150
|
panelStat("Active commands/sec", { x: 18, y: 0, w: 6, h: 4 }, 'sum(rate(arc_commands_total{service_name="$service"}[5m]))', "ops"),
|
|
36672
|
-
panelTimeseries("Request rate by route", { x: 0, y: 4, w: 12, h: 8 }, 'sum by (span_name) (rate(
|
|
37151
|
+
panelTimeseries("Request rate by route", { x: 0, y: 4, w: 12, h: 8 }, 'sum by (span_name) (rate(traces_span_metrics_calls_total{service_name="$service", span_kind="SPAN_KIND_SERVER"}[1m]))', "{{span_name}}", "reqps"),
|
|
36673
37152
|
panelTimeseries("Latency percentiles", { x: 12, y: 4, w: 12, h: 8 }, [
|
|
36674
37153
|
{
|
|
36675
|
-
expr: 'histogram_quantile(0.5, sum(rate(
|
|
37154
|
+
expr: 'histogram_quantile(0.5, sum(rate(traces_span_metrics_duration_milliseconds_bucket{service_name="$service", span_kind="SPAN_KIND_SERVER"}[5m])) by (le))',
|
|
36676
37155
|
legend: "p50"
|
|
36677
37156
|
},
|
|
36678
37157
|
{
|
|
36679
|
-
expr: 'histogram_quantile(0.95, sum(rate(
|
|
37158
|
+
expr: 'histogram_quantile(0.95, sum(rate(traces_span_metrics_duration_milliseconds_bucket{service_name="$service", span_kind="SPAN_KIND_SERVER"}[5m])) by (le))',
|
|
36680
37159
|
legend: "p95"
|
|
36681
37160
|
},
|
|
36682
37161
|
{
|
|
36683
|
-
expr: 'histogram_quantile(0.99, sum(rate(
|
|
37162
|
+
expr: 'histogram_quantile(0.99, sum(rate(traces_span_metrics_duration_milliseconds_bucket{service_name="$service", span_kind="SPAN_KIND_SERVER"}[5m])) by (le))',
|
|
36684
37163
|
legend: "p99"
|
|
36685
37164
|
}
|
|
36686
|
-
], "ms"),
|
|
37165
|
+
], "", "ms"),
|
|
36687
37166
|
panelTimeseries("Commands per second", { x: 0, y: 12, w: 12, h: 8 }, 'sum by (arc_command_name) (rate(arc_commands_total{service_name="$service"}[1m]))', "{{arc_command_name}}", "ops"),
|
|
36688
|
-
panelTimeseries("Command p95 latency", { x: 12, y: 12, w: 12, h: 8 }, 'histogram_quantile(0.95, sum by (arc_command_name, le) (rate(
|
|
36689
|
-
panelTimeseries("DB
|
|
36690
|
-
panelTimeseries("DB
|
|
37167
|
+
panelTimeseries("Command p95 latency", { x: 12, y: 12, w: 12, h: 8 }, 'histogram_quantile(0.95, sum by (arc_command_name, le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service"}[5m])))', "{{arc_command_name}}", "ms"),
|
|
37168
|
+
panelTimeseries("DB ops/sec by collection", { x: 0, y: 20, w: 12, h: 8 }, 'sum by (db_collection_name, db_operation_name) (rate(arc_db_operation_duration_milliseconds_count{service_name="$service"}[1m]))', "{{db_collection_name}} {{db_operation_name}}", "ops"),
|
|
37169
|
+
panelTimeseries("DB p95 latency by operation", { x: 12, y: 20, w: 12, h: 8 }, 'histogram_quantile(0.95, sum by (db_operation_name, le) (rate(arc_db_operation_duration_milliseconds_bucket{service_name="$service"}[5m])))', "{{db_operation_name}}", "ms"),
|
|
36691
37170
|
{
|
|
36692
37171
|
title: "Recent error logs",
|
|
36693
37172
|
type: "logs",
|
|
@@ -36695,7 +37174,7 @@ function generateArcOverviewDashboard() {
|
|
|
36695
37174
|
datasource: { type: "loki", uid: "loki" },
|
|
36696
37175
|
targets: [
|
|
36697
37176
|
{
|
|
36698
|
-
expr: '{service_name="$service"}
|
|
37177
|
+
expr: '{service_name="$service"} | severity_text=~`ERROR|FATAL`',
|
|
36699
37178
|
refId: "A"
|
|
36700
37179
|
}
|
|
36701
37180
|
],
|
|
@@ -36729,7 +37208,7 @@ function generateArcTracesDashboard() {
|
|
|
36729
37208
|
label: "Service",
|
|
36730
37209
|
type: "query",
|
|
36731
37210
|
datasource: { type: "prometheus", uid: "prometheus" },
|
|
36732
|
-
query: "label_values(
|
|
37211
|
+
query: "label_values(traces_span_metrics_calls_total, service_name)",
|
|
36733
37212
|
refresh: 2,
|
|
36734
37213
|
current: { text: "arc-prod", value: "arc-prod" }
|
|
36735
37214
|
}
|
|
@@ -36828,12 +37307,23 @@ function generateArcLogsDashboard() {
|
|
|
36828
37307
|
type: "textbox",
|
|
36829
37308
|
query: "",
|
|
36830
37309
|
current: { text: "", value: "" }
|
|
37310
|
+
},
|
|
37311
|
+
{
|
|
37312
|
+
name: "container",
|
|
37313
|
+
label: "Container",
|
|
37314
|
+
type: "query",
|
|
37315
|
+
datasource: { type: "loki", uid: "loki" },
|
|
37316
|
+
query: "label_values(container)",
|
|
37317
|
+
refresh: 2,
|
|
37318
|
+
includeAll: true,
|
|
37319
|
+
multi: false,
|
|
37320
|
+
current: { text: "All", value: "$__all" }
|
|
36831
37321
|
}
|
|
36832
37322
|
]
|
|
36833
37323
|
},
|
|
36834
37324
|
panels: [
|
|
36835
|
-
panelStat("Logs ingested (1h)", { x: 0, y: 0, w: 6, h: 4 }, 'sum(
|
|
36836
|
-
panelStat("Errors (1h)", { x: 6, y: 0, w: 6, h: 4 }, 'sum(
|
|
37325
|
+
panelStat("Logs ingested (1h)", { x: 0, y: 0, w: 6, h: 4 }, 'sum(count_over_time({service_name="$service"}[1h]))', "short", undefined, LOKI_DS),
|
|
37326
|
+
panelStat("Errors (1h)", { x: 6, y: 0, w: 6, h: 4 }, 'sum(count_over_time({service_name="$service"} | severity_text=~`ERROR|FATAL` [1h]))', "short", { orange: 1, red: 50 }, LOKI_DS),
|
|
36837
37327
|
{
|
|
36838
37328
|
title: "Log volume by severity",
|
|
36839
37329
|
type: "timeseries",
|
|
@@ -36883,6 +37373,27 @@ function generateArcLogsDashboard() {
|
|
|
36883
37373
|
dedupStrategy: "none",
|
|
36884
37374
|
sortOrder: "Descending"
|
|
36885
37375
|
}
|
|
37376
|
+
},
|
|
37377
|
+
{
|
|
37378
|
+
title: "Container logs ($container)",
|
|
37379
|
+
type: "logs",
|
|
37380
|
+
gridPos: { x: 0, y: 26, w: 24, h: 14 },
|
|
37381
|
+
datasource: { type: "loki", uid: "loki" },
|
|
37382
|
+
targets: [
|
|
37383
|
+
{
|
|
37384
|
+
expr: '{container=~"$container"} |~ "$search"',
|
|
37385
|
+
refId: "A"
|
|
37386
|
+
}
|
|
37387
|
+
],
|
|
37388
|
+
options: {
|
|
37389
|
+
showTime: true,
|
|
37390
|
+
showLabels: true,
|
|
37391
|
+
showCommonLabels: false,
|
|
37392
|
+
wrapLogMessage: true,
|
|
37393
|
+
enableLogDetails: true,
|
|
37394
|
+
dedupStrategy: "none",
|
|
37395
|
+
sortOrder: "Descending"
|
|
37396
|
+
}
|
|
36886
37397
|
}
|
|
36887
37398
|
]
|
|
36888
37399
|
};
|
|
@@ -36912,7 +37423,7 @@ function generateArcSamplingDashboard() {
|
|
|
36912
37423
|
expr: "sum(rate(otelcol_exporter_sent_spans[1m]))",
|
|
36913
37424
|
legend: "exported"
|
|
36914
37425
|
}
|
|
36915
|
-
], "ops"),
|
|
37426
|
+
], "", "ops"),
|
|
36916
37427
|
panelTimeseries("Collector queue size (BatchSpanProcessor)", { x: 0, y: 12, w: 12, h: 8 }, "otelcol_processor_batch_batch_send_size_sum / clamp_min(otelcol_processor_batch_batch_send_size_count, 1)", "avg batch size", "short"),
|
|
36917
37428
|
panelTimeseries("Collector process memory", { x: 12, y: 12, w: 12, h: 8 }, 'process_resident_memory_bytes{job="otel-collector"}', "RSS", "bytes")
|
|
36918
37429
|
]
|
|
@@ -36953,24 +37464,24 @@ function generateArcCommandDashboard() {
|
|
|
36953
37464
|
},
|
|
36954
37465
|
panels: [
|
|
36955
37466
|
panelStat("Call rate", { x: 0, y: 0, w: 6, h: 4 }, 'sum(rate(arc_commands_total{service_name="$service", arc_command_name="$command"}[5m]))', "ops"),
|
|
36956
|
-
panelStat("P50 latency", { x: 6, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.5, sum by (le) (rate(
|
|
36957
|
-
panelStat("P95 latency", { x: 12, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.95, sum by (le) (rate(
|
|
36958
|
-
panelStat("P99 latency", { x: 18, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.99, sum by (le) (rate(
|
|
37467
|
+
panelStat("P50 latency", { x: 6, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.5, sum by (le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service", arc_command_name="$command"}[5m])))', "ms"),
|
|
37468
|
+
panelStat("P95 latency", { x: 12, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.95, sum by (le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service", arc_command_name="$command"}[5m])))', "ms", { orange: 200, red: 1000 }),
|
|
37469
|
+
panelStat("P99 latency", { x: 18, y: 0, w: 6, h: 4 }, 'histogram_quantile(0.99, sum by (le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service", arc_command_name="$command"}[5m])))', "ms", { orange: 500, red: 2000 }),
|
|
36959
37470
|
panelTimeseries("Call rate over time", { x: 0, y: 4, w: 12, h: 8 }, 'sum(rate(arc_commands_total{service_name="$service", arc_command_name="$command"}[1m]))', "calls/s", "ops"),
|
|
36960
37471
|
panelTimeseries("Latency percentiles", { x: 12, y: 4, w: 12, h: 8 }, [
|
|
36961
37472
|
{
|
|
36962
|
-
expr: 'histogram_quantile(0.5, sum by (le) (rate(
|
|
37473
|
+
expr: 'histogram_quantile(0.5, sum by (le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service", arc_command_name="$command"}[5m])))',
|
|
36963
37474
|
legend: "p50"
|
|
36964
37475
|
},
|
|
36965
37476
|
{
|
|
36966
|
-
expr: 'histogram_quantile(0.95, sum by (le) (rate(
|
|
37477
|
+
expr: 'histogram_quantile(0.95, sum by (le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service", arc_command_name="$command"}[5m])))',
|
|
36967
37478
|
legend: "p95"
|
|
36968
37479
|
},
|
|
36969
37480
|
{
|
|
36970
|
-
expr: 'histogram_quantile(0.99, sum by (le) (rate(
|
|
37481
|
+
expr: 'histogram_quantile(0.99, sum by (le) (rate(arc_command_duration_milliseconds_bucket{service_name="$service", arc_command_name="$command"}[5m])))',
|
|
36971
37482
|
legend: "p99"
|
|
36972
37483
|
}
|
|
36973
|
-
], "ms"),
|
|
37484
|
+
], "", "ms"),
|
|
36974
37485
|
{
|
|
36975
37486
|
title: "Recent traces (sampled)",
|
|
36976
37487
|
type: "traces",
|
|
@@ -36989,23 +37500,108 @@ function generateArcCommandDashboard() {
|
|
|
36989
37500
|
};
|
|
36990
37501
|
return JSON.stringify(dashboard, null, 2);
|
|
36991
37502
|
}
|
|
37503
|
+
function generateArcInfraDashboard() {
|
|
37504
|
+
const dashboard = {
|
|
37505
|
+
title: "Arc Infrastructure",
|
|
37506
|
+
uid: "arc-infra",
|
|
37507
|
+
schemaVersion: 39,
|
|
37508
|
+
version: 1,
|
|
37509
|
+
refresh: "30s",
|
|
37510
|
+
time: { from: "now-3h", to: "now" },
|
|
37511
|
+
tags: ["arc", "auto-provisioned"],
|
|
37512
|
+
panels: [
|
|
37513
|
+
panelStat("Host CPU used", { x: 0, y: 0, w: 6, h: 4 }, '100 * (1 - avg(rate(system_cpu_time_seconds_total{state="idle"}[5m])))', "percent", { orange: 70, red: 90 }),
|
|
37514
|
+
panelStat("Host memory used", { x: 6, y: 0, w: 6, h: 4 }, '100 * sum(system_memory_usage_bytes{state="used"}) / sum(system_memory_usage_bytes)', "percent", { orange: 80, red: 90 }),
|
|
37515
|
+
panelStat("Disk used (worst mount)", { x: 12, y: 0, w: 6, h: 4 }, '100 * max(sum by (device, mountpoint) (system_filesystem_usage_bytes{state="used"}) / sum by (device, mountpoint) (system_filesystem_usage_bytes))', "percent", { orange: 75, red: 85 }),
|
|
37516
|
+
panelStat("Load (1m)", { x: 18, y: 0, w: 6, h: 4 }, "system_cpu_load_average_1m", "short"),
|
|
37517
|
+
panelTimeseries("Host CPU utilization", { x: 0, y: 4, w: 12, h: 8 }, [
|
|
37518
|
+
{
|
|
37519
|
+
expr: '100 * (1 - avg(rate(system_cpu_time_seconds_total{state="idle"}[5m])))',
|
|
37520
|
+
legend: "used %"
|
|
37521
|
+
},
|
|
37522
|
+
{ expr: "system_cpu_load_average_1m", legend: "load 1m" },
|
|
37523
|
+
{ expr: "system_cpu_load_average_5m", legend: "load 5m" },
|
|
37524
|
+
{ expr: "system_cpu_load_average_15m", legend: "load 15m" }
|
|
37525
|
+
], "", "short"),
|
|
37526
|
+
panelTimeseries("Host memory by state", { x: 12, y: 4, w: 12, h: 8 }, "sum by (state) (system_memory_usage_bytes)", "{{state}}", "bytes"),
|
|
37527
|
+
panelTimeseries("Filesystem usage by mount", { x: 0, y: 12, w: 12, h: 8 }, '100 * sum by (mountpoint) (system_filesystem_usage_bytes{state="used"}) / sum by (mountpoint) (system_filesystem_usage_bytes)', "{{mountpoint}}", "percent"),
|
|
37528
|
+
panelTimeseries("Disk I/O", { x: 12, y: 12, w: 12, h: 8 }, "sum by (device, direction) (rate(system_disk_io_bytes_total[5m]))", "{{device}} {{direction}}", "Bps"),
|
|
37529
|
+
panelTimeseries("Network I/O", { x: 0, y: 20, w: 12, h: 8 }, 'sum by (device, direction) (rate(system_network_io_bytes_total{device!="lo"}[5m]))', "{{device}} {{direction}}", "Bps"),
|
|
37530
|
+
panelTimeseries("Container restarts (24h)", { x: 12, y: 20, w: 12, h: 8 }, "sum by (container_name) (increase(container_restarts_total[24h]))", "{{container_name}}", "short"),
|
|
37531
|
+
panelTimeseries("Container CPU", { x: 0, y: 28, w: 12, h: 8 }, "container_cpu_utilization_ratio", "{{container_name}}", "percent"),
|
|
37532
|
+
panelTimeseries("Container memory", { x: 12, y: 28, w: 12, h: 8 }, "container_memory_usage_total_bytes", "{{container_name}}", "bytes"),
|
|
37533
|
+
panelTimeseries("Container network RX", { x: 0, y: 36, w: 12, h: 8 }, "sum by (container_name) (rate(container_network_io_usage_rx_bytes_total[5m]))", "{{container_name}}", "Bps"),
|
|
37534
|
+
panelTimeseries("Container network TX", { x: 12, y: 36, w: 12, h: 8 }, "sum by (container_name) (rate(container_network_io_usage_tx_bytes_total[5m]))", "{{container_name}}", "Bps")
|
|
37535
|
+
]
|
|
37536
|
+
};
|
|
37537
|
+
return JSON.stringify(dashboard, null, 2);
|
|
37538
|
+
}
|
|
37539
|
+
function generateArcEdgeDashboard() {
|
|
37540
|
+
const dashboard = {
|
|
37541
|
+
title: "Arc Caddy / Edge",
|
|
37542
|
+
uid: "arc-edge",
|
|
37543
|
+
schemaVersion: 39,
|
|
37544
|
+
version: 1,
|
|
37545
|
+
refresh: "30s",
|
|
37546
|
+
time: { from: "now-1h", to: "now" },
|
|
37547
|
+
tags: ["arc", "auto-provisioned"],
|
|
37548
|
+
panels: [
|
|
37549
|
+
panelStat("Requests/s", { x: 0, y: 0, w: 6, h: 4 }, "sum(rate(caddy_http_request_duration_seconds_count[1m]))", "reqps"),
|
|
37550
|
+
panelStat("In-flight requests", { x: 6, y: 0, w: 6, h: 4 }, "sum(caddy_http_requests_in_flight)", "short"),
|
|
37551
|
+
panelStat("Handler errors/s", { x: 12, y: 0, w: 6, h: 4 }, "sum(rate(caddy_http_request_errors_total[5m]))", "ops", { orange: 0.1, red: 1 }),
|
|
37552
|
+
panelStat("P95 latency", { x: 18, y: 0, w: 6, h: 4 }, "histogram_quantile(0.95, sum by (le) (rate(caddy_http_request_duration_seconds_bucket[5m])))", "s", { orange: 0.3, red: 1 }),
|
|
37553
|
+
panelTimeseries("Request rate by host", { x: 0, y: 4, w: 12, h: 8 }, "sum by (host) (rate(caddy_http_request_duration_seconds_count[1m]))", "{{host}}", "reqps"),
|
|
37554
|
+
panelTimeseries("Responses by status code", { x: 12, y: 4, w: 12, h: 8 }, "sum by (code) (rate(caddy_http_request_duration_seconds_count[1m]))", "{{code}}", "reqps"),
|
|
37555
|
+
panelTimeseries("P95 latency by host", { x: 0, y: 12, w: 12, h: 8 }, "histogram_quantile(0.95, sum by (host, le) (rate(caddy_http_request_duration_seconds_bucket[5m])))", "{{host}}", "s"),
|
|
37556
|
+
panelTimeseries("4xx/5xx responses (access log)", { x: 12, y: 12, w: 12, h: 8 }, 'sum by (status) (count_over_time({compose_service="caddy"} | json | status >= 400 [$__interval]))', "{{status}}", "short", LOKI_DS),
|
|
37557
|
+
{
|
|
37558
|
+
title: "Access log (live)",
|
|
37559
|
+
type: "logs",
|
|
37560
|
+
gridPos: { x: 0, y: 20, w: 24, h: 12 },
|
|
37561
|
+
datasource: { type: "loki", uid: "loki" },
|
|
37562
|
+
targets: [
|
|
37563
|
+
{
|
|
37564
|
+
expr: '{compose_service="caddy"}',
|
|
37565
|
+
refId: "A"
|
|
37566
|
+
}
|
|
37567
|
+
],
|
|
37568
|
+
options: {
|
|
37569
|
+
showTime: true,
|
|
37570
|
+
showLabels: false,
|
|
37571
|
+
showCommonLabels: false,
|
|
37572
|
+
wrapLogMessage: true,
|
|
37573
|
+
enableLogDetails: true,
|
|
37574
|
+
dedupStrategy: "none",
|
|
37575
|
+
sortOrder: "Descending"
|
|
37576
|
+
}
|
|
37577
|
+
}
|
|
37578
|
+
]
|
|
37579
|
+
};
|
|
37580
|
+
return JSON.stringify(dashboard, null, 2);
|
|
37581
|
+
}
|
|
36992
37582
|
function generateObservabilityConfigs(cfg) {
|
|
36993
37583
|
return {
|
|
36994
37584
|
"observability/otel-collector-config.yaml": generateOtelCollectorConfig(cfg),
|
|
36995
37585
|
"observability/tempo.yaml": generateTempoConfig(cfg),
|
|
36996
37586
|
"observability/loki-config.yaml": generateLokiConfig(cfg),
|
|
36997
37587
|
"observability/prometheus.yml": generatePrometheusConfig(cfg),
|
|
37588
|
+
"observability/alloy-config.alloy": generateAlloyConfig(),
|
|
36998
37589
|
"observability/grafana-datasources.yaml": generateGrafanaDatasources(),
|
|
36999
37590
|
"observability/grafana-dashboards.yaml": generateGrafanaDashboardsProvider(),
|
|
37591
|
+
"observability/grafana-alerting/alerting.yaml": generateGrafanaAlerting(cfg),
|
|
37000
37592
|
"observability/grafana-dashboards/arc-overview.json": generateArcOverviewDashboard(),
|
|
37001
37593
|
"observability/grafana-dashboards/arc-traces.json": generateArcTracesDashboard(),
|
|
37002
37594
|
"observability/grafana-dashboards/arc-service-map.json": generateArcServiceMapDashboard(),
|
|
37003
37595
|
"observability/grafana-dashboards/arc-logs.json": generateArcLogsDashboard(),
|
|
37004
37596
|
"observability/grafana-dashboards/arc-sampling.json": generateArcSamplingDashboard(),
|
|
37005
|
-
"observability/grafana-dashboards/arc-command.json": generateArcCommandDashboard()
|
|
37597
|
+
"observability/grafana-dashboards/arc-command.json": generateArcCommandDashboard(),
|
|
37598
|
+
"observability/grafana-dashboards/arc-infra.json": generateArcInfraDashboard(),
|
|
37599
|
+
"observability/grafana-dashboards/arc-edge.json": generateArcEdgeDashboard()
|
|
37006
37600
|
};
|
|
37007
37601
|
}
|
|
37008
|
-
|
|
37602
|
+
var PROMETHEUS_DS = { type: "prometheus", uid: "prometheus" };
|
|
37603
|
+
var LOKI_DS = { type: "loki", uid: "loki" };
|
|
37604
|
+
function panelStat(title, gridPos, expr, unit, thresholds, datasource = PROMETHEUS_DS) {
|
|
37009
37605
|
const steps = [
|
|
37010
37606
|
{ color: "green", value: null }
|
|
37011
37607
|
];
|
|
@@ -37019,7 +37615,7 @@ function panelStat(title, gridPos, expr, unit, thresholds) {
|
|
|
37019
37615
|
title,
|
|
37020
37616
|
type: "stat",
|
|
37021
37617
|
gridPos,
|
|
37022
|
-
datasource
|
|
37618
|
+
datasource,
|
|
37023
37619
|
targets: [{ expr, refId: "A", legendFormat: title }],
|
|
37024
37620
|
fieldConfig: {
|
|
37025
37621
|
defaults: {
|
|
@@ -37037,7 +37633,7 @@ function panelStat(title, gridPos, expr, unit, thresholds) {
|
|
|
37037
37633
|
}
|
|
37038
37634
|
};
|
|
37039
37635
|
}
|
|
37040
|
-
function panelTimeseries(title, gridPos, query, legend, unit) {
|
|
37636
|
+
function panelTimeseries(title, gridPos, query, legend, unit, datasource = PROMETHEUS_DS) {
|
|
37041
37637
|
const targets = Array.isArray(query) ? query.map((q, i) => ({
|
|
37042
37638
|
expr: q.expr,
|
|
37043
37639
|
refId: String.fromCharCode(65 + i),
|
|
@@ -37047,7 +37643,7 @@ function panelTimeseries(title, gridPos, query, legend, unit) {
|
|
|
37047
37643
|
title,
|
|
37048
37644
|
type: "timeseries",
|
|
37049
37645
|
gridPos,
|
|
37050
|
-
datasource
|
|
37646
|
+
datasource,
|
|
37051
37647
|
targets,
|
|
37052
37648
|
fieldConfig: {
|
|
37053
37649
|
defaults: {
|
|
@@ -37378,11 +37974,16 @@ function validateDeployConfig(input) {
|
|
|
37378
37974
|
metrics: optionalString(retentionRaw, "observability.retention.metrics")
|
|
37379
37975
|
};
|
|
37380
37976
|
}
|
|
37977
|
+
const alertWebhookUrl = optionalString(observabilityRaw, "observability.alertWebhookUrl");
|
|
37978
|
+
if (alertWebhookUrl !== undefined && !/^https?:\/\/.+/.test(alertWebhookUrl)) {
|
|
37979
|
+
throw new Error(`deploy.arc.json: observability.alertWebhookUrl must be an http(s) URL (got "${alertWebhookUrl}")`);
|
|
37980
|
+
}
|
|
37381
37981
|
validated.observability = {
|
|
37382
37982
|
enabled: enabledRaw,
|
|
37383
37983
|
subdomain: optionalString(observabilityRaw, "observability.subdomain") ?? "observability",
|
|
37384
37984
|
adminPasswordEnv: optionalString(observabilityRaw, "observability.adminPasswordEnv") ?? "ARC_OBSERVABILITY_PASSWORD",
|
|
37385
|
-
retention
|
|
37985
|
+
retention,
|
|
37986
|
+
alertWebhookUrl
|
|
37386
37987
|
};
|
|
37387
37988
|
}
|
|
37388
37989
|
const provision = input.provision;
|
|
@@ -37678,14 +38279,14 @@ async function bootstrap(inputs) {
|
|
|
37678
38279
|
});
|
|
37679
38280
|
ok("Host bootstrapped");
|
|
37680
38281
|
}
|
|
37681
|
-
const needUpStack = state.kind !== "ready" || state.marker === null || state.marker.configHash !== inputs.configHash || !await isRegistryRunning(cfg);
|
|
38282
|
+
const needUpStack = state.kind !== "ready" || state.marker === null || state.marker.configHash !== inputs.configHash || state.marker.cliVersion !== inputs.cliVersion || !await isRegistryRunning(cfg);
|
|
37682
38283
|
if (needUpStack) {
|
|
37683
38284
|
await upStack(inputs);
|
|
37684
38285
|
ok("Docker stack up");
|
|
37685
38286
|
}
|
|
37686
38287
|
if (cfg.observability?.enabled) {
|
|
37687
38288
|
log2("Ensuring observability sidecars are running...");
|
|
37688
|
-
const obsServices = ["otel-collector", "tempo", "loki", "prometheus", "grafana"];
|
|
38289
|
+
const obsServices = ["otel-collector", "tempo", "loki", "prometheus", "alloy", "grafana"];
|
|
37689
38290
|
await assertExec(cfg.target, `cd ${cfg.target.remoteDir} && docker compose pull --ignore-pull-failures ${obsServices.join(" ")} && docker compose up -d ${obsServices.join(" ")}`);
|
|
37690
38291
|
ok("Observability stack up");
|
|
37691
38292
|
}
|
|
@@ -37752,7 +38353,7 @@ async function upStack(inputs) {
|
|
|
37752
38353
|
await scpUpload(cfg.target, join18(workDir, "docker-compose.yml"), `${cfg.target.remoteDir}/docker-compose.yml`);
|
|
37753
38354
|
await scpUpload(cfg.target, join18(workDir, "htpasswd"), `${cfg.target.remoteDir}/registry-auth/htpasswd`);
|
|
37754
38355
|
if (observabilityFiles && observabilityHtpasswd) {
|
|
37755
|
-
await assertExec(cfg.target, `mkdir -p ${cfg.target.remoteDir}/observability/grafana-dashboards`);
|
|
38356
|
+
await assertExec(cfg.target, `mkdir -p ${cfg.target.remoteDir}/observability/grafana-dashboards ${cfg.target.remoteDir}/observability/grafana-alerting`);
|
|
37756
38357
|
for (const relPath of Object.keys(observabilityFiles)) {
|
|
37757
38358
|
const localDir = dirname9(join18(workDir, relPath));
|
|
37758
38359
|
mkdirSync12(localDir, { recursive: true });
|
|
@@ -39274,7 +39875,7 @@ class ContextHandler {
|
|
|
39274
39875
|
try {
|
|
39275
39876
|
return await this.telemetry.startSpan(`command.${commandName}`, runCommand, { attributes: baseAttrs });
|
|
39276
39877
|
} finally {
|
|
39277
|
-
this.telemetry.measureSince("arc.command.
|
|
39878
|
+
this.telemetry.measureSince("arc.command.duration", start, {
|
|
39278
39879
|
"arc.command.name": commandName
|
|
39279
39880
|
});
|
|
39280
39881
|
this.telemetry.incrementCounter("arc.commands.total", 1, {
|
|
@@ -40359,6 +40960,7 @@ async function createArcServer(config) {
|
|
|
40359
40960
|
websocket: {
|
|
40360
40961
|
open(ws) {
|
|
40361
40962
|
connectionManager.addClient(ws);
|
|
40963
|
+
config.telemetry?.addUpDown("arc.ws.active_connections", 1);
|
|
40362
40964
|
},
|
|
40363
40965
|
async message(ws, messageStr) {
|
|
40364
40966
|
const client = connectionManager.getClientByWs(ws);
|
|
@@ -40371,6 +40973,9 @@ async function createArcServer(config) {
|
|
|
40371
40973
|
console.error("Failed to parse WS message:", error);
|
|
40372
40974
|
return;
|
|
40373
40975
|
}
|
|
40976
|
+
config.telemetry?.incrementCounter("arc.ws.messages", 1, {
|
|
40977
|
+
"messaging.message.type": String(message?.type ?? "unknown")
|
|
40978
|
+
});
|
|
40374
40979
|
const dispatch = async () => {
|
|
40375
40980
|
try {
|
|
40376
40981
|
for (const handler of wsHandlers) {
|
|
@@ -40408,6 +41013,7 @@ async function createArcServer(config) {
|
|
|
40408
41013
|
cleanupClientSubs(client.id);
|
|
40409
41014
|
config.onWsClose?.(client.id);
|
|
40410
41015
|
connectionManager.removeClient(client.id);
|
|
41016
|
+
config.telemetry?.addUpDown("arc.ws.active_connections", -1);
|
|
40411
41017
|
}
|
|
40412
41018
|
}
|
|
40413
41019
|
}
|
|
@@ -40726,7 +41332,8 @@ async function startPlatformServer(opts) {
|
|
|
40726
41332
|
endpoint: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
|
|
40727
41333
|
mode: devMode ? "development" : "production",
|
|
40728
41334
|
sampleRate: devMode ? 1 : 1,
|
|
40729
|
-
debug: process.env.ARC_OTEL_DEBUG === "true"
|
|
41335
|
+
debug: process.env.ARC_OTEL_DEBUG === "true",
|
|
41336
|
+
patchConsole: process.env.ARC_OTEL_PATCH_CONSOLE !== "false"
|
|
40730
41337
|
});
|
|
40731
41338
|
telemetry = init2.telemetry;
|
|
40732
41339
|
telemetryShutdown = init2.shutdown;
|