sentinelayer-cli 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +996 -996
- package/bin/create-sentinelayer.js +5 -5
- package/bin/sentinelayer-cli.js +4 -4
- package/bin/sl.js +5 -5
- package/package.json +62 -54
- package/src/agents/jules/config/definition.js +209 -209
- package/src/agents/jules/config/system-prompt.js +175 -175
- package/src/agents/jules/error-intake.js +51 -51
- package/src/agents/jules/fix-cycle.js +377 -377
- package/src/agents/jules/loop.js +367 -367
- package/src/agents/jules/pulse.js +319 -319
- package/src/agents/jules/stream.js +186 -186
- package/src/agents/jules/swarm/file-scanner.js +74 -74
- package/src/agents/jules/swarm/index.js +11 -11
- package/src/agents/jules/swarm/orchestrator.js +362 -362
- package/src/agents/jules/swarm/pattern-hunter.js +123 -123
- package/src/agents/jules/swarm/sub-agent.js +308 -308
- package/src/agents/jules/tools/auth-audit.js +226 -222
- package/src/agents/jules/tools/dispatch.js +327 -327
- package/src/agents/jules/tools/file-edit.js +180 -180
- package/src/agents/jules/tools/file-read.js +100 -100
- package/src/agents/jules/tools/frontend-analyze.js +570 -570
- package/src/agents/jules/tools/glob.js +168 -168
- package/src/agents/jules/tools/grep.js +228 -228
- package/src/agents/jules/tools/index.js +29 -29
- package/src/agents/jules/tools/path-guards.js +161 -161
- package/src/agents/jules/tools/runtime-audit.js +493 -493
- package/src/agents/jules/tools/shell.js +383 -383
- package/src/ai/aidenid.js +972 -945
- package/src/ai/client.js +508 -508
- package/src/ai/domain-target-store.js +268 -268
- package/src/ai/identity-store.js +270 -270
- package/src/ai/site-store.js +145 -145
- package/src/audit/agents/architecture.js +180 -180
- package/src/audit/agents/compliance.js +179 -179
- package/src/audit/agents/documentation.js +165 -165
- package/src/audit/agents/performance.js +145 -145
- package/src/audit/agents/security.js +215 -215
- package/src/audit/agents/testing.js +172 -172
- package/src/audit/orchestrator.js +557 -557
- package/src/audit/package.js +204 -204
- package/src/audit/registry.js +284 -284
- package/src/audit/replay.js +103 -103
- package/src/auth/http.js +113 -113
- package/src/auth/service.js +891 -848
- package/src/auth/session-store.js +359 -345
- package/src/cli.js +252 -252
- package/src/commands/ai/identity-lifecycle.js +1338 -1337
- package/src/commands/ai/provision-governance.js +1272 -1246
- package/src/commands/ai/shared.js +147 -147
- package/src/commands/ai.js +11 -11
- package/src/commands/apply.js +12 -12
- package/src/commands/audit.js +1166 -1147
- package/src/commands/auth.js +375 -366
- package/src/commands/chat.js +191 -191
- package/src/commands/config.js +184 -184
- package/src/commands/cost.js +311 -311
- package/src/commands/daemon/core.js +850 -850
- package/src/commands/daemon/extended.js +1048 -1048
- package/src/commands/daemon/shared.js +213 -213
- package/src/commands/daemon.js +11 -11
- package/src/commands/guide.js +174 -174
- package/src/commands/ingest.js +58 -58
- package/src/commands/init.js +55 -55
- package/src/commands/legacy-args.js +10 -10
- package/src/commands/mcp.js +461 -404
- package/src/commands/omargate.js +15 -15
- package/src/commands/persona.js +20 -20
- package/src/commands/plugin.js +260 -260
- package/src/commands/policy.js +132 -132
- package/src/commands/prompt.js +238 -238
- package/src/commands/review.js +704 -704
- package/src/commands/scan.js +866 -788
- package/src/commands/spec.js +716 -716
- package/src/commands/swarm.js +651 -651
- package/src/commands/telemetry.js +202 -202
- package/src/commands/watch.js +510 -510
- package/src/config/agent-dictionary.js +182 -182
- package/src/config/io.js +56 -56
- package/src/config/paths.js +18 -18
- package/src/config/schema.js +55 -55
- package/src/config/service.js +184 -184
- package/src/cost/budget.js +235 -235
- package/src/cost/history.js +188 -188
- package/src/cost/tracker.js +171 -171
- package/src/daemon/artifact-lineage.js +534 -534
- package/src/daemon/assignment-ledger.js +770 -770
- package/src/daemon/ast-parser-layer.js +258 -258
- package/src/daemon/budget-governor.js +633 -633
- package/src/daemon/callgraph-overlay.js +646 -646
- package/src/daemon/error-worker.js +626 -626
- package/src/daemon/hybrid-mapper.js +929 -929
- package/src/daemon/ingest-refresh.js +195 -0
- package/src/daemon/jira-lifecycle.js +632 -632
- package/src/daemon/operator-control.js +657 -657
- package/src/daemon/reliability-lane.js +471 -471
- package/src/daemon/watchdog.js +971 -971
- package/src/guide/generator.js +316 -316
- package/src/ingest/engine.js +918 -918
- package/src/interactive/action-menu.js +132 -0
- package/src/interactive/auto-ingest.js +111 -0
- package/src/interactive/index.js +95 -0
- package/src/interactive/workspace.js +92 -0
- package/src/legacy-cli.js +2548 -2435
- package/src/mcp/registry.js +695 -695
- package/src/memory/blackboard.js +301 -301
- package/src/memory/retrieval.js +581 -581
- package/src/plugin/manifest.js +553 -553
- package/src/policy/packs.js +144 -144
- package/src/prompt/generator.js +118 -106
- package/src/review/ai-review.js +669 -669
- package/src/review/local-review.js +1284 -1284
- package/src/review/replay.js +235 -235
- package/src/review/report.js +664 -664
- package/src/review/spec-binding.js +487 -487
- package/src/scaffold/generator.js +67 -0
- package/src/scaffold/templates.js +150 -0
- package/src/scan/generator.js +418 -351
- package/src/scan/gh-secrets.js +107 -0
- package/src/spec/generator.js +519 -519
- package/src/spec/regenerate.js +237 -237
- package/src/spec/templates.js +91 -91
- package/src/swarm/dashboard.js +247 -247
- package/src/swarm/factory.js +363 -363
- package/src/swarm/pentest.js +934 -934
- package/src/swarm/registry.js +419 -419
- package/src/swarm/report.js +158 -158
- package/src/swarm/runtime.js +576 -576
- package/src/swarm/scenario-dsl.js +272 -272
- package/src/telemetry/ledger.js +302 -302
- package/src/telemetry/session-tracker.js +118 -0
- package/src/telemetry/sync.js +190 -0
- package/src/ui/markdown.js +220 -220
|
@@ -1,471 +1,471 @@
|
|
|
1
|
-
import fsp from "node:fs/promises";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
|
|
4
|
-
import { appendAdminErrorEvent, resolveErrorDaemonStorage, runErrorDaemonWorker } from "./error-worker.js";
|
|
5
|
-
|
|
6
|
-
const RELIABILITY_SCHEMA_VERSION = "1.0.0";
|
|
7
|
-
const MAINTENANCE_SCHEMA_VERSION = "1.0.0";
|
|
8
|
-
|
|
9
|
-
export const RELIABILITY_CHECK_IDS = Object.freeze([
|
|
10
|
-
"aidenid_password_reset_flow",
|
|
11
|
-
"aidenid_invite_flow",
|
|
12
|
-
]);
|
|
13
|
-
|
|
14
|
-
function normalizeString(value) {
|
|
15
|
-
if (value === undefined || value === null) {
|
|
16
|
-
return "";
|
|
17
|
-
}
|
|
18
|
-
return String(value).trim();
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
|
|
22
|
-
const normalized = normalizeString(value);
|
|
23
|
-
if (!normalized) {
|
|
24
|
-
return fallbackIso;
|
|
25
|
-
}
|
|
26
|
-
const epoch = Date.parse(normalized);
|
|
27
|
-
if (!Number.isFinite(epoch)) {
|
|
28
|
-
return fallbackIso;
|
|
29
|
-
}
|
|
30
|
-
return new Date(epoch).toISOString();
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function normalizeBoolean(value, fallbackValue) {
|
|
34
|
-
if (value === undefined || value === null || normalizeString(value) === "") {
|
|
35
|
-
return fallbackValue;
|
|
36
|
-
}
|
|
37
|
-
const normalized = normalizeString(value).toLowerCase();
|
|
38
|
-
if (["true", "1", "yes", "on"].includes(normalized)) {
|
|
39
|
-
return true;
|
|
40
|
-
}
|
|
41
|
-
if (["false", "0", "no", "off"].includes(normalized)) {
|
|
42
|
-
return false;
|
|
43
|
-
}
|
|
44
|
-
return fallbackValue;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
function normalizeCsv(value) {
|
|
48
|
-
if (Array.isArray(value)) {
|
|
49
|
-
return value.map((item) => normalizeString(item)).filter(Boolean);
|
|
50
|
-
}
|
|
51
|
-
const normalized = normalizeString(value);
|
|
52
|
-
if (!normalized) {
|
|
53
|
-
return [];
|
|
54
|
-
}
|
|
55
|
-
return normalized
|
|
56
|
-
.split(",")
|
|
57
|
-
.map((item) => item.trim())
|
|
58
|
-
.filter(Boolean);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
function normalizeChecks(checkIds = []) {
|
|
62
|
-
return normalizeCsv(checkIds)
|
|
63
|
-
.map((item) => item.toLowerCase())
|
|
64
|
-
.filter((item) => RELIABILITY_CHECK_IDS.includes(item));
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function createReliabilityRunId(nowIso, region = "global") {
|
|
68
|
-
const normalizedRegion = normalizeString(region).replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
69
|
-
return `reliability-lane-${normalizedRegion}-${nowIso.replace(/[:.]/g, "-")}`;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
function createDefaultBillboard(nowIso = new Date().toISOString()) {
|
|
73
|
-
return {
|
|
74
|
-
schemaVersion: MAINTENANCE_SCHEMA_VERSION,
|
|
75
|
-
generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
76
|
-
enabled: false,
|
|
77
|
-
message: "",
|
|
78
|
-
source: null,
|
|
79
|
-
actor: null,
|
|
80
|
-
openedAt: null,
|
|
81
|
-
resolvedAt: null,
|
|
82
|
-
lastUpdatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
83
|
-
};
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
function normalizeBillboard(raw = {}, nowIso = new Date().toISOString()) {
|
|
87
|
-
return {
|
|
88
|
-
schemaVersion: MAINTENANCE_SCHEMA_VERSION,
|
|
89
|
-
generatedAt: normalizeIsoTimestamp(raw.generatedAt, nowIso),
|
|
90
|
-
enabled: normalizeBoolean(raw.enabled, false),
|
|
91
|
-
message: normalizeString(raw.message),
|
|
92
|
-
source: normalizeString(raw.source) || null,
|
|
93
|
-
actor: normalizeString(raw.actor) || null,
|
|
94
|
-
openedAt: raw.openedAt ? normalizeIsoTimestamp(raw.openedAt, nowIso) : null,
|
|
95
|
-
resolvedAt: raw.resolvedAt ? normalizeIsoTimestamp(raw.resolvedAt, nowIso) : null,
|
|
96
|
-
lastUpdatedAt: raw.lastUpdatedAt
|
|
97
|
-
? normalizeIsoTimestamp(raw.lastUpdatedAt, nowIso)
|
|
98
|
-
: normalizeIsoTimestamp(nowIso, nowIso),
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
function createDefaultConfig(nowIso = new Date().toISOString()) {
|
|
103
|
-
return {
|
|
104
|
-
schemaVersion: RELIABILITY_SCHEMA_VERSION,
|
|
105
|
-
generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
106
|
-
enabled: true,
|
|
107
|
-
schedule: {
|
|
108
|
-
cron: "0 0 * * *",
|
|
109
|
-
timezone: "UTC",
|
|
110
|
-
regions: ["us-east-1"],
|
|
111
|
-
},
|
|
112
|
-
checks: RELIABILITY_CHECK_IDS,
|
|
113
|
-
};
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
function normalizeConfig(raw = {}, nowIso = new Date().toISOString()) {
|
|
117
|
-
const checks = normalizeChecks(raw.checks);
|
|
118
|
-
return {
|
|
119
|
-
schemaVersion: RELIABILITY_SCHEMA_VERSION,
|
|
120
|
-
generatedAt: normalizeIsoTimestamp(raw.generatedAt, nowIso),
|
|
121
|
-
enabled: normalizeBoolean(raw.enabled, true),
|
|
122
|
-
schedule: {
|
|
123
|
-
cron: normalizeString(raw.schedule?.cron) || "0 0 * * *",
|
|
124
|
-
timezone: normalizeString(raw.schedule?.timezone) || "UTC",
|
|
125
|
-
regions: normalizeCsv(raw.schedule?.regions).length
|
|
126
|
-
? normalizeCsv(raw.schedule?.regions)
|
|
127
|
-
: ["us-east-1"],
|
|
128
|
-
},
|
|
129
|
-
checks: checks.length > 0 ? checks : [...RELIABILITY_CHECK_IDS],
|
|
130
|
-
};
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
async function readJsonFile(filePath, defaultFactory) {
|
|
134
|
-
try {
|
|
135
|
-
const raw = await fsp.readFile(filePath, "utf-8");
|
|
136
|
-
return JSON.parse(raw);
|
|
137
|
-
} catch (error) {
|
|
138
|
-
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
139
|
-
return defaultFactory();
|
|
140
|
-
}
|
|
141
|
-
throw error;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
async function writeJsonFile(filePath, payload = {}) {
|
|
146
|
-
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
147
|
-
await fsp.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8");
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
async function appendJsonLine(filePath, payload = {}) {
|
|
151
|
-
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
152
|
-
await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, "utf-8");
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
async function listRunArtifacts(runsDir, outputRoot) {
|
|
156
|
-
try {
|
|
157
|
-
const entries = await fsp.readdir(runsDir, { withFileTypes: true });
|
|
158
|
-
const files = entries
|
|
159
|
-
.filter((entry) => entry.isFile() && entry.name.toLowerCase().endsWith(".json"))
|
|
160
|
-
.map((entry) => path.join(runsDir, entry.name));
|
|
161
|
-
const payloads = await Promise.all(
|
|
162
|
-
files.map(async (filePath) => {
|
|
163
|
-
const payload = await readJsonFile(filePath, () => null);
|
|
164
|
-
if (!payload || typeof payload !== "object") {
|
|
165
|
-
return null;
|
|
166
|
-
}
|
|
167
|
-
return {
|
|
168
|
-
runId: normalizeString(payload.runId) || path.basename(filePath, ".json"),
|
|
169
|
-
generatedAt: normalizeIsoTimestamp(payload.generatedAt, new Date().toISOString()),
|
|
170
|
-
overallStatus: normalizeString(payload.overallStatus) || "UNKNOWN",
|
|
171
|
-
failureCount: Number(payload.failureCount || 0),
|
|
172
|
-
path: normalizeString(path.relative(outputRoot, filePath)).replace(/\\/g, "/"),
|
|
173
|
-
};
|
|
174
|
-
})
|
|
175
|
-
);
|
|
176
|
-
return payloads
|
|
177
|
-
.filter(Boolean)
|
|
178
|
-
.sort((left, right) => (Date.parse(String(right.generatedAt || "")) || 0) - (Date.parse(String(left.generatedAt || "")) || 0));
|
|
179
|
-
} catch (error) {
|
|
180
|
-
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
181
|
-
return [];
|
|
182
|
-
}
|
|
183
|
-
throw error;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
export async function resolveReliabilityLaneStorage({
|
|
188
|
-
targetPath = ".",
|
|
189
|
-
outputDir = "",
|
|
190
|
-
env,
|
|
191
|
-
homeDir,
|
|
192
|
-
} = {}) {
|
|
193
|
-
const daemonStorage = await resolveErrorDaemonStorage({
|
|
194
|
-
targetPath,
|
|
195
|
-
outputDir,
|
|
196
|
-
env,
|
|
197
|
-
homeDir,
|
|
198
|
-
});
|
|
199
|
-
const reliabilityDir = path.join(daemonStorage.baseDir, "reliability");
|
|
200
|
-
return {
|
|
201
|
-
...daemonStorage,
|
|
202
|
-
reliabilityDir,
|
|
203
|
-
configPath: path.join(reliabilityDir, "lane-config.json"),
|
|
204
|
-
billboardPath: path.join(reliabilityDir, "maintenance-billboard.json"),
|
|
205
|
-
eventsPath: path.join(reliabilityDir, "reliability-events.ndjson"),
|
|
206
|
-
runsDir: path.join(reliabilityDir, "runs"),
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
export async function setMaintenanceBillboard({
|
|
211
|
-
targetPath = ".",
|
|
212
|
-
outputDir = "",
|
|
213
|
-
enabled,
|
|
214
|
-
message = "",
|
|
215
|
-
source = "manual",
|
|
216
|
-
actor = "omar-operator",
|
|
217
|
-
reason = "",
|
|
218
|
-
env,
|
|
219
|
-
homeDir,
|
|
220
|
-
nowIso = new Date().toISOString(),
|
|
221
|
-
} = {}) {
|
|
222
|
-
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
223
|
-
const storage = await resolveReliabilityLaneStorage({
|
|
224
|
-
targetPath,
|
|
225
|
-
outputDir,
|
|
226
|
-
env,
|
|
227
|
-
homeDir,
|
|
228
|
-
});
|
|
229
|
-
const current = normalizeBillboard(
|
|
230
|
-
await readJsonFile(storage.billboardPath, () => createDefaultBillboard(normalizedNow)),
|
|
231
|
-
normalizedNow
|
|
232
|
-
);
|
|
233
|
-
const nextEnabled = normalizeBoolean(enabled, current.enabled);
|
|
234
|
-
const normalizedMessage = normalizeString(message);
|
|
235
|
-
const normalizedSource = normalizeString(source) || "manual";
|
|
236
|
-
const normalizedActor = normalizeString(actor) || "omar-operator";
|
|
237
|
-
const normalizedReason = normalizeString(reason) || null;
|
|
238
|
-
|
|
239
|
-
const next = {
|
|
240
|
-
...current,
|
|
241
|
-
generatedAt: normalizedNow,
|
|
242
|
-
enabled: nextEnabled,
|
|
243
|
-
message: normalizedMessage || current.message || "",
|
|
244
|
-
source: normalizedSource,
|
|
245
|
-
actor: normalizedActor,
|
|
246
|
-
openedAt: nextEnabled ? current.openedAt || normalizedNow : current.openedAt,
|
|
247
|
-
resolvedAt: nextEnabled ? null : normalizedNow,
|
|
248
|
-
lastUpdatedAt: normalizedNow,
|
|
249
|
-
};
|
|
250
|
-
|
|
251
|
-
await Promise.all([
|
|
252
|
-
writeJsonFile(storage.billboardPath, next),
|
|
253
|
-
appendJsonLine(storage.eventsPath, {
|
|
254
|
-
timestamp: normalizedNow,
|
|
255
|
-
eventType: "maintenance_update",
|
|
256
|
-
enabled: next.enabled,
|
|
257
|
-
source: normalizedSource,
|
|
258
|
-
actor: normalizedActor,
|
|
259
|
-
reason: normalizedReason,
|
|
260
|
-
message: next.message,
|
|
261
|
-
}),
|
|
262
|
-
]);
|
|
263
|
-
|
|
264
|
-
return {
|
|
265
|
-
...storage,
|
|
266
|
-
billboard: next,
|
|
267
|
-
};
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
export async function getReliabilityLaneStatus({
|
|
271
|
-
targetPath = ".",
|
|
272
|
-
outputDir = "",
|
|
273
|
-
env,
|
|
274
|
-
homeDir,
|
|
275
|
-
nowIso = new Date().toISOString(),
|
|
276
|
-
limit = 10,
|
|
277
|
-
} = {}) {
|
|
278
|
-
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
279
|
-
const storage = await resolveReliabilityLaneStorage({
|
|
280
|
-
targetPath,
|
|
281
|
-
outputDir,
|
|
282
|
-
env,
|
|
283
|
-
homeDir,
|
|
284
|
-
});
|
|
285
|
-
const [configRaw, billboardRaw, runs] = await Promise.all([
|
|
286
|
-
readJsonFile(storage.configPath, () => createDefaultConfig(normalizedNow)),
|
|
287
|
-
readJsonFile(storage.billboardPath, () => createDefaultBillboard(normalizedNow)),
|
|
288
|
-
listRunArtifacts(storage.runsDir, storage.outputRoot),
|
|
289
|
-
]);
|
|
290
|
-
const normalizedLimit = Math.max(1, Math.floor(Number(limit || 10)));
|
|
291
|
-
return {
|
|
292
|
-
...storage,
|
|
293
|
-
config: normalizeConfig(configRaw, normalizedNow),
|
|
294
|
-
billboard: normalizeBillboard(billboardRaw, normalizedNow),
|
|
295
|
-
runCount: runs.length,
|
|
296
|
-
recentRuns: runs.slice(0, normalizedLimit),
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
export async function runReliabilityLane({
|
|
301
|
-
targetPath = ".",
|
|
302
|
-
outputDir = "",
|
|
303
|
-
region = "us-east-1",
|
|
304
|
-
timezone = "UTC",
|
|
305
|
-
simulateFailures = [],
|
|
306
|
-
checks = [],
|
|
307
|
-
autoOpenMaintenance = true,
|
|
308
|
-
clearMaintenanceOnPass = true,
|
|
309
|
-
env,
|
|
310
|
-
homeDir,
|
|
311
|
-
nowIso = new Date().toISOString(),
|
|
312
|
-
} = {}) {
|
|
313
|
-
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
314
|
-
const storage = await resolveReliabilityLaneStorage({
|
|
315
|
-
targetPath,
|
|
316
|
-
outputDir,
|
|
317
|
-
env,
|
|
318
|
-
homeDir,
|
|
319
|
-
});
|
|
320
|
-
|
|
321
|
-
const [configRaw, billboardRaw] = await Promise.all([
|
|
322
|
-
readJsonFile(storage.configPath, () => createDefaultConfig(normalizedNow)),
|
|
323
|
-
readJsonFile(storage.billboardPath, () => createDefaultBillboard(normalizedNow)),
|
|
324
|
-
]);
|
|
325
|
-
const config = normalizeConfig(configRaw, normalizedNow);
|
|
326
|
-
const currentBillboard = normalizeBillboard(billboardRaw, normalizedNow);
|
|
327
|
-
const selectedChecks = normalizeChecks(checks);
|
|
328
|
-
const checksToRun = selectedChecks.length > 0 ? selectedChecks : config.checks;
|
|
329
|
-
const failureSet = new Set(normalizeChecks(simulateFailures));
|
|
330
|
-
|
|
331
|
-
const checkResults = checksToRun.map((checkId, index) => {
|
|
332
|
-
const failed = failureSet.has(checkId);
|
|
333
|
-
return {
|
|
334
|
-
checkId,
|
|
335
|
-
status: failed ? "FAIL" : "PASS",
|
|
336
|
-
durationMs: 1200 + index * 250,
|
|
337
|
-
message: failed
|
|
338
|
-
? `Synthetic check '${checkId}' failed in ${region}.`
|
|
339
|
-
: `Synthetic check '${checkId}' passed in ${region}.`,
|
|
340
|
-
};
|
|
341
|
-
});
|
|
342
|
-
const failureResults = checkResults.filter((check) => check.status === "FAIL");
|
|
343
|
-
const overallStatus = failureResults.length > 0 ? "FAIL" : "PASS";
|
|
344
|
-
|
|
345
|
-
const autoQueueFailures = failureResults.length > 0;
|
|
346
|
-
let workerRun = null;
|
|
347
|
-
if (autoQueueFailures) {
|
|
348
|
-
for (const failure of failureResults) {
|
|
349
|
-
await appendAdminErrorEvent({
|
|
350
|
-
targetPath,
|
|
351
|
-
outputDir,
|
|
352
|
-
event: {
|
|
353
|
-
source: "reliability_lane",
|
|
354
|
-
service: "aidenid-synthetic",
|
|
355
|
-
endpoint: `/synthetic/${failure.checkId}`,
|
|
356
|
-
errorCode: `${failure.checkId.toUpperCase()}_FAILED`,
|
|
357
|
-
severity: "P1",
|
|
358
|
-
message: failure.message,
|
|
359
|
-
metadata: {
|
|
360
|
-
region,
|
|
361
|
-
timezone,
|
|
362
|
-
checkId: failure.checkId,
|
|
363
|
-
lane: "midnight",
|
|
364
|
-
},
|
|
365
|
-
},
|
|
366
|
-
env,
|
|
367
|
-
homeDir,
|
|
368
|
-
});
|
|
369
|
-
}
|
|
370
|
-
workerRun = await runErrorDaemonWorker({
|
|
371
|
-
targetPath,
|
|
372
|
-
outputDir,
|
|
373
|
-
maxEvents: Math.max(20, failureResults.length * 5),
|
|
374
|
-
env,
|
|
375
|
-
homeDir,
|
|
376
|
-
nowIso: normalizedNow,
|
|
377
|
-
});
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
let billboard = currentBillboard;
|
|
381
|
-
if (overallStatus === "FAIL" && autoOpenMaintenance) {
|
|
382
|
-
billboard = normalizeBillboard(
|
|
383
|
-
{
|
|
384
|
-
...currentBillboard,
|
|
385
|
-
enabled: true,
|
|
386
|
-
message:
|
|
387
|
-
currentBillboard.message ||
|
|
388
|
-
"Scheduled midnight reliability lane detected failures. Maintenance is active while remediation is in progress.",
|
|
389
|
-
source: "reliability_lane",
|
|
390
|
-
actor: "omar-daemon",
|
|
391
|
-
openedAt: currentBillboard.openedAt || normalizedNow,
|
|
392
|
-
resolvedAt: null,
|
|
393
|
-
lastUpdatedAt: normalizedNow,
|
|
394
|
-
},
|
|
395
|
-
normalizedNow
|
|
396
|
-
);
|
|
397
|
-
}
|
|
398
|
-
if (
|
|
399
|
-
overallStatus === "PASS" &&
|
|
400
|
-
clearMaintenanceOnPass &&
|
|
401
|
-
currentBillboard.enabled &&
|
|
402
|
-
normalizeString(currentBillboard.source) === "reliability_lane"
|
|
403
|
-
) {
|
|
404
|
-
billboard = normalizeBillboard(
|
|
405
|
-
{
|
|
406
|
-
...currentBillboard,
|
|
407
|
-
enabled: false,
|
|
408
|
-
resolvedAt: normalizedNow,
|
|
409
|
-
lastUpdatedAt: normalizedNow,
|
|
410
|
-
},
|
|
411
|
-
normalizedNow
|
|
412
|
-
);
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
const runId = createReliabilityRunId(normalizedNow, region);
|
|
416
|
-
const runPath = path.join(storage.runsDir, `${runId}.json`);
|
|
417
|
-
const runPayload = {
|
|
418
|
-
schemaVersion: RELIABILITY_SCHEMA_VERSION,
|
|
419
|
-
generatedAt: normalizedNow,
|
|
420
|
-
runId,
|
|
421
|
-
lane: "midnight_reliability",
|
|
422
|
-
region: normalizeString(region) || "us-east-1",
|
|
423
|
-
timezone: normalizeString(timezone) || "UTC",
|
|
424
|
-
overallStatus,
|
|
425
|
-
checkCount: checkResults.length,
|
|
426
|
-
failureCount: failureResults.length,
|
|
427
|
-
checks: checkResults,
|
|
428
|
-
configSnapshot: config,
|
|
429
|
-
maintenance: billboard,
|
|
430
|
-
worker: workerRun
|
|
431
|
-
? {
|
|
432
|
-
runId: workerRun.runId,
|
|
433
|
-
runPath: path.relative(storage.outputRoot, workerRun.runPath).replace(/\\/g, "/"),
|
|
434
|
-
processedCount: workerRun.processedCount,
|
|
435
|
-
queuedCount: workerRun.queuedCount,
|
|
436
|
-
dedupedCount: workerRun.dedupedCount,
|
|
437
|
-
queueDepth: workerRun.queueDepth,
|
|
438
|
-
}
|
|
439
|
-
: null,
|
|
440
|
-
};
|
|
441
|
-
|
|
442
|
-
await fsp.mkdir(storage.runsDir, { recursive: true });
|
|
443
|
-
await Promise.all([
|
|
444
|
-
writeJsonFile(storage.configPath, config),
|
|
445
|
-
writeJsonFile(storage.billboardPath, billboard),
|
|
446
|
-
writeJsonFile(runPath, runPayload),
|
|
447
|
-
appendJsonLine(storage.eventsPath, {
|
|
448
|
-
timestamp: normalizedNow,
|
|
449
|
-
eventType: "reliability_run",
|
|
450
|
-
runId,
|
|
451
|
-
region: runPayload.region,
|
|
452
|
-
timezone: runPayload.timezone,
|
|
453
|
-
overallStatus,
|
|
454
|
-
checkCount: checkResults.length,
|
|
455
|
-
failureCount: failureResults.length,
|
|
456
|
-
maintenanceEnabled: billboard.enabled,
|
|
457
|
-
}),
|
|
458
|
-
]);
|
|
459
|
-
|
|
460
|
-
return {
|
|
461
|
-
...storage,
|
|
462
|
-
runId,
|
|
463
|
-
runPath,
|
|
464
|
-
overallStatus,
|
|
465
|
-
checkCount: checkResults.length,
|
|
466
|
-
failureCount: failureResults.length,
|
|
467
|
-
checks: checkResults,
|
|
468
|
-
maintenance: billboard,
|
|
469
|
-
worker: runPayload.worker,
|
|
470
|
-
};
|
|
471
|
-
}
|
|
1
|
+
import fsp from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
import { appendAdminErrorEvent, resolveErrorDaemonStorage, runErrorDaemonWorker } from "./error-worker.js";
|
|
5
|
+
|
|
6
|
+
const RELIABILITY_SCHEMA_VERSION = "1.0.0";
|
|
7
|
+
const MAINTENANCE_SCHEMA_VERSION = "1.0.0";
|
|
8
|
+
|
|
9
|
+
export const RELIABILITY_CHECK_IDS = Object.freeze([
|
|
10
|
+
"aidenid_password_reset_flow",
|
|
11
|
+
"aidenid_invite_flow",
|
|
12
|
+
]);
|
|
13
|
+
|
|
14
|
+
function normalizeString(value) {
|
|
15
|
+
if (value === undefined || value === null) {
|
|
16
|
+
return "";
|
|
17
|
+
}
|
|
18
|
+
return String(value).trim();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
|
|
22
|
+
const normalized = normalizeString(value);
|
|
23
|
+
if (!normalized) {
|
|
24
|
+
return fallbackIso;
|
|
25
|
+
}
|
|
26
|
+
const epoch = Date.parse(normalized);
|
|
27
|
+
if (!Number.isFinite(epoch)) {
|
|
28
|
+
return fallbackIso;
|
|
29
|
+
}
|
|
30
|
+
return new Date(epoch).toISOString();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function normalizeBoolean(value, fallbackValue) {
|
|
34
|
+
if (value === undefined || value === null || normalizeString(value) === "") {
|
|
35
|
+
return fallbackValue;
|
|
36
|
+
}
|
|
37
|
+
const normalized = normalizeString(value).toLowerCase();
|
|
38
|
+
if (["true", "1", "yes", "on"].includes(normalized)) {
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
if (["false", "0", "no", "off"].includes(normalized)) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
return fallbackValue;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function normalizeCsv(value) {
|
|
48
|
+
if (Array.isArray(value)) {
|
|
49
|
+
return value.map((item) => normalizeString(item)).filter(Boolean);
|
|
50
|
+
}
|
|
51
|
+
const normalized = normalizeString(value);
|
|
52
|
+
if (!normalized) {
|
|
53
|
+
return [];
|
|
54
|
+
}
|
|
55
|
+
return normalized
|
|
56
|
+
.split(",")
|
|
57
|
+
.map((item) => item.trim())
|
|
58
|
+
.filter(Boolean);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function normalizeChecks(checkIds = []) {
|
|
62
|
+
return normalizeCsv(checkIds)
|
|
63
|
+
.map((item) => item.toLowerCase())
|
|
64
|
+
.filter((item) => RELIABILITY_CHECK_IDS.includes(item));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function createReliabilityRunId(nowIso, region = "global") {
|
|
68
|
+
const normalizedRegion = normalizeString(region).replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
69
|
+
return `reliability-lane-${normalizedRegion}-${nowIso.replace(/[:.]/g, "-")}`;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function createDefaultBillboard(nowIso = new Date().toISOString()) {
|
|
73
|
+
return {
|
|
74
|
+
schemaVersion: MAINTENANCE_SCHEMA_VERSION,
|
|
75
|
+
generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
76
|
+
enabled: false,
|
|
77
|
+
message: "",
|
|
78
|
+
source: null,
|
|
79
|
+
actor: null,
|
|
80
|
+
openedAt: null,
|
|
81
|
+
resolvedAt: null,
|
|
82
|
+
lastUpdatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function normalizeBillboard(raw = {}, nowIso = new Date().toISOString()) {
|
|
87
|
+
return {
|
|
88
|
+
schemaVersion: MAINTENANCE_SCHEMA_VERSION,
|
|
89
|
+
generatedAt: normalizeIsoTimestamp(raw.generatedAt, nowIso),
|
|
90
|
+
enabled: normalizeBoolean(raw.enabled, false),
|
|
91
|
+
message: normalizeString(raw.message),
|
|
92
|
+
source: normalizeString(raw.source) || null,
|
|
93
|
+
actor: normalizeString(raw.actor) || null,
|
|
94
|
+
openedAt: raw.openedAt ? normalizeIsoTimestamp(raw.openedAt, nowIso) : null,
|
|
95
|
+
resolvedAt: raw.resolvedAt ? normalizeIsoTimestamp(raw.resolvedAt, nowIso) : null,
|
|
96
|
+
lastUpdatedAt: raw.lastUpdatedAt
|
|
97
|
+
? normalizeIsoTimestamp(raw.lastUpdatedAt, nowIso)
|
|
98
|
+
: normalizeIsoTimestamp(nowIso, nowIso),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function createDefaultConfig(nowIso = new Date().toISOString()) {
|
|
103
|
+
return {
|
|
104
|
+
schemaVersion: RELIABILITY_SCHEMA_VERSION,
|
|
105
|
+
generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
106
|
+
enabled: true,
|
|
107
|
+
schedule: {
|
|
108
|
+
cron: "0 0 * * *",
|
|
109
|
+
timezone: "UTC",
|
|
110
|
+
regions: ["us-east-1"],
|
|
111
|
+
},
|
|
112
|
+
checks: RELIABILITY_CHECK_IDS,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function normalizeConfig(raw = {}, nowIso = new Date().toISOString()) {
|
|
117
|
+
const checks = normalizeChecks(raw.checks);
|
|
118
|
+
return {
|
|
119
|
+
schemaVersion: RELIABILITY_SCHEMA_VERSION,
|
|
120
|
+
generatedAt: normalizeIsoTimestamp(raw.generatedAt, nowIso),
|
|
121
|
+
enabled: normalizeBoolean(raw.enabled, true),
|
|
122
|
+
schedule: {
|
|
123
|
+
cron: normalizeString(raw.schedule?.cron) || "0 0 * * *",
|
|
124
|
+
timezone: normalizeString(raw.schedule?.timezone) || "UTC",
|
|
125
|
+
regions: normalizeCsv(raw.schedule?.regions).length
|
|
126
|
+
? normalizeCsv(raw.schedule?.regions)
|
|
127
|
+
: ["us-east-1"],
|
|
128
|
+
},
|
|
129
|
+
checks: checks.length > 0 ? checks : [...RELIABILITY_CHECK_IDS],
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
async function readJsonFile(filePath, defaultFactory) {
|
|
134
|
+
try {
|
|
135
|
+
const raw = await fsp.readFile(filePath, "utf-8");
|
|
136
|
+
return JSON.parse(raw);
|
|
137
|
+
} catch (error) {
|
|
138
|
+
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
139
|
+
return defaultFactory();
|
|
140
|
+
}
|
|
141
|
+
throw error;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async function writeJsonFile(filePath, payload = {}) {
|
|
146
|
+
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
147
|
+
await fsp.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8");
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async function appendJsonLine(filePath, payload = {}) {
|
|
151
|
+
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
152
|
+
await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, "utf-8");
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async function listRunArtifacts(runsDir, outputRoot) {
|
|
156
|
+
try {
|
|
157
|
+
const entries = await fsp.readdir(runsDir, { withFileTypes: true });
|
|
158
|
+
const files = entries
|
|
159
|
+
.filter((entry) => entry.isFile() && entry.name.toLowerCase().endsWith(".json"))
|
|
160
|
+
.map((entry) => path.join(runsDir, entry.name));
|
|
161
|
+
const payloads = await Promise.all(
|
|
162
|
+
files.map(async (filePath) => {
|
|
163
|
+
const payload = await readJsonFile(filePath, () => null);
|
|
164
|
+
if (!payload || typeof payload !== "object") {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
runId: normalizeString(payload.runId) || path.basename(filePath, ".json"),
|
|
169
|
+
generatedAt: normalizeIsoTimestamp(payload.generatedAt, new Date().toISOString()),
|
|
170
|
+
overallStatus: normalizeString(payload.overallStatus) || "UNKNOWN",
|
|
171
|
+
failureCount: Number(payload.failureCount || 0),
|
|
172
|
+
path: normalizeString(path.relative(outputRoot, filePath)).replace(/\\/g, "/"),
|
|
173
|
+
};
|
|
174
|
+
})
|
|
175
|
+
);
|
|
176
|
+
return payloads
|
|
177
|
+
.filter(Boolean)
|
|
178
|
+
.sort((left, right) => (Date.parse(String(right.generatedAt || "")) || 0) - (Date.parse(String(left.generatedAt || "")) || 0));
|
|
179
|
+
} catch (error) {
|
|
180
|
+
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
throw error;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export async function resolveReliabilityLaneStorage({
|
|
188
|
+
targetPath = ".",
|
|
189
|
+
outputDir = "",
|
|
190
|
+
env,
|
|
191
|
+
homeDir,
|
|
192
|
+
} = {}) {
|
|
193
|
+
const daemonStorage = await resolveErrorDaemonStorage({
|
|
194
|
+
targetPath,
|
|
195
|
+
outputDir,
|
|
196
|
+
env,
|
|
197
|
+
homeDir,
|
|
198
|
+
});
|
|
199
|
+
const reliabilityDir = path.join(daemonStorage.baseDir, "reliability");
|
|
200
|
+
return {
|
|
201
|
+
...daemonStorage,
|
|
202
|
+
reliabilityDir,
|
|
203
|
+
configPath: path.join(reliabilityDir, "lane-config.json"),
|
|
204
|
+
billboardPath: path.join(reliabilityDir, "maintenance-billboard.json"),
|
|
205
|
+
eventsPath: path.join(reliabilityDir, "reliability-events.ndjson"),
|
|
206
|
+
runsDir: path.join(reliabilityDir, "runs"),
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export async function setMaintenanceBillboard({
|
|
211
|
+
targetPath = ".",
|
|
212
|
+
outputDir = "",
|
|
213
|
+
enabled,
|
|
214
|
+
message = "",
|
|
215
|
+
source = "manual",
|
|
216
|
+
actor = "omar-operator",
|
|
217
|
+
reason = "",
|
|
218
|
+
env,
|
|
219
|
+
homeDir,
|
|
220
|
+
nowIso = new Date().toISOString(),
|
|
221
|
+
} = {}) {
|
|
222
|
+
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
223
|
+
const storage = await resolveReliabilityLaneStorage({
|
|
224
|
+
targetPath,
|
|
225
|
+
outputDir,
|
|
226
|
+
env,
|
|
227
|
+
homeDir,
|
|
228
|
+
});
|
|
229
|
+
const current = normalizeBillboard(
|
|
230
|
+
await readJsonFile(storage.billboardPath, () => createDefaultBillboard(normalizedNow)),
|
|
231
|
+
normalizedNow
|
|
232
|
+
);
|
|
233
|
+
const nextEnabled = normalizeBoolean(enabled, current.enabled);
|
|
234
|
+
const normalizedMessage = normalizeString(message);
|
|
235
|
+
const normalizedSource = normalizeString(source) || "manual";
|
|
236
|
+
const normalizedActor = normalizeString(actor) || "omar-operator";
|
|
237
|
+
const normalizedReason = normalizeString(reason) || null;
|
|
238
|
+
|
|
239
|
+
const next = {
|
|
240
|
+
...current,
|
|
241
|
+
generatedAt: normalizedNow,
|
|
242
|
+
enabled: nextEnabled,
|
|
243
|
+
message: normalizedMessage || current.message || "",
|
|
244
|
+
source: normalizedSource,
|
|
245
|
+
actor: normalizedActor,
|
|
246
|
+
openedAt: nextEnabled ? current.openedAt || normalizedNow : current.openedAt,
|
|
247
|
+
resolvedAt: nextEnabled ? null : normalizedNow,
|
|
248
|
+
lastUpdatedAt: normalizedNow,
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
await Promise.all([
|
|
252
|
+
writeJsonFile(storage.billboardPath, next),
|
|
253
|
+
appendJsonLine(storage.eventsPath, {
|
|
254
|
+
timestamp: normalizedNow,
|
|
255
|
+
eventType: "maintenance_update",
|
|
256
|
+
enabled: next.enabled,
|
|
257
|
+
source: normalizedSource,
|
|
258
|
+
actor: normalizedActor,
|
|
259
|
+
reason: normalizedReason,
|
|
260
|
+
message: next.message,
|
|
261
|
+
}),
|
|
262
|
+
]);
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
...storage,
|
|
266
|
+
billboard: next,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
export async function getReliabilityLaneStatus({
|
|
271
|
+
targetPath = ".",
|
|
272
|
+
outputDir = "",
|
|
273
|
+
env,
|
|
274
|
+
homeDir,
|
|
275
|
+
nowIso = new Date().toISOString(),
|
|
276
|
+
limit = 10,
|
|
277
|
+
} = {}) {
|
|
278
|
+
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
279
|
+
const storage = await resolveReliabilityLaneStorage({
|
|
280
|
+
targetPath,
|
|
281
|
+
outputDir,
|
|
282
|
+
env,
|
|
283
|
+
homeDir,
|
|
284
|
+
});
|
|
285
|
+
const [configRaw, billboardRaw, runs] = await Promise.all([
|
|
286
|
+
readJsonFile(storage.configPath, () => createDefaultConfig(normalizedNow)),
|
|
287
|
+
readJsonFile(storage.billboardPath, () => createDefaultBillboard(normalizedNow)),
|
|
288
|
+
listRunArtifacts(storage.runsDir, storage.outputRoot),
|
|
289
|
+
]);
|
|
290
|
+
const normalizedLimit = Math.max(1, Math.floor(Number(limit || 10)));
|
|
291
|
+
return {
|
|
292
|
+
...storage,
|
|
293
|
+
config: normalizeConfig(configRaw, normalizedNow),
|
|
294
|
+
billboard: normalizeBillboard(billboardRaw, normalizedNow),
|
|
295
|
+
runCount: runs.length,
|
|
296
|
+
recentRuns: runs.slice(0, normalizedLimit),
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export async function runReliabilityLane({
|
|
301
|
+
targetPath = ".",
|
|
302
|
+
outputDir = "",
|
|
303
|
+
region = "us-east-1",
|
|
304
|
+
timezone = "UTC",
|
|
305
|
+
simulateFailures = [],
|
|
306
|
+
checks = [],
|
|
307
|
+
autoOpenMaintenance = true,
|
|
308
|
+
clearMaintenanceOnPass = true,
|
|
309
|
+
env,
|
|
310
|
+
homeDir,
|
|
311
|
+
nowIso = new Date().toISOString(),
|
|
312
|
+
} = {}) {
|
|
313
|
+
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
314
|
+
const storage = await resolveReliabilityLaneStorage({
|
|
315
|
+
targetPath,
|
|
316
|
+
outputDir,
|
|
317
|
+
env,
|
|
318
|
+
homeDir,
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
const [configRaw, billboardRaw] = await Promise.all([
|
|
322
|
+
readJsonFile(storage.configPath, () => createDefaultConfig(normalizedNow)),
|
|
323
|
+
readJsonFile(storage.billboardPath, () => createDefaultBillboard(normalizedNow)),
|
|
324
|
+
]);
|
|
325
|
+
const config = normalizeConfig(configRaw, normalizedNow);
|
|
326
|
+
const currentBillboard = normalizeBillboard(billboardRaw, normalizedNow);
|
|
327
|
+
const selectedChecks = normalizeChecks(checks);
|
|
328
|
+
const checksToRun = selectedChecks.length > 0 ? selectedChecks : config.checks;
|
|
329
|
+
const failureSet = new Set(normalizeChecks(simulateFailures));
|
|
330
|
+
|
|
331
|
+
const checkResults = checksToRun.map((checkId, index) => {
|
|
332
|
+
const failed = failureSet.has(checkId);
|
|
333
|
+
return {
|
|
334
|
+
checkId,
|
|
335
|
+
status: failed ? "FAIL" : "PASS",
|
|
336
|
+
durationMs: 1200 + index * 250,
|
|
337
|
+
message: failed
|
|
338
|
+
? `Synthetic check '${checkId}' failed in ${region}.`
|
|
339
|
+
: `Synthetic check '${checkId}' passed in ${region}.`,
|
|
340
|
+
};
|
|
341
|
+
});
|
|
342
|
+
const failureResults = checkResults.filter((check) => check.status === "FAIL");
|
|
343
|
+
const overallStatus = failureResults.length > 0 ? "FAIL" : "PASS";
|
|
344
|
+
|
|
345
|
+
const autoQueueFailures = failureResults.length > 0;
|
|
346
|
+
let workerRun = null;
|
|
347
|
+
if (autoQueueFailures) {
|
|
348
|
+
for (const failure of failureResults) {
|
|
349
|
+
await appendAdminErrorEvent({
|
|
350
|
+
targetPath,
|
|
351
|
+
outputDir,
|
|
352
|
+
event: {
|
|
353
|
+
source: "reliability_lane",
|
|
354
|
+
service: "aidenid-synthetic",
|
|
355
|
+
endpoint: `/synthetic/${failure.checkId}`,
|
|
356
|
+
errorCode: `${failure.checkId.toUpperCase()}_FAILED`,
|
|
357
|
+
severity: "P1",
|
|
358
|
+
message: failure.message,
|
|
359
|
+
metadata: {
|
|
360
|
+
region,
|
|
361
|
+
timezone,
|
|
362
|
+
checkId: failure.checkId,
|
|
363
|
+
lane: "midnight",
|
|
364
|
+
},
|
|
365
|
+
},
|
|
366
|
+
env,
|
|
367
|
+
homeDir,
|
|
368
|
+
});
|
|
369
|
+
}
|
|
370
|
+
workerRun = await runErrorDaemonWorker({
|
|
371
|
+
targetPath,
|
|
372
|
+
outputDir,
|
|
373
|
+
maxEvents: Math.max(20, failureResults.length * 5),
|
|
374
|
+
env,
|
|
375
|
+
homeDir,
|
|
376
|
+
nowIso: normalizedNow,
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
let billboard = currentBillboard;
|
|
381
|
+
if (overallStatus === "FAIL" && autoOpenMaintenance) {
|
|
382
|
+
billboard = normalizeBillboard(
|
|
383
|
+
{
|
|
384
|
+
...currentBillboard,
|
|
385
|
+
enabled: true,
|
|
386
|
+
message:
|
|
387
|
+
currentBillboard.message ||
|
|
388
|
+
"Scheduled midnight reliability lane detected failures. Maintenance is active while remediation is in progress.",
|
|
389
|
+
source: "reliability_lane",
|
|
390
|
+
actor: "omar-daemon",
|
|
391
|
+
openedAt: currentBillboard.openedAt || normalizedNow,
|
|
392
|
+
resolvedAt: null,
|
|
393
|
+
lastUpdatedAt: normalizedNow,
|
|
394
|
+
},
|
|
395
|
+
normalizedNow
|
|
396
|
+
);
|
|
397
|
+
}
|
|
398
|
+
if (
|
|
399
|
+
overallStatus === "PASS" &&
|
|
400
|
+
clearMaintenanceOnPass &&
|
|
401
|
+
currentBillboard.enabled &&
|
|
402
|
+
normalizeString(currentBillboard.source) === "reliability_lane"
|
|
403
|
+
) {
|
|
404
|
+
billboard = normalizeBillboard(
|
|
405
|
+
{
|
|
406
|
+
...currentBillboard,
|
|
407
|
+
enabled: false,
|
|
408
|
+
resolvedAt: normalizedNow,
|
|
409
|
+
lastUpdatedAt: normalizedNow,
|
|
410
|
+
},
|
|
411
|
+
normalizedNow
|
|
412
|
+
);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const runId = createReliabilityRunId(normalizedNow, region);
|
|
416
|
+
const runPath = path.join(storage.runsDir, `${runId}.json`);
|
|
417
|
+
const runPayload = {
|
|
418
|
+
schemaVersion: RELIABILITY_SCHEMA_VERSION,
|
|
419
|
+
generatedAt: normalizedNow,
|
|
420
|
+
runId,
|
|
421
|
+
lane: "midnight_reliability",
|
|
422
|
+
region: normalizeString(region) || "us-east-1",
|
|
423
|
+
timezone: normalizeString(timezone) || "UTC",
|
|
424
|
+
overallStatus,
|
|
425
|
+
checkCount: checkResults.length,
|
|
426
|
+
failureCount: failureResults.length,
|
|
427
|
+
checks: checkResults,
|
|
428
|
+
configSnapshot: config,
|
|
429
|
+
maintenance: billboard,
|
|
430
|
+
worker: workerRun
|
|
431
|
+
? {
|
|
432
|
+
runId: workerRun.runId,
|
|
433
|
+
runPath: path.relative(storage.outputRoot, workerRun.runPath).replace(/\\/g, "/"),
|
|
434
|
+
processedCount: workerRun.processedCount,
|
|
435
|
+
queuedCount: workerRun.queuedCount,
|
|
436
|
+
dedupedCount: workerRun.dedupedCount,
|
|
437
|
+
queueDepth: workerRun.queueDepth,
|
|
438
|
+
}
|
|
439
|
+
: null,
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
await fsp.mkdir(storage.runsDir, { recursive: true });
|
|
443
|
+
await Promise.all([
|
|
444
|
+
writeJsonFile(storage.configPath, config),
|
|
445
|
+
writeJsonFile(storage.billboardPath, billboard),
|
|
446
|
+
writeJsonFile(runPath, runPayload),
|
|
447
|
+
appendJsonLine(storage.eventsPath, {
|
|
448
|
+
timestamp: normalizedNow,
|
|
449
|
+
eventType: "reliability_run",
|
|
450
|
+
runId,
|
|
451
|
+
region: runPayload.region,
|
|
452
|
+
timezone: runPayload.timezone,
|
|
453
|
+
overallStatus,
|
|
454
|
+
checkCount: checkResults.length,
|
|
455
|
+
failureCount: failureResults.length,
|
|
456
|
+
maintenanceEnabled: billboard.enabled,
|
|
457
|
+
}),
|
|
458
|
+
]);
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
...storage,
|
|
462
|
+
runId,
|
|
463
|
+
runPath,
|
|
464
|
+
overallStatus,
|
|
465
|
+
checkCount: checkResults.length,
|
|
466
|
+
failureCount: failureResults.length,
|
|
467
|
+
checks: checkResults,
|
|
468
|
+
maintenance: billboard,
|
|
469
|
+
worker: runPayload.worker,
|
|
470
|
+
};
|
|
471
|
+
}
|