@nforma.ai/nforma 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +1024 -0
- package/agents/qgsd-codebase-mapper.md +764 -0
- package/agents/qgsd-debugger.md +1201 -0
- package/agents/qgsd-executor.md +472 -0
- package/agents/qgsd-integration-checker.md +443 -0
- package/agents/qgsd-phase-researcher.md +502 -0
- package/agents/qgsd-plan-checker.md +643 -0
- package/agents/qgsd-planner.md +1182 -0
- package/agents/qgsd-project-researcher.md +621 -0
- package/agents/qgsd-quorum-orchestrator.md +628 -0
- package/agents/qgsd-quorum-slot-worker.md +41 -0
- package/agents/qgsd-quorum-synthesizer.md +133 -0
- package/agents/qgsd-quorum-test-worker.md +37 -0
- package/agents/qgsd-quorum-worker.md +161 -0
- package/agents/qgsd-research-synthesizer.md +239 -0
- package/agents/qgsd-roadmapper.md +660 -0
- package/agents/qgsd-verifier.md +628 -0
- package/bin/accept-debug-invariant.cjs +165 -0
- package/bin/account-manager.cjs +719 -0
- package/bin/aggregate-requirements.cjs +466 -0
- package/bin/analyze-assumptions.cjs +757 -0
- package/bin/analyze-state-space.cjs +921 -0
- package/bin/attribute-trace-divergence.cjs +150 -0
- package/bin/auth-drivers/gh-cli.cjs +93 -0
- package/bin/auth-drivers/index.cjs +46 -0
- package/bin/auth-drivers/pool.cjs +67 -0
- package/bin/auth-drivers/simple.cjs +95 -0
- package/bin/autoClosePtoF.cjs +110 -0
- package/bin/blessed-terminal.cjs +350 -0
- package/bin/build-phase-index.cjs +472 -0
- package/bin/call-quorum-slot.cjs +541 -0
- package/bin/ccr-secure-config.cjs +99 -0
- package/bin/ccr-secure-start.cjs +83 -0
- package/bin/check-bundled-sdks.cjs +177 -0
- package/bin/check-coverage-guard.cjs +112 -0
- package/bin/check-liveness-fairness.cjs +95 -0
- package/bin/check-mcp-health.cjs +123 -0
- package/bin/check-provider-health.cjs +395 -0
- package/bin/check-results-exit.cjs +24 -0
- package/bin/check-spec-sync.cjs +360 -0
- package/bin/check-trace-redaction.cjs +271 -0
- package/bin/check-trace-schema-drift.cjs +99 -0
- package/bin/compareDrift.cjs +21 -0
- package/bin/conformance-schema.cjs +12 -0
- package/bin/count-scenarios.cjs +420 -0
- package/bin/debt-dedup.cjs +144 -0
- package/bin/debt-ledger.cjs +61 -0
- package/bin/debt-retention.cjs +76 -0
- package/bin/debt-state-machine.cjs +80 -0
- package/bin/detect-coverage-gaps.cjs +204 -0
- package/bin/detect-project-intent.cjs +362 -0
- package/bin/export-prism-constants.cjs +164 -0
- package/bin/extract-annotations.cjs +633 -0
- package/bin/extractFormalExpected.cjs +104 -0
- package/bin/fingerprint-drift.cjs +24 -0
- package/bin/fingerprint-issue.cjs +46 -0
- package/bin/formal-core.cjs +519 -0
- package/bin/formal-ref-linker.cjs +141 -0
- package/bin/formal-test-sync.cjs +788 -0
- package/bin/generate-formal-specs.cjs +588 -0
- package/bin/generate-petri-net.cjs +397 -0
- package/bin/generate-phase-spec.cjs +249 -0
- package/bin/generate-proposed-changes.cjs +194 -0
- package/bin/generate-tla-cfg.cjs +122 -0
- package/bin/generate-traceability-matrix.cjs +701 -0
- package/bin/generate-triage-bundle.cjs +300 -0
- package/bin/gh-account-rotate.cjs +34 -0
- package/bin/initialize-model-registry.cjs +105 -0
- package/bin/install-formal-tools.cjs +382 -0
- package/bin/install.js +2424 -0
- package/bin/isNumericThreshold.cjs +34 -0
- package/bin/issue-classifier.cjs +151 -0
- package/bin/levenshtein.cjs +74 -0
- package/bin/lint-formal-models.cjs +580 -0
- package/bin/load-baseline-requirements.cjs +275 -0
- package/bin/manage-agents-core.cjs +815 -0
- package/bin/migrate-formal-dir.cjs +172 -0
- package/bin/migrate-planning.cjs +206 -0
- package/bin/migrate-to-slots.cjs +255 -0
- package/bin/nForma.cjs +2726 -0
- package/bin/observe-config.cjs +353 -0
- package/bin/observe-debt-writer.cjs +140 -0
- package/bin/observe-handler-grafana.cjs +128 -0
- package/bin/observe-handler-internal.cjs +301 -0
- package/bin/observe-handler-logstash.cjs +153 -0
- package/bin/observe-handler-prometheus.cjs +185 -0
- package/bin/observe-handlers.cjs +436 -0
- package/bin/observe-registry.cjs +131 -0
- package/bin/observe-render.cjs +168 -0
- package/bin/planning-paths.cjs +167 -0
- package/bin/polyrepo.cjs +560 -0
- package/bin/prism-priority.cjs +153 -0
- package/bin/probe-quorum-slots.cjs +167 -0
- package/bin/promote-model.cjs +225 -0
- package/bin/propose-debug-invariants.cjs +165 -0
- package/bin/providers.json +392 -0
- package/bin/pty-proxy.py +129 -0
- package/bin/qgsd-solve.cjs +2477 -0
- package/bin/quorum-consensus-gate.cjs +238 -0
- package/bin/quorum-formal-context.cjs +183 -0
- package/bin/quorum-slot-dispatch.cjs +934 -0
- package/bin/read-policy.cjs +60 -0
- package/bin/requirement-map.cjs +63 -0
- package/bin/requirements-core.cjs +247 -0
- package/bin/resolve-cli.cjs +101 -0
- package/bin/review-mcp-logs.cjs +294 -0
- package/bin/run-account-manager-tlc.cjs +188 -0
- package/bin/run-account-pool-alloy.cjs +158 -0
- package/bin/run-alloy.cjs +153 -0
- package/bin/run-audit-alloy.cjs +187 -0
- package/bin/run-breaker-tlc.cjs +181 -0
- package/bin/run-formal-check.cjs +395 -0
- package/bin/run-formal-verify.cjs +701 -0
- package/bin/run-installer-alloy.cjs +188 -0
- package/bin/run-oauth-rotation-prism.cjs +132 -0
- package/bin/run-oscillation-tlc.cjs +202 -0
- package/bin/run-phase-tlc.cjs +228 -0
- package/bin/run-prism.cjs +446 -0
- package/bin/run-protocol-tlc.cjs +201 -0
- package/bin/run-quorum-composition-alloy.cjs +155 -0
- package/bin/run-sensitivity-sweep.cjs +231 -0
- package/bin/run-stop-hook-tlc.cjs +188 -0
- package/bin/run-tlc.cjs +467 -0
- package/bin/run-transcript-alloy.cjs +173 -0
- package/bin/run-uppaal.cjs +264 -0
- package/bin/secrets.cjs +134 -0
- package/bin/sensitivity-report.cjs +219 -0
- package/bin/sensitivity-sweep-feedback.cjs +194 -0
- package/bin/set-secret.cjs +29 -0
- package/bin/setup-telemetry-cron.sh +36 -0
- package/bin/sweepPtoF.cjs +63 -0
- package/bin/sync-baseline-requirements.cjs +290 -0
- package/bin/task-envelope.cjs +360 -0
- package/bin/telemetry-collector.cjs +229 -0
- package/bin/unified-mcp-server.mjs +735 -0
- package/bin/update-agents.cjs +369 -0
- package/bin/update-scoreboard.cjs +1134 -0
- package/bin/validate-debt-entry.cjs +207 -0
- package/bin/validate-invariant.cjs +419 -0
- package/bin/validate-memory.cjs +389 -0
- package/bin/validate-requirements-haiku.cjs +435 -0
- package/bin/validate-traces.cjs +438 -0
- package/bin/verify-formal-results.cjs +124 -0
- package/bin/verify-quorum-health.cjs +273 -0
- package/bin/write-check-result.cjs +106 -0
- package/bin/xstate-to-tla.cjs +483 -0
- package/bin/xstate-trace-walker.cjs +205 -0
- package/commands/qgsd/add-phase.md +43 -0
- package/commands/qgsd/add-requirement.md +24 -0
- package/commands/qgsd/add-todo.md +47 -0
- package/commands/qgsd/audit-milestone.md +37 -0
- package/commands/qgsd/check-todos.md +45 -0
- package/commands/qgsd/cleanup.md +18 -0
- package/commands/qgsd/close-formal-gaps.md +33 -0
- package/commands/qgsd/complete-milestone.md +136 -0
- package/commands/qgsd/debug.md +166 -0
- package/commands/qgsd/discuss-phase.md +83 -0
- package/commands/qgsd/execute-phase.md +117 -0
- package/commands/qgsd/fix-tests.md +27 -0
- package/commands/qgsd/formal-test-sync.md +32 -0
- package/commands/qgsd/health.md +22 -0
- package/commands/qgsd/help.md +22 -0
- package/commands/qgsd/insert-phase.md +32 -0
- package/commands/qgsd/join-discord.md +18 -0
- package/commands/qgsd/list-phase-assumptions.md +46 -0
- package/commands/qgsd/map-codebase.md +71 -0
- package/commands/qgsd/map-requirements.md +20 -0
- package/commands/qgsd/mcp-restart.md +176 -0
- package/commands/qgsd/mcp-set-model.md +134 -0
- package/commands/qgsd/mcp-setup.md +1371 -0
- package/commands/qgsd/mcp-status.md +274 -0
- package/commands/qgsd/mcp-update.md +238 -0
- package/commands/qgsd/new-milestone.md +44 -0
- package/commands/qgsd/new-project.md +42 -0
- package/commands/qgsd/observe.md +260 -0
- package/commands/qgsd/pause-work.md +38 -0
- package/commands/qgsd/plan-milestone-gaps.md +34 -0
- package/commands/qgsd/plan-phase.md +44 -0
- package/commands/qgsd/polyrepo.md +50 -0
- package/commands/qgsd/progress.md +24 -0
- package/commands/qgsd/queue.md +54 -0
- package/commands/qgsd/quick.md +133 -0
- package/commands/qgsd/quorum-test.md +275 -0
- package/commands/qgsd/quorum.md +707 -0
- package/commands/qgsd/reapply-patches.md +110 -0
- package/commands/qgsd/remove-phase.md +31 -0
- package/commands/qgsd/research-phase.md +189 -0
- package/commands/qgsd/resume-work.md +40 -0
- package/commands/qgsd/set-profile.md +34 -0
- package/commands/qgsd/settings.md +39 -0
- package/commands/qgsd/solve.md +565 -0
- package/commands/qgsd/sync-baselines.md +119 -0
- package/commands/qgsd/triage.md +233 -0
- package/commands/qgsd/update.md +37 -0
- package/commands/qgsd/verify-work.md +38 -0
- package/hooks/dist/config-loader.js +297 -0
- package/hooks/dist/conformance-schema.cjs +12 -0
- package/hooks/dist/gsd-context-monitor.js +64 -0
- package/hooks/dist/qgsd-check-update.js +62 -0
- package/hooks/dist/qgsd-circuit-breaker.js +682 -0
- package/hooks/dist/qgsd-precompact.js +156 -0
- package/hooks/dist/qgsd-prompt.js +653 -0
- package/hooks/dist/qgsd-session-start.js +122 -0
- package/hooks/dist/qgsd-slot-correlator.js +58 -0
- package/hooks/dist/qgsd-spec-regen.js +86 -0
- package/hooks/dist/qgsd-statusline.js +91 -0
- package/hooks/dist/qgsd-stop.js +553 -0
- package/hooks/dist/qgsd-token-collector.js +133 -0
- package/hooks/dist/unified-mcp-server.mjs +669 -0
- package/package.json +95 -0
- package/scripts/build-hooks.js +46 -0
- package/scripts/postinstall.js +48 -0
- package/scripts/secret-audit.sh +45 -0
- package/templates/qgsd.json +49 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* check-provider-health.cjs
|
|
6
|
+
*
|
|
7
|
+
* Fast HTTP probe of the underlying LLM providers behind claude-mcp-server.
|
|
8
|
+
* Reads ~/.claude.json → extracts ANTHROPIC_BASE_URL per server → groups by
|
|
9
|
+
* provider → hits GET /models with a short connect timeout.
|
|
10
|
+
*
|
|
11
|
+
* A 200 or 401/403 means the provider is UP (server responded).
|
|
12
|
+
* A timeout or connection error means it's DOWN.
|
|
13
|
+
*
|
|
14
|
+
* No LLM inference is performed — this completes in ~2–3 seconds.
|
|
15
|
+
*
|
|
16
|
+
* TTL cache at ~/.claude/qgsd-provider-cache.json:
|
|
17
|
+
* - DOWN entries: 5 minutes TTL
|
|
18
|
+
* - UP entries: 3 minutes TTL
|
|
19
|
+
* Cache is read before probing; stale or missing → probe runs normally.
|
|
20
|
+
* After each probe, result is written back to cache.
|
|
21
|
+
*
|
|
22
|
+
* Usage:
|
|
23
|
+
* node bin/check-provider-health.cjs [--timeout-ms N] [--json] [--no-cache] [--cache-status]
|
|
24
|
+
*
|
|
25
|
+
* Exit codes:
|
|
26
|
+
* 0 = all providers healthy
|
|
27
|
+
* 1 = one or more providers unhealthy
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const https = require('https');
|
|
31
|
+
const http = require('http');
|
|
32
|
+
const fs = require('fs');
|
|
33
|
+
const path = require('path');
|
|
34
|
+
const os = require('os');
|
|
35
|
+
|
|
36
|
+
// ─── Project root lookup (for quorum-failures.json) ───────────────────────────
|
|
37
|
+
function findProjectRoot() {
|
|
38
|
+
let dir = __dirname;
|
|
39
|
+
for (let i = 0; i < 8; i++) {
|
|
40
|
+
if (fs.existsSync(path.join(dir, '.planning'))) return dir;
|
|
41
|
+
const parent = path.dirname(dir);
|
|
42
|
+
if (parent === dir) break;
|
|
43
|
+
dir = parent;
|
|
44
|
+
}
|
|
45
|
+
return process.cwd();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const args = process.argv.slice(2);
|
|
49
|
+
const getArg = (f) => { const i = args.indexOf(f); return i !== -1 && args[i+1] ? args[i+1] : null; };
|
|
50
|
+
const hasFlag = (f) => args.includes(f);
|
|
51
|
+
const TIMEOUT_MS = parseInt(getArg('--timeout-ms') ?? '7000', 10);
|
|
52
|
+
const JSON_OUT = hasFlag('--json');
|
|
53
|
+
const NO_CACHE = hasFlag('--no-cache');
|
|
54
|
+
const CACHE_STATUS = hasFlag('--cache-status');
|
|
55
|
+
|
|
56
|
+
// ─── TTL cache constants ──────────────────────────────────────────────────────
|
|
57
|
+
const CACHE_FILE = path.join(os.homedir(), '.claude', 'qgsd-provider-cache.json');
|
|
58
|
+
const TTL_DOWN_MS = 300000; // 5 minutes
|
|
59
|
+
const TTL_UP_MS = 180000; // 3 minutes
|
|
60
|
+
|
|
61
|
+
// ─── Load / save cache ────────────────────────────────────────────────────────
|
|
62
|
+
function loadCache() {
|
|
63
|
+
try {
|
|
64
|
+
const raw = fs.readFileSync(CACHE_FILE, 'utf8');
|
|
65
|
+
const parsed = JSON.parse(raw);
|
|
66
|
+
if (parsed && typeof parsed.entries === 'object') return parsed;
|
|
67
|
+
} catch (_) {}
|
|
68
|
+
return { entries: {} };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function saveCache(cache) {
|
|
72
|
+
try {
|
|
73
|
+
fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2), 'utf8');
|
|
74
|
+
} catch (e) {
|
|
75
|
+
// Non-fatal: cache write failure does not abort health check
|
|
76
|
+
process.stderr.write('[cache] Write failed: ' + e.message + '\n');
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function getCachedResult(cache, baseUrl) {
|
|
81
|
+
const entry = cache.entries[baseUrl];
|
|
82
|
+
if (!entry) return null;
|
|
83
|
+
const ttl = entry.healthy ? TTL_UP_MS : TTL_DOWN_MS;
|
|
84
|
+
const age = Date.now() - entry.cachedAt;
|
|
85
|
+
if (age < ttl) return entry;
|
|
86
|
+
return null; // stale
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// --cache-status: print cache file contents and exit
|
|
90
|
+
if (CACHE_STATUS) {
|
|
91
|
+
const cache = loadCache();
|
|
92
|
+
const now = Date.now();
|
|
93
|
+
const entries = Object.entries(cache.entries);
|
|
94
|
+
if (entries.length === 0) {
|
|
95
|
+
console.log('Cache file: ' + CACHE_FILE);
|
|
96
|
+
console.log('No entries cached yet.');
|
|
97
|
+
process.exit(0);
|
|
98
|
+
}
|
|
99
|
+
console.log('Cache file: ' + CACHE_FILE);
|
|
100
|
+
console.log('Entries: ' + entries.length);
|
|
101
|
+
console.log('');
|
|
102
|
+
for (const [baseUrl, entry] of entries) {
|
|
103
|
+
const ttl = entry.healthy ? TTL_UP_MS : TTL_DOWN_MS;
|
|
104
|
+
const age = now - entry.cachedAt;
|
|
105
|
+
const remaining = ttl - age;
|
|
106
|
+
const status = entry.healthy ? 'UP' : 'DOWN';
|
|
107
|
+
const fresh = remaining > 0;
|
|
108
|
+
const expiresIn = fresh ? Math.round(remaining / 1000) + 's' : 'EXPIRED';
|
|
109
|
+
console.log(` ${status.padEnd(5)} ${baseUrl}`);
|
|
110
|
+
console.log(` latencyMs=${entry.latencyMs} statusCode=${entry.statusCode} expires=${expiresIn}`);
|
|
111
|
+
}
|
|
112
|
+
process.exit(0);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ─── Load provider map from ~/.claude.json ────────────────────────────────────
|
|
116
|
+
let mcpServers = {};
|
|
117
|
+
try {
|
|
118
|
+
const raw = JSON.parse(fs.readFileSync(path.join(os.homedir(), '.claude.json'), 'utf8'));
|
|
119
|
+
mcpServers = raw.mcpServers ?? {};
|
|
120
|
+
} catch (e) {
|
|
121
|
+
console.error('Could not read ~/.claude.json:', e.message);
|
|
122
|
+
process.exit(1);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Load quorum_active from ~/.claude/qgsd.json (project config takes precedence)
|
|
126
|
+
let quorumActive = [];
|
|
127
|
+
try {
|
|
128
|
+
const globalQgsd = path.join(os.homedir(), '.claude', 'qgsd.json');
|
|
129
|
+
const projQgsd = path.join(process.cwd(), '.claude', 'qgsd.json');
|
|
130
|
+
for (const cfgPath of [globalQgsd, projQgsd]) {
|
|
131
|
+
try {
|
|
132
|
+
const cfgRaw = JSON.parse(fs.readFileSync(cfgPath, 'utf8'));
|
|
133
|
+
if (Array.isArray(cfgRaw.quorum_active)) quorumActive = cfgRaw.quorum_active;
|
|
134
|
+
} catch (_) {}
|
|
135
|
+
}
|
|
136
|
+
} catch (_) {}
|
|
137
|
+
|
|
138
|
+
// Filter mcpServers by quorum_active (empty quorumActive = all servers participate)
|
|
139
|
+
const activeMcpServers = (quorumActive.length > 0)
|
|
140
|
+
? Object.fromEntries(Object.entries(mcpServers).filter(([name]) => quorumActive.includes(name)))
|
|
141
|
+
: mcpServers;
|
|
142
|
+
|
|
143
|
+
// Build: providerBaseUrl -> { servers: [], apiKey }
|
|
144
|
+
const providers = {};
|
|
145
|
+
for (const [name, cfg] of Object.entries(activeMcpServers)) {
|
|
146
|
+
// HTTP slots are identified by ANTHROPIC_BASE_URL presence (checked below).
|
|
147
|
+
// Subprocess slots (codex-1, gemini-1, etc.) have no ANTHROPIC_BASE_URL and are filtered by the !baseUrl guard.
|
|
148
|
+
const env = cfg.env ?? {};
|
|
149
|
+
const baseUrl = env.ANTHROPIC_BASE_URL;
|
|
150
|
+
const apiKey = env.ANTHROPIC_API_KEY;
|
|
151
|
+
const model = env.CLAUDE_DEFAULT_MODEL ?? '?';
|
|
152
|
+
if (!baseUrl) continue;
|
|
153
|
+
|
|
154
|
+
if (!providers[baseUrl]) {
|
|
155
|
+
providers[baseUrl] = { servers: [], apiKey };
|
|
156
|
+
}
|
|
157
|
+
providers[baseUrl].servers.push({ name, model });
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ─── Quorum Failure Patterns printer ─────────────────────────────────────────
|
|
161
|
+
function printQuorumFailures() {
|
|
162
|
+
if (JSON_OUT) return;
|
|
163
|
+
try {
|
|
164
|
+
let failuresPath;
|
|
165
|
+
try {
|
|
166
|
+
const pp = require('./planning-paths.cjs');
|
|
167
|
+
failuresPath = pp.resolve(findProjectRoot(), 'quorum-failures');
|
|
168
|
+
} catch (_) {
|
|
169
|
+
failuresPath = path.join(findProjectRoot(), '.planning', 'quorum-failures.json');
|
|
170
|
+
}
|
|
171
|
+
if (!fs.existsSync(failuresPath)) return;
|
|
172
|
+
|
|
173
|
+
let records;
|
|
174
|
+
try {
|
|
175
|
+
records = JSON.parse(fs.readFileSync(failuresPath, 'utf8'));
|
|
176
|
+
if (!Array.isArray(records)) records = [];
|
|
177
|
+
} catch (_) { return; }
|
|
178
|
+
|
|
179
|
+
const fYellow = (s) => `\x1b[33m${s}\x1b[0m`;
|
|
180
|
+
const fRed = (s) => `\x1b[31m${s}\x1b[0m`;
|
|
181
|
+
const fBold = (s) => `\x1b[1m${s}\x1b[0m`;
|
|
182
|
+
const fCyan = (s) => `\x1b[36m${s}\x1b[0m`;
|
|
183
|
+
const fDim = (s) => `\x1b[2m${s}\x1b[0m`;
|
|
184
|
+
|
|
185
|
+
const hints = {
|
|
186
|
+
CLI_SYNTAX: 'Check CLI args_template in providers.json for this slot',
|
|
187
|
+
TIMEOUT: 'Check provider timeout_ms in providers.json or increase --timeout arg',
|
|
188
|
+
AUTH: 'Check API key env var / OAuth token for this slot',
|
|
189
|
+
UNKNOWN: 'Check stderr output in pattern field above',
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
let warnEmitted = false;
|
|
193
|
+
const bySlot = {};
|
|
194
|
+
for (const r of records) {
|
|
195
|
+
if (!bySlot[r.slot]) bySlot[r.slot] = {};
|
|
196
|
+
if (!bySlot[r.slot][r.error_type]) bySlot[r.slot][r.error_type] = r;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
for (const [slotName, byType] of Object.entries(bySlot)) {
|
|
200
|
+
for (const [errorType, rec] of Object.entries(byType)) {
|
|
201
|
+
if (rec.count >= 3) {
|
|
202
|
+
if (!warnEmitted) {
|
|
203
|
+
console.log(fYellow('━━━ QUORUM SLOT FAILURE PATTERNS ━━━'));
|
|
204
|
+
console.log();
|
|
205
|
+
warnEmitted = true;
|
|
206
|
+
}
|
|
207
|
+
console.log(` ${fYellow('WARN')} quorum slot "${fCyan(slotName)}" has ${fRed(String(rec.count))} ${fBold(errorType)} failures (last: ${fDim(rec.last_seen)})`);
|
|
208
|
+
console.log(` ${fDim('Pattern:')} ${fDim(rec.pattern)}`);
|
|
209
|
+
console.log(` ${fDim('Hint:')} ${fDim(hints[errorType] ?? hints.UNKNOWN)}`);
|
|
210
|
+
console.log();
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
} catch (_) { /* silently skip — never crash health check */ }
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (Object.keys(providers).length === 0) {
|
|
218
|
+
console.log('No claude-mcp-server instances with ANTHROPIC_BASE_URL found.');
|
|
219
|
+
printQuorumFailures();
|
|
220
|
+
process.exit(0);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ─── HTTP probe ───────────────────────────────────────────────────────────────
|
|
224
|
+
function probeUrl(baseUrl, apiKey) {
|
|
225
|
+
return new Promise((resolve) => {
|
|
226
|
+
// Hit /models — standard OpenAI-compat endpoint
|
|
227
|
+
// A 401/403 = server up (auth required); 200 = server up (open); timeout = down
|
|
228
|
+
let probeUrl;
|
|
229
|
+
try {
|
|
230
|
+
const u = new URL(baseUrl);
|
|
231
|
+
// Normalize: strip trailing /v1 if present, re-add /v1/models
|
|
232
|
+
const base = u.origin + (u.pathname.replace(/\/$/, ''));
|
|
233
|
+
probeUrl = `${base}/models`;
|
|
234
|
+
} catch {
|
|
235
|
+
return resolve({ healthy: false, statusCode: null, error: `Invalid URL: ${baseUrl}`, latencyMs: 0 });
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const start = Date.now();
|
|
239
|
+
const parsed = new URL(probeUrl);
|
|
240
|
+
const lib = parsed.protocol === 'https:' ? https : http;
|
|
241
|
+
|
|
242
|
+
const headers = { 'User-Agent': 'qgsd-health-check/1.0' };
|
|
243
|
+
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
|
|
244
|
+
|
|
245
|
+
const req = lib.request(
|
|
246
|
+
{
|
|
247
|
+
hostname: parsed.hostname,
|
|
248
|
+
port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
|
|
249
|
+
path: parsed.pathname + parsed.search,
|
|
250
|
+
method: 'GET',
|
|
251
|
+
headers,
|
|
252
|
+
timeout: TIMEOUT_MS,
|
|
253
|
+
},
|
|
254
|
+
(res) => {
|
|
255
|
+
const latencyMs = Date.now() - start;
|
|
256
|
+
// Consume response body to free socket
|
|
257
|
+
res.resume();
|
|
258
|
+
res.on('end', () => {
|
|
259
|
+
// 200, 401, 403 all mean the server is alive
|
|
260
|
+
const healthy = [200, 401, 403, 404, 422].includes(res.statusCode);
|
|
261
|
+
resolve({ healthy, statusCode: res.statusCode, error: null, latencyMs });
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
);
|
|
265
|
+
|
|
266
|
+
req.on('timeout', () => {
|
|
267
|
+
req.destroy();
|
|
268
|
+
const latencyMs = Date.now() - start;
|
|
269
|
+
resolve({ healthy: false, statusCode: null, error: `Timed out after ${TIMEOUT_MS}ms`, latencyMs });
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
req.on('error', (e) => {
|
|
273
|
+
const latencyMs = Date.now() - start;
|
|
274
|
+
resolve({ healthy: false, statusCode: null, error: e.message, latencyMs });
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
req.end();
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ─── Run probes (sequential to avoid thundering-herd on same provider) ────────
|
|
282
|
+
async function main() {
|
|
283
|
+
const results = [];
|
|
284
|
+
const cache = loadCache();
|
|
285
|
+
|
|
286
|
+
for (const [baseUrl, { servers, apiKey }] of Object.entries(providers)) {
|
|
287
|
+
// Extract a friendly provider name from the URL
|
|
288
|
+
let providerName;
|
|
289
|
+
try {
|
|
290
|
+
providerName = new URL(baseUrl).hostname.replace(/^api\./, '').replace(/\.com$|\.ai$|\.xyz$/, '');
|
|
291
|
+
} catch {
|
|
292
|
+
providerName = baseUrl;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
let probe;
|
|
296
|
+
const cached = NO_CACHE ? null : getCachedResult(cache, baseUrl);
|
|
297
|
+
|
|
298
|
+
if (cached) {
|
|
299
|
+
// Use cached result — log to stderr only in non-JSON mode to avoid polluting JSON output
|
|
300
|
+
probe = {
|
|
301
|
+
healthy: cached.healthy,
|
|
302
|
+
statusCode: cached.statusCode,
|
|
303
|
+
error: cached.error ?? null,
|
|
304
|
+
latencyMs: cached.latencyMs,
|
|
305
|
+
};
|
|
306
|
+
if (!JSON_OUT) {
|
|
307
|
+
const ttl = cached.healthy ? TTL_UP_MS : TTL_DOWN_MS;
|
|
308
|
+
const remaining = Math.round((ttl - (Date.now() - cached.cachedAt)) / 1000);
|
|
309
|
+
const statusStr = cached.healthy ? 'UP' : 'DOWN';
|
|
310
|
+
process.stderr.write(`[cache] ${providerName} = ${statusStr} (cached, expires in ${remaining}s)\n`);
|
|
311
|
+
}
|
|
312
|
+
} else {
|
|
313
|
+
// Run the HTTP probe
|
|
314
|
+
probe = await probeUrl(baseUrl, apiKey);
|
|
315
|
+
// Write result back to cache
|
|
316
|
+
cache.entries[baseUrl] = {
|
|
317
|
+
healthy: probe.healthy,
|
|
318
|
+
statusCode: probe.statusCode,
|
|
319
|
+
error: probe.error,
|
|
320
|
+
latencyMs: probe.latencyMs,
|
|
321
|
+
cachedAt: Date.now(),
|
|
322
|
+
};
|
|
323
|
+
saveCache(cache);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
results.push({
|
|
327
|
+
provider: providerName,
|
|
328
|
+
baseUrl,
|
|
329
|
+
servers: servers.map(s => s.name),
|
|
330
|
+
models: servers.map(s => s.model),
|
|
331
|
+
healthy: probe.healthy,
|
|
332
|
+
statusCode: probe.statusCode,
|
|
333
|
+
latencyMs: probe.latencyMs,
|
|
334
|
+
error: probe.error,
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// ── Output ───────────────────────────────────────────────────────────────────
|
|
339
|
+
if (JSON_OUT) {
|
|
340
|
+
console.log(JSON.stringify(results, null, 2));
|
|
341
|
+
} else {
|
|
342
|
+
const green = (s) => `\x1b[32m${s}\x1b[0m`;
|
|
343
|
+
const red = (s) => `\x1b[31m${s}\x1b[0m`;
|
|
344
|
+
const yellow= (s) => `\x1b[33m${s}\x1b[0m`;
|
|
345
|
+
const dim = (s) => `\x1b[2m${s}\x1b[0m`;
|
|
346
|
+
const bold = (s) => `\x1b[1m${s}\x1b[0m`;
|
|
347
|
+
const cyan = (s) => `\x1b[36m${s}\x1b[0m`;
|
|
348
|
+
|
|
349
|
+
console.log(`\n${bold('━━━ LLM PROVIDER HEALTH CHECK ━━━')}`);
|
|
350
|
+
console.log(dim(` Probe: GET /models | Timeout: ${TIMEOUT_MS}ms | ${new Date().toISOString().slice(0,19).replace('T',' ')} UTC`));
|
|
351
|
+
console.log();
|
|
352
|
+
|
|
353
|
+
for (const r of results) {
|
|
354
|
+
const icon = r.healthy ? green('✓') : red('✗');
|
|
355
|
+
const status = r.healthy
|
|
356
|
+
? green(`UP [${r.statusCode}]`)
|
|
357
|
+
: red(`DOWN [${r.statusCode ?? 'timeout'}]`);
|
|
358
|
+
const lat = r.latencyMs < 500 ? green(`${r.latencyMs}ms`) : yellow(`${r.latencyMs}ms`);
|
|
359
|
+
|
|
360
|
+
console.log(` ${icon} ${bold(r.provider.padEnd(14))} ${status.padEnd(20)} ${lat}`);
|
|
361
|
+
console.log(` ${dim(r.baseUrl)}`);
|
|
362
|
+
|
|
363
|
+
for (let i = 0; i < r.servers.length; i++) {
|
|
364
|
+
const serverHealthy = r.healthy;
|
|
365
|
+
const dot = serverHealthy ? green('•') : red('•');
|
|
366
|
+
console.log(` ${dot} ${cyan(r.servers[i].padEnd(22))} ${dim(r.models[i])}`);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (r.error) {
|
|
370
|
+
console.log(` ${red('→')} ${dim(r.error)}`);
|
|
371
|
+
}
|
|
372
|
+
console.log();
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const unhealthy = results.filter(r => !r.healthy);
|
|
376
|
+
if (unhealthy.length > 0) {
|
|
377
|
+
console.log(red(`${unhealthy.length}/${results.length} providers DOWN — skip these MCP servers in quorum:`));
|
|
378
|
+
for (const r of unhealthy) {
|
|
379
|
+
r.servers.forEach(s => console.log(` • ${s}`));
|
|
380
|
+
}
|
|
381
|
+
console.log();
|
|
382
|
+
console.log(dim(' Tip: run again in a few minutes — AkashML/Fireworks have intermittent outages.'));
|
|
383
|
+
} else {
|
|
384
|
+
console.log(green(' All providers healthy ✓'));
|
|
385
|
+
}
|
|
386
|
+
console.log();
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ── Quorum Failure Patterns ──────────────────────────────────────────────────
|
|
390
|
+
printQuorumFailures();
|
|
391
|
+
|
|
392
|
+
process.exit(results.some(r => !r.healthy) ? 1 : 0);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
main().catch(e => { console.error(e); process.exit(1); });
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const { NDJSON_PATH } = require('./write-check-result.cjs');
|
|
5
|
+
|
|
6
|
+
if (!fs.existsSync(NDJSON_PATH)) {
|
|
7
|
+
process.stderr.write('[check-results-exit] No check-results.ndjson found — nothing to check\n');
|
|
8
|
+
process.exit(0);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const lines = fs.readFileSync(NDJSON_PATH, 'utf8').split('\n').filter(l => l.trim().length > 0);
|
|
12
|
+
const parsed = lines.map(l => JSON.parse(l));
|
|
13
|
+
const fails = parsed.filter(r => r.result === 'fail');
|
|
14
|
+
|
|
15
|
+
if (fails.length > 0) {
|
|
16
|
+
process.stderr.write('[check-results-exit] ' + fails.length + ' fail(s) found:\n');
|
|
17
|
+
for (const f of fails) {
|
|
18
|
+
process.stderr.write(' tool=' + f.tool + ' formalism=' + f.formalism + ' ts=' + f.timestamp + '\n');
|
|
19
|
+
}
|
|
20
|
+
process.exit(1);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
process.stdout.write('[check-results-exit] All ' + parsed.length + ' check(s) pass/warn/inconclusive.\n');
|
|
24
|
+
process.exit(0);
|