solana-resilience-kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +210 -0
  3. package/dist/cli/diagnose.d.ts +75 -0
  4. package/dist/cli/diagnose.js +70 -0
  5. package/dist/errors.d.ts +30 -0
  6. package/dist/errors.js +39 -0
  7. package/dist/fees/estimator.d.ts +47 -0
  8. package/dist/fees/estimator.js +43 -0
  9. package/dist/fees/oracles.d.ts +46 -0
  10. package/dist/fees/oracles.js +88 -0
  11. package/dist/index.d.ts +32 -0
  12. package/dist/index.js +28 -0
  13. package/dist/jito/router.d.ts +53 -0
  14. package/dist/jito/router.js +53 -0
  15. package/dist/jito/tips.d.ts +33 -0
  16. package/dist/jito/tips.js +40 -0
  17. package/dist/observability/metrics.d.ts +62 -0
  18. package/dist/observability/metrics.js +74 -0
  19. package/dist/rpc/health.d.ts +41 -0
  20. package/dist/rpc/health.js +120 -0
  21. package/dist/rpc/pool.d.ts +66 -0
  22. package/dist/rpc/pool.js +126 -0
  23. package/dist/rpc/rate-limit.d.ts +38 -0
  24. package/dist/rpc/rate-limit.js +65 -0
  25. package/dist/testing/base58.d.ts +11 -0
  26. package/dist/testing/base58.js +53 -0
  27. package/dist/testing/faults.d.ts +28 -0
  28. package/dist/testing/faults.js +16 -0
  29. package/dist/testing/index.d.ts +13 -0
  30. package/dist/testing/index.js +10 -0
  31. package/dist/testing/mock-cluster.d.ts +86 -0
  32. package/dist/testing/mock-cluster.js +160 -0
  33. package/dist/testing/mock-endpoint.d.ts +37 -0
  34. package/dist/testing/mock-endpoint.js +101 -0
  35. package/dist/testing/mock-jito.d.ts +44 -0
  36. package/dist/testing/mock-jito.js +94 -0
  37. package/dist/testing/rng.d.ts +11 -0
  38. package/dist/testing/rng.js +22 -0
  39. package/dist/tx/confirmation.d.ts +40 -0
  40. package/dist/tx/confirmation.js +56 -0
  41. package/dist/tx/sender.d.ts +57 -0
  42. package/dist/tx/sender.js +74 -0
  43. package/dist/wallet/adapter.d.ts +42 -0
  44. package/dist/wallet/adapter.js +34 -0
  45. package/package.json +71 -0
@@ -0,0 +1,88 @@
1
+ /** Native estimate from getRecentPrioritizationFees over recent slots. */
2
+ export class NativeFeeOracle {
3
+ rpc;
4
+ constructor(rpc) {
5
+ this.rpc = rpc;
6
+ }
7
+ /**
8
+ * Derives micro-lamports-per-CU percentiles from the cluster's recent
9
+ * prioritization-fee samples (`getRecentPrioritizationFees`). This is the
10
+ * free, backward-looking source: the node returns the smallest fee paid by a
11
+ * landed tx per recent slot. We sort the samples and pick percentiles by
12
+ * nearest-rank so the levels are monotonic.
13
+ */
14
+ async getPriorityFee(writableAccounts) {
15
+ const recent = await this.rpc
16
+ .getRecentPrioritizationFees(writableAccounts)
17
+ .send();
18
+ // Samples may arrive as bigint (kit MicroLamports) or number; normalize and
19
+ // sort ascending so percentile-by-rank produces monotonic levels.
20
+ const sorted = recent
21
+ .map((entry) => Number(entry.prioritizationFee))
22
+ .sort((a, b) => a - b);
23
+ if (sorted.length === 0) {
24
+ return {
25
+ levels: { min: 0, low: 0, medium: 0, high: 0, veryHigh: 0 },
26
+ };
27
+ }
28
+ // Nearest-rank percentile over (n - 1): p=0 -> first, p=100 -> last. The
29
+ // computed index is always in [0, n-1]; the `?? 0` only satisfies
30
+ // noUncheckedIndexedAccess and is never reached for a non-empty array.
31
+ const percentile = (p) => sorted[Math.round((p / 100) * (sorted.length - 1))] ?? 0;
32
+ return {
33
+ levels: {
34
+ min: percentile(0),
35
+ low: percentile(25),
36
+ medium: percentile(50),
37
+ high: percentile(75),
38
+ veryHigh: percentile(100),
39
+ },
40
+ };
41
+ }
42
+ }
43
+ /**
44
+ * Helius `getPriorityFeeEstimate` — account-aware percentile estimates. Helius
45
+ * already returns micro-lamports-per-CU figures keyed by the same level names we
46
+ * expose (`priorityFeeLevels`), so the mapping is direct. `fetchImpl` is
47
+ * injectable for deterministic tests and defaults to the global `fetch`; the
48
+ * API key, when provided, is appended to the request URL.
49
+ */
50
+ export class HeliusFeeOracle {
51
+ config;
52
+ constructor(config) {
53
+ this.config = config;
54
+ }
55
+ async getPriorityFee(writableAccounts) {
56
+ const fetchImpl = this.config.fetchImpl ?? globalThis.fetch;
57
+ const url = this.config.apiKey
58
+ ? `${this.config.url}?api-key=${this.config.apiKey}`
59
+ : this.config.url;
60
+ const res = await fetchImpl(url, {
61
+ method: "POST",
62
+ headers: { "content-type": "application/json" },
63
+ body: JSON.stringify({
64
+ jsonrpc: "2.0",
65
+ id: "1",
66
+ method: "getPriorityFeeEstimate",
67
+ params: [
68
+ {
69
+ accountKeys: writableAccounts,
70
+ options: { includeAllPriorityFeeLevels: true },
71
+ },
72
+ ],
73
+ }),
74
+ });
75
+ const body = (await res.json());
76
+ const fees = body.result?.priorityFeeLevels ?? {};
77
+ const microLamports = (v) => Math.round(Number(v ?? 0));
78
+ return {
79
+ levels: {
80
+ min: microLamports(fees.min),
81
+ low: microLamports(fees.low),
82
+ medium: microLamports(fees.medium),
83
+ high: microLamports(fees.high),
84
+ veryHigh: microLamports(fees.veryHigh),
85
+ },
86
+ };
87
+ }
88
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * solana-resilience-kit — vendor-neutral, client-side resilience + observability
3
+ * layer for Solana dApps, built on @solana/kit (web3.js v2).
4
+ *
5
+ * Public API surface. Implementations are filled in during the build phase;
6
+ * the test suite under /test encodes the required behavior of every export.
7
+ */
8
+ export * from "./errors.js";
9
+ export { ResilientRpcPool } from "./rpc/pool.js";
10
+ export type { ResilientEndpoint, ResilientRpcConfig } from "./rpc/pool.js";
11
+ export { HealthMonitor } from "./rpc/health.js";
12
+ export type { EndpointHealth, HealthMonitorConfig } from "./rpc/health.js";
13
+ export { CreditRateLimiter, DEFAULT_METHOD_WEIGHTS } from "./rpc/rate-limit.js";
14
+ export type { RateLimiterConfig } from "./rpc/rate-limit.js";
15
+ export { TransactionSender } from "./tx/sender.js";
16
+ export type { SendConfig, SendResult, SenderDeps } from "./tx/sender.js";
17
+ export { ConfirmationTracker } from "./tx/confirmation.js";
18
+ export type { TrackConfig, TrackResult, TerminalOutcome } from "./tx/confirmation.js";
19
+ export { FeeEstimator } from "./fees/estimator.js";
20
+ export type { ComputeBudget, EstimateConfig } from "./fees/estimator.js";
21
+ export { NativeFeeOracle, HeliusFeeOracle } from "./fees/oracles.js";
22
+ export type { FeeOracle, FeeLevel, PriorityFeeEstimate, HttpFeeOracleConfig } from "./fees/oracles.js";
23
+ export { JitoRouter } from "./jito/router.js";
24
+ export type { JitoEngineClient, JitoRouteConfig, JitoRouteResult } from "./jito/router.js";
25
+ export { TipEstimator, MIN_TIP_LAMPORTS } from "./jito/tips.js";
26
+ export type { TipFloor, TipPercentile, TipEstimatorConfig } from "./jito/tips.js";
27
+ export { InMemoryMetrics, OtelMetrics } from "./observability/metrics.js";
28
+ export type { Metrics, OtelMetricsConfig } from "./observability/metrics.js";
29
+ export { ResilientWalletAdapter } from "./wallet/adapter.js";
30
+ export type { WalletSigner, ResilientWalletConfig } from "./wallet/adapter.js";
31
+ export { Diagnostics } from "./cli/diagnose.js";
32
+ export type { ProbeTarget, EndpointProbe, ProbeReport, TxDiagnosis, DiagnosticsDeps, } from "./cli/diagnose.js";
package/dist/index.js ADDED
@@ -0,0 +1,28 @@
1
+ /**
2
+ * solana-resilience-kit — vendor-neutral, client-side resilience + observability
3
+ * layer for Solana dApps, built on @solana/kit (web3.js v2).
4
+ *
5
+ * Public API surface. Implementations are filled in during the build phase;
6
+ * the test suite under /test encodes the required behavior of every export.
7
+ */
8
+ // Errors
9
+ export * from "./errors.js";
10
+ // RPC layer
11
+ export { ResilientRpcPool } from "./rpc/pool.js";
12
+ export { HealthMonitor } from "./rpc/health.js";
13
+ export { CreditRateLimiter, DEFAULT_METHOD_WEIGHTS } from "./rpc/rate-limit.js";
14
+ // Transactions
15
+ export { TransactionSender } from "./tx/sender.js";
16
+ export { ConfirmationTracker } from "./tx/confirmation.js";
17
+ // Fees
18
+ export { FeeEstimator } from "./fees/estimator.js";
19
+ export { NativeFeeOracle, HeliusFeeOracle } from "./fees/oracles.js";
20
+ // Jito / MEV
21
+ export { JitoRouter } from "./jito/router.js";
22
+ export { TipEstimator, MIN_TIP_LAMPORTS } from "./jito/tips.js";
23
+ // Observability
24
+ export { InMemoryMetrics, OtelMetrics } from "./observability/metrics.js";
25
+ // Wallet
26
+ export { ResilientWalletAdapter } from "./wallet/adapter.js";
27
+ // Diagnostics
28
+ export { Diagnostics } from "./cli/diagnose.js";
@@ -0,0 +1,53 @@
1
+ /**
2
+ * JitoRouter — routes a transaction/bundle through the Jito Block Engine for MEV
3
+ * protection, polls in-flight status, and FALLS BACK to normal RPC submission
4
+ * if the bundle does not land before the deadline. A bundle_id is only a receipt,
5
+ * not a landing guarantee, so fallback is mandatory for reliable confirmation.
6
+ */
7
+ import type { TransactionSender, SendConfig, SendResult } from "../tx/sender.js";
8
+ import type { TipEstimator, TipPercentile } from "./tips.js";
9
+ export interface JitoEngineClient {
10
+ getTipAccounts(): Promise<string[]>;
11
+ sendBundle(wireTransactions: string[]): Promise<string>;
12
+ getInflightBundleStatuses(ids: string[]): Promise<Array<{
13
+ bundle_id: string;
14
+ status: string;
15
+ }>>;
16
+ }
17
+ export interface JitoRouteConfig extends SendConfig {
18
+ /** Tip percentile to target (default "p50"). */
19
+ tipPercentile?: TipPercentile;
20
+ /** Max status polls before falling back to RPC (default 10). */
21
+ maxBundlePolls?: number;
22
+ }
23
+ export interface JitoRouteResult extends SendResult {
24
+ /** "jito" if the bundle landed, "rpc" if we fell back. */
25
+ route: "jito" | "rpc";
26
+ bundleId: string | null;
27
+ }
28
+ export interface JitoRouterDeps {
29
+ sleep?: (ms: number) => Promise<void>;
30
+ }
31
+ export declare class JitoRouter {
32
+ private readonly engine;
33
+ private readonly tipEstimator;
34
+ private readonly fallbackSender;
35
+ private readonly sleep;
36
+ constructor(engine: JitoEngineClient, tipEstimator: TipEstimator, fallbackSender: TransactionSender, deps?: JitoRouterDeps);
37
+ /**
38
+ * Submit via Jito; fall back to RPC sender if the bundle doesn't land.
39
+ *
40
+ * Correctness invariants (see CLAUDE.md):
41
+ * - A `bundle_id` is a receipt, NOT a landing guarantee. We poll in-flight
42
+ * status a bounded number of times (`maxBundlePolls`) and, on anything
43
+ * other than a confirmed "Landed", hand the SAME already-signed wire
44
+ * transaction to the RPC `TransactionSender`. The fallback inherits the
45
+ * sender's invariants (maxRetries:0, no re-sign, LVBH-bounded loop), so
46
+ * there is no double-charge and the path is idempotent by signature.
47
+ * - On the Jito-landed path we do NOT poll the cluster for confirmation:
48
+ * a "Landed" bundle IS the confirmation. The transaction may never have
49
+ * been broadcast to the RPC, so a getSignatureStatuses poll would falsely
50
+ * expire it. We return outcome "confirmed" with slot null directly.
51
+ */
52
+ sendWithFallback(config: JitoRouteConfig): Promise<JitoRouteResult>;
53
+ }
@@ -0,0 +1,53 @@
1
+ export class JitoRouter {
2
+ engine;
3
+ tipEstimator;
4
+ fallbackSender;
5
+ sleep;
6
+ constructor(engine, tipEstimator, fallbackSender, deps) {
7
+ this.engine = engine;
8
+ this.tipEstimator = tipEstimator;
9
+ this.fallbackSender = fallbackSender;
10
+ this.sleep = deps?.sleep ?? ((ms) => new Promise((r) => setTimeout(r, ms)));
11
+ }
12
+ /**
13
+ * Submit via Jito; fall back to RPC sender if the bundle doesn't land.
14
+ *
15
+ * Correctness invariants (see CLAUDE.md):
16
+ * - A `bundle_id` is a receipt, NOT a landing guarantee. We poll in-flight
17
+ * status a bounded number of times (`maxBundlePolls`) and, on anything
18
+ * other than a confirmed "Landed", hand the SAME already-signed wire
19
+ * transaction to the RPC `TransactionSender`. The fallback inherits the
20
+ * sender's invariants (maxRetries:0, no re-sign, LVBH-bounded loop), so
21
+ * there is no double-charge and the path is idempotent by signature.
22
+ * - On the Jito-landed path we do NOT poll the cluster for confirmation:
23
+ * a "Landed" bundle IS the confirmation. The transaction may never have
24
+ * been broadcast to the RPC, so a getSignatureStatuses poll would falsely
25
+ * expire it. We return outcome "confirmed" with slot null directly.
26
+ */
27
+ async sendWithFallback(config) {
28
+ const maxBundlePolls = config.maxBundlePolls ?? 10;
29
+ const bundleId = await this.engine.sendBundle([config.wireTransaction]);
30
+ for (let i = 0; i < maxBundlePolls; i++) {
31
+ const statuses = await this.engine.getInflightBundleStatuses([bundleId]);
32
+ const status = statuses[0]?.status;
33
+ if (status === "Landed") {
34
+ return {
35
+ signature: config.signature,
36
+ outcome: "confirmed",
37
+ slot: null,
38
+ rebroadcasts: 0,
39
+ route: "jito",
40
+ bundleId,
41
+ };
42
+ }
43
+ // Unrecoverable on Jito -> stop polling and fall back to RPC.
44
+ if (status === "Failed" || status === "Invalid")
45
+ break;
46
+ await this.sleep(config.rebroadcastIntervalMs ?? 1000);
47
+ }
48
+ // Bundle never landed -> RPC fallback (the mandatory invariant). The
49
+ // sender owns the rebroadcast/confirm loop on the identical signed bytes.
50
+ const r = await this.fallbackSender.sendAndConfirm(config);
51
+ return { ...r, route: "rpc", bundleId };
52
+ }
53
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * TipEstimator — sizes a Jito tip from live tip-floor percentiles, clamped to
3
+ * the protocol minimum (1000 lamports). Tips are economically distinct from
4
+ * priority fees and drive bundle auction priority.
5
+ */
6
+ export declare const MIN_TIP_LAMPORTS = 1000;
7
+ export type TipPercentile = "p25" | "p50" | "p75" | "p95" | "p99";
8
+ export interface TipFloor {
9
+ p25: number;
10
+ p50: number;
11
+ p75: number;
12
+ p95: number;
13
+ p99: number;
14
+ }
15
+ export interface TipEstimatorConfig {
16
+ /** Endpoint serving tip-floor data (default Jito public tip_floor REST). */
17
+ tipFloorUrl?: string;
18
+ fetchImpl?: typeof fetch;
19
+ }
20
+ export declare class TipEstimator {
21
+ private readonly config?;
22
+ constructor(config?: TipEstimatorConfig | undefined);
23
+ /**
24
+ * Fetches current tip-floor percentiles and returns them in lamports.
25
+ *
26
+ * The Jito public tip_floor REST endpoint returns an array whose first
27
+ * element carries `landed_tips_*_percentile` fields denominated in SOL. We
28
+ * are defensive about array-vs-object shape and convert SOL -> lamports.
29
+ */
30
+ getTipFloor(): Promise<TipFloor>;
31
+ /** Recommends a tip (lamports) at the chosen percentile, clamped to minimum. */
32
+ recommendTip(percentile?: TipPercentile): Promise<number>;
33
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * TipEstimator — sizes a Jito tip from live tip-floor percentiles, clamped to
3
+ * the protocol minimum (1000 lamports). Tips are economically distinct from
4
+ * priority fees and drive bundle auction priority.
5
+ */
6
+ export const MIN_TIP_LAMPORTS = 1000;
7
+ const DEFAULT_TIP_FLOOR_URL = "https://bundles.jito.wtf/api/v1/bundles/tip_floor";
8
+ export class TipEstimator {
9
+ config;
10
+ constructor(config) {
11
+ this.config = config;
12
+ }
13
+ /**
14
+ * Fetches current tip-floor percentiles and returns them in lamports.
15
+ *
16
+ * The Jito public tip_floor REST endpoint returns an array whose first
17
+ * element carries `landed_tips_*_percentile` fields denominated in SOL. We
18
+ * are defensive about array-vs-object shape and convert SOL -> lamports.
19
+ */
20
+ async getTipFloor() {
21
+ const fetchImpl = this.config?.fetchImpl ?? globalThis.fetch;
22
+ const url = this.config?.tipFloorUrl ?? DEFAULT_TIP_FLOOR_URL;
23
+ const res = await fetchImpl(url);
24
+ const body = await res.json();
25
+ const record = (Array.isArray(body) ? body[0] : body) ?? {};
26
+ const toLamports = (sol) => Math.round((sol ?? 0) * 1e9);
27
+ return {
28
+ p25: toLamports(record.landed_tips_25th_percentile),
29
+ p50: toLamports(record.landed_tips_50th_percentile),
30
+ p75: toLamports(record.landed_tips_75th_percentile),
31
+ p95: toLamports(record.landed_tips_95th_percentile),
32
+ p99: toLamports(record.landed_tips_99th_percentile),
33
+ };
34
+ }
35
+ /** Recommends a tip (lamports) at the chosen percentile, clamped to minimum. */
36
+ async recommendTip(percentile = "p50") {
37
+ const floor = await this.getTipFloor();
38
+ return Math.max(floor[percentile], MIN_TIP_LAMPORTS);
39
+ }
40
+ }
@@ -0,0 +1,62 @@
1
+ import type { Meter } from "@opentelemetry/api";
2
+ export interface Metrics {
3
+ /** Per-endpoint request latency (ms) with success/failure outcome. */
4
+ recordRequest(endpoint: string, method: string, latencyMs: number, ok: boolean): void;
5
+ /** A request was rate-limited (HTTP 429). */
6
+ recordRateLimited(endpoint: string): void;
7
+ /** A transaction was (re)broadcast to the network. */
8
+ recordRebroadcast(signature: string): void;
9
+ /** Terminal transaction outcome. */
10
+ recordLanding(signature: string, outcome: "confirmed" | "expired", slots: number): void;
11
+ /** Observed slot for an endpoint (drives slot-lag dashboards). */
12
+ recordSlot(endpoint: string, slot: bigint): void;
13
+ }
14
+ /** Trivial, fully-implemented metrics sink for tests and local debugging. */
15
+ export declare class InMemoryMetrics implements Metrics {
16
+ readonly requests: Array<{
17
+ endpoint: string;
18
+ method: string;
19
+ latencyMs: number;
20
+ ok: boolean;
21
+ }>;
22
+ readonly rateLimited: string[];
23
+ readonly rebroadcasts: string[];
24
+ readonly landings: Array<{
25
+ signature: string;
26
+ outcome: "confirmed" | "expired";
27
+ slots: number;
28
+ }>;
29
+ readonly slots: Array<{
30
+ endpoint: string;
31
+ slot: bigint;
32
+ }>;
33
+ recordRequest(endpoint: string, method: string, latencyMs: number, ok: boolean): void;
34
+ recordRateLimited(endpoint: string): void;
35
+ recordRebroadcast(signature: string): void;
36
+ recordLanding(signature: string, outcome: "confirmed" | "expired", slots: number): void;
37
+ recordSlot(endpoint: string, slot: bigint): void;
38
+ /** Convenience aggregations the diagnostics CLI / dashboard will reuse. */
39
+ successRate(): number;
40
+ }
41
+ export interface OtelMetricsConfig {
42
+ serviceName?: string;
43
+ /** OTLP endpoint, e.g. a Datadog Agent or OTel Collector. */
44
+ otlpEndpoint?: string;
45
+ /** Inject a Meter for tests; defaults to the global OTel meter. */
46
+ meter?: Meter;
47
+ }
48
+ /** OpenTelemetry/Datadog-backed metrics. Bridges {@link Metrics} to OTel instruments. */
49
+ export declare class OtelMetrics implements Metrics {
50
+ private readonly latency;
51
+ private readonly failures;
52
+ private readonly rateLimited;
53
+ private readonly rebroadcasts;
54
+ private readonly landings;
55
+ private readonly slot;
56
+ constructor(config?: OtelMetricsConfig);
57
+ recordRequest(endpoint: string, method: string, latencyMs: number, ok: boolean): void;
58
+ recordRateLimited(endpoint: string): void;
59
+ recordRebroadcast(signature: string): void;
60
+ recordLanding(signature: string, outcome: "confirmed" | "expired", slots: number): void;
61
+ recordSlot(endpoint: string, slot: bigint): void;
62
+ }
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Observability surface. The SDK emits a small, fixed set of client-side
3
+ * signals — the metrics the ecosystem currently re-implements by hand. The
4
+ * `Metrics` interface decouples the SDK from any backend; `InMemoryMetrics` is
5
+ * a real implementation used by tests to assert the SDK emits the right
6
+ * signals, and `OtelMetrics` (to be implemented) bridges to OpenTelemetry /
7
+ * Datadog via the OTLP exporter.
8
+ */
9
+ import { metrics } from "@opentelemetry/api";
10
+ /** Trivial, fully-implemented metrics sink for tests and local debugging. */
11
+ export class InMemoryMetrics {
12
+ requests = [];
13
+ rateLimited = [];
14
+ rebroadcasts = [];
15
+ landings = [];
16
+ slots = [];
17
+ recordRequest(endpoint, method, latencyMs, ok) {
18
+ this.requests.push({ endpoint, method, latencyMs, ok });
19
+ }
20
+ recordRateLimited(endpoint) {
21
+ this.rateLimited.push(endpoint);
22
+ }
23
+ recordRebroadcast(signature) {
24
+ this.rebroadcasts.push(signature);
25
+ }
26
+ recordLanding(signature, outcome, slots) {
27
+ this.landings.push({ signature, outcome, slots });
28
+ }
29
+ recordSlot(endpoint, slot) {
30
+ this.slots.push({ endpoint, slot });
31
+ }
32
+ /** Convenience aggregations the diagnostics CLI / dashboard will reuse. */
33
+ successRate() {
34
+ if (this.requests.length === 0)
35
+ return 1;
36
+ return this.requests.filter((r) => r.ok).length / this.requests.length;
37
+ }
38
+ }
39
+ /** OpenTelemetry/Datadog-backed metrics. Bridges {@link Metrics} to OTel instruments. */
40
+ export class OtelMetrics {
41
+ latency;
42
+ failures;
43
+ rateLimited;
44
+ rebroadcasts;
45
+ landings;
46
+ slot;
47
+ constructor(config) {
48
+ const meter = config?.meter ?? metrics.getMeter(config?.serviceName ?? "solana-resilience-kit");
49
+ this.latency = meter.createHistogram("rpc.request.latency_ms");
50
+ this.failures = meter.createCounter("rpc.request.failures");
51
+ this.rateLimited = meter.createCounter("rpc.rate_limited");
52
+ this.rebroadcasts = meter.createCounter("tx.rebroadcasts");
53
+ this.landings = meter.createCounter("tx.landings");
54
+ this.slot = meter.createGauge("rpc.endpoint.slot");
55
+ }
56
+ recordRequest(endpoint, method, latencyMs, ok) {
57
+ this.latency.record(latencyMs, { endpoint, method, ok });
58
+ if (!ok)
59
+ this.failures.add(1, { endpoint, method });
60
+ }
61
+ recordRateLimited(endpoint) {
62
+ this.rateLimited.add(1, { endpoint });
63
+ }
64
+ recordRebroadcast(signature) {
65
+ this.rebroadcasts.add(1, { signature });
66
+ }
67
+ recordLanding(signature, outcome, slots) {
68
+ this.landings.add(1, { signature, outcome, slots });
69
+ }
70
+ recordSlot(endpoint, slot) {
71
+ // Slots are well within Number.MAX_SAFE_INTEGER; gauges take numbers.
72
+ this.slot.record(Number(slot), { endpoint });
73
+ }
74
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * HealthMonitor — tracks per-endpoint freshness and reliability so the pool can
3
+ * route to healthy, up-to-date nodes and avoid the "lagging node drops your tx"
4
+ * failure mode. Freshness is judged by comparing observed slots across
5
+ * endpoints; an endpoint more than `maxSlotLag` behind the best is unhealthy.
6
+ */
7
+ export interface EndpointHealth {
8
+ name: string;
9
+ healthy: boolean;
10
+ slot: bigint | null;
11
+ /** Exponentially-weighted mean latency in ms. */
12
+ latencyMs: number;
13
+ /** Rolling error rate in [0,1]. */
14
+ errorRate: number;
15
+ consecutiveFailures: number;
16
+ lastError: unknown | null;
17
+ }
18
+ export interface HealthMonitorConfig {
19
+ endpointNames: string[];
20
+ /** Slots behind the freshest node before an endpoint is deemed stale. */
21
+ maxSlotLag?: bigint;
22
+ /** Consecutive failures before an endpoint is ejected. */
23
+ failureThreshold?: number;
24
+ /** EWMA smoothing factor for latency (0..1). */
25
+ latencyAlpha?: number;
26
+ }
27
+ export declare class HealthMonitor {
28
+ private readonly maxSlotLag;
29
+ private readonly failureThreshold;
30
+ private readonly latencyAlpha;
31
+ private readonly states;
32
+ constructor(config: HealthMonitorConfig);
33
+ recordSuccess(endpoint: string, latencyMs: number, slot?: bigint): void;
34
+ recordFailure(endpoint: string, error: unknown): void;
35
+ isHealthy(endpoint: string): boolean;
36
+ /** Healthy endpoints ordered best-first (freshest slot, then lowest latency). */
37
+ rankByFreshness(): string[];
38
+ snapshot(): EndpointHealth[];
39
+ /** Max of all non-null observed slots across endpoints; 0n when none seen. */
40
+ private freshestSlot;
41
+ }
@@ -0,0 +1,120 @@
1
+ /**
2
+ * HealthMonitor — tracks per-endpoint freshness and reliability so the pool can
3
+ * route to healthy, up-to-date nodes and avoid the "lagging node drops your tx"
4
+ * failure mode. Freshness is judged by comparing observed slots across
5
+ * endpoints; an endpoint more than `maxSlotLag` behind the best is unhealthy.
6
+ */
7
+ /** Step applied to errorRate per success (down) / failure (up). Keeps the rate
8
+ * a bounded [0,1] rolling signal without needing a full window of samples. */
9
+ const ERROR_RATE_STEP = 0.1;
10
+ export class HealthMonitor {
11
+ maxSlotLag;
12
+ failureThreshold;
13
+ latencyAlpha;
14
+ states = new Map();
15
+ constructor(config) {
16
+ this.maxSlotLag = config.maxSlotLag ?? 150n;
17
+ this.failureThreshold = config.failureThreshold ?? 3;
18
+ this.latencyAlpha = config.latencyAlpha ?? 0.3;
19
+ for (const name of config.endpointNames) {
20
+ this.states.set(name, {
21
+ name,
22
+ slot: null,
23
+ latencyMs: 0,
24
+ latencySeeded: false,
25
+ errorRate: 0,
26
+ consecutiveFailures: 0,
27
+ lastError: null,
28
+ });
29
+ }
30
+ }
31
+ recordSuccess(endpoint, latencyMs, slot) {
32
+ const state = this.states.get(endpoint);
33
+ if (state === undefined)
34
+ return; // guard unknown endpoint names
35
+ state.consecutiveFailures = 0;
36
+ if (!state.latencySeeded) {
37
+ state.latencyMs = latencyMs;
38
+ state.latencySeeded = true;
39
+ }
40
+ else {
41
+ state.latencyMs =
42
+ this.latencyAlpha * latencyMs + (1 - this.latencyAlpha) * state.latencyMs;
43
+ }
44
+ if (slot !== undefined) {
45
+ state.slot = slot;
46
+ }
47
+ state.errorRate = clamp01(state.errorRate - ERROR_RATE_STEP);
48
+ state.lastError = null;
49
+ }
50
+ recordFailure(endpoint, error) {
51
+ const state = this.states.get(endpoint);
52
+ if (state === undefined)
53
+ return; // guard unknown endpoint names
54
+ state.consecutiveFailures += 1;
55
+ state.lastError = error;
56
+ state.errorRate = clamp01(state.errorRate + ERROR_RATE_STEP);
57
+ }
58
+ isHealthy(endpoint) {
59
+ const state = this.states.get(endpoint);
60
+ if (state === undefined)
61
+ return false; // unknown endpoint is never healthy
62
+ if (state.consecutiveFailures >= this.failureThreshold)
63
+ return false;
64
+ if (state.slot !== null) {
65
+ const lag = this.freshestSlot() - state.slot;
66
+ if (lag > this.maxSlotLag)
67
+ return false;
68
+ }
69
+ return true;
70
+ }
71
+ /** Healthy endpoints ordered best-first (freshest slot, then lowest latency). */
72
+ rankByFreshness() {
73
+ return [...this.states.values()]
74
+ .filter((s) => this.isHealthy(s.name))
75
+ .sort((a, b) => {
76
+ // Freshest slot first; nulls sort last.
77
+ if (a.slot !== b.slot) {
78
+ if (a.slot === null)
79
+ return 1;
80
+ if (b.slot === null)
81
+ return -1;
82
+ if (a.slot > b.slot)
83
+ return -1;
84
+ if (a.slot < b.slot)
85
+ return 1;
86
+ }
87
+ // Tie-break: lower latency first.
88
+ return a.latencyMs - b.latencyMs;
89
+ })
90
+ .map((s) => s.name);
91
+ }
92
+ snapshot() {
93
+ return [...this.states.values()].map((s) => ({
94
+ name: s.name,
95
+ healthy: this.isHealthy(s.name),
96
+ slot: s.slot,
97
+ latencyMs: s.latencyMs,
98
+ errorRate: s.errorRate,
99
+ consecutiveFailures: s.consecutiveFailures,
100
+ lastError: s.lastError,
101
+ }));
102
+ }
103
+ /** Max of all non-null observed slots across endpoints; 0n when none seen. */
104
+ freshestSlot() {
105
+ let max = 0n;
106
+ for (const state of this.states.values()) {
107
+ if (state.slot !== null && state.slot > max) {
108
+ max = state.slot;
109
+ }
110
+ }
111
+ return max;
112
+ }
113
+ }
114
+ function clamp01(value) {
115
+ if (value < 0)
116
+ return 0;
117
+ if (value > 1)
118
+ return 1;
119
+ return value;
120
+ }