@checkstack/healthcheck-backend 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # @checkstack/healthcheck-backend
2
2
 
3
+ ## 0.10.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 869b4ab: ## Health Check Execution Improvements
8
+
9
+ ### Breaking Changes (backend-api)
10
+
11
+ - `HealthCheckStrategy.createClient()` now accepts `unknown` instead of `TConfig` due to TypeScript contravariance constraints. Implementations should use `this.config.validate(config)` to narrow the type.
12
+
13
+ ### Features
14
+
15
+ - **Platform-level hard timeout**: The executor now wraps the entire health check execution (connection + all collectors) in a single timeout, ensuring checks never hang indefinitely.
16
+ - **Parallel collector execution**: Collectors now run in parallel using `Promise.allSettled()`, improving performance while ensuring all collectors complete regardless of individual failures.
17
+ - **Base strategy config schema**: All strategy configs now extend `baseStrategyConfigSchema` which provides a standardized `timeout` field with sensible defaults (30s, min 100ms).
18
+
19
+ ### Fixes
20
+
21
+ - Fixed HTTP and Jenkins strategies clearing timeouts before reading the full response body.
22
+ - Simplified registry type signatures by using default type parameters.
23
+
24
+ ### Patch Changes
25
+
26
+ - Updated dependencies [869b4ab]
27
+ - @checkstack/backend-api@0.8.0
28
+ - @checkstack/catalog-backend@0.2.13
29
+ - @checkstack/command-backend@0.1.11
30
+ - @checkstack/integration-backend@0.1.11
31
+ - @checkstack/queue-api@0.2.5
32
+
3
33
  ## 0.9.0
4
34
 
5
35
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@checkstack/healthcheck-backend",
3
- "version": "0.9.0",
3
+ "version": "0.10.0",
4
4
  "type": "module",
5
5
  "main": "src/index.ts",
6
6
  "scripts": {
@@ -10,17 +10,17 @@
10
10
  "lint:code": "eslint . --max-warnings 0"
11
11
  },
12
12
  "dependencies": {
13
- "@checkstack/backend-api": "0.5.2",
14
- "@checkstack/catalog-backend": "0.2.10",
15
- "@checkstack/catalog-common": "1.2.6",
16
- "@checkstack/command-backend": "0.1.8",
17
- "@checkstack/common": "0.6.1",
18
- "@checkstack/healthcheck-common": "0.8.1",
19
- "@checkstack/incident-common": "0.4.2",
20
- "@checkstack/integration-backend": "0.1.8",
21
- "@checkstack/maintenance-common": "0.4.4",
22
- "@checkstack/queue-api": "0.2.2",
23
- "@checkstack/signal-common": "0.1.5",
13
+ "@checkstack/backend-api": "0.7.0",
14
+ "@checkstack/catalog-backend": "0.2.12",
15
+ "@checkstack/catalog-common": "1.2.7",
16
+ "@checkstack/command-backend": "0.1.10",
17
+ "@checkstack/common": "0.6.2",
18
+ "@checkstack/healthcheck-common": "0.8.2",
19
+ "@checkstack/incident-common": "0.4.3",
20
+ "@checkstack/integration-backend": "0.1.10",
21
+ "@checkstack/maintenance-common": "0.4.5",
22
+ "@checkstack/queue-api": "0.2.4",
23
+ "@checkstack/signal-common": "0.1.6",
24
24
  "@hono/zod-validator": "^0.7.6",
25
25
  "drizzle-orm": "^0.45.1",
26
26
  "hono": "^4.0.0",
@@ -30,7 +30,7 @@
30
30
  "devDependencies": {
31
31
  "@checkstack/drizzle-helper": "0.0.3",
32
32
  "@checkstack/scripts": "0.1.1",
33
- "@checkstack/test-utils-backend": "0.1.8",
33
+ "@checkstack/test-utils-backend": "0.1.10",
34
34
  "@checkstack/tsconfig": "0.0.3",
35
35
  "@orpc/server": "^1.13.2",
36
36
  "@types/bun": "^1.0.0",
@@ -28,7 +28,7 @@ const createMockRegistry = (): HealthCheckRegistry => ({
28
28
  description: "Mock",
29
29
  config: new Versioned({
30
30
  version: 1,
31
- schema: z.object({}),
31
+ schema: z.object({ timeout: z.number().default(30_000) }),
32
32
  }),
33
33
  result: new Versioned({
34
34
  version: 1,
@@ -5,6 +5,9 @@ import {
5
5
  type CollectorRegistry,
6
6
  evaluateAssertions,
7
7
  type SafeDatabase,
8
+ type BaseStrategyConfig,
9
+ type ConnectedClient,
10
+ type TransportClient,
8
11
  } from "@checkstack/backend-api";
9
12
  import { QueueManager } from "@checkstack/queue-api";
10
13
  import {
@@ -305,24 +308,175 @@ async function executeHealthCheckJob(props: {
305
308
  return;
306
309
  }
307
310
 
308
- // Execute health check using createClient pattern
311
+ // Extract timeout from strategy config for platform-level enforcement
312
+ const strategyConfig = configRow.config as unknown as BaseStrategyConfig;
313
+ const executionTimeout = strategyConfig.timeout ?? 60_000;
314
+
315
+ // Execute health check using createClient pattern with unified hard timeout
309
316
  const start = performance.now();
310
- let connectedClient;
317
+ let connectionTimeMs: number | undefined;
318
+ let connectedClient:
319
+ | ConnectedClient<TransportClient<never, unknown>>
320
+ | undefined;
321
+ const collectors = configRow.collectors ?? [];
322
+ const collectorResults: Record<string, unknown> = {};
323
+ let hasCollectorError = false;
324
+ let errorMessage: string | undefined;
325
+
311
326
  try {
312
- connectedClient = await strategy.createClient(
313
- configRow.config as Record<string, unknown>,
314
- );
327
+ // Platform-level hard timeout wrapping the entire execution sequence
328
+ await Promise.race([
329
+ (async () => {
330
+ // 1. Establish connection
331
+ connectedClient = await strategy.createClient(strategyConfig);
332
+ connectionTimeMs = Math.round(performance.now() - start);
333
+
334
+ // 2. Execute collectors in parallel
335
+ const collectorPromises = collectors.map(async (collectorEntry) => {
336
+ const registered = collectorRegistry.getCollector(
337
+ collectorEntry.collectorId,
338
+ );
339
+ if (!registered) {
340
+ logger.warn(
341
+ `Collector ${collectorEntry.collectorId} not found, skipping`,
342
+ );
343
+ return { storageKey: collectorEntry.id, skipped: true };
344
+ }
345
+
346
+ const storageKey = collectorEntry.id;
347
+
348
+ try {
349
+ const collectorResult = await registered.collector.execute({
350
+ config: collectorEntry.config,
351
+ client: connectedClient!.client,
352
+ pluginId: configRow.strategyId,
353
+ });
354
+
355
+ // Check for collector-level error
356
+ let collectorError: string | undefined;
357
+ if (collectorResult.error) {
358
+ collectorError = collectorResult.error;
359
+ }
360
+
361
+ // Evaluate per-collector assertions
362
+ let assertionFailed: string | undefined;
363
+ if (
364
+ collectorEntry.assertions &&
365
+ collectorEntry.assertions.length > 0 &&
366
+ collectorResult.result
367
+ ) {
368
+ const failedAssertion = evaluateAssertions(
369
+ collectorEntry.assertions,
370
+ collectorResult.result as Record<string, unknown>,
371
+ );
372
+ if (failedAssertion) {
373
+ assertionFailed = `${failedAssertion.field} ${
374
+ failedAssertion.operator
375
+ } ${failedAssertion.value ?? ""}`;
376
+ logger.debug(
377
+ `Collector ${storageKey} assertion failed: ${assertionFailed}`,
378
+ );
379
+ }
380
+ }
381
+
382
+ // Strip ephemeral fields before storage
383
+ const strippedResult = stripEphemeralFields(
384
+ collectorResult.result as Record<string, unknown>,
385
+ registered.collector.result.schema,
386
+ );
387
+
388
+ return {
389
+ storageKey,
390
+ skipped: false,
391
+ success: true,
392
+ collectorError,
393
+ assertionFailed,
394
+ result: {
395
+ _collectorId: collectorEntry.collectorId,
396
+ _assertionFailed: assertionFailed,
397
+ ...strippedResult,
398
+ },
399
+ };
400
+ } catch (error) {
401
+ const errorStr =
402
+ error instanceof Error ? error.message : String(error);
403
+ logger.debug(`Collector ${storageKey} failed: ${errorStr}`);
404
+ return {
405
+ storageKey,
406
+ skipped: false,
407
+ success: false,
408
+ error: errorStr,
409
+ result: {
410
+ _collectorId: collectorEntry.collectorId,
411
+ _assertionFailed: undefined,
412
+ error: errorStr,
413
+ },
414
+ };
415
+ }
416
+ });
417
+
418
+ // Wait for all collectors to complete
419
+ const settledResults = await Promise.allSettled(collectorPromises);
420
+
421
+ // Process results from all collectors
422
+ for (const settled of settledResults) {
423
+ if (settled.status === "rejected") {
424
+ // This shouldn't happen since we catch errors above, but handle it
425
+ hasCollectorError = true;
426
+ if (!errorMessage) errorMessage = String(settled.reason);
427
+ continue;
428
+ }
429
+
430
+ const result = settled.value;
431
+ if (result.skipped) continue;
432
+
433
+ // Store the result
434
+ collectorResults[result.storageKey] = result.result;
435
+
436
+ // Track errors
437
+ if (
438
+ !result.success ||
439
+ result.collectorError ||
440
+ result.assertionFailed
441
+ ) {
442
+ hasCollectorError = true;
443
+ if (!errorMessage) {
444
+ errorMessage =
445
+ result.error ||
446
+ result.collectorError ||
447
+ (result.assertionFailed
448
+ ? `Assertion failed: ${result.assertionFailed}`
449
+ : undefined);
450
+ }
451
+ }
452
+ }
453
+ })(),
454
+ new Promise<never>((_, reject) =>
455
+ setTimeout(
456
+ () =>
457
+ reject(
458
+ new Error(`Execution timeout after ${executionTimeout}ms`),
459
+ ),
460
+ executionTimeout,
461
+ ),
462
+ ),
463
+ ]);
315
464
  } catch (error) {
316
- // Connection failed
317
465
  const latencyMs = Math.round(performance.now() - start);
318
- const errorMessage =
319
- error instanceof Error ? error.message : "Connection failed";
466
+ const caughtError =
467
+ error instanceof Error ? error.message : String(error);
468
+
469
+ // Use a specific error message if available, otherwise use the caught error
470
+ const finalError = errorMessage || caughtError;
320
471
 
321
472
  const result = {
322
473
  status: "unhealthy" as const,
323
474
  latencyMs,
324
- message: errorMessage,
325
- metadata: { connected: false, error: errorMessage },
475
+ message: finalError,
476
+ metadata: {
477
+ connected: !!connectedClient,
478
+ error: finalError,
479
+ },
326
480
  };
327
481
 
328
482
  await db.insert(healthCheckRuns).values({
@@ -333,7 +487,6 @@ async function executeHealthCheckJob(props: {
333
487
  result: { ...result } as Record<string, unknown>,
334
488
  });
335
489
 
336
- // Trigger incremental hourly aggregation
337
490
  await incrementHourlyAggregate({
338
491
  db,
339
492
  systemId,
@@ -346,10 +499,9 @@ async function executeHealthCheckJob(props: {
346
499
  });
347
500
 
348
501
  logger.debug(
349
- `Health check ${configId} for system ${systemId} failed: ${errorMessage}`,
502
+ `Health check ${configId} for system ${systemId} failed: ${finalError}`,
350
503
  );
351
504
 
352
- // Broadcast failure signal
353
505
  await signalService.broadcast(HEALTH_CHECK_RUN_COMPLETED, {
354
506
  systemId,
355
507
  systemName,
@@ -359,7 +511,6 @@ async function executeHealthCheckJob(props: {
359
511
  latencyMs: result.latencyMs,
360
512
  });
361
513
 
362
- // Check and notify state change
363
514
  const newState = await service.getSystemHealthStatus(systemId);
364
515
  if (newState.status !== previousStatus) {
365
516
  await notifyStateChange({
@@ -374,98 +525,14 @@ async function executeHealthCheckJob(props: {
374
525
  }
375
526
 
376
527
  return;
377
- }
378
-
379
- const connectionTimeMs = Math.round(performance.now() - start);
380
-
381
- // Execute collectors
382
- const collectors = configRow.collectors ?? [];
383
- const collectorResults: Record<string, unknown> = {};
384
- let hasCollectorError = false;
385
- let errorMessage: string | undefined;
386
-
387
- try {
388
- for (const collectorEntry of collectors) {
389
- const registered = collectorRegistry.getCollector(
390
- collectorEntry.collectorId,
391
- );
392
- if (!registered) {
393
- logger.warn(
394
- `Collector ${collectorEntry.collectorId} not found, skipping`,
395
- );
396
- continue;
397
- }
398
-
399
- // Use the collector's UUID as the storage key
400
- const storageKey = collectorEntry.id;
401
-
528
+ } finally {
529
+ if (connectedClient) {
402
530
  try {
403
- const collectorResult = await registered.collector.execute({
404
- config: collectorEntry.config,
405
- client: connectedClient.client,
406
- pluginId: configRow.strategyId,
407
- });
408
-
409
- // Check for collector-level error
410
- if (collectorResult.error) {
411
- hasCollectorError = true;
412
- errorMessage = collectorResult.error;
413
- }
414
-
415
- // Evaluate per-collector assertions
416
- let assertionFailed: string | undefined;
417
- if (
418
- collectorEntry.assertions &&
419
- collectorEntry.assertions.length > 0 &&
420
- collectorResult.result
421
- ) {
422
- const assertions = collectorEntry.assertions;
423
- const failedAssertion = evaluateAssertions(
424
- assertions,
425
- collectorResult.result as Record<string, unknown>,
426
- );
427
- if (failedAssertion) {
428
- hasCollectorError = true;
429
- assertionFailed = `${failedAssertion.field} ${
430
- failedAssertion.operator
431
- } ${failedAssertion.value ?? ""}`;
432
- errorMessage = `Assertion failed: ${assertionFailed}`;
433
- logger.debug(
434
- `Collector ${storageKey} assertion failed: ${errorMessage}`,
435
- );
436
- }
437
- }
438
-
439
- // Strip ephemeral fields (like HTTP body) before storage to save space
440
- const strippedResult = stripEphemeralFields(
441
- collectorResult.result as Record<string, unknown>,
442
- registered.collector.result.schema,
443
- );
444
-
445
- // Store result under the collector's UUID, with collector type and assertion metadata
446
- collectorResults[storageKey] = {
447
- _collectorId: collectorEntry.collectorId, // Store the type for frontend schema linking
448
- _assertionFailed: assertionFailed, // null if no assertion failed
449
- ...strippedResult,
450
- };
531
+ connectedClient.close();
451
532
  } catch (error) {
452
- hasCollectorError = true;
453
- errorMessage = error instanceof Error ? error.message : String(error);
454
- collectorResults[storageKey] = {
455
- _collectorId: collectorEntry.collectorId,
456
- _assertionFailed: undefined,
457
- error: errorMessage,
458
- };
459
- logger.debug(`Collector ${storageKey} failed: ${errorMessage}`);
533
+ logger.warn(`Failed to close connection: ${error}`);
460
534
  }
461
535
  }
462
- } finally {
463
- // Clean up connection
464
- try {
465
- connectedClient.close();
466
- } catch {
467
- // Ignore close errors
468
- }
469
536
  }
470
537
 
471
538
  // Determine health status based on collector results