@checkstack/anomaly-backend 1.2.7 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,74 @@
1
1
  # @checkstack/anomaly-backend
2
2
 
3
+ ## 1.3.0
4
+
5
+ ### Minor Changes
6
+
7
+ - dbb76a2: fix(ai): guide the assistant to find all issues and fix the anomaly tool
8
+
9
+ Two assistant problems reported in production:
10
+
11
+ 1. Asked "are there any issues?", the model answered from a single source (an
12
+ SLO breach) and missed a system with a failing health check. The chat
13
+ system prompt now instructs the model to check ALL issue sources before
14
+ answering - failing health checks (`healthcheck_status`), breaching/at-risk
15
+ SLOs (`slo_listObjectives`), active anomalies (`anomaly_list`), and open
16
+ incidents (`incident_list`) - and not to stop after the first source. It
17
+ also tells the model that `systemId` must be a real system UUID (resolve a
18
+ name via the catalog tool first) and to never invent ids or filter values.
19
+
20
+ 2. The anomaly tool was named `anomaly.explain` but actually LISTS anomalies
21
+ with optional filters. The misleading name led the model to pass a
22
+ non-existent filter value ("Type validation failed") and a system
23
+ name/anomaly id as `systemId` ("a value was malformed"). Renamed to
24
+ `anomaly.list` with a description that spells out the optional filters and
25
+ their valid enum values (state: suspicious|anomaly|recovered, kind:
26
+ spike|drift, suppression: active|suppressed|all) and that `systemId` is a
27
+ system UUID.
28
+
29
+ Also sharpened the `healthcheck.status` and `slo.listObjectives` tool
30
+ descriptions to be use-case oriented ("use when asked what is failing /
31
+ breaching").
32
+
33
+ BREAKING: the anomaly read tool's name changes from `anomaly_explain` to
34
+ `anomaly_list` over the MCP `tools/list` surface. MCP clients referencing it by
35
+ the old name must update.
36
+
37
+ - 0b6f01b: feat(anomaly): contribute anomaly signals to the backend system.issues aggregator
38
+
39
+ The anomaly plugin now registers a `system.issues` contributor (sourceId
40
+ `anomaly`) from its backend `init`, so the AI assistant surfaces confirmed
41
+ anomalies and suspicious states alongside incidents, SLOs, health checks, and
42
+ dependency problems.
43
+
44
+ The contributor enforces its own `anomaly_feed.read` access gate (returning an
45
+ empty map - never throwing - when the principal lacks access; service users are
46
+ trusted), then reads the current problem rows for every system from the shared,
47
+ durable `anomalies` table via a new global `getActiveSignalAnomalies` service
48
+ method (state = anomaly | suspicious, suppressed rows excluded). The answer is
49
+ therefore identical on every pod, and only systems with a current problem appear
50
+ in the result.
51
+
52
+ The row->signal mapping (source/tone/label/detail/href/accessRule/iconName) is
53
+ extracted into a new pure `deriveAnomalySignals` deriver in
54
+ `@checkstack/anomaly-common`, shared by both the backend contributor and the
55
+ frontend `AnomalySignalsFiller` so the two surfaces stay in lockstep. The
56
+ frontend filler now delegates to that deriver with unchanged behavior.
57
+
58
+ ### Patch Changes
59
+
60
+ - Updated dependencies [dbb76a2]
61
+ - Updated dependencies [0b6f01b]
62
+ - Updated dependencies [0b6f01b]
63
+ - Updated dependencies [0b6f01b]
64
+ - @checkstack/ai-backend@0.3.0
65
+ - @checkstack/healthcheck-backend@1.7.0
66
+ - @checkstack/anomaly-common@1.4.0
67
+ - @checkstack/healthcheck-common@1.6.0
68
+ - @checkstack/catalog-backend@1.4.8
69
+ - @checkstack/backend-api@0.21.6
70
+ - @checkstack/gitops-backend@0.5.6
71
+
3
72
  ## 1.2.7
4
73
 
5
74
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@checkstack/anomaly-backend",
3
- "version": "1.2.7",
3
+ "version": "1.3.0",
4
4
  "license": "Elastic-2.0",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -14,18 +14,18 @@
14
14
  "lint:code": "eslint . --max-warnings 0"
15
15
  },
16
16
  "dependencies": {
17
- "@checkstack/backend-api": "0.21.5",
18
- "@checkstack/ai-backend": "0.2.0",
17
+ "@checkstack/backend-api": "0.21.6",
18
+ "@checkstack/ai-backend": "0.3.0",
19
19
  "@checkstack/common": "0.15.0",
20
- "@checkstack/anomaly-common": "1.3.4",
20
+ "@checkstack/anomaly-common": "1.4.0",
21
21
  "@checkstack/signal-common": "0.2.9",
22
- "@checkstack/healthcheck-common": "1.5.4",
22
+ "@checkstack/healthcheck-common": "1.6.0",
23
23
  "@checkstack/queue-api": "0.3.12",
24
24
  "@checkstack/cache-api": "0.3.12",
25
25
  "@checkstack/cache-utils": "0.2.17",
26
- "@checkstack/healthcheck-backend": "1.6.7",
27
- "@checkstack/catalog-backend": "1.4.7",
28
- "@checkstack/gitops-backend": "0.5.5",
26
+ "@checkstack/healthcheck-backend": "1.7.0",
27
+ "@checkstack/catalog-backend": "1.4.8",
28
+ "@checkstack/gitops-backend": "0.5.6",
29
29
  "@checkstack/gitops-common": "0.6.3",
30
30
  "@checkstack/catalog-common": "2.3.4",
31
31
  "@checkstack/notification-common": "1.3.3",
@@ -38,7 +38,7 @@
38
38
  "devDependencies": {
39
39
  "@checkstack/drizzle-helper": "0.0.5",
40
40
  "@checkstack/scripts": "0.6.1",
41
- "@checkstack/test-utils-backend": "0.1.39",
41
+ "@checkstack/test-utils-backend": "0.1.40",
42
42
  "@checkstack/tsconfig": "0.0.7",
43
43
  "@types/bun": "^1.0.0",
44
44
  "date-fns": "^4.4.0",
@@ -5,20 +5,19 @@ import {
5
5
  } from "@checkstack/ai-backend";
6
6
  import { anomalyContract, pluginMetadata } from "@checkstack/anomaly-common";
7
7
 
8
- describe("anomaly AI projection (anomaly.explain)", () => {
8
+ describe("anomaly AI projection (anomaly.list)", () => {
9
9
  test("projects getAnomalies as a read-only tool with the source procedure's access rules", () => {
10
10
  const tool = buildProjectedTool({
11
11
  procedure: anomalyContract.getAnomalies,
12
12
  sourcePluginMetadata: pluginMetadata,
13
13
  procedureKey: "getAnomalies",
14
- name: "anomaly.explain",
15
- description:
16
- "List detected anomalies (statistical sigma/drift) for context. Read-only.",
14
+ name: "anomaly.list",
15
+ description: "List detected anomalies (statistical spikes / drift).",
17
16
  effect: "read",
18
17
  execute: deferredProjectionExecute,
19
18
  });
20
19
 
21
- expect(tool.name).toBe("anomaly.explain");
20
+ expect(tool.name).toBe("anomaly.list");
22
21
  expect(tool.effect).toBe("read");
23
22
 
24
23
  // The projection inherits the source procedure's gating — it must NOT
package/src/plugin.ts CHANGED
@@ -1,8 +1,11 @@
1
1
  import { createBackendPlugin, coreServices, type SafeDatabase } from "@checkstack/backend-api";
2
2
  import {
3
3
  aiToolProjectionExtensionPoint,
4
+ systemSignalsExtensionPoint,
5
+ createSystemAccessResolver,
4
6
  deferredProjectionExecute,
5
7
  } from "@checkstack/ai-backend";
8
+ import { createAnomalySignalsContributor } from "./system-signals";
6
9
  import { healthCheckHooks } from "@checkstack/healthcheck-backend";
7
10
  import { setupBaselineAnalyzerJob } from "./jobs/baseline-analyzer";
8
11
  import { processCheckCompleted } from "./detector";
@@ -58,9 +61,16 @@ export const plugin = createBackendPlugin({
58
61
  procedure: anomalyContract.getAnomalies,
59
62
  sourcePluginMetadata: pluginMetadata,
60
63
  procedureKey: "getAnomalies",
61
- name: "anomaly.explain",
64
+ name: "anomaly.list",
62
65
  description:
63
- "List detected anomalies (statistical sigma/drift) for context. Read-only.",
66
+ "List detected anomalies (statistical spikes / drift). Read-only. " +
67
+ "All filters are OPTIONAL - call with no arguments to list every " +
68
+ "anomaly, or narrow with: systemId (a system UUID from the catalog " +
69
+ "tool, never a system name), state (one of: suspicious, anomaly, " +
70
+ "recovered), kind (one of: spike, drift), suppression (one of: " +
71
+ "active, suppressed, all). Each result includes the anomaly's id and " +
72
+ "systemId. There is no per-anomaly 'explain' call - read the returned " +
73
+ "rows directly.",
64
74
  effect: "read",
65
75
  execute: deferredProjectionExecute,
66
76
  });
@@ -110,6 +120,20 @@ export const plugin = createBackendPlugin({
110
120
 
111
121
  const service = new AnomalyService(typedDb);
112
122
  gitopsService = service;
123
+
124
+ // Contribute anomaly problem state to the dashboard `system.issues`
125
+ // aggregator. The contributor gates the originating principal on
126
+ // anomaly's own read rule and reads globally from shared Postgres - see
127
+ // createAnomalySignalsContributor.
128
+ env
129
+ .getExtensionPoint(systemSignalsExtensionPoint)
130
+ .contribute(
131
+ createAnomalySignalsContributor({
132
+ service,
133
+ resolver: createSystemAccessResolver(rpcClient),
134
+ }),
135
+ );
136
+
113
137
  routerCache = createAnomalyRouterCache({ cacheManager, logger });
114
138
  const router = createRouter(service, logger, routerCache);
115
139
  rpc.registerRouter(router, anomalyContract);
package/src/service.ts CHANGED
@@ -73,6 +73,45 @@ export class AnomalyService {
73
73
  }));
74
74
  }
75
75
 
76
+ /**
77
+ * Return the current "problem" anomaly rows across ALL systems, for the
78
+ * dashboard `system.issues` aggregator. Mirrors the frontend filler's two
79
+ * active queries (state = anomaly | suspicious, suppressed rows excluded) in a
80
+ * single global read so the backend signals match the frontend ones. Reads
81
+ * from shared, durable storage so every pod returns the same answer.
82
+ */
83
+ async getActiveSignalAnomalies(): Promise<
84
+ Array<{
85
+ systemId: string;
86
+ configurationId: string;
87
+ fieldPath: string;
88
+ startedAt: string;
89
+ state: schema.AnomalyState;
90
+ }>
91
+ > {
92
+ const rows = await this.db
93
+ .select({
94
+ systemId: schema.anomalies.systemId,
95
+ configurationId: schema.anomalies.configurationId,
96
+ fieldPath: schema.anomalies.fieldPath,
97
+ startedAt: schema.anomalies.startedAt,
98
+ state: schema.anomalies.state,
99
+ })
100
+ .from(schema.anomalies)
101
+ .where(
102
+ and(
103
+ inArray(schema.anomalies.state, ["anomaly", "suspicious"]),
104
+ isNull(schema.anomalies.suppressedAt),
105
+ ),
106
+ )
107
+ .orderBy(desc(schema.anomalies.startedAt));
108
+
109
+ return rows.map((r) => ({
110
+ ...r,
111
+ startedAt: r.startedAt.toISOString(),
112
+ }));
113
+ }
114
+
76
115
  /**
77
116
  * Globally suppress a single anomaly row. Snapshots the current observed
78
117
  * value and baseline so the inline detector can auto-unsuppress once the
@@ -0,0 +1,97 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import type { AuthUser } from "@checkstack/backend-api";
3
+ import { qualifyAccessRuleId } from "@checkstack/common";
4
+ import type { SystemAccessResolver } from "@checkstack/ai-backend";
5
+ import { anomalyAccess } from "@checkstack/anomaly-common";
6
+ import { createAnomalySignalsContributor } from "./system-signals";
7
+ import type { AnomalyService } from "./service";
8
+
9
+ type Rows = Awaited<ReturnType<AnomalyService["getActiveSignalAnomalies"]>>;
10
+
11
+ const stubService = (
12
+ rows: Rows,
13
+ ): Pick<AnomalyService, "getActiveSignalAnomalies"> => ({
14
+ getActiveSignalAnomalies: async () => rows,
15
+ });
16
+
17
+ const sampleRows: Rows = [
18
+ {
19
+ systemId: "sys-1",
20
+ configurationId: "cfg-1",
21
+ fieldPath: "latency",
22
+ startedAt: "2026-06-07T10:00:00.000Z",
23
+ state: "anomaly",
24
+ },
25
+ {
26
+ systemId: "sys-2",
27
+ configurationId: "cfg-2",
28
+ fieldPath: "errors",
29
+ startedAt: "2026-06-07T11:00:00.000Z",
30
+ state: "suspicious",
31
+ },
32
+ ];
33
+
34
+ // The per-source gate is owned/tested by createGatedSystemSignalsContributor.
35
+ const allowAll: SystemAccessResolver = {
36
+ accessibleSystemIds: async ({ systemIds }) => systemIds,
37
+ };
38
+ const denyAll: SystemAccessResolver = { accessibleSystemIds: async () => [] };
39
+
40
+ const withFeedRead: AuthUser = {
41
+ type: "user",
42
+ id: "u1",
43
+ accessRules: [
44
+ qualifyAccessRuleId(
45
+ { pluginId: anomalyAccess.feed.read.pluginId },
46
+ anomalyAccess.feed.read,
47
+ ),
48
+ ],
49
+ };
50
+
51
+ describe("createAnomalySignalsContributor", () => {
52
+ test("uses the anomaly source id", () => {
53
+ const contributor = createAnomalySignalsContributor({
54
+ service: stubService([]),
55
+ resolver: allowAll,
56
+ });
57
+ expect(contributor.sourceId).toBe("anomaly");
58
+ });
59
+
60
+ test("wires the service + shared deriver for an authorized principal", async () => {
61
+ const contributor = createAnomalySignalsContributor({
62
+ service: stubService(sampleRows),
63
+ resolver: allowAll,
64
+ });
65
+
66
+ const map = await contributor.read({ principal: withFeedRead });
67
+
68
+ expect(Object.keys(map.signals).sort()).toEqual(["sys-1", "sys-2"]);
69
+ expect(map.signals["sys-1"]?.[0]).toMatchObject({
70
+ source: "anomaly",
71
+ tone: "warn",
72
+ label: "Anomaly detected",
73
+ });
74
+ expect(map.signals["sys-2"]?.[0]).toMatchObject({
75
+ source: "anomaly",
76
+ tone: "info",
77
+ label: "Suspicious behaviour",
78
+ });
79
+ });
80
+
81
+ test("routes a non-global user through the team gate (no grants -> nothing)", async () => {
82
+ const contributor = createAnomalySignalsContributor({
83
+ service: stubService(sampleRows),
84
+ resolver: denyAll,
85
+ });
86
+ const principal: AuthUser = {
87
+ type: "user",
88
+ id: "u1",
89
+ accessRules: ["catalog.system.read"],
90
+ };
91
+
92
+ expect(await contributor.read({ principal })).toEqual({
93
+ accessible: false,
94
+ signals: {},
95
+ });
96
+ });
97
+ });
@@ -0,0 +1,40 @@
1
+ import {
2
+ createGatedSystemSignalsContributor,
3
+ type SystemAccessResolver,
4
+ type SystemSignalsContributor,
5
+ } from "@checkstack/ai-backend";
6
+ import {
7
+ anomalyAccess,
8
+ deriveAnomalySignals,
9
+ ANOMALY_SIGNAL_SOURCE_ID,
10
+ } from "@checkstack/anomaly-common";
11
+ import type { AnomalyService } from "./service";
12
+
13
+ /**
14
+ * The slice of {@link AnomalyService} the contributor needs - the single global
15
+ * read of current problem rows. Narrowed so the contributor (and its test) does
16
+ * not depend on the full service surface.
17
+ */
18
+ type SignalSource = Pick<AnomalyService, "getActiveSignalAnomalies">;
19
+
20
+ /**
21
+ * Build the anomaly contributor for the dashboard `system.issues` aggregator.
22
+ * Reads active (anomaly/suspicious) rows globally from shared Postgres and runs
23
+ * the SAME deriver the frontend filler uses. The per-source access gate (global
24
+ * `anomaly.feed.read` plus per-system team grants) is applied by
25
+ * {@link createGatedSystemSignalsContributor}.
26
+ */
27
+ export const createAnomalySignalsContributor = ({
28
+ service,
29
+ resolver,
30
+ }: {
31
+ service: SignalSource;
32
+ resolver: SystemAccessResolver;
33
+ }): SystemSignalsContributor =>
34
+ createGatedSystemSignalsContributor({
35
+ sourceId: ANOMALY_SIGNAL_SOURCE_ID,
36
+ accessRule: anomalyAccess.feed.read,
37
+ resolver,
38
+ readSignals: async () =>
39
+ deriveAnomalySignals({ rows: await service.getActiveSignalAnomalies() }),
40
+ });