@oneuptime/common 10.0.84 → 10.0.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Models/DatabaseModels/Index.ts +2 -0
- package/Models/DatabaseModels/KubernetesContainer.ts +552 -0
- package/Models/DatabaseModels/KubernetesResource.ts +130 -0
- package/Models/DatabaseModels/LlmLog.ts +2 -1
- package/Models/DatabaseModels/LlmProvider.ts +5 -4
- package/Models/DatabaseModels/Project.ts +40 -0
- package/Server/API/KubernetesResourceAPI.ts +144 -12
- package/Server/Infrastructure/Postgres/SchemaMigrations/1777550162848-MigrationName.ts +29 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/1777571961028-MigrationName.ts +99 -0
- package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +4 -0
- package/Server/Infrastructure/Queue.ts +60 -0
- package/Server/Infrastructure/QueueWorker.ts +39 -1
- package/Server/Middleware/HttpMetricsMiddleware.ts +92 -0
- package/Server/Services/AuditLogService.ts +19 -1
- package/Server/Services/KubernetesContainerService.ts +264 -0
- package/Server/Services/KubernetesResourceService.ts +233 -0
- package/Server/Services/StatusPageSubscriberService.ts +4 -4
- package/Server/Types/Database/Permissions/AccessControlPermission.ts +3 -3
- package/Server/Utils/LLM/LLMService.ts +132 -11
- package/Server/Utils/Monitor/MonitorAlert.ts +1 -1
- package/Server/Utils/Monitor/MonitorIncident.ts +1 -1
- package/Server/Utils/StartServer.ts +2 -0
- package/Server/Utils/Telemetry/AppMetrics.ts +211 -0
- package/Server/Utils/Telemetry/RuntimeMetrics.ts +169 -0
- package/Server/Utils/Telemetry.ts +98 -0
- package/Server/Utils/Workspace/Slack/Actions/Alert.ts +2 -2
- package/Server/Utils/Workspace/Slack/Actions/Incident.ts +2 -2
- package/Server/Utils/Workspace/Slack/Actions/ScheduledMaintenance.ts +2 -2
- package/Tests/jest.setup.ts +18 -0
- package/Types/Kubernetes/KubernetesInventoryExtractor.ts +171 -5
- package/Types/LLM/LlmType.ts +3 -0
- package/UI/Components/Forms/ModelForm.tsx +3 -3
- package/UI/Components/LogsViewer/components/LogsAnalyticsView.tsx +2 -2
- package/Utils/UUID.ts +1 -3
- package/build/dist/Models/DatabaseModels/Index.js +2 -0
- package/build/dist/Models/DatabaseModels/Index.js.map +1 -1
- package/build/dist/Models/DatabaseModels/KubernetesContainer.js +581 -0
- package/build/dist/Models/DatabaseModels/KubernetesContainer.js.map +1 -0
- package/build/dist/Models/DatabaseModels/KubernetesResource.js +135 -0
- package/build/dist/Models/DatabaseModels/KubernetesResource.js.map +1 -1
- package/build/dist/Models/DatabaseModels/LlmLog.js +1 -1
- package/build/dist/Models/DatabaseModels/LlmLog.js.map +1 -1
- package/build/dist/Models/DatabaseModels/LlmProvider.js +4 -4
- package/build/dist/Models/DatabaseModels/LlmProvider.js.map +1 -1
- package/build/dist/Models/DatabaseModels/Project.js +41 -0
- package/build/dist/Models/DatabaseModels/Project.js.map +1 -1
- package/build/dist/Server/API/KubernetesResourceAPI.js +106 -9
- package/build/dist/Server/API/KubernetesResourceAPI.js.map +1 -1
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777550162848-MigrationName.js +16 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777550162848-MigrationName.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777571961028-MigrationName.js +40 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1777571961028-MigrationName.js.map +1 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +4 -0
- package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
- package/build/dist/Server/Infrastructure/Queue.js +44 -0
- package/build/dist/Server/Infrastructure/Queue.js.map +1 -1
- package/build/dist/Server/Infrastructure/QueueWorker.js +31 -1
- package/build/dist/Server/Infrastructure/QueueWorker.js.map +1 -1
- package/build/dist/Server/Middleware/HttpMetricsMiddleware.js +61 -0
- package/build/dist/Server/Middleware/HttpMetricsMiddleware.js.map +1 -0
- package/build/dist/Server/Services/AuditLogService.js +14 -1
- package/build/dist/Server/Services/AuditLogService.js.map +1 -1
- package/build/dist/Server/Services/KubernetesContainerService.js +179 -0
- package/build/dist/Server/Services/KubernetesContainerService.js.map +1 -0
- package/build/dist/Server/Services/KubernetesResourceService.js +175 -0
- package/build/dist/Server/Services/KubernetesResourceService.js.map +1 -1
- package/build/dist/Server/Services/StatusPageSubscriberService.js +4 -4
- package/build/dist/Server/Services/StatusPageSubscriberService.js.map +1 -1
- package/build/dist/Server/Types/Database/Permissions/AccessControlPermission.js +3 -3
- package/build/dist/Server/Utils/LLM/LLMService.js +111 -13
- package/build/dist/Server/Utils/LLM/LLMService.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorAlert.js +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorAlert.js.map +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorIncident.js +1 -1
- package/build/dist/Server/Utils/Monitor/MonitorIncident.js.map +1 -1
- package/build/dist/Server/Utils/StartServer.js +2 -0
- package/build/dist/Server/Utils/StartServer.js.map +1 -1
- package/build/dist/Server/Utils/Telemetry/AppMetrics.js +167 -0
- package/build/dist/Server/Utils/Telemetry/AppMetrics.js.map +1 -0
- package/build/dist/Server/Utils/Telemetry/RuntimeMetrics.js +141 -0
- package/build/dist/Server/Utils/Telemetry/RuntimeMetrics.js.map +1 -0
- package/build/dist/Server/Utils/Telemetry.js +47 -0
- package/build/dist/Server/Utils/Telemetry.js.map +1 -1
- package/build/dist/Server/Utils/Workspace/Slack/Actions/Alert.js +2 -2
- package/build/dist/Server/Utils/Workspace/Slack/Actions/Incident.js +2 -2
- package/build/dist/Server/Utils/Workspace/Slack/Actions/ScheduledMaintenance.js +2 -2
- package/build/dist/Tests/jest.setup.js +17 -0
- package/build/dist/Tests/jest.setup.js.map +1 -1
- package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js +116 -4
- package/build/dist/Types/Kubernetes/KubernetesInventoryExtractor.js.map +1 -1
- package/build/dist/Types/LLM/LlmType.js +3 -0
- package/build/dist/Types/LLM/LlmType.js.map +1 -1
- package/build/dist/UI/Components/Forms/ModelForm.js +3 -3
- package/build/dist/UI/Components/LogsViewer/components/LogsAnalyticsView.js.map +1 -1
- package/build/dist/Utils/UUID.js +1 -2
- package/build/dist/Utils/UUID.js.map +1 -1
- package/package.json +6 -8
|
@@ -42,7 +42,7 @@ import LlmType from "../../Types/LLM/LlmType";
|
|
|
42
42
|
pluralName: "LLM Providers",
|
|
43
43
|
icon: IconProp.Bolt,
|
|
44
44
|
tableDescription:
|
|
45
|
-
"Manage LLM Provider configurations. Connect to OpenAI, Anthropic, Ollama, or other LLM providers to enable AI features.",
|
|
45
|
+
"Manage LLM Provider configurations. Connect to OpenAI, Azure OpenAI, Anthropic, Groq, Mistral, Ollama, or other LLM providers to enable AI features.",
|
|
46
46
|
})
|
|
47
47
|
@TableAccessControl({
|
|
48
48
|
create: [
|
|
@@ -179,7 +179,8 @@ export default class LlmProvider extends BaseModel {
|
|
|
179
179
|
required: true,
|
|
180
180
|
type: TableColumnType.ShortText,
|
|
181
181
|
title: "LLM Type",
|
|
182
|
-
description:
|
|
182
|
+
description:
|
|
183
|
+
"The type of LLM provider (OpenAI, Azure OpenAI, Anthropic, Groq, Mistral, Ollama, etc.)",
|
|
183
184
|
})
|
|
184
185
|
@Column({
|
|
185
186
|
nullable: false,
|
|
@@ -214,7 +215,7 @@ export default class LlmProvider extends BaseModel {
|
|
|
214
215
|
type: TableColumnType.LongText,
|
|
215
216
|
title: "API Key",
|
|
216
217
|
description:
|
|
217
|
-
"The API key for the LLM provider. Required for OpenAI and
|
|
218
|
+
"The API key for the LLM provider. Required for OpenAI, Azure OpenAI, Anthropic, Groq, and Mistral.",
|
|
218
219
|
encrypted: true,
|
|
219
220
|
})
|
|
220
221
|
@Column({
|
|
@@ -276,7 +277,7 @@ export default class LlmProvider extends BaseModel {
|
|
|
276
277
|
type: TableColumnType.ShortURL,
|
|
277
278
|
title: "Base URL",
|
|
278
279
|
description:
|
|
279
|
-
"The base URL for the LLM API. Required for Ollama, optional for others.",
|
|
280
|
+
"The base URL for the LLM API. Required for Azure OpenAI and Ollama, optional for others.",
|
|
280
281
|
})
|
|
281
282
|
@Column({
|
|
282
283
|
nullable: true,
|
|
@@ -2180,4 +2180,44 @@ export default class Project extends TenantModel {
|
|
|
2180
2180
|
create: PlanType.Free,
|
|
2181
2181
|
})
|
|
2182
2182
|
public auditLogsRetentionInDays?: number = undefined;
|
|
2183
|
+
|
|
2184
|
+
@ColumnAccessControl({
|
|
2185
|
+
create: [Permission.User],
|
|
2186
|
+
read: [
|
|
2187
|
+
Permission.ProjectOwner,
|
|
2188
|
+
Permission.ProjectAdmin,
|
|
2189
|
+
Permission.ProjectMember,
|
|
2190
|
+
Permission.Viewer,
|
|
2191
|
+
Permission.ReadProject,
|
|
2192
|
+
Permission.UnAuthorizedSsoUser,
|
|
2193
|
+
Permission.ProjectUser,
|
|
2194
|
+
Permission.ReadAllProjectResources,
|
|
2195
|
+
],
|
|
2196
|
+
update: [
|
|
2197
|
+
Permission.ProjectOwner,
|
|
2198
|
+
Permission.ProjectAdmin,
|
|
2199
|
+
Permission.EditProject,
|
|
2200
|
+
],
|
|
2201
|
+
})
|
|
2202
|
+
@TableColumn({
|
|
2203
|
+
required: true,
|
|
2204
|
+
type: TableColumnType.Boolean,
|
|
2205
|
+
isDefaultValueColumn: true,
|
|
2206
|
+
defaultValue: false,
|
|
2207
|
+
title: "Store System Events",
|
|
2208
|
+
description:
|
|
2209
|
+
"When enabled, audit logs will also include events triggered by the system. By default, only events triggered by users are recorded.",
|
|
2210
|
+
})
|
|
2211
|
+
@Column({
|
|
2212
|
+
type: ColumnType.Boolean,
|
|
2213
|
+
nullable: false,
|
|
2214
|
+
unique: false,
|
|
2215
|
+
default: false,
|
|
2216
|
+
})
|
|
2217
|
+
@ColumnBillingAccessControl({
|
|
2218
|
+
read: PlanType.Free,
|
|
2219
|
+
update: PlanType.Enterprise,
|
|
2220
|
+
create: PlanType.Free,
|
|
2221
|
+
})
|
|
2222
|
+
public storeSystemEventsInAuditLogs?: boolean = undefined;
|
|
2183
2223
|
}
|
|
@@ -56,14 +56,57 @@ export default class KubernetesResourceAPI extends BaseAPI<
|
|
|
56
56
|
}
|
|
57
57
|
},
|
|
58
58
|
);
|
|
59
|
+
|
|
60
|
+
/*
|
|
61
|
+
* Latest CPU+memory aggregated by Pod namespace. Powers the
|
|
62
|
+
* Namespaces list view without a ClickHouse round-trip.
|
|
63
|
+
*/
|
|
64
|
+
this.router.post(
|
|
65
|
+
`${new this.entityType()
|
|
66
|
+
.getCrudApiPath()
|
|
67
|
+
?.toString()}/latest-pod-metrics-by-namespace/:clusterId`,
|
|
68
|
+
UserMiddleware.getUserMiddleware,
|
|
69
|
+
async (req: ExpressRequest, res: ExpressResponse, next: NextFunction) => {
|
|
70
|
+
try {
|
|
71
|
+
await this.getLatestPodMetricsByNamespace(req, res);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
next(err);
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
/*
|
|
79
|
+
* Latest CPU+memory aggregated by Pod ownerReferences[].name for
|
|
80
|
+
* a given owner kind. Powers Deployments/StatefulSets/DaemonSets/
|
|
81
|
+
* Jobs/CronJobs list views.
|
|
82
|
+
*/
|
|
83
|
+
this.router.post(
|
|
84
|
+
`${new this.entityType()
|
|
85
|
+
.getCrudApiPath()
|
|
86
|
+
?.toString()}/latest-pod-metrics-by-owner/:clusterId/:ownerKind`,
|
|
87
|
+
UserMiddleware.getUserMiddleware,
|
|
88
|
+
async (req: ExpressRequest, res: ExpressResponse, next: NextFunction) => {
|
|
89
|
+
try {
|
|
90
|
+
await this.getLatestPodMetricsByOwner(req, res);
|
|
91
|
+
} catch (err) {
|
|
92
|
+
next(err);
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
);
|
|
59
96
|
}
|
|
60
97
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
98
|
+
/*
|
|
99
|
+
* Cluster + auth resolution shared by the cluster-scoped sub-routes.
|
|
100
|
+
* Returns the (projectId, kubernetesClusterId) tuple after enforcing
|
|
101
|
+
* the standard ACL chain. Throws NotFound when the cluster is
|
|
102
|
+
* missing or the caller lacks read access (indistinguishable on
|
|
103
|
+
* purpose).
|
|
104
|
+
*/
|
|
105
|
+
private async resolveClusterForRequest(req: ExpressRequest): Promise<{
|
|
106
|
+
projectId: ObjectID;
|
|
107
|
+
kubernetesClusterId: ObjectID;
|
|
108
|
+
}> {
|
|
65
109
|
const clusterIdParam: string | undefined = req.params["clusterId"];
|
|
66
|
-
|
|
67
110
|
if (!clusterIdParam) {
|
|
68
111
|
throw new BadDataException("Cluster ID is required");
|
|
69
112
|
}
|
|
@@ -78,12 +121,6 @@ export default class KubernetesResourceAPI extends BaseAPI<
|
|
|
78
121
|
const props: DatabaseCommonInteractionProps =
|
|
79
122
|
await CommonAPI.getDatabaseCommonInteractionProps(req);
|
|
80
123
|
|
|
81
|
-
/*
|
|
82
|
-
* Authorize: the caller must be able to read the parent cluster.
|
|
83
|
-
* findOneById applies the full ACL chain; a null return means 404
|
|
84
|
-
* (either the cluster doesn't exist or the caller cannot see it —
|
|
85
|
-
* indistinguishable on purpose).
|
|
86
|
-
*/
|
|
87
124
|
const cluster: KubernetesCluster | null =
|
|
88
125
|
await KubernetesClusterService.findOneById({
|
|
89
126
|
id: kubernetesClusterId,
|
|
@@ -98,9 +135,104 @@ export default class KubernetesResourceAPI extends BaseAPI<
|
|
|
98
135
|
throw new NotFoundException("Kubernetes Cluster not found");
|
|
99
136
|
}
|
|
100
137
|
|
|
101
|
-
|
|
138
|
+
return {
|
|
102
139
|
projectId: cluster.projectId,
|
|
103
140
|
kubernetesClusterId,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/*
|
|
145
|
+
* Translate a service-layer Map of aggregates into a JSON dict
|
|
146
|
+
* { name: { cpuPercent, memoryBytes } } suitable for the wire.
|
|
147
|
+
* memoryBytes is stringified so values past 2 GiB don't overflow
|
|
148
|
+
* client-side number parsing in the JSON path; the UI parses it
|
|
149
|
+
* back to a number for rendering.
|
|
150
|
+
*/
|
|
151
|
+
private mapAggregatesToJson(
|
|
152
|
+
aggregates: Map<string, { cpuPercent: number; memoryBytes: number }>,
|
|
153
|
+
): JSONObject {
|
|
154
|
+
const out: JSONObject = {};
|
|
155
|
+
for (const [name, value] of aggregates.entries()) {
|
|
156
|
+
out[name] = {
|
|
157
|
+
cpuPercent: value.cpuPercent,
|
|
158
|
+
memoryBytes: value.memoryBytes.toString(),
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
return out;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
private async getLatestPodMetricsByNamespace(
|
|
165
|
+
req: ExpressRequest,
|
|
166
|
+
res: ExpressResponse,
|
|
167
|
+
): Promise<void> {
|
|
168
|
+
const { projectId, kubernetesClusterId } =
|
|
169
|
+
await this.resolveClusterForRequest(req);
|
|
170
|
+
|
|
171
|
+
const staleAfter: Date = new Date(Date.now() - 15 * 60 * 1000);
|
|
172
|
+
const aggregates: Map<string, { cpuPercent: number; memoryBytes: number }> =
|
|
173
|
+
await this.service.getLatestMetricsByNamespace({
|
|
174
|
+
projectId,
|
|
175
|
+
kubernetesClusterId,
|
|
176
|
+
staleAfter,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
return Response.sendJsonObjectResponse(req, res, {
|
|
180
|
+
aggregates: this.mapAggregatesToJson(aggregates),
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
private async getLatestPodMetricsByOwner(
|
|
185
|
+
req: ExpressRequest,
|
|
186
|
+
res: ExpressResponse,
|
|
187
|
+
): Promise<void> {
|
|
188
|
+
const ownerKind: string | undefined = req.params["ownerKind"];
|
|
189
|
+
if (!ownerKind) {
|
|
190
|
+
throw new BadDataException("Owner kind is required");
|
|
191
|
+
}
|
|
192
|
+
/*
|
|
193
|
+
* Only a small allow-list of owner kinds makes sense here; reject
|
|
194
|
+
* anything else so the endpoint can't be used to probe arbitrary
|
|
195
|
+
* jsonb_array_elements paths.
|
|
196
|
+
*/
|
|
197
|
+
const allowed: Set<string> = new Set([
|
|
198
|
+
"Deployment",
|
|
199
|
+
"StatefulSet",
|
|
200
|
+
"DaemonSet",
|
|
201
|
+
"Job",
|
|
202
|
+
"CronJob",
|
|
203
|
+
"ReplicaSet",
|
|
204
|
+
]);
|
|
205
|
+
if (!allowed.has(ownerKind)) {
|
|
206
|
+
throw new BadDataException(`Unsupported owner kind: ${ownerKind}`);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const { projectId, kubernetesClusterId } =
|
|
210
|
+
await this.resolveClusterForRequest(req);
|
|
211
|
+
|
|
212
|
+
const staleAfter: Date = new Date(Date.now() - 15 * 60 * 1000);
|
|
213
|
+
const aggregates: Map<string, { cpuPercent: number; memoryBytes: number }> =
|
|
214
|
+
await this.service.getLatestMetricsByOwner({
|
|
215
|
+
projectId,
|
|
216
|
+
kubernetesClusterId,
|
|
217
|
+
ownerKind,
|
|
218
|
+
staleAfter,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
return Response.sendJsonObjectResponse(req, res, {
|
|
222
|
+
aggregates: this.mapAggregatesToJson(aggregates),
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
private async getInventorySummary(
|
|
227
|
+
req: ExpressRequest,
|
|
228
|
+
res: ExpressResponse,
|
|
229
|
+
): Promise<void> {
|
|
230
|
+
const { projectId, kubernetesClusterId } =
|
|
231
|
+
await this.resolveClusterForRequest(req);
|
|
232
|
+
|
|
233
|
+
const summary: InventorySummary = await this.service.getInventorySummary({
|
|
234
|
+
projectId,
|
|
235
|
+
kubernetesClusterId,
|
|
104
236
|
});
|
|
105
237
|
|
|
106
238
|
const responseBody: JSONObject = {
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { MigrationInterface, QueryRunner } from "typeorm";
|
|
2
|
+
|
|
3
|
+
export class MigrationName1777550162848 implements MigrationInterface {
|
|
4
|
+
public name: string = "MigrationName1777550162848";
|
|
5
|
+
|
|
6
|
+
public async up(queryRunner: QueryRunner): Promise<void> {
|
|
7
|
+
await queryRunner.query(
|
|
8
|
+
`ALTER TABLE "Project" ADD "storeSystemEventsInAuditLogs" boolean NOT NULL DEFAULT false`,
|
|
9
|
+
);
|
|
10
|
+
await queryRunner.query(
|
|
11
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "rotation" SET DEFAULT '{"_type":"Recurring","value":{"intervalType":"Day","intervalCount":{"_type":"PositiveNumber","value":1}}}'`,
|
|
12
|
+
);
|
|
13
|
+
await queryRunner.query(
|
|
14
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "restrictionTimes" SET DEFAULT '{"_type":"RestrictionTimes","value":{"restictionType":"None","dayRestrictionTimes":null,"weeklyRestrictionTimes":[]}}'`,
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
public async down(queryRunner: QueryRunner): Promise<void> {
|
|
19
|
+
await queryRunner.query(
|
|
20
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "restrictionTimes" SET DEFAULT '{"_type": "RestrictionTimes", "value": {"restictionType": "None", "dayRestrictionTimes": null, "weeklyRestrictionTimes": []}}'`,
|
|
21
|
+
);
|
|
22
|
+
await queryRunner.query(
|
|
23
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "rotation" SET DEFAULT '{"_type": "Recurring", "value": {"intervalType": "Day", "intervalCount": {"_type": "PositiveNumber", "value": 1}}}'`,
|
|
24
|
+
);
|
|
25
|
+
await queryRunner.query(
|
|
26
|
+
`ALTER TABLE "Project" DROP COLUMN "storeSystemEventsInAuditLogs"`,
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { MigrationInterface, QueryRunner } from "typeorm";
|
|
2
|
+
|
|
3
|
+
export class MigrationName1777571961028 implements MigrationInterface {
|
|
4
|
+
public name: string = "MigrationName1777571961028";
|
|
5
|
+
|
|
6
|
+
public async up(queryRunner: QueryRunner): Promise<void> {
|
|
7
|
+
await queryRunner.query(
|
|
8
|
+
`CREATE TABLE "KubernetesContainer" ("_id" uuid NOT NULL DEFAULT uuid_generate_v4(), "createdAt" TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), "updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), "deletedAt" TIMESTAMP WITH TIME ZONE, "version" integer NOT NULL, "projectId" uuid NOT NULL, "kubernetesClusterId" uuid NOT NULL, "podNamespaceKey" character varying(100) NOT NULL DEFAULT '', "podName" character varying(100) NOT NULL, "name" character varying(100) NOT NULL, "image" character varying(500), "state" character varying(100), "reason" character varying(100), "isReady" boolean, "restartCount" integer, "memoryLimitBytes" bigint, "latestCpuPercent" numeric, "latestMemoryBytes" bigint, "metricsUpdatedAt" TIMESTAMP WITH TIME ZONE, "lastSeenAt" TIMESTAMP WITH TIME ZONE NOT NULL, "createdByUserId" uuid, "deletedByUserId" uuid, CONSTRAINT "PK_7e19b5140bc3005a6ea2f8f7aee" PRIMARY KEY ("_id"))`,
|
|
9
|
+
);
|
|
10
|
+
await queryRunner.query(
|
|
11
|
+
`CREATE INDEX "IDX_fcc7f4bc83564a8c7885233f6e" ON "KubernetesContainer" ("projectId") `,
|
|
12
|
+
);
|
|
13
|
+
await queryRunner.query(
|
|
14
|
+
`CREATE INDEX "IDX_5303bcae1a72f9830bd7d15e2c" ON "KubernetesContainer" ("kubernetesClusterId") `,
|
|
15
|
+
);
|
|
16
|
+
await queryRunner.query(
|
|
17
|
+
`CREATE UNIQUE INDEX "IDX_1dcb8fed322a9bddfabb60cbc7" ON "KubernetesContainer" ("projectId", "kubernetesClusterId", "podNamespaceKey", "podName", "name") `,
|
|
18
|
+
);
|
|
19
|
+
await queryRunner.query(
|
|
20
|
+
`ALTER TABLE "KubernetesResource" ADD "controllerDeploymentName" character varying(100)`,
|
|
21
|
+
);
|
|
22
|
+
await queryRunner.query(
|
|
23
|
+
`ALTER TABLE "KubernetesResource" ADD "controllerCronJobName" character varying(100)`,
|
|
24
|
+
);
|
|
25
|
+
await queryRunner.query(
|
|
26
|
+
`ALTER TABLE "KubernetesResource" ADD "latestCpuPercent" numeric`,
|
|
27
|
+
);
|
|
28
|
+
await queryRunner.query(
|
|
29
|
+
`ALTER TABLE "KubernetesResource" ADD "latestMemoryBytes" bigint`,
|
|
30
|
+
);
|
|
31
|
+
await queryRunner.query(
|
|
32
|
+
`ALTER TABLE "KubernetesResource" ADD "metricsUpdatedAt" TIMESTAMP WITH TIME ZONE`,
|
|
33
|
+
);
|
|
34
|
+
await queryRunner.query(
|
|
35
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "rotation" SET DEFAULT '{"_type":"Recurring","value":{"intervalType":"Day","intervalCount":{"_type":"PositiveNumber","value":1}}}'`,
|
|
36
|
+
);
|
|
37
|
+
await queryRunner.query(
|
|
38
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "restrictionTimes" SET DEFAULT '{"_type":"RestrictionTimes","value":{"restictionType":"None","dayRestrictionTimes":null,"weeklyRestrictionTimes":[]}}'`,
|
|
39
|
+
);
|
|
40
|
+
await queryRunner.query(
|
|
41
|
+
`ALTER TABLE "KubernetesContainer" ADD CONSTRAINT "FK_fcc7f4bc83564a8c7885233f6e3" FOREIGN KEY ("projectId") REFERENCES "Project"("_id") ON DELETE CASCADE ON UPDATE NO ACTION`,
|
|
42
|
+
);
|
|
43
|
+
await queryRunner.query(
|
|
44
|
+
`ALTER TABLE "KubernetesContainer" ADD CONSTRAINT "FK_5303bcae1a72f9830bd7d15e2cd" FOREIGN KEY ("kubernetesClusterId") REFERENCES "KubernetesCluster"("_id") ON DELETE CASCADE ON UPDATE NO ACTION`,
|
|
45
|
+
);
|
|
46
|
+
await queryRunner.query(
|
|
47
|
+
`ALTER TABLE "KubernetesContainer" ADD CONSTRAINT "FK_d0f740eb8fc87c2426d78babf6b" FOREIGN KEY ("createdByUserId") REFERENCES "User"("_id") ON DELETE SET NULL ON UPDATE NO ACTION`,
|
|
48
|
+
);
|
|
49
|
+
await queryRunner.query(
|
|
50
|
+
`ALTER TABLE "KubernetesContainer" ADD CONSTRAINT "FK_eadbc98e53bc5788d8313e52c67" FOREIGN KEY ("deletedByUserId") REFERENCES "User"("_id") ON DELETE SET NULL ON UPDATE NO ACTION`,
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
public async down(queryRunner: QueryRunner): Promise<void> {
|
|
55
|
+
await queryRunner.query(
|
|
56
|
+
`ALTER TABLE "KubernetesContainer" DROP CONSTRAINT "FK_eadbc98e53bc5788d8313e52c67"`,
|
|
57
|
+
);
|
|
58
|
+
await queryRunner.query(
|
|
59
|
+
`ALTER TABLE "KubernetesContainer" DROP CONSTRAINT "FK_d0f740eb8fc87c2426d78babf6b"`,
|
|
60
|
+
);
|
|
61
|
+
await queryRunner.query(
|
|
62
|
+
`ALTER TABLE "KubernetesContainer" DROP CONSTRAINT "FK_5303bcae1a72f9830bd7d15e2cd"`,
|
|
63
|
+
);
|
|
64
|
+
await queryRunner.query(
|
|
65
|
+
`ALTER TABLE "KubernetesContainer" DROP CONSTRAINT "FK_fcc7f4bc83564a8c7885233f6e3"`,
|
|
66
|
+
);
|
|
67
|
+
await queryRunner.query(
|
|
68
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "restrictionTimes" SET DEFAULT '{"_type": "RestrictionTimes", "value": {"restictionType": "None", "dayRestrictionTimes": null, "weeklyRestrictionTimes": []}}'`,
|
|
69
|
+
);
|
|
70
|
+
await queryRunner.query(
|
|
71
|
+
`ALTER TABLE "OnCallDutyPolicyScheduleLayer" ALTER COLUMN "rotation" SET DEFAULT '{"_type": "Recurring", "value": {"intervalType": "Day", "intervalCount": {"_type": "PositiveNumber", "value": 1}}}'`,
|
|
72
|
+
);
|
|
73
|
+
await queryRunner.query(
|
|
74
|
+
`ALTER TABLE "KubernetesResource" DROP COLUMN "metricsUpdatedAt"`,
|
|
75
|
+
);
|
|
76
|
+
await queryRunner.query(
|
|
77
|
+
`ALTER TABLE "KubernetesResource" DROP COLUMN "latestMemoryBytes"`,
|
|
78
|
+
);
|
|
79
|
+
await queryRunner.query(
|
|
80
|
+
`ALTER TABLE "KubernetesResource" DROP COLUMN "latestCpuPercent"`,
|
|
81
|
+
);
|
|
82
|
+
await queryRunner.query(
|
|
83
|
+
`ALTER TABLE "KubernetesResource" DROP COLUMN "controllerCronJobName"`,
|
|
84
|
+
);
|
|
85
|
+
await queryRunner.query(
|
|
86
|
+
`ALTER TABLE "KubernetesResource" DROP COLUMN "controllerDeploymentName"`,
|
|
87
|
+
);
|
|
88
|
+
await queryRunner.query(
|
|
89
|
+
`DROP INDEX "public"."IDX_1dcb8fed322a9bddfabb60cbc7"`,
|
|
90
|
+
);
|
|
91
|
+
await queryRunner.query(
|
|
92
|
+
`DROP INDEX "public"."IDX_5303bcae1a72f9830bd7d15e2c"`,
|
|
93
|
+
);
|
|
94
|
+
await queryRunner.query(
|
|
95
|
+
`DROP INDEX "public"."IDX_fcc7f4bc83564a8c7885233f6e"`,
|
|
96
|
+
);
|
|
97
|
+
await queryRunner.query(`DROP TABLE "KubernetesContainer"`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -293,6 +293,8 @@ import { MigrationName1776940714709 } from "./1776940714709-MigrationName";
|
|
|
293
293
|
import { AddStatusPageLanguageSettings1776971364783 } from "./1776971364783-AddStatusPageLanguageSettings";
|
|
294
294
|
import { AddTelemetryRetentionSettings1777018175127 } from "./1777018175127-AddTelemetryRetentionSettings";
|
|
295
295
|
import { AddMonitorTemplate1777201966799 } from "./1777201966799-AddMonitorTemplate";
|
|
296
|
+
import { MigrationName1777550162848 } from "./1777550162848-MigrationName";
|
|
297
|
+
import { MigrationName1777571961028 } from "./1777571961028-MigrationName";
|
|
296
298
|
export default [
|
|
297
299
|
InitialMigration,
|
|
298
300
|
MigrationName1717678334852,
|
|
@@ -589,4 +591,6 @@ export default [
|
|
|
589
591
|
AddStatusPageLanguageSettings1776971364783,
|
|
590
592
|
AddTelemetryRetentionSettings1777018175127,
|
|
591
593
|
AddMonitorTemplate1777201966799,
|
|
594
|
+
MigrationName1777550162848,
|
|
595
|
+
MigrationName1777571961028,
|
|
592
596
|
];
|
|
@@ -8,6 +8,8 @@ import { BullMQAdapter } from "@bull-board/api/bullMQAdapter";
|
|
|
8
8
|
import { ExpressRouter } from "../Utils/Express";
|
|
9
9
|
import CaptureSpan from "../Utils/Telemetry/CaptureSpan";
|
|
10
10
|
import logger from "../Utils/Logger";
|
|
11
|
+
import Telemetry from "../Utils/Telemetry";
|
|
12
|
+
import type { Attributes, ObservableResult } from "@opentelemetry/api";
|
|
11
13
|
import Redis from "./Redis";
|
|
12
14
|
|
|
13
15
|
export enum QueueName {
|
|
@@ -23,6 +25,7 @@ export default class Queue {
|
|
|
23
25
|
private static queueDict: Dictionary<BullQueue> = {};
|
|
24
26
|
// track queues we have already run initial cleanup on
|
|
25
27
|
private static cleanedQueueNames: Set<string> = new Set<string>();
|
|
28
|
+
private static queueSizeMetricRegistered: boolean = false;
|
|
26
29
|
// store repeatable jobs to re-add on reconnect
|
|
27
30
|
private static repeatableJobs: Dictionary<
|
|
28
31
|
Dictionary<{
|
|
@@ -99,6 +102,9 @@ export default class Queue {
|
|
|
99
102
|
// save it to the dictionary
|
|
100
103
|
this.queueDict[queueName] = queue;
|
|
101
104
|
|
|
105
|
+
// Register the observable gauge once any queue exists in this process.
|
|
106
|
+
this.registerQueueSizeMetric();
|
|
107
|
+
|
|
102
108
|
// Add event listener to re-add repeatable jobs on reconnect
|
|
103
109
|
this.setupReconnectListener(queue, queueName).catch((err: unknown) => {
|
|
104
110
|
logger.error("Error setting up reconnect listener for queue");
|
|
@@ -243,6 +249,60 @@ export default class Queue {
|
|
|
243
249
|
return jobAdded;
|
|
244
250
|
}
|
|
245
251
|
|
|
252
|
+
private static registerQueueSizeMetric(): void {
|
|
253
|
+
if (this.queueSizeMetricRegistered) {
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (!Telemetry.isMetricsEnabled()) {
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
try {
|
|
262
|
+
Telemetry.getObservableGauge({
|
|
263
|
+
name: "queue.size",
|
|
264
|
+
description:
|
|
265
|
+
"Number of BullMQ jobs in each queue, partitioned by job state.",
|
|
266
|
+
unit: "1",
|
|
267
|
+
callback: async (
|
|
268
|
+
result: ObservableResult<Attributes>,
|
|
269
|
+
): Promise<void> => {
|
|
270
|
+
for (const queueName of Object.keys(this.queueDict)) {
|
|
271
|
+
try {
|
|
272
|
+
const stats: {
|
|
273
|
+
waiting: number;
|
|
274
|
+
active: number;
|
|
275
|
+
completed: number;
|
|
276
|
+
failed: number;
|
|
277
|
+
delayed: number;
|
|
278
|
+
total: number;
|
|
279
|
+
} = await this.getQueueStats(queueName as QueueName);
|
|
280
|
+
|
|
281
|
+
const baseAttrs: Attributes = {
|
|
282
|
+
"messaging.system": "bullmq",
|
|
283
|
+
"messaging.destination.name": queueName,
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
result.observe(stats.waiting, { ...baseAttrs, state: "waiting" });
|
|
287
|
+
result.observe(stats.active, { ...baseAttrs, state: "active" });
|
|
288
|
+
result.observe(stats.delayed, { ...baseAttrs, state: "delayed" });
|
|
289
|
+
result.observe(stats.failed, { ...baseAttrs, state: "failed" });
|
|
290
|
+
} catch (err) {
|
|
291
|
+
// Don't let one queue's stat failure break others.
|
|
292
|
+
logger.debug("Failed to read queue stats");
|
|
293
|
+
logger.debug(err);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
},
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
this.queueSizeMetricRegistered = true;
|
|
300
|
+
} catch (err) {
|
|
301
|
+
logger.error("Failed to register queue.size metric");
|
|
302
|
+
logger.error(err);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
246
306
|
@CaptureSpan()
|
|
247
307
|
public static async getQueueSize(queueName: QueueName): Promise<number> {
|
|
248
308
|
const queue: BullQueue = this.getQueue(queueName);
|
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
} from "../../Types/FunctionTypes";
|
|
8
8
|
import { Worker } from "bullmq";
|
|
9
9
|
import CaptureSpan from "../Utils/Telemetry/CaptureSpan";
|
|
10
|
+
import AppMetrics from "../Utils/Telemetry/AppMetrics";
|
|
10
11
|
import Redis from "./Redis";
|
|
11
12
|
|
|
12
13
|
export default class QueueWorker {
|
|
@@ -29,7 +30,44 @@ export default class QueueWorker {
|
|
|
29
30
|
maxStalledCount?: number;
|
|
30
31
|
},
|
|
31
32
|
): Worker {
|
|
32
|
-
const
|
|
33
|
+
const instrumentedJobHandler: (job: QueueJob) => Promise<void> = async (
|
|
34
|
+
job: QueueJob,
|
|
35
|
+
): Promise<void> => {
|
|
36
|
+
const startNs: bigint = process.hrtime.bigint();
|
|
37
|
+
const baseAttributes: Record<string, string> = {
|
|
38
|
+
"messaging.system": "bullmq",
|
|
39
|
+
"messaging.destination.name": queueName,
|
|
40
|
+
"messaging.operation.name": job.name || "unknown",
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
AppMetrics.getWorkerJobsInFlight().add(1, baseAttributes);
|
|
44
|
+
|
|
45
|
+
let outcome: "success" | "failure" | "timeout" = "success";
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
await onJobInQueue(job);
|
|
49
|
+
} catch (err) {
|
|
50
|
+
outcome =
|
|
51
|
+
err instanceof TimeoutException ||
|
|
52
|
+
(err as { name?: string })?.name === "TimeoutException"
|
|
53
|
+
? "timeout"
|
|
54
|
+
: "failure";
|
|
55
|
+
throw err;
|
|
56
|
+
} finally {
|
|
57
|
+
const elapsedNs: bigint = process.hrtime.bigint() - startNs;
|
|
58
|
+
const durationMs: number = Number(elapsedNs) / 1e6;
|
|
59
|
+
const attributes: Record<string, string> = {
|
|
60
|
+
...baseAttributes,
|
|
61
|
+
outcome,
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
AppMetrics.getWorkerJobCounter().add(1, attributes);
|
|
65
|
+
AppMetrics.getWorkerJobDuration().record(durationMs, attributes);
|
|
66
|
+
AppMetrics.getWorkerJobsInFlight().add(-1, baseAttributes);
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const worker: Worker = new Worker(queueName, instrumentedJobHandler, {
|
|
33
71
|
connection: Redis.getRedisOptions(),
|
|
34
72
|
concurrency: options.concurrency,
|
|
35
73
|
// Only set these values if provided so we do not override BullMQ defaults
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ExpressRequest,
|
|
3
|
+
ExpressResponse,
|
|
4
|
+
NextFunction,
|
|
5
|
+
} from "../Utils/Express";
|
|
6
|
+
import AppMetrics from "../Utils/Telemetry/AppMetrics";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Express middleware that records HTTP server metrics (request count,
|
|
10
|
+
* duration, and in-flight gauge) for every request.
|
|
11
|
+
*
|
|
12
|
+
* Attributes are kept low-cardinality on purpose:
|
|
13
|
+
* - http.request.method: GET / POST / ...
|
|
14
|
+
* - http.route: Express route template (e.g. /api/users/:id)
|
|
15
|
+
* or "unmatched" when nothing matched the request.
|
|
16
|
+
* - http.response.status_code: full status code (bounded set).
|
|
17
|
+
* - status_class: 2xx / 3xx / 4xx / 5xx — handy for fast queries.
|
|
18
|
+
*
|
|
19
|
+
* High-cardinality identifiers (raw URL, query string, userId, projectId,
|
|
20
|
+
* requestId) intentionally stay on traces and logs.
|
|
21
|
+
*/
|
|
22
|
+
const HttpMetricsMiddleware: (
|
|
23
|
+
req: ExpressRequest,
|
|
24
|
+
res: ExpressResponse,
|
|
25
|
+
next: NextFunction,
|
|
26
|
+
) => void = (
|
|
27
|
+
req: ExpressRequest,
|
|
28
|
+
res: ExpressResponse,
|
|
29
|
+
next: NextFunction,
|
|
30
|
+
): void => {
|
|
31
|
+
const startNs: bigint = process.hrtime.bigint();
|
|
32
|
+
const method: string = (req.method || "UNKNOWN").toUpperCase();
|
|
33
|
+
|
|
34
|
+
const inFlight: ReturnType<typeof AppMetrics.getHttpRequestsInFlight> =
|
|
35
|
+
AppMetrics.getHttpRequestsInFlight();
|
|
36
|
+
|
|
37
|
+
inFlight.add(1, { "http.request.method": method });
|
|
38
|
+
|
|
39
|
+
let recorded: boolean = false;
|
|
40
|
+
|
|
41
|
+
const recordOnce: () => void = (): void => {
|
|
42
|
+
if (recorded) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
recorded = true;
|
|
46
|
+
|
|
47
|
+
const elapsedNs: bigint = process.hrtime.bigint() - startNs;
|
|
48
|
+
const durationMs: number = Number(elapsedNs) / 1e6;
|
|
49
|
+
const statusCode: number = res.statusCode || 0;
|
|
50
|
+
const statusClass: string =
|
|
51
|
+
statusCode >= 100 && statusCode < 600
|
|
52
|
+
? `${Math.floor(statusCode / 100)}xx`
|
|
53
|
+
: "unknown";
|
|
54
|
+
|
|
55
|
+
/*
|
|
56
|
+
* Express populates req.route once the request has matched a route
|
|
57
|
+
* handler. For 404s (no match), record the request under a stable
|
|
58
|
+
* "unmatched" label rather than the raw URL to avoid cardinality blowup.
|
|
59
|
+
*/
|
|
60
|
+
const routeWithMethod: { path?: string } | undefined = (
|
|
61
|
+
req as ExpressRequest & { route?: { path?: string } }
|
|
62
|
+
).route;
|
|
63
|
+
|
|
64
|
+
const baseUrl: string = (req as ExpressRequest & { baseUrl?: string })
|
|
65
|
+
.baseUrl
|
|
66
|
+
? (req as ExpressRequest & { baseUrl: string }).baseUrl
|
|
67
|
+
: "";
|
|
68
|
+
|
|
69
|
+
const routeTemplate: string =
|
|
70
|
+
routeWithMethod && typeof routeWithMethod.path === "string"
|
|
71
|
+
? `${baseUrl}${routeWithMethod.path}`
|
|
72
|
+
: "unmatched";
|
|
73
|
+
|
|
74
|
+
const attributes: Record<string, string | number> = {
|
|
75
|
+
"http.request.method": method,
|
|
76
|
+
"http.route": routeTemplate,
|
|
77
|
+
"http.response.status_code": statusCode,
|
|
78
|
+
status_class: statusClass,
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
AppMetrics.getHttpRequestCounter().add(1, attributes);
|
|
82
|
+
AppMetrics.getHttpRequestDuration().record(durationMs, attributes);
|
|
83
|
+
inFlight.add(-1, { "http.request.method": method });
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
res.on("finish", recordOnce);
|
|
87
|
+
res.on("close", recordOnce);
|
|
88
|
+
|
|
89
|
+
next();
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
export default HttpMetricsMiddleware;
|