@oneuptime/common 10.5.9 → 10.5.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/Models/AnalyticsModels/ExceptionInstance.ts +1 -1
  2. package/Models/AnalyticsModels/Log.ts +1 -1
  3. package/Models/AnalyticsModels/Metric.ts +1 -1
  4. package/Models/AnalyticsModels/Profile.ts +1 -1
  5. package/Models/AnalyticsModels/ProfileSample.ts +1 -1
  6. package/Models/AnalyticsModels/Span.ts +1 -1
  7. package/Models/DatabaseModels/TelemetryException.ts +46 -34
  8. package/Models/DatabaseModels/TelemetryUsageBilling.ts +35 -2
  9. package/Server/API/AIAgentDataAPI.ts +25 -7
  10. package/Server/API/TelemetryAPI.ts +6 -0
  11. package/Server/API/TelemetryExceptionAPI.ts +6 -2
  12. package/Server/EnvironmentConfig.ts +27 -0
  13. package/Server/Infrastructure/ClickhouseDatabase.ts +21 -1
  14. package/Server/Infrastructure/Postgres/DataSourceOptions.ts +19 -0
  15. package/Server/Infrastructure/Postgres/SchemaMigrations/1780381124553-MigrationName.ts +28 -0
  16. package/Server/Infrastructure/Postgres/SchemaMigrations/1780382837019-MigrationName.ts +24 -0
  17. package/Server/Infrastructure/Postgres/SchemaMigrations/1780387560604-MigrationName.ts +47 -0
  18. package/Server/Infrastructure/Postgres/SchemaMigrations/1780388219225-MigrationName.ts +34 -0
  19. package/Server/Infrastructure/Postgres/SchemaMigrations/Index.ts +8 -0
  20. package/Server/Infrastructure/PostgresDatabase.ts +27 -1
  21. package/Server/Infrastructure/QueueWorker.ts +54 -4
  22. package/Server/Infrastructure/Redis.ts +11 -0
  23. package/Server/Services/AnalyticsDatabaseService.ts +87 -0
  24. package/Server/Services/DatabaseService.ts +73 -0
  25. package/Server/Services/TelemetryAttributeService.ts +38 -2
  26. package/Server/Services/TelemetryExceptionService.ts +24 -49
  27. package/Server/Services/TelemetryUsageBillingService.ts +289 -166
  28. package/Server/Types/AnalyticsDatabase/ModelPermission.ts +102 -72
  29. package/Server/Types/Database/Permissions/OwnedScopePermission.ts +81 -60
  30. package/Server/Types/Database/Permissions/OwnerTableRegistry.ts +67 -0
  31. package/Server/Utils/Express.ts +32 -0
  32. package/Server/Utils/GracefulShutdown.ts +194 -0
  33. package/Server/Utils/Logger.ts +12 -1
  34. package/Server/Utils/Monitor/MonitorLogUtil.ts +22 -17
  35. package/Server/Utils/Profiling.ts +14 -6
  36. package/Server/Utils/StartServer.ts +13 -5
  37. package/Server/Utils/Telemetry/ContextSpanProcessor.ts +48 -0
  38. package/Server/Utils/Telemetry/LogExceptionExtractor.ts +289 -0
  39. package/Server/Utils/Telemetry/SpanUtil.ts +16 -35
  40. package/Server/Utils/Telemetry/StackTraceParser.ts +423 -0
  41. package/Server/Utils/Telemetry/TelemetryContext.ts +190 -0
  42. package/Server/Utils/Telemetry.ts +33 -7
  43. package/Tests/Server/Services/TelemetryAttributeService.test.ts +83 -0
  44. package/Tests/Server/Utils/Telemetry/LogExceptionExtractor.test.ts +0 -0
  45. package/Types/Database/AccessControl/OwnedThrough.ts +31 -3
  46. package/Types/Telemetry/ServiceType.ts +10 -0
  47. package/UI/Components/AutocompleteTextInput/AutocompleteTextInput.tsx +7 -1
  48. package/UI/Components/Dictionary/Dictionary.tsx +19 -0
  49. package/UI/Components/Filters/FiltersForm.tsx +1 -0
  50. package/UI/Components/Filters/JSONFilter.tsx +2 -0
  51. package/UI/Components/Filters/Types/Filter.ts +1 -0
  52. package/UI/Components/LogsViewer/LogsViewer.tsx +16 -0
  53. package/UI/Utils/Project.ts +6 -0
  54. package/UI/Utils/Telemetry/Telemetry.ts +65 -0
  55. package/UI/Utils/TelemetryService.ts +150 -0
  56. package/build/dist/Models/AnalyticsModels/ExceptionInstance.js +1 -1
  57. package/build/dist/Models/AnalyticsModels/ExceptionInstance.js.map +1 -1
  58. package/build/dist/Models/AnalyticsModels/Log.js +1 -1
  59. package/build/dist/Models/AnalyticsModels/Log.js.map +1 -1
  60. package/build/dist/Models/AnalyticsModels/Metric.js +1 -1
  61. package/build/dist/Models/AnalyticsModels/Metric.js.map +1 -1
  62. package/build/dist/Models/AnalyticsModels/Profile.js +1 -1
  63. package/build/dist/Models/AnalyticsModels/Profile.js.map +1 -1
  64. package/build/dist/Models/AnalyticsModels/ProfileSample.js +1 -1
  65. package/build/dist/Models/AnalyticsModels/ProfileSample.js.map +1 -1
  66. package/build/dist/Models/AnalyticsModels/Span.js +1 -1
  67. package/build/dist/Models/AnalyticsModels/Span.js.map +1 -1
  68. package/build/dist/Models/DatabaseModels/TelemetryException.js +47 -33
  69. package/build/dist/Models/DatabaseModels/TelemetryException.js.map +1 -1
  70. package/build/dist/Models/DatabaseModels/TelemetryUsageBilling.js +36 -2
  71. package/build/dist/Models/DatabaseModels/TelemetryUsageBilling.js.map +1 -1
  72. package/build/dist/Server/API/AIAgentDataAPI.js +24 -8
  73. package/build/dist/Server/API/AIAgentDataAPI.js.map +1 -1
  74. package/build/dist/Server/API/TelemetryAPI.js +4 -0
  75. package/build/dist/Server/API/TelemetryAPI.js.map +1 -1
  76. package/build/dist/Server/API/TelemetryExceptionAPI.js +6 -2
  77. package/build/dist/Server/API/TelemetryExceptionAPI.js.map +1 -1
  78. package/build/dist/Server/EnvironmentConfig.js +19 -0
  79. package/build/dist/Server/EnvironmentConfig.js.map +1 -1
  80. package/build/dist/Server/Infrastructure/ClickhouseDatabase.js +16 -2
  81. package/build/dist/Server/Infrastructure/ClickhouseDatabase.js.map +1 -1
  82. package/build/dist/Server/Infrastructure/Postgres/DataSourceOptions.js +10 -9
  83. package/build/dist/Server/Infrastructure/Postgres/DataSourceOptions.js.map +1 -1
  84. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780381124553-MigrationName.js +23 -0
  85. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780381124553-MigrationName.js.map +1 -0
  86. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780382837019-MigrationName.js +19 -0
  87. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780382837019-MigrationName.js.map +1 -0
  88. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780387560604-MigrationName.js +22 -0
  89. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780387560604-MigrationName.js.map +1 -0
  90. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780388219225-MigrationName.js +25 -0
  91. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/1780388219225-MigrationName.js.map +1 -0
  92. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js +8 -0
  93. package/build/dist/Server/Infrastructure/Postgres/SchemaMigrations/Index.js.map +1 -1
  94. package/build/dist/Server/Infrastructure/PostgresDatabase.js +20 -1
  95. package/build/dist/Server/Infrastructure/PostgresDatabase.js.map +1 -1
  96. package/build/dist/Server/Infrastructure/QueueWorker.js +40 -3
  97. package/build/dist/Server/Infrastructure/QueueWorker.js.map +1 -1
  98. package/build/dist/Server/Infrastructure/Redis.js +5 -0
  99. package/build/dist/Server/Infrastructure/Redis.js.map +1 -1
  100. package/build/dist/Server/Services/AnalyticsDatabaseService.js +59 -0
  101. package/build/dist/Server/Services/AnalyticsDatabaseService.js.map +1 -1
  102. package/build/dist/Server/Services/DatabaseService.js +62 -0
  103. package/build/dist/Server/Services/DatabaseService.js.map +1 -1
  104. package/build/dist/Server/Services/TelemetryAttributeService.js +23 -1
  105. package/build/dist/Server/Services/TelemetryAttributeService.js.map +1 -1
  106. package/build/dist/Server/Services/TelemetryExceptionService.js +16 -41
  107. package/build/dist/Server/Services/TelemetryExceptionService.js.map +1 -1
  108. package/build/dist/Server/Services/TelemetryUsageBillingService.js +211 -147
  109. package/build/dist/Server/Services/TelemetryUsageBillingService.js.map +1 -1
  110. package/build/dist/Server/Types/AnalyticsDatabase/ModelPermission.js +84 -63
  111. package/build/dist/Server/Types/AnalyticsDatabase/ModelPermission.js.map +1 -1
  112. package/build/dist/Server/Types/Database/Permissions/OwnedScopePermission.js +67 -49
  113. package/build/dist/Server/Types/Database/Permissions/OwnedScopePermission.js.map +1 -1
  114. package/build/dist/Server/Types/Database/Permissions/OwnerTableRegistry.js +51 -0
  115. package/build/dist/Server/Types/Database/Permissions/OwnerTableRegistry.js.map +1 -1
  116. package/build/dist/Server/Utils/Express.js +23 -0
  117. package/build/dist/Server/Utils/Express.js.map +1 -1
  118. package/build/dist/Server/Utils/GracefulShutdown.js +145 -0
  119. package/build/dist/Server/Utils/GracefulShutdown.js.map +1 -0
  120. package/build/dist/Server/Utils/Logger.js +8 -1
  121. package/build/dist/Server/Utils/Logger.js.map +1 -1
  122. package/build/dist/Server/Utils/Monitor/MonitorLogUtil.js +12 -10
  123. package/build/dist/Server/Utils/Monitor/MonitorLogUtil.js.map +1 -1
  124. package/build/dist/Server/Utils/Profiling.js +8 -3
  125. package/build/dist/Server/Utils/Profiling.js.map +1 -1
  126. package/build/dist/Server/Utils/StartServer.js +12 -4
  127. package/build/dist/Server/Utils/StartServer.js.map +1 -1
  128. package/build/dist/Server/Utils/Telemetry/ContextSpanProcessor.js +37 -0
  129. package/build/dist/Server/Utils/Telemetry/ContextSpanProcessor.js.map +1 -0
  130. package/build/dist/Server/Utils/Telemetry/LogExceptionExtractor.js +214 -0
  131. package/build/dist/Server/Utils/Telemetry/LogExceptionExtractor.js.map +1 -0
  132. package/build/dist/Server/Utils/Telemetry/SpanUtil.js +15 -24
  133. package/build/dist/Server/Utils/Telemetry/SpanUtil.js.map +1 -1
  134. package/build/dist/Server/Utils/Telemetry/StackTraceParser.js +365 -0
  135. package/build/dist/Server/Utils/Telemetry/StackTraceParser.js.map +1 -0
  136. package/build/dist/Server/Utils/Telemetry/TelemetryContext.js +124 -0
  137. package/build/dist/Server/Utils/Telemetry/TelemetryContext.js.map +1 -0
  138. package/build/dist/Server/Utils/Telemetry.js +22 -5
  139. package/build/dist/Server/Utils/Telemetry.js.map +1 -1
  140. package/build/dist/Tests/Server/Services/TelemetryAttributeService.test.js +50 -0
  141. package/build/dist/Tests/Server/Services/TelemetryAttributeService.test.js.map +1 -0
  142. package/build/dist/Tests/Server/Utils/Telemetry/LogExceptionExtractor.test.js +0 -0
  143. package/build/dist/Tests/Server/Utils/Telemetry/LogExceptionExtractor.test.js.map +1 -0
  144. package/build/dist/Types/Database/AccessControl/OwnedThrough.js +7 -2
  145. package/build/dist/Types/Database/AccessControl/OwnedThrough.js.map +1 -1
  146. package/build/dist/Types/Telemetry/ServiceType.js +10 -0
  147. package/build/dist/Types/Telemetry/ServiceType.js.map +1 -1
  148. package/build/dist/UI/Components/AutocompleteTextInput/AutocompleteTextInput.js +7 -1
  149. package/build/dist/UI/Components/AutocompleteTextInput/AutocompleteTextInput.js.map +1 -1
  150. package/build/dist/UI/Components/Dictionary/Dictionary.js +10 -0
  151. package/build/dist/UI/Components/Dictionary/Dictionary.js.map +1 -1
  152. package/build/dist/UI/Components/Filters/FiltersForm.js +1 -1
  153. package/build/dist/UI/Components/Filters/FiltersForm.js.map +1 -1
  154. package/build/dist/UI/Components/Filters/JSONFilter.js +1 -1
  155. package/build/dist/UI/Components/Filters/JSONFilter.js.map +1 -1
  156. package/build/dist/UI/Components/LogsViewer/LogsViewer.js +15 -0
  157. package/build/dist/UI/Components/LogsViewer/LogsViewer.js.map +1 -1
  158. package/build/dist/UI/Utils/Project.js +5 -0
  159. package/build/dist/UI/Utils/Project.js.map +1 -1
  160. package/build/dist/UI/Utils/Telemetry/Telemetry.js +44 -0
  161. package/build/dist/UI/Utils/Telemetry/Telemetry.js.map +1 -1
  162. package/build/dist/UI/Utils/TelemetryService.js +113 -0
  163. package/build/dist/UI/Utils/TelemetryService.js.map +1 -0
  164. package/package.json +1 -1
@@ -0,0 +1,194 @@
1
+ import logger from "./Logger";
2
+
3
+ /*
4
+ * Centralized graceful-shutdown coordinator.
5
+ *
6
+ * Before this existed, each subsystem registered its own
7
+ * process.on("SIGTERM" | "SIGINT") handler independently. The telemetry
8
+ * handler in particular called process.exit(0) as soon as the OTEL SDK
9
+ * flushed, which raced every other handler and — crucially — meant the
10
+ * Postgres / Redis / Clickhouse pools were never drained. Connections were
11
+ * left for the OS socket teardown to reap (and leaked outright on SIGKILL or
12
+ * a network partition).
13
+ *
14
+ * Now every subsystem registers an async cleanup callback here, and this class
15
+ * is the single owner of the signal handlers and of process.exit. Handlers run
16
+ * in ascending priority order (lower first) so we stop accepting new work
17
+ * before tearing down the resources that work depends on:
18
+ *
19
+ * HttpServer (10) -> stop accepting new HTTP requests
20
+ * Workers (20) -> stop pulling new queue jobs, finish in-flight jobs
21
+ * Buffers (30) -> flush in-memory write buffers to their datastore
22
+ * DataStores (40) -> drain Postgres / Redis / Clickhouse pools
23
+ * Telemetry (50) -> flush traces / metrics / logs / profiles last
24
+ *
25
+ * Handlers in the same tier run concurrently. Each handler is bounded by a
26
+ * per-handler timeout, and the whole sequence by an overall deadline, so a
27
+ * single hung handler can never wedge the shutdown.
28
+ */
29
+ export enum ShutdownPriority {
30
+ HttpServer = 10,
31
+ Workers = 20,
32
+ Buffers = 30,
33
+ DataStores = 40,
34
+ Telemetry = 50,
35
+ }
36
+
37
+ export type ShutdownCallback = () => Promise<void> | void;
38
+
39
+ interface RegisteredShutdownHandler {
40
+ name: string;
41
+ priority: ShutdownPriority;
42
+ callback: ShutdownCallback;
43
+ }
44
+
45
+ export default class GracefulShutdown {
46
+ private static handlers: Array<RegisteredShutdownHandler> = [];
47
+ private static signalListenersInstalled: boolean = false;
48
+ private static isShuttingDown: boolean = false;
49
+
50
+ /*
51
+ * How long a single handler may run before we give up on it and move on.
52
+ * Kept comfortably under the orchestrator (Kubernetes) default
53
+ * terminationGracePeriodSeconds of 30s.
54
+ */
55
+ private static readonly perHandlerTimeoutMs: number = 10_000;
56
+
57
+ /*
58
+ * Hard ceiling for the entire shutdown. If we blow past this we force-exit
59
+ * rather than risk being SIGKILLed mid-cleanup.
60
+ */
61
+ private static readonly overallTimeoutMs: number = 25_000;
62
+
63
+ /*
64
+ * Register a cleanup callback to run on SIGTERM / SIGINT. Registering by a
65
+ * stable name is idempotent: a repeat registration (e.g. a second connect())
66
+ * replaces the previous callback instead of stacking a duplicate. Callers
67
+ * that own multiple independent resources (e.g. two Clickhouse pools) must
68
+ * therefore pass distinct names.
69
+ */
70
+ public static registerHandler(
71
+ name: string,
72
+ priority: ShutdownPriority,
73
+ callback: ShutdownCallback,
74
+ ): void {
75
+ const existingIndex: number = this.handlers.findIndex(
76
+ (handler: RegisteredShutdownHandler) => {
77
+ return handler.name === name;
78
+ },
79
+ );
80
+
81
+ if (existingIndex >= 0) {
82
+ this.handlers[existingIndex] = { name, priority, callback };
83
+ } else {
84
+ this.handlers.push({ name, priority, callback });
85
+ }
86
+
87
+ this.installSignalListeners();
88
+ }
89
+
90
+ private static installSignalListeners(): void {
91
+ if (this.signalListenersInstalled) {
92
+ return;
93
+ }
94
+ this.signalListenersInstalled = true;
95
+
96
+ process.on("SIGTERM", () => {
97
+ void this.shutdown("SIGTERM");
98
+ });
99
+ process.on("SIGINT", () => {
100
+ void this.shutdown("SIGINT");
101
+ });
102
+ }
103
+
104
+ public static async shutdown(signal: string): Promise<void> {
105
+ if (this.isShuttingDown) {
106
+ /*
107
+ * A second signal while we're already draining means the operator (or
108
+ * orchestrator) is impatient. Bail out immediately.
109
+ */
110
+ logger.warn(
111
+ `GracefulShutdown: received ${signal} while already shutting down. Forcing exit.`,
112
+ );
113
+ return process.exit(1);
114
+ }
115
+
116
+ this.isShuttingDown = true;
117
+ logger.info(
118
+ `GracefulShutdown: received ${signal}. Draining ${this.handlers.length} handler(s)...`,
119
+ );
120
+
121
+ const forceExitTimer: ReturnType<typeof setTimeout> = setTimeout(() => {
122
+ logger.error(
123
+ `GracefulShutdown: exceeded ${this.overallTimeoutMs}ms overall deadline. Forcing exit.`,
124
+ );
125
+ return process.exit(1);
126
+ }, this.overallTimeoutMs);
127
+
128
+ // Don't let this timer keep the event loop alive on its own.
129
+ forceExitTimer.unref();
130
+
131
+ // Run handlers tier by tier; lower priority tiers complete before the next.
132
+ const tiers: Array<number> = Array.from(
133
+ new Set(
134
+ this.handlers.map((handler: RegisteredShutdownHandler) => {
135
+ return handler.priority;
136
+ }),
137
+ ),
138
+ ).sort((a: number, b: number) => {
139
+ return a - b;
140
+ });
141
+
142
+ for (const tier of tiers) {
143
+ const handlersInTier: Array<RegisteredShutdownHandler> =
144
+ this.handlers.filter((handler: RegisteredShutdownHandler) => {
145
+ return handler.priority === tier;
146
+ });
147
+
148
+ // Handlers within a tier are independent, so run them concurrently.
149
+ await Promise.all(
150
+ handlersInTier.map((handler: RegisteredShutdownHandler) => {
151
+ return this.runHandlerWithTimeout(handler);
152
+ }),
153
+ );
154
+ }
155
+
156
+ clearTimeout(forceExitTimer);
157
+ logger.info("GracefulShutdown: all handlers complete. Exiting cleanly.");
158
+ return process.exit(0);
159
+ }
160
+
161
+ private static async runHandlerWithTimeout(
162
+ handler: RegisteredShutdownHandler,
163
+ ): Promise<void> {
164
+ logger.debug(`GracefulShutdown: running handler "${handler.name}"...`);
165
+
166
+ let timer: ReturnType<typeof setTimeout> | null = null;
167
+
168
+ const timeout: Promise<void> = new Promise<void>((resolve: () => void) => {
169
+ timer = setTimeout(() => {
170
+ logger.warn(
171
+ `GracefulShutdown: handler "${handler.name}" exceeded ${this.perHandlerTimeoutMs}ms. Moving on.`,
172
+ );
173
+ return resolve();
174
+ }, this.perHandlerTimeoutMs);
175
+ timer.unref();
176
+ });
177
+
178
+ const run: Promise<void> = (async (): Promise<void> => {
179
+ try {
180
+ await handler.callback();
181
+ logger.debug(`GracefulShutdown: handler "${handler.name}" done.`);
182
+ } catch (err) {
183
+ logger.error(`GracefulShutdown: handler "${handler.name}" failed:`);
184
+ logger.error(err);
185
+ }
186
+ })();
187
+
188
+ await Promise.race([run, timeout]);
189
+
190
+ if (timer) {
191
+ clearTimeout(timer);
192
+ }
193
+ }
194
+ }
@@ -1,5 +1,6 @@
1
1
  import { LogLevel } from "../EnvironmentConfig";
2
2
  import OneUptimeTelemetry, { TelemetryLogger } from "./Telemetry";
3
+ import TelemetryContext from "./Telemetry/TelemetryContext";
3
4
  import { SeverityNumber } from "@opentelemetry/api-logs";
4
5
  import Exception from "../../Types/Exception/Exception";
5
6
  import { JSONObject } from "../../Types/JSON";
@@ -171,9 +172,19 @@ export default class logger {
171
172
  return;
172
173
  }
173
174
 
175
+ /*
176
+ * Merge ambient TelemetryContext attributes (projectId, userId,
177
+ * monitorId, requestId, ...) into every log record. Attributes passed
178
+ * explicitly to the log call take precedence over the ambient context.
179
+ */
180
+ const mergedAttributes: LogAttributes = {
181
+ ...TelemetryContext.getAttributes(),
182
+ ...(data.attributes || {}),
183
+ };
184
+
174
185
  const sanitizedAttributes:
175
186
  | Record<string, string | number | boolean>
176
- | undefined = this.sanitizeAttributes(data.attributes);
187
+ | undefined = this.sanitizeAttributes(mergedAttributes);
177
188
 
178
189
  logger.emit({
179
190
  body: this.serializeLogBody(data.body),
@@ -2,6 +2,7 @@ import MonitorLogService from "../../Services/MonitorLogService";
2
2
  import GlobalConfigService from "../../Services/GlobalConfigService";
3
3
  import GlobalConfig from "../../../Models/DatabaseModels/GlobalConfig";
4
4
  import logger from "../Logger";
5
+ import GracefulShutdown, { ShutdownPriority } from "../GracefulShutdown";
5
6
  import OneUptimeDate from "../../../Types/Date";
6
7
  import ObjectID from "../../../Types/ObjectID";
7
8
  import { JSONObject } from "../../../Types/JSON";
@@ -44,8 +45,9 @@ export default class MonitorLogUtil {
44
45
  * here until either MONITOR_LOG_FLUSH_BATCH_SIZE rows arrive
45
46
  * (size trigger) or MONITOR_LOG_FLUSH_INTERVAL_MS elapses since
46
47
  * the first row entered an empty buffer (time trigger),
47
- * whichever comes first. On graceful shutdown the SIGTERM /
48
- * SIGINT hook below drains the buffer before the process exits.
48
+ * whichever comes first. On graceful shutdown the registered
49
+ * GracefulShutdown handler below drains the buffer (in the
50
+ * Buffers tier, before the datastores are torn down).
49
51
  */
50
52
  private static buffer: Array<JSONObject> = [];
51
53
  private static flushTimer: NodeJS.Timeout | null = null;
@@ -233,10 +235,12 @@ export default class MonitorLogUtil {
233
235
  }
234
236
 
235
237
  /*
236
- * Register SIGTERM / SIGINT handlers exactly once, lazily on
237
- * first ingest. We avoid registering at module-load time so
238
- * tooling that imports this file (e.g. migration runners,
239
- * CLI scripts) doesn't end up with stray process listeners.
238
+ * Register the shutdown flush exactly once, lazily on first ingest. We avoid
239
+ * registering at module-load time so tooling that imports this file (e.g.
240
+ * migration runners, CLI scripts) doesn't end up holding a stray handler.
241
+ *
242
+ * Runs in the Buffers tier — ahead of the DataStores tier — so the buffer is
243
+ * drained to Clickhouse before the datastore pools are torn down.
240
244
  */
241
245
  private static ensureShutdownHooks(): void {
242
246
  if (this.shutdownHooksRegistered) {
@@ -244,16 +248,17 @@ export default class MonitorLogUtil {
244
248
  }
245
249
  this.shutdownHooksRegistered = true;
246
250
 
247
- const flushOnShutdown: () => Promise<void> = async (): Promise<void> => {
248
- try {
249
- await this.flushAndWait();
250
- } catch (err) {
251
- logger.error("Error flushing MonitorLog buffer on shutdown:");
252
- logger.error(err);
253
- }
254
- };
255
-
256
- process.on("SIGTERM", flushOnShutdown);
257
- process.on("SIGINT", flushOnShutdown);
251
+ GracefulShutdown.registerHandler(
252
+ "MonitorLogUtil",
253
+ ShutdownPriority.Buffers,
254
+ async (): Promise<void> => {
255
+ try {
256
+ await this.flushAndWait();
257
+ } catch (err) {
258
+ logger.error("Error flushing MonitorLog buffer on shutdown:");
259
+ logger.error(err);
260
+ }
261
+ },
262
+ );
258
263
  }
259
264
  }
@@ -1,6 +1,7 @@
1
1
  import Pyroscope from "@pyroscope/nodejs";
2
2
  import { EnableProfiling } from "../EnvironmentConfig";
3
3
  import logger, { LogAttributes } from "./Logger";
4
+ import GracefulShutdown, { ShutdownPriority } from "./GracefulShutdown";
4
5
 
5
6
  export default class Profiling {
6
7
  public static init(data: { serviceName: string }): void {
@@ -44,12 +45,19 @@ export default class Profiling {
44
45
  logger.error(err, profilingLogAttributes);
45
46
  }
46
47
 
47
- process.on("SIGTERM", () => {
48
- Pyroscope.stop().catch((err: unknown) => {
49
- logger.error("Error stopping profiler:", profilingLogAttributes);
50
- logger.error(err, profilingLogAttributes);
51
- });
52
- });
48
+ // Stop the profiler last (Telemetry tier), alongside the OTEL flush.
49
+ GracefulShutdown.registerHandler(
50
+ "Profiling",
51
+ ShutdownPriority.Telemetry,
52
+ async (): Promise<void> => {
53
+ try {
54
+ await Pyroscope.stop();
55
+ } catch (err) {
56
+ logger.error("Error stopping profiler:", profilingLogAttributes);
57
+ logger.error(err, profilingLogAttributes);
58
+ }
59
+ },
60
+ );
53
61
  }
54
62
 
55
63
  private static getServerAddress(): string | undefined {
@@ -29,6 +29,7 @@ import logger, {
29
29
  import "./Process";
30
30
  import Response from "./Response";
31
31
  import SpanUtil from "./Telemetry/SpanUtil";
32
+ import TelemetryContext from "./Telemetry/TelemetryContext";
32
33
  import { api } from "@opentelemetry/sdk-node";
33
34
  import StatusCode from "../../Types/API/StatusCode";
34
35
  import HTTPErrorResponse from "../../Types/API/HTTPErrorResponse";
@@ -213,12 +214,19 @@ app.use((req: ExpressRequest, _res: ExpressResponse, next: NextFunction) => {
213
214
  const requestId: string = crypto.randomUUID();
214
215
  (req as OneUptimeRequest).requestId = requestId;
215
216
 
216
- // Tag the current span with requestId so all downstream spans inherit context
217
- SpanUtil.addAttributesToCurrentSpan({
218
- requestId: requestId,
219
- });
217
+ /*
218
+ * Open a telemetry-context scope for the entire request. requestId is seeded
219
+ * here; projectId/userId are added later by the auth middleware. Because
220
+ * ContextSpanProcessor and Logger read this ambient context, every span and
221
+ * log produced downstream inherits it automatically.
222
+ */
223
+ TelemetryContext.runWithContext({ requestId: requestId }, () => {
224
+ SpanUtil.addAttributesToCurrentSpan({
225
+ requestId: requestId,
226
+ });
220
227
 
221
- next();
228
+ next();
229
+ });
222
230
  });
223
231
 
224
232
  export interface InitFuctionOptions {
@@ -0,0 +1,48 @@
1
+ import TelemetryContext from "./TelemetryContext";
2
+ import type { Context } from "@opentelemetry/api";
3
+ import type {
4
+ ReadableSpan,
5
+ Span,
6
+ SpanProcessor,
7
+ } from "@opentelemetry/sdk-trace-base";
8
+
9
+ /**
10
+ * Copies the ambient {@link TelemetryContext} attributes (projectId, userId,
11
+ * monitorId, incidentId, requestId, ...) onto every span at creation time.
12
+ *
13
+ * Combined with `TelemetryContext` scopes seeded at each entry point (HTTP
14
+ * request, worker job, probe check, cron run), this makes the full
15
+ * tenant/business context queryable on all spans — including the ~1958
16
+ * attribute-less `@CaptureSpan` spans — without touching any of those call
17
+ * sites.
18
+ */
19
+ export default class ContextSpanProcessor implements SpanProcessor {
20
+ public onStart(span: Span, _parentContext: Context): void {
21
+ try {
22
+ const attributes: Record<string, string | number | boolean> =
23
+ TelemetryContext.getAttributes();
24
+
25
+ for (const key in attributes) {
26
+ const value: string | number | boolean | undefined = attributes[key];
27
+
28
+ if (value !== undefined && value !== null) {
29
+ span.setAttribute(key, value);
30
+ }
31
+ }
32
+ } catch {
33
+ // Context enrichment must never break span creation.
34
+ }
35
+ }
36
+
37
+ public onEnd(_span: ReadableSpan): void {
38
+ // no-op: enrichment happens entirely in onStart.
39
+ }
40
+
41
+ public shutdown(): Promise<void> {
42
+ return Promise.resolve();
43
+ }
44
+
45
+ public forceFlush(): Promise<void> {
46
+ return Promise.resolve();
47
+ }
48
+ }
@@ -0,0 +1,289 @@
1
+ import { JSONObject, JSONValue } from "../../../Types/JSON";
2
+ import StackTraceParser, { ParsedStackTrace } from "./StackTraceParser";
3
+
4
+ /**
5
+ * Result of detecting an exception inside a single log record. Shaped to feed
6
+ * the same ExceptionInstance (ClickHouse) + TelemetryException (Postgres)
7
+ * sinks the trace span-event path uses, so log-derived and span-derived
8
+ * exceptions group under one fingerprint when identical.
9
+ */
10
+ export interface ExtractedLogException {
11
+ message: string;
12
+ exceptionType: string;
13
+ stackTrace: string;
14
+ parsedFrames: string; // JSON.stringify(StackFrame[]) or "[]"
15
+ escaped: boolean | null; // Path A may carry exception.escaped; Path B => null (unknown)
16
+ }
17
+
18
+ export interface LogExceptionExtractorInput {
19
+ body: string; // post-scrub log body
20
+ attributes: JSONObject; // post-scrub merged log attributes
21
+ severityNumber: number;
22
+ /**
23
+ * True when the log carries BOTH a traceId and a spanId — i.e. it was
24
+ * emitted inside an instrumented span. The span-exception path is the
25
+ * canonical source for those, so Path B (body scan) is suppressed to avoid
26
+ * double-counting (which would also inflate occuranceCount and the windowed
27
+ * exception monitor). Path A (explicit exception.* attributes) is NOT
28
+ * suppressed — those are an intentional structured exception record.
29
+ */
30
+ hasTraceAndSpan: boolean;
31
+ }
32
+
33
+ /**
34
+ * OTel log severityNumber >= 17 is ERROR (17-20) or FATAL (21-24). Path B only
35
+ * scans those — the overwhelming majority of logs are below this and never
36
+ * reach the parser.
37
+ */
38
+ const MIN_ERROR_SEVERITY_NUMBER: number = 17;
39
+
40
+ /**
41
+ * Only the first 16 KB of a body is parsed. A clean single-record stack trace
42
+ * fits comfortably (~150 frames); the top frames are the most diagnostic and
43
+ * are at the front. Guards against pathological multi-megabyte error logs on
44
+ * the hot path.
45
+ */
46
+ const MAX_PARSE_BODY_LENGTH: number = 16 * 1024;
47
+
48
+ /*
49
+ * Raw log bodies are unbounded (unlike SDK-bounded span exception.stacktrace),
50
+ * so clamp what we store into the ZSTD stackTrace column and the Postgres summary.
51
+ */
52
+ const MAX_STORED_STACK_TRACE_LENGTH: number = 64 * 1024;
53
+ const MAX_MESSAGE_LENGTH: number = 1024;
54
+
55
+ /**
56
+ * Single pre-compiled signature for "this body plausibly contains a stack
57
+ * trace". Evaluated once before the (more expensive) multi-language parser.
58
+ * Covers: Python traceback header, JS/Java `at file:line`, Go panic/goroutine,
59
+ * Python `File "...", line N`, and a typed `SomethingException`/`SomethingError`.
60
+ */
61
+ const LOOKS_LIKE_STACK_TRACE: RegExp =
62
+ /(?:Traceback \(most recent call last\)|\n\s+at\s+.+:\d+|\bpanic:\s|goroutine\s+\d+\s+\[|\n\s*File\s+"[^"]+",\s+line\s+\d+|\b[A-Za-z_][\w.$]*(?:Exception|Error)\b)/;
63
+
64
+ // Header parsers for deriving exceptionType + message from a raw body.
65
+ const PYTHON_TRACEBACK_HEADER: RegExp = /^Traceback \(most recent call last\):/;
66
+ const JAVA_THREAD_PREFIX: RegExp = /^Exception in thread\s+"[^"]*"\s+(.*)$/;
67
+ const GO_PANIC: RegExp = /^panic:\s*(.*)$/;
68
+ /*
69
+ * Leading identifier is optional so a bare "Error: msg" / "Exception: msg"
70
+ * (common in Node.js) matches as well as "TypeError" / "java.lang.IOException".
71
+ */
72
+ const TYPED_ERROR: RegExp =
73
+ /^((?:[A-Za-z_][\w.$]*)?(?:Error|Exception|Warning|Fault))(?::\s*([\s\S]*))?$/;
74
+ const QUALIFIED_TYPE: RegExp = /^([A-Za-z_][\w.$]*\.[A-Za-z_][\w.$]*):\s*(.*)$/;
75
+
76
+ export default class LogExceptionExtractor {
77
+ /**
78
+ * Detect an exception in a single log record. Returns null when none is
79
+ * found. Never throws — extraction must never fail log ingest.
80
+ */
81
+ public static extractFromLogRecord(
82
+ input: LogExceptionExtractorInput,
83
+ ): ExtractedLogException | null {
84
+ try {
85
+ /*
86
+ * Path A — explicit OTel exception.* attributes. Always on, cheapest,
87
+ * highest-signal (the app explicitly recorded an exception on the log).
88
+ */
89
+ const fromAttributes: ExtractedLogException | null =
90
+ LogExceptionExtractor.extractFromAttributes(input.attributes);
91
+ if (fromAttributes) {
92
+ return fromAttributes;
93
+ }
94
+
95
+ // Path B — raw body scan. Gated to keep the hot path cheap.
96
+ if (input.severityNumber < MIN_ERROR_SEVERITY_NUMBER) {
97
+ return null;
98
+ }
99
+ if (input.hasTraceAndSpan) {
100
+ return null;
101
+ }
102
+ return LogExceptionExtractor.extractFromBody(input.body);
103
+ } catch {
104
+ return null;
105
+ }
106
+ }
107
+
108
+ private static extractFromAttributes(
109
+ attributes: JSONObject,
110
+ ): ExtractedLogException | null {
111
+ if (!attributes) {
112
+ return null;
113
+ }
114
+
115
+ const stackTrace: string = asString(attributes["exception.stacktrace"]);
116
+ const exceptionType: string = asString(attributes["exception.type"]);
117
+ const message: string = asString(attributes["exception.message"]);
118
+
119
+ if (!stackTrace && !exceptionType && !message) {
120
+ return null;
121
+ }
122
+
123
+ const clampedStack: string = clamp(
124
+ stackTrace,
125
+ MAX_STORED_STACK_TRACE_LENGTH,
126
+ );
127
+
128
+ let parsedFrames: string = "[]";
129
+ if (clampedStack) {
130
+ try {
131
+ const parsed: ParsedStackTrace = StackTraceParser.parse(clampedStack);
132
+ parsedFrames = JSON.stringify(parsed.frames);
133
+ } catch {
134
+ parsedFrames = "[]";
135
+ }
136
+ }
137
+
138
+ return {
139
+ message: clamp(message, MAX_MESSAGE_LENGTH),
140
+ exceptionType: exceptionType,
141
+ stackTrace: clampedStack,
142
+ parsedFrames: parsedFrames,
143
+ escaped: toNullableBoolean(attributes["exception.escaped"]),
144
+ };
145
+ }
146
+
147
+ private static extractFromBody(body: string): ExtractedLogException | null {
148
+ if (!body) {
149
+ return null;
150
+ }
151
+
152
+ const sliced: string =
153
+ body.length > MAX_PARSE_BODY_LENGTH
154
+ ? body.slice(0, MAX_PARSE_BODY_LENGTH)
155
+ : body;
156
+
157
+ if (!LOOKS_LIKE_STACK_TRACE.test(sliced)) {
158
+ return null;
159
+ }
160
+
161
+ const parsed: ParsedStackTrace = StackTraceParser.parse(sliced);
162
+
163
+ /*
164
+ * Require at least one parsed frame. A signature match with zero frames is
165
+ * usually prose that merely mentions "...Error:" / "...Exception" — not an
166
+ * actual stack trace. Path A is the path allowed to emit without frames.
167
+ */
168
+ if (!parsed.frames || parsed.frames.length === 0) {
169
+ return null;
170
+ }
171
+
172
+ const header: { exceptionType: string; message: string } =
173
+ LogExceptionExtractor.parseHeader(sliced);
174
+
175
+ return {
176
+ message: clamp(header.message, MAX_MESSAGE_LENGTH),
177
+ exceptionType: header.exceptionType,
178
+ stackTrace: clamp(sliced, MAX_STORED_STACK_TRACE_LENGTH),
179
+ parsedFrames: JSON.stringify(parsed.frames),
180
+ escaped: null,
181
+ };
182
+ }
183
+
184
+ /**
185
+ * Best-effort derivation of exceptionType + message from a raw stack-trace
186
+ * body. A stable, clean exceptionType improves grouping and exception-monitor
187
+ * targeting, but a miss is harmless — the fingerprint also uses the message
188
+ * and the normalized stack trace.
189
+ */
190
+ private static parseHeader(body: string): {
191
+ exceptionType: string;
192
+ message: string;
193
+ } {
194
+ const lines: Array<string> = body
195
+ .split("\n")
196
+ .map((l: string) => {
197
+ return l.trim();
198
+ })
199
+ .filter((l: string) => {
200
+ return l.length > 0;
201
+ });
202
+
203
+ if (lines.length === 0) {
204
+ return { exceptionType: "", message: "" };
205
+ }
206
+
207
+ /*
208
+ * Python: header is "Traceback (most recent call last):"; the "Type: message"
209
+ * line is at the BOTTOM of the traceback, so scan upward for it.
210
+ */
211
+ if (PYTHON_TRACEBACK_HEADER.test(lines[0]!)) {
212
+ for (let i: number = lines.length - 1; i >= 0; i--) {
213
+ const candidate: RegExpMatchArray | null = lines[i]!.match(TYPED_ERROR);
214
+ if (candidate) {
215
+ return {
216
+ exceptionType: candidate[1] || "",
217
+ message: (candidate[2] || "").trim(),
218
+ };
219
+ }
220
+ }
221
+ // Truncated traceback with no type line — keep the last line as message.
222
+ return { exceptionType: "", message: lines[lines.length - 1]! };
223
+ }
224
+
225
+ // Strip Java's "Exception in thread "name"" prefix if present.
226
+ let firstLine: string = lines[0]!;
227
+ const threadMatch: RegExpMatchArray | null =
228
+ firstLine.match(JAVA_THREAD_PREFIX);
229
+ if (threadMatch) {
230
+ firstLine = threadMatch[1]!.trim();
231
+ }
232
+
233
+ const goMatch: RegExpMatchArray | null = firstLine.match(GO_PANIC);
234
+ if (goMatch) {
235
+ return { exceptionType: "panic", message: (goMatch[1] || "").trim() };
236
+ }
237
+
238
+ // "TypeError: msg", "java.lang.NullPointerException: msg", "...Exception"
239
+ const typedMatch: RegExpMatchArray | null = firstLine.match(TYPED_ERROR);
240
+ if (typedMatch) {
241
+ return {
242
+ exceptionType: typedMatch[1] || "",
243
+ message: (typedMatch[2] || "").trim(),
244
+ };
245
+ }
246
+
247
+ // Generic qualified "pkg.Sub.Type: msg"
248
+ const qualifiedMatch: RegExpMatchArray | null =
249
+ firstLine.match(QUALIFIED_TYPE);
250
+ if (qualifiedMatch) {
251
+ return {
252
+ exceptionType: qualifiedMatch[1] || "",
253
+ message: (qualifiedMatch[2] || "").trim(),
254
+ };
255
+ }
256
+
257
+ return { exceptionType: "", message: firstLine };
258
+ }
259
+ }
260
+
261
+ function asString(value: JSONValue | undefined): string {
262
+ if (typeof value === "string") {
263
+ return value;
264
+ }
265
+ return "";
266
+ }
267
+
268
+ function toNullableBoolean(value: JSONValue | undefined): boolean | null {
269
+ if (typeof value === "boolean") {
270
+ return value;
271
+ }
272
+ if (typeof value === "string") {
273
+ const normalized: string = value.trim().toLowerCase();
274
+ if (normalized === "true") {
275
+ return true;
276
+ }
277
+ if (normalized === "false") {
278
+ return false;
279
+ }
280
+ }
281
+ return null;
282
+ }
283
+
284
+ function clamp(value: string, max: number): string {
285
+ if (value && value.length > max) {
286
+ return value.slice(0, max);
287
+ }
288
+ return value;
289
+ }