@growth-labs/monitoring 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/package.json +6 -3
- package/src/alerting/README.md +13 -0
- package/src/alerting/dedup.ts +70 -0
- package/src/alerting/escalation.ts +29 -0
- package/src/alerting/index.ts +95 -0
- package/src/alerting/thresholds.ts +148 -0
- package/src/index.ts +23 -0
- package/src/prober/README.md +18 -0
- package/src/prober/index.ts +83 -0
- package/src/prober/persist.ts +46 -0
- package/src/prober/runners/get-runner.ts +49 -0
- package/src/prober/runners/happy-path-runner.ts +270 -0
- package/src/prober/runners/post-runner.ts +50 -0
- package/src/prober/surfaces.ts +52 -0
- package/src/schemas/README.md +14 -0
- package/src/schemas/drizzle/schema.ts +59 -0
- package/src/schemas/index.ts +1 -0
- package/src/tail/README.md +18 -0
- package/src/tail/categorize.ts +156 -0
- package/src/tail/fingerprint.ts +21 -0
- package/src/tail/index.ts +71 -0
- package/src/tail/persist.ts +93 -0
- package/src/tail/redact.ts +30 -0
- package/src/tail/sample.ts +34 -0
- package/src/types.ts +111 -0
- package/src/virtual.d.ts +4 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import { type CheckResult, errorMessage, isTimeoutMessage, type RuntimeFetch } from '../../types.js'
|
|
2
|
+
import type { CodeSigninHappyPathConfig, HappyPathSurface } from '../surfaces.js'
|
|
3
|
+
|
|
4
|
+
export interface GmailCodeClient {
|
|
5
|
+
findLatestCode(
|
|
6
|
+
config: CodeSigninHappyPathConfig['gmail'],
|
|
7
|
+
env: Record<string, unknown>,
|
|
8
|
+
runtime: RuntimeFetch,
|
|
9
|
+
): Promise<string | null>
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface HappyPathRuntime extends RuntimeFetch {
|
|
13
|
+
env?: Record<string, unknown>
|
|
14
|
+
gmailClient?: GmailCodeClient
|
|
15
|
+
sleep?: (ms: number) => Promise<void>
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function runHappyPath(
|
|
19
|
+
surface: HappyPathSurface,
|
|
20
|
+
runtime: HappyPathRuntime = {},
|
|
21
|
+
): Promise<CheckResult> {
|
|
22
|
+
const start = Date.now()
|
|
23
|
+
const fetcher = runtime.fetcher ?? fetch
|
|
24
|
+
const env = runtime.env ?? {}
|
|
25
|
+
try {
|
|
26
|
+
if (surface.flow !== 'code-signin') {
|
|
27
|
+
return { status: 'fail', latencyMs: 0, errorMessage: `unsupported flow ${surface.flow}` }
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const requestResponse = await fetcher(surface.config.codeRequestUrl, {
|
|
31
|
+
method: 'POST',
|
|
32
|
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
33
|
+
body: new URLSearchParams({ email: surface.config.testEmail }).toString(),
|
|
34
|
+
redirect: 'manual',
|
|
35
|
+
signal: AbortSignal.timeout(surface.timeoutMs),
|
|
36
|
+
})
|
|
37
|
+
if (requestResponse.status < 200 || requestResponse.status >= 300) {
|
|
38
|
+
return fail(
|
|
39
|
+
start,
|
|
40
|
+
`code request expected 2xx, got ${requestResponse.status}`,
|
|
41
|
+
requestResponse.status,
|
|
42
|
+
)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const code = await pollForCode(surface.config, env, runtime)
|
|
46
|
+
if (!code) return timeout(start, 'verification code email did not arrive before poll limit')
|
|
47
|
+
|
|
48
|
+
const verifyResponse = await fetcher(surface.config.codeVerifyUrl, {
|
|
49
|
+
method: 'POST',
|
|
50
|
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
51
|
+
body: new URLSearchParams({ email: surface.config.testEmail, code }).toString(),
|
|
52
|
+
redirect: 'manual',
|
|
53
|
+
signal: AbortSignal.timeout(surface.timeoutMs),
|
|
54
|
+
})
|
|
55
|
+
if (verifyResponse.status < 200 || verifyResponse.status >= 400) {
|
|
56
|
+
return fail(
|
|
57
|
+
start,
|
|
58
|
+
`code verify expected 2xx/3xx, got ${verifyResponse.status}`,
|
|
59
|
+
verifyResponse.status,
|
|
60
|
+
)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const cookies = collectSetCookies([requestResponse, verifyResponse])
|
|
64
|
+
const location = verifyResponse.headers.get('Location')
|
|
65
|
+
if (location) {
|
|
66
|
+
const callbackUrl = new URL(location, surface.config.callbackUrl).toString()
|
|
67
|
+
const callbackResponse = await fetcher(callbackUrl, {
|
|
68
|
+
method: 'GET',
|
|
69
|
+
headers: cookieHeader(cookies),
|
|
70
|
+
redirect: 'manual',
|
|
71
|
+
signal: AbortSignal.timeout(surface.timeoutMs),
|
|
72
|
+
})
|
|
73
|
+
cookies.push(...collectSetCookies([callbackResponse]))
|
|
74
|
+
if (callbackResponse.status >= 400) {
|
|
75
|
+
return fail(
|
|
76
|
+
start,
|
|
77
|
+
`callback expected <400, got ${callbackResponse.status}`,
|
|
78
|
+
callbackResponse.status,
|
|
79
|
+
)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const accountResponse = await fetcher(surface.config.accountUrl, {
|
|
84
|
+
method: 'GET',
|
|
85
|
+
headers: cookieHeader(cookies),
|
|
86
|
+
redirect: 'manual',
|
|
87
|
+
signal: AbortSignal.timeout(surface.timeoutMs),
|
|
88
|
+
})
|
|
89
|
+
if (accountResponse.status !== 200) {
|
|
90
|
+
return fail(
|
|
91
|
+
start,
|
|
92
|
+
`account expected 200, got ${accountResponse.status}`,
|
|
93
|
+
accountResponse.status,
|
|
94
|
+
)
|
|
95
|
+
}
|
|
96
|
+
const accountBody = await accountResponse.text()
|
|
97
|
+
if (!accountBody.includes(surface.config.testEmail)) {
|
|
98
|
+
return fail(
|
|
99
|
+
start,
|
|
100
|
+
`account page did not contain ${surface.config.testEmail}`,
|
|
101
|
+
accountResponse.status,
|
|
102
|
+
)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return { status: 'pass', statusCode: 200, latencyMs: Date.now() - start }
|
|
106
|
+
} catch (error) {
|
|
107
|
+
const message = errorMessage(error)
|
|
108
|
+
return {
|
|
109
|
+
status: isTimeoutMessage(message) ? 'timeout' : 'fail',
|
|
110
|
+
latencyMs: Date.now() - start,
|
|
111
|
+
errorMessage: message,
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export async function extractCodeFromMessage(message: string): Promise<string | null> {
|
|
117
|
+
return message.match(/\b\d{6}\b/)?.[0] ?? null
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async function pollForCode(
|
|
121
|
+
config: CodeSigninHappyPathConfig,
|
|
122
|
+
env: Record<string, unknown>,
|
|
123
|
+
runtime: HappyPathRuntime,
|
|
124
|
+
): Promise<string | null> {
|
|
125
|
+
const client = runtime.gmailClient ?? defaultGmailClient
|
|
126
|
+
const sleep =
|
|
127
|
+
runtime.sleep ?? ((ms: number) => new Promise<void>((resolve) => setTimeout(resolve, ms)))
|
|
128
|
+
for (let attempt = 0; attempt < config.gmail.maxPollAttempts; attempt += 1) {
|
|
129
|
+
const code = await client.findLatestCode(config.gmail, env, runtime)
|
|
130
|
+
if (code) return code
|
|
131
|
+
if (attempt < config.gmail.maxPollAttempts - 1) await sleep(config.gmail.pollIntervalMs)
|
|
132
|
+
}
|
|
133
|
+
return null
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const defaultGmailClient: GmailCodeClient = {
|
|
137
|
+
async findLatestCode(config, env, runtime) {
|
|
138
|
+
const fetcher = runtime.fetcher ?? fetch
|
|
139
|
+
const credentialsJson = env[config.credentialsSecret]
|
|
140
|
+
if (typeof credentialsJson !== 'string') {
|
|
141
|
+
throw new Error(`env.${config.credentialsSecret} must contain Gmail service account JSON`)
|
|
142
|
+
}
|
|
143
|
+
const token = await authorizeGmail(
|
|
144
|
+
JSON.parse(credentialsJson) as ServiceAccountCredentials,
|
|
145
|
+
config.subject,
|
|
146
|
+
)
|
|
147
|
+
const query = encodeURIComponent(config.query ?? 'newer_than:5m')
|
|
148
|
+
const messages = await fetcher(
|
|
149
|
+
`https://gmail.googleapis.com/gmail/v1/users/me/messages?q=${query}`,
|
|
150
|
+
{ headers: { Authorization: `Bearer ${token}` } },
|
|
151
|
+
)
|
|
152
|
+
const list = (await messages.json()) as { messages?: Array<{ id: string }> }
|
|
153
|
+
for (const message of list.messages ?? []) {
|
|
154
|
+
const response = await fetcher(
|
|
155
|
+
`https://gmail.googleapis.com/gmail/v1/users/me/messages/${message.id}?format=full`,
|
|
156
|
+
{ headers: { Authorization: `Bearer ${token}` } },
|
|
157
|
+
)
|
|
158
|
+
const body = JSON.stringify(await response.json())
|
|
159
|
+
const code = await extractCodeFromMessage(body)
|
|
160
|
+
if (code) return code
|
|
161
|
+
}
|
|
162
|
+
return null
|
|
163
|
+
},
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
interface ServiceAccountCredentials {
|
|
167
|
+
client_email: string
|
|
168
|
+
private_key: string
|
|
169
|
+
token_uri?: string
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
async function authorizeGmail(
|
|
173
|
+
credentials: ServiceAccountCredentials,
|
|
174
|
+
subject: string,
|
|
175
|
+
): Promise<string> {
|
|
176
|
+
const now = Math.floor(Date.now() / 1000)
|
|
177
|
+
const assertion = await signJwt(
|
|
178
|
+
{
|
|
179
|
+
alg: 'RS256',
|
|
180
|
+
typ: 'JWT',
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
iss: credentials.client_email,
|
|
184
|
+
scope: 'https://www.googleapis.com/auth/gmail.readonly',
|
|
185
|
+
aud: credentials.token_uri ?? 'https://oauth2.googleapis.com/token',
|
|
186
|
+
exp: now + 3600,
|
|
187
|
+
iat: now,
|
|
188
|
+
sub: subject,
|
|
189
|
+
},
|
|
190
|
+
credentials.private_key,
|
|
191
|
+
)
|
|
192
|
+
const response = await fetch(credentials.token_uri ?? 'https://oauth2.googleapis.com/token', {
|
|
193
|
+
method: 'POST',
|
|
194
|
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
195
|
+
body: new URLSearchParams({
|
|
196
|
+
grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer',
|
|
197
|
+
assertion,
|
|
198
|
+
}).toString(),
|
|
199
|
+
})
|
|
200
|
+
if (!response.ok) throw new Error(`Gmail JWT authorization failed with ${response.status}`)
|
|
201
|
+
const payload = (await response.json()) as { access_token?: string }
|
|
202
|
+
if (!payload.access_token) throw new Error('Gmail JWT authorization returned no access_token')
|
|
203
|
+
return payload.access_token
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async function signJwt(
|
|
207
|
+
header: Record<string, unknown>,
|
|
208
|
+
claims: Record<string, unknown>,
|
|
209
|
+
privateKeyPem: string,
|
|
210
|
+
): Promise<string> {
|
|
211
|
+
const encodedHeader = base64UrlEncode(new TextEncoder().encode(JSON.stringify(header)))
|
|
212
|
+
const encodedClaims = base64UrlEncode(new TextEncoder().encode(JSON.stringify(claims)))
|
|
213
|
+
const input = `${encodedHeader}.${encodedClaims}`
|
|
214
|
+
const key = await crypto.subtle.importKey(
|
|
215
|
+
'pkcs8',
|
|
216
|
+
pemToArrayBuffer(privateKeyPem),
|
|
217
|
+
{ name: 'RSASSA-PKCS1-v1_5', hash: 'SHA-256' },
|
|
218
|
+
false,
|
|
219
|
+
['sign'],
|
|
220
|
+
)
|
|
221
|
+
const signature = await crypto.subtle.sign(
|
|
222
|
+
'RSASSA-PKCS1-v1_5',
|
|
223
|
+
key,
|
|
224
|
+
new TextEncoder().encode(input),
|
|
225
|
+
)
|
|
226
|
+
return `${input}.${base64UrlEncode(new Uint8Array(signature))}`
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function pemToArrayBuffer(pem: string): ArrayBuffer {
|
|
230
|
+
const keyLabel = ['PRIVATE', 'KEY'].join(' ')
|
|
231
|
+
const beginLabel = ['-----BEGIN', `${keyLabel}-----`].join(' ')
|
|
232
|
+
const endLabel = ['-----END', `${keyLabel}-----`].join(' ')
|
|
233
|
+
const base64 = pem.replaceAll(beginLabel, '').replaceAll(endLabel, '').replace(/\s/g, '')
|
|
234
|
+
const binary = atob(base64)
|
|
235
|
+
const bytes = new Uint8Array(binary.length)
|
|
236
|
+
for (let index = 0; index < binary.length; index += 1) bytes[index] = binary.charCodeAt(index)
|
|
237
|
+
return bytes.buffer
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function base64UrlEncode(bytes: Uint8Array): string {
|
|
241
|
+
const binary = Array.from(bytes, (byte) => String.fromCharCode(byte)).join('')
|
|
242
|
+
return btoa(binary).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/g, '')
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function fail(start: number, errorMessage: string, statusCode?: number): CheckResult {
|
|
246
|
+
return { status: 'fail', statusCode, latencyMs: Date.now() - start, errorMessage }
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function timeout(start: number, errorMessage: string): CheckResult {
|
|
250
|
+
return { status: 'timeout', latencyMs: Date.now() - start, errorMessage }
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function collectSetCookies(responses: Response[]): string[] {
|
|
254
|
+
const cookies: string[] = []
|
|
255
|
+
for (const response of responses) {
|
|
256
|
+
const getSetCookie = (response.headers as Headers & { getSetCookie?: () => string[] })
|
|
257
|
+
.getSetCookie
|
|
258
|
+
const values = getSetCookie ? getSetCookie.call(response.headers) : []
|
|
259
|
+
const single = response.headers.get('set-cookie')
|
|
260
|
+
for (const value of single ? [single, ...values] : values) {
|
|
261
|
+
const cookie = value.split(';')[0]?.trim()
|
|
262
|
+
if (cookie) cookies.push(cookie)
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return cookies
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function cookieHeader(cookies: string[]): HeadersInit {
|
|
269
|
+
return cookies.length > 0 ? { Cookie: cookies.join('; ') } : {}
|
|
270
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { type CheckResult, errorMessage, isTimeoutMessage, type RuntimeFetch } from '../../types.js'
|
|
2
|
+
import type { PostSurface } from '../surfaces.js'
|
|
3
|
+
|
|
4
|
+
export async function runPost(
|
|
5
|
+
surface: PostSurface,
|
|
6
|
+
runtime: RuntimeFetch = {},
|
|
7
|
+
): Promise<CheckResult> {
|
|
8
|
+
const start = Date.now()
|
|
9
|
+
const fetcher = runtime.fetcher ?? fetch
|
|
10
|
+
try {
|
|
11
|
+
const body = new URLSearchParams(surface.bodyFormUrlEncoded).toString()
|
|
12
|
+
const response = await fetcher(surface.url, {
|
|
13
|
+
method: 'POST',
|
|
14
|
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
15
|
+
body,
|
|
16
|
+
redirect: 'manual',
|
|
17
|
+
signal: AbortSignal.timeout(surface.timeoutMs),
|
|
18
|
+
})
|
|
19
|
+
const latencyMs = Date.now() - start
|
|
20
|
+
|
|
21
|
+
if (response.status !== surface.assertions.statusCode) {
|
|
22
|
+
return {
|
|
23
|
+
status: 'fail',
|
|
24
|
+
statusCode: response.status,
|
|
25
|
+
latencyMs,
|
|
26
|
+
errorMessage: `expected ${surface.assertions.statusCode}, got ${response.status}`,
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const location = response.headers.get('Location') ?? ''
|
|
31
|
+
if (surface.assertions.locationPattern && !surface.assertions.locationPattern.test(location)) {
|
|
32
|
+
return {
|
|
33
|
+
status: 'fail',
|
|
34
|
+
statusCode: response.status,
|
|
35
|
+
latencyMs,
|
|
36
|
+
errorMessage: `Location "${location}" does not match expected pattern`,
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return { status: 'pass', statusCode: response.status, latencyMs }
|
|
41
|
+
} catch (error) {
|
|
42
|
+
const latencyMs = Date.now() - start
|
|
43
|
+
const message = errorMessage(error)
|
|
44
|
+
return {
|
|
45
|
+
status: isTimeoutMessage(message) ? 'timeout' : 'fail',
|
|
46
|
+
latencyMs,
|
|
47
|
+
errorMessage: message,
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type { CheckType } from '../types.js'
|
|
2
|
+
|
|
3
|
+
export interface BaseSurface {
|
|
4
|
+
name: string
|
|
5
|
+
kind: CheckType
|
|
6
|
+
schedule: string
|
|
7
|
+
timeoutMs: number
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface GetSurface extends BaseSurface {
|
|
11
|
+
kind: 'get'
|
|
12
|
+
url: string
|
|
13
|
+
assertions: {
|
|
14
|
+
statusCode: number
|
|
15
|
+
contentMarker?: string
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface PostSurface extends BaseSurface {
|
|
20
|
+
kind: 'post'
|
|
21
|
+
url: string
|
|
22
|
+
bodyFormUrlEncoded: Record<string, string>
|
|
23
|
+
assertions: {
|
|
24
|
+
statusCode: number
|
|
25
|
+
locationPattern?: RegExp
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface GmailPollingConfig {
|
|
30
|
+
credentialsSecret: string
|
|
31
|
+
subject: string
|
|
32
|
+
pollIntervalMs: number
|
|
33
|
+
maxPollAttempts: number
|
|
34
|
+
query?: string
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface CodeSigninHappyPathConfig {
|
|
38
|
+
codeRequestUrl: string
|
|
39
|
+
codeVerifyUrl: string
|
|
40
|
+
callbackUrl: string
|
|
41
|
+
accountUrl: string
|
|
42
|
+
testEmail: string
|
|
43
|
+
gmail: GmailPollingConfig
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface HappyPathSurface extends BaseSurface {
|
|
47
|
+
kind: 'happy_path'
|
|
48
|
+
flow: 'code-signin'
|
|
49
|
+
config: CodeSigninHappyPathConfig
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export type SurfaceConfig = GetSurface | PostSurface | HappyPathSurface
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Monitoring Schemas
|
|
2
|
+
|
|
3
|
+
The package ships raw SQL migrations and matching Drizzle schema definitions for
|
|
4
|
+
consumers that prefer typed queries.
|
|
5
|
+
|
|
6
|
+
Tables:
|
|
7
|
+
|
|
8
|
+
- `gl_uptime_checks`
|
|
9
|
+
- `gl_uptime_incidents`
|
|
10
|
+
- `gl_errors`
|
|
11
|
+
|
|
12
|
+
Consumers can point Wrangler or Drizzle tooling at
|
|
13
|
+
`node_modules/@growth-labs/monitoring/src/schemas/migrations` or the copied
|
|
14
|
+
`dist/schemas/migrations` directory after build.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { index, integer, sqliteTable, text } from 'drizzle-orm/sqlite-core'
|
|
2
|
+
|
|
3
|
+
export const uptimeChecks = sqliteTable(
|
|
4
|
+
'gl_uptime_checks',
|
|
5
|
+
{
|
|
6
|
+
id: text('id').primaryKey(),
|
|
7
|
+
surface: text('surface').notNull(),
|
|
8
|
+
checkType: text('check_type').notNull(),
|
|
9
|
+
status: text('status').notNull(),
|
|
10
|
+
statusCode: integer('status_code'),
|
|
11
|
+
latencyMs: integer('latency_ms'),
|
|
12
|
+
errorMessage: text('error_message'),
|
|
13
|
+
checkedAt: integer('checked_at').notNull(),
|
|
14
|
+
},
|
|
15
|
+
(table) => [
|
|
16
|
+
index('idx_uptime_surface_time').on(table.surface, table.checkedAt),
|
|
17
|
+
index('idx_uptime_status_time').on(table.status, table.checkedAt),
|
|
18
|
+
],
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
export const uptimeIncidents = sqliteTable(
|
|
22
|
+
'gl_uptime_incidents',
|
|
23
|
+
{
|
|
24
|
+
id: text('id').primaryKey(),
|
|
25
|
+
surface: text('surface').notNull(),
|
|
26
|
+
openedAt: integer('opened_at').notNull(),
|
|
27
|
+
closedAt: integer('closed_at'),
|
|
28
|
+
triggerCheckId: text('trigger_check_id'),
|
|
29
|
+
resolveCheckId: text('resolve_check_id'),
|
|
30
|
+
severity: text('severity').notNull(),
|
|
31
|
+
notes: text('notes'),
|
|
32
|
+
},
|
|
33
|
+
(table) => [
|
|
34
|
+
index('idx_incidents_surface_open').on(table.surface, table.openedAt),
|
|
35
|
+
index('idx_incidents_open').on(table.closedAt),
|
|
36
|
+
],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
export const errors = sqliteTable(
|
|
40
|
+
'gl_errors',
|
|
41
|
+
{
|
|
42
|
+
id: text('id').primaryKey(),
|
|
43
|
+
realmKey: text('realm_key').notNull(),
|
|
44
|
+
surface: text('surface').notNull(),
|
|
45
|
+
severity: text('severity').notNull(),
|
|
46
|
+
message: text('message').notNull(),
|
|
47
|
+
stack: text('stack'),
|
|
48
|
+
requestId: text('request_id'),
|
|
49
|
+
statusCode: integer('status_code'),
|
|
50
|
+
durationMs: integer('duration_ms'),
|
|
51
|
+
occurredAt: integer('occurred_at').notNull(),
|
|
52
|
+
fingerprint: text('fingerprint').notNull(),
|
|
53
|
+
},
|
|
54
|
+
(table) => [
|
|
55
|
+
index('idx_errors_realm_time').on(table.realmKey, table.occurredAt),
|
|
56
|
+
index('idx_errors_fingerprint_time').on(table.fingerprint, table.occurredAt),
|
|
57
|
+
index('idx_errors_surface_time').on(table.surface, table.occurredAt),
|
|
58
|
+
],
|
|
59
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { errors, uptimeChecks, uptimeIncidents } from './drizzle/schema.js'
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Monitoring Tail Worker
|
|
2
|
+
|
|
3
|
+
`createTailWorker(config)` returns both `tail` and `tailHandler` aliases for
|
|
4
|
+
Cloudflare Tail Worker exports.
|
|
5
|
+
|
|
6
|
+
Pipeline:
|
|
7
|
+
|
|
8
|
+
1. Categorize each trace event into zero or more records.
|
|
9
|
+
2. Compute the fingerprint before redaction.
|
|
10
|
+
3. Apply sampling, with an in-memory 100% override for surfaces that become
|
|
11
|
+
actively incidented within the same batch.
|
|
12
|
+
4. Redact PII/secrets and truncate fields.
|
|
13
|
+
5. Persist to `gl_errors` and write a WAE datapoint.
|
|
14
|
+
6. Evaluate new-error and rate-spike alert actions.
|
|
15
|
+
|
|
16
|
+
Surface names use `${scriptName}:${method} ${normalizedPath}`. The v0.1 path
|
|
17
|
+
normalizer keeps alphabetic-only segments and replaces all other segments with
|
|
18
|
+
`[slug]`.
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
export type ErrorCategory =
|
|
2
|
+
| 'exception'
|
|
3
|
+
| 'fivexx'
|
|
4
|
+
| 'console-error'
|
|
5
|
+
| 'console-warn'
|
|
6
|
+
| 'slow-request'
|
|
7
|
+
|
|
8
|
+
export interface CategorizedEvent {
|
|
9
|
+
category: ErrorCategory
|
|
10
|
+
surface: string
|
|
11
|
+
message: string
|
|
12
|
+
stack?: string
|
|
13
|
+
statusCode?: number
|
|
14
|
+
durationMs?: number
|
|
15
|
+
requestId?: string
|
|
16
|
+
occurredAt: number
|
|
17
|
+
fingerprint?: string
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface TraceItem {
|
|
21
|
+
scriptName?: string
|
|
22
|
+
outcome?: string
|
|
23
|
+
exceptions?: Array<{ name?: string; message?: string; stack?: string }>
|
|
24
|
+
logs?: Array<{ level?: string; message?: unknown; args?: unknown[] }>
|
|
25
|
+
request?: { method?: string; url?: string; headers?: Record<string, string> }
|
|
26
|
+
response?: { status?: number }
|
|
27
|
+
eventTimestamp?: number | string
|
|
28
|
+
wallTime?: number
|
|
29
|
+
dispatchNamespace?: string
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
interface CategorizeOptions {
|
|
33
|
+
slowRequestThresholdMs?: number
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function categorize(
|
|
37
|
+
traceEvent: TraceItem,
|
|
38
|
+
options: CategorizeOptions = {},
|
|
39
|
+
): CategorizedEvent[] {
|
|
40
|
+
const occurredAt = parseTimestamp(traceEvent.eventTimestamp)
|
|
41
|
+
const surface = surfaceForTrace(traceEvent)
|
|
42
|
+
const requestId =
|
|
43
|
+
traceEvent.request?.headers?.['cf-ray'] ?? traceEvent.request?.headers?.['CF-Ray']
|
|
44
|
+
const durationMs = typeof traceEvent.wallTime === 'number' ? traceEvent.wallTime : undefined
|
|
45
|
+
const events: CategorizedEvent[] = []
|
|
46
|
+
|
|
47
|
+
for (const exception of traceEvent.exceptions ?? []) {
|
|
48
|
+
const prefix = exception.name ? `${exception.name}: ` : ''
|
|
49
|
+
events.push({
|
|
50
|
+
category: 'exception',
|
|
51
|
+
surface,
|
|
52
|
+
message: `${prefix}${exception.message ?? 'Unhandled exception'}`,
|
|
53
|
+
stack: exception.stack,
|
|
54
|
+
requestId,
|
|
55
|
+
durationMs,
|
|
56
|
+
occurredAt,
|
|
57
|
+
})
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const statusCode = traceEvent.response?.status
|
|
61
|
+
if (typeof statusCode === 'number' && statusCode >= 500) {
|
|
62
|
+
events.push({
|
|
63
|
+
category: 'fivexx',
|
|
64
|
+
surface,
|
|
65
|
+
message: `HTTP ${statusCode} response`,
|
|
66
|
+
statusCode,
|
|
67
|
+
requestId,
|
|
68
|
+
durationMs,
|
|
69
|
+
occurredAt,
|
|
70
|
+
})
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
for (const log of traceEvent.logs ?? []) {
|
|
74
|
+
if (log.level !== 'error' && log.level !== 'warn') continue
|
|
75
|
+
events.push({
|
|
76
|
+
category: log.level === 'error' ? 'console-error' : 'console-warn',
|
|
77
|
+
surface,
|
|
78
|
+
message: logMessage(log),
|
|
79
|
+
statusCode,
|
|
80
|
+
requestId,
|
|
81
|
+
durationMs,
|
|
82
|
+
occurredAt,
|
|
83
|
+
})
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const slowThreshold = options.slowRequestThresholdMs
|
|
87
|
+
if (
|
|
88
|
+
events.length === 0 &&
|
|
89
|
+
typeof durationMs === 'number' &&
|
|
90
|
+
typeof slowThreshold === 'number' &&
|
|
91
|
+
durationMs > slowThreshold
|
|
92
|
+
) {
|
|
93
|
+
events.push({
|
|
94
|
+
category: 'slow-request',
|
|
95
|
+
surface,
|
|
96
|
+
message: `Slow request took ${durationMs}ms`,
|
|
97
|
+
statusCode,
|
|
98
|
+
requestId,
|
|
99
|
+
durationMs,
|
|
100
|
+
occurredAt,
|
|
101
|
+
})
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return events
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function normalizePath(pathname: string): string {
|
|
108
|
+
return (
|
|
109
|
+
'/' +
|
|
110
|
+
pathname
|
|
111
|
+
.split('/')
|
|
112
|
+
.filter(Boolean)
|
|
113
|
+
.map((segment) => (/^[A-Za-z]+$/.test(segment) ? segment : '[slug]'))
|
|
114
|
+
.join('/')
|
|
115
|
+
)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function surfaceForTrace(traceEvent: TraceItem): string {
|
|
119
|
+
const scriptName = traceEvent.scriptName ?? traceEvent.dispatchNamespace ?? 'unknown'
|
|
120
|
+
const method = traceEvent.request?.method ?? 'GET'
|
|
121
|
+
const url = traceEvent.request?.url ?? 'https://unknown/'
|
|
122
|
+
let pathname = '/'
|
|
123
|
+
try {
|
|
124
|
+
pathname = new URL(url).pathname
|
|
125
|
+
} catch {
|
|
126
|
+
pathname = url.split('?')[0] || '/'
|
|
127
|
+
}
|
|
128
|
+
return `${scriptName}:${method} ${normalizePath(pathname)}`
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function parseTimestamp(value: number | string | undefined): number {
|
|
132
|
+
if (typeof value === 'number') return value < 10_000_000_000 ? value * 1000 : value
|
|
133
|
+
if (typeof value === 'string') {
|
|
134
|
+
const numeric = Number(value)
|
|
135
|
+
if (Number.isFinite(numeric)) return parseTimestamp(numeric)
|
|
136
|
+
const parsed = Date.parse(value)
|
|
137
|
+
if (Number.isFinite(parsed)) return parsed
|
|
138
|
+
}
|
|
139
|
+
return Date.now()
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function logMessage(log: { message?: unknown; args?: unknown[] }): string {
|
|
143
|
+
const value = log.message ?? log.args ?? ''
|
|
144
|
+
if (Array.isArray(value)) return value.map(formatLogPart).join(' ')
|
|
145
|
+
return formatLogPart(value)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function formatLogPart(value: unknown): string {
|
|
149
|
+
if (typeof value === 'string') return value
|
|
150
|
+
if (value instanceof Error) return value.message
|
|
151
|
+
try {
|
|
152
|
+
return JSON.stringify(value)
|
|
153
|
+
} catch {
|
|
154
|
+
return String(value)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { CategorizedEvent } from './categorize.js'
|
|
2
|
+
|
|
3
|
+
const encoder = new TextEncoder()
|
|
4
|
+
|
|
5
|
+
export async function computeFingerprint(event: CategorizedEvent): Promise<string> {
|
|
6
|
+
const firstStackFrame = event.stack?.split('\n')[1]?.trim() ?? ''
|
|
7
|
+
const normalizedMessage = normalizeMessage(event.message)
|
|
8
|
+
const input = `${event.category}|${normalizedMessage}|${firstStackFrame}`
|
|
9
|
+
const digest = await crypto.subtle.digest('SHA-256', encoder.encode(input))
|
|
10
|
+
return [...new Uint8Array(digest)]
|
|
11
|
+
.map((byte) => byte.toString(16).padStart(2, '0'))
|
|
12
|
+
.join('')
|
|
13
|
+
.slice(0, 16)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function normalizeMessage(message: string): string {
|
|
17
|
+
return message
|
|
18
|
+
.replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, '[uuid]')
|
|
19
|
+
.replace(/\/[a-z0-9_-]+(?=\b)/gi, '/[seg]')
|
|
20
|
+
.replace(/\b\d{4,}\b/g, '[n]')
|
|
21
|
+
}
|