class-ai-agent 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/README.md +10 -5
- package/.agent/rules/agent-continuity.md +44 -0
- package/.agent/rules/antigravity-overview.md +38 -0
- package/.agent/rules/api-conventions.md +85 -0
- package/.agent/rules/clean-code.md +211 -0
- package/.agent/rules/code-style.md +92 -0
- package/.agent/rules/codegraph.md +47 -0
- package/.agent/rules/database.md +66 -0
- package/.agent/rules/error-handling.md +98 -0
- package/.agent/rules/git-workflow.md +83 -0
- package/.agent/rules/monitoring.md +317 -0
- package/.agent/rules/naming-conventions.md +266 -0
- package/.agent/rules/project-structure.md +71 -0
- package/.agent/rules/security.md +95 -0
- package/.agent/rules/system-design.md +168 -0
- package/.agent/rules/tech-stack.md +463 -0
- package/.agent/rules/testing.md +110 -0
- package/.agents/agents/backend.md +395 -0
- package/.agents/agents/business-analyst.md +380 -0
- package/.agents/agents/code-reviewer.md +110 -0
- package/.agents/agents/copywriter-seo.md +236 -0
- package/.agents/agents/frontend.md +384 -0
- package/.agents/agents/project-manager.md +201 -0
- package/.agents/agents/qa.md +221 -0
- package/.agents/agents/security-auditor.md +143 -0
- package/.agents/agents/systems-architect.md +211 -0
- package/.agents/agents/test-engineer.md +123 -0
- package/.agents/agents/ui-ux-designer.md +210 -0
- package/.agents/references/accessibility-checklist.md +174 -0
- package/.agents/references/agent-continuity.md +42 -0
- package/.agents/references/codegraph.md +90 -0
- package/.agents/references/mcp-antigravity.md +71 -0
- package/.agents/references/performance-checklist.md +150 -0
- package/.agents/references/security-checklist.md +94 -0
- package/.agents/references/supabase.md +55 -0
- package/.agents/references/testing-patterns.md +183 -0
- package/.agents/skills/agent-continuity/SKILL.md +70 -0
- package/.agents/skills/code-review/SKILL.md +208 -0
- package/.agents/skills/deploy/SKILL.md +68 -0
- package/.agents/skills/deploy/deploy.md +735 -0
- package/.agents/skills/incremental-implementation/SKILL.md +210 -0
- package/.agents/skills/security-review/SKILL.md +71 -0
- package/.agents/skills/supabase/SKILL.md +135 -0
- package/.agents/skills/supabase/UPSTREAM.md +16 -0
- package/.agents/skills/supabase/assets/feedback-issue-template.md +17 -0
- package/.agents/skills/supabase/references/skill-feedback.md +17 -0
- package/.agents/skills/supabase-postgres-best-practices/SKILL.md +64 -0
- package/.agents/skills/supabase-postgres-best-practices/UPSTREAM.md +16 -0
- package/.agents/skills/supabase-postgres-best-practices/references/_contributing.md +170 -0
- package/.agents/skills/supabase-postgres-best-practices/references/_sections.md +39 -0
- package/.agents/skills/supabase-postgres-best-practices/references/_template.md +34 -0
- package/.agents/skills/supabase-postgres-best-practices/references/advanced-full-text-search.md +55 -0
- package/.agents/skills/supabase-postgres-best-practices/references/advanced-jsonb-indexing.md +49 -0
- package/.agents/skills/supabase-postgres-best-practices/references/conn-idle-timeout.md +46 -0
- package/.agents/skills/supabase-postgres-best-practices/references/conn-limits.md +44 -0
- package/.agents/skills/supabase-postgres-best-practices/references/conn-pooling.md +41 -0
- package/.agents/skills/supabase-postgres-best-practices/references/conn-prepared-statements.md +46 -0
- package/.agents/skills/supabase-postgres-best-practices/references/data-batch-inserts.md +54 -0
- package/.agents/skills/supabase-postgres-best-practices/references/data-n-plus-one.md +53 -0
- package/.agents/skills/supabase-postgres-best-practices/references/data-pagination.md +50 -0
- package/.agents/skills/supabase-postgres-best-practices/references/data-upsert.md +50 -0
- package/.agents/skills/supabase-postgres-best-practices/references/lock-advisory.md +56 -0
- package/.agents/skills/supabase-postgres-best-practices/references/lock-deadlock-prevention.md +68 -0
- package/.agents/skills/supabase-postgres-best-practices/references/lock-short-transactions.md +50 -0
- package/.agents/skills/supabase-postgres-best-practices/references/lock-skip-locked.md +54 -0
- package/.agents/skills/supabase-postgres-best-practices/references/monitor-explain-analyze.md +45 -0
- package/.agents/skills/supabase-postgres-best-practices/references/monitor-pg-stat-statements.md +55 -0
- package/.agents/skills/supabase-postgres-best-practices/references/monitor-vacuum-analyze.md +55 -0
- package/.agents/skills/supabase-postgres-best-practices/references/query-composite-indexes.md +44 -0
- package/.agents/skills/supabase-postgres-best-practices/references/query-covering-indexes.md +40 -0
- package/.agents/skills/supabase-postgres-best-practices/references/query-index-types.md +48 -0
- package/.agents/skills/supabase-postgres-best-practices/references/query-missing-indexes.md +43 -0
- package/.agents/skills/supabase-postgres-best-practices/references/query-partial-indexes.md +45 -0
- package/.agents/skills/supabase-postgres-best-practices/references/schema-constraints.md +80 -0
- package/.agents/skills/supabase-postgres-best-practices/references/schema-data-types.md +46 -0
- package/.agents/skills/supabase-postgres-best-practices/references/schema-foreign-key-indexes.md +59 -0
- package/.agents/skills/supabase-postgres-best-practices/references/schema-lowercase-identifiers.md +55 -0
- package/.agents/skills/supabase-postgres-best-practices/references/schema-partitioning.md +55 -0
- package/.agents/skills/supabase-postgres-best-practices/references/schema-primary-keys.md +61 -0
- package/.agents/skills/supabase-postgres-best-practices/references/security-privileges.md +54 -0
- package/.agents/skills/supabase-postgres-best-practices/references/security-rls-basics.md +50 -0
- package/.agents/skills/supabase-postgres-best-practices/references/security-rls-performance.md +63 -0
- package/.agents/skills/tdd/SKILL.md +217 -0
- package/.agents/skills/ui-ux-pro-max/SKILL.md +288 -0
- package/.agents/skills/ui-ux-pro-max/data/charts.csv +26 -0
- package/.agents/skills/ui-ux-pro-max/data/colors.csv +97 -0
- package/.agents/skills/ui-ux-pro-max/data/icons.csv +101 -0
- package/.agents/skills/ui-ux-pro-max/data/landing.csv +31 -0
- package/.agents/skills/ui-ux-pro-max/data/products.csv +97 -0
- package/.agents/skills/ui-ux-pro-max/data/react-performance.csv +45 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/astro.csv +54 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
- package/.agents/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
- package/.agents/skills/ui-ux-pro-max/data/styles.csv +68 -0
- package/.agents/skills/ui-ux-pro-max/data/typography.csv +58 -0
- package/.agents/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
- package/.agents/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
- package/.agents/skills/ui-ux-pro-max/data/web-interface.csv +31 -0
- package/.agents/skills/ui-ux-pro-max/scripts/core.py +253 -0
- package/.agents/skills/ui-ux-pro-max/scripts/design_system.py +1067 -0
- package/.agents/skills/ui-ux-pro-max/scripts/search.py +114 -0
- package/.agents/workflows/build.md +132 -0
- package/.agents/workflows/debug.md +242 -0
- package/.agents/workflows/deploy.md +43 -0
- package/.agents/workflows/fix-issue.md +45 -0
- package/.agents/workflows/handoff.md +93 -0
- package/.agents/workflows/plan.md +125 -0
- package/.agents/workflows/publish-npm.md +122 -0
- package/.agents/workflows/resume.md +106 -0
- package/.agents/workflows/review.md +53 -0
- package/.agents/workflows/simplify.md +221 -0
- package/.agents/workflows/spec.md +95 -0
- package/.agents/workflows/test.md +213 -0
- package/.cursor/rules/cursor-overview.mdc +3 -2
- package/.kiro/steering/kiro-overview.md +2 -2
- package/AGENTS.md +4 -1
- package/GEMINI.md +152 -0
- package/README.md +52 -15
- package/bin/class-ai-agent.cjs +85 -9
- package/package.json +9 -4
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
---
|
|
2
|
+
trigger: glob
|
|
3
|
+
globs: {ts,tsx,js,jsx,mjs,cjs,json,md,prisma,yml,yaml}
|
|
4
|
+
description: "Error Handling"
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Error Handling
|
|
8
|
+
|
|
9
|
+
## Core Principles
|
|
10
|
+
- **Never swallow errors silently** — always log or rethrow
|
|
11
|
+
- Use a centralized error handler
|
|
12
|
+
- Return consistent error responses to the API
|
|
13
|
+
- Distinguish between operational errors (expected) and programmer errors (bugs)
|
|
14
|
+
|
|
15
|
+
## Custom Error Class
|
|
16
|
+
```js
|
|
17
|
+
// src/utils/app-error.js
|
|
18
|
+
class AppError extends Error {
|
|
19
|
+
constructor(message, statusCode = 500, code = 'INTERNAL_ERROR') {
|
|
20
|
+
super(message);
|
|
21
|
+
this.name = 'AppError';
|
|
22
|
+
this.statusCode = statusCode;
|
|
23
|
+
this.code = code;
|
|
24
|
+
this.isOperational = true;
|
|
25
|
+
Error.captureStackTrace(this, this.constructor);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export default AppError;
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Throwing Errors
|
|
33
|
+
```js
|
|
34
|
+
// ✅ Use AppError for known operational errors
|
|
35
|
+
if (!user) {
|
|
36
|
+
throw new AppError('User not found', 404, 'USER_NOT_FOUND');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (!hasPermission) {
|
|
40
|
+
throw new AppError('Forbidden', 403, 'ACCESS_DENIED');
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Async Error Handling
|
|
45
|
+
```js
|
|
46
|
+
// ✅ Always wrap async route handlers
|
|
47
|
+
const asyncHandler = (fn) => (req, res, next) =>
|
|
48
|
+
Promise.resolve(fn(req, res, next)).catch(next);
|
|
49
|
+
|
|
50
|
+
router.get('/users/:id', asyncHandler(async (req, res) => {
|
|
51
|
+
const user = await userService.findById(req.params.id);
|
|
52
|
+
res.json({ success: true, data: user });
|
|
53
|
+
}));
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Global Error Handler (Express)
|
|
57
|
+
```js
|
|
58
|
+
// middleware/error-handler.js
|
|
59
|
+
export function errorHandler(err, req, res, next) {
|
|
60
|
+
const statusCode = err.statusCode || 500;
|
|
61
|
+
const isOperational = err.isOperational || false;
|
|
62
|
+
|
|
63
|
+
// Log all errors
|
|
64
|
+
logger.error({ err, req: { method: req.method, url: req.url } });
|
|
65
|
+
|
|
66
|
+
// Don't expose internal errors to clients
|
|
67
|
+
if (!isOperational) {
|
|
68
|
+
return res.status(500).json({
|
|
69
|
+
success: false,
|
|
70
|
+
error: { code: 'INTERNAL_ERROR', message: 'An unexpected error occurred' }
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
res.status(statusCode).json({
|
|
75
|
+
success: false,
|
|
76
|
+
error: { code: err.code, message: err.message }
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Validation Errors
|
|
82
|
+
```js
|
|
83
|
+
// Use a validation library (Zod/Joi/Yup) and throw structured errors
|
|
84
|
+
import { z } from 'zod';
|
|
85
|
+
|
|
86
|
+
const userSchema = z.object({
|
|
87
|
+
email: z.string().email(),
|
|
88
|
+
name: z.string().min(2)
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
function validateUser(data) {
|
|
92
|
+
const result = userSchema.safeParse(data);
|
|
93
|
+
if (!result.success) {
|
|
94
|
+
throw new AppError('Validation failed', 422, 'VALIDATION_ERROR');
|
|
95
|
+
}
|
|
96
|
+
return result.data;
|
|
97
|
+
}
|
|
98
|
+
```
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
---
|
|
2
|
+
trigger: glob
|
|
3
|
+
globs: {ts,tsx,js,jsx,mjs,cjs,json,md,prisma,yml,yaml}
|
|
4
|
+
description: "Git Workflow"
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Git Workflow
|
|
8
|
+
|
|
9
|
+
## Branch Strategy (Git Flow)
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
main — Production-ready code only
|
|
13
|
+
develop — Integration branch for features
|
|
14
|
+
feature/* — New features
|
|
15
|
+
fix/* — Bug fixes
|
|
16
|
+
hotfix/* — Urgent production fixes
|
|
17
|
+
release/* — Release preparation
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Branch Naming
|
|
21
|
+
```
|
|
22
|
+
feature/user-authentication
|
|
23
|
+
feature/payment-integration
|
|
24
|
+
fix/login-redirect-bug
|
|
25
|
+
fix/order-calculation-issue
|
|
26
|
+
hotfix/critical-security-patch
|
|
27
|
+
release/v1.2.0
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Commit Message Format (Conventional Commits)
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
<type>(<scope>): <short description>
|
|
34
|
+
|
|
35
|
+
[optional body]
|
|
36
|
+
|
|
37
|
+
[optional footer]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Types
|
|
41
|
+
| Type | Usage |
|
|
42
|
+
|------|-------|
|
|
43
|
+
| `feat` | New feature |
|
|
44
|
+
| `fix` | Bug fix |
|
|
45
|
+
| `docs` | Documentation only |
|
|
46
|
+
| `style` | Formatting, no logic change |
|
|
47
|
+
| `refactor` | Code restructure, no feature/fix |
|
|
48
|
+
| `test` | Adding or fixing tests |
|
|
49
|
+
| `chore` | Build process, dependencies |
|
|
50
|
+
| `perf` | Performance improvement |
|
|
51
|
+
|
|
52
|
+
### Examples
|
|
53
|
+
```
|
|
54
|
+
feat(auth): add JWT refresh token support
|
|
55
|
+
|
|
56
|
+
fix(orders): correct total price calculation when discount applied
|
|
57
|
+
|
|
58
|
+
docs(api): add Swagger annotations to user endpoints
|
|
59
|
+
|
|
60
|
+
test(users): add unit tests for UserService.findById
|
|
61
|
+
|
|
62
|
+
chore: upgrade express to v5.0.0
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Pull Request Rules
|
|
66
|
+
- PRs must reference an issue: `Closes #123`
|
|
67
|
+
- Minimum 1 reviewer approval required
|
|
68
|
+
- All CI checks must pass
|
|
69
|
+
- No direct commits to `main` or `develop`
|
|
70
|
+
- PR title must follow conventional commit format
|
|
71
|
+
|
|
72
|
+
## Commit Best Practices
|
|
73
|
+
- Commit frequently with small, focused changes
|
|
74
|
+
- Each commit should be a single logical change
|
|
75
|
+
- Never commit: `.env` files, secrets, `node_modules`
|
|
76
|
+
- Always run tests before committing
|
|
77
|
+
|
|
78
|
+
## Tags & Releases
|
|
79
|
+
```bash
|
|
80
|
+
# Tag a release
|
|
81
|
+
git tag -a v1.2.0 -m "Release version 1.2.0"
|
|
82
|
+
git push origin v1.2.0
|
|
83
|
+
```
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
---
|
|
2
|
+
trigger: glob
|
|
3
|
+
globs: {ts,tsx,js,jsx,mjs,cjs,json,md,prisma,yml,yaml}
|
|
4
|
+
description: "Monitoring & Observability"
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Monitoring & Observability
|
|
8
|
+
|
|
9
|
+
> Standards for logging, metrics, tracing, alerting, and Grafana dashboard design.
|
|
10
|
+
|
|
11
|
+
## 🔭 The Three Pillars of Observability
|
|
12
|
+
|
|
13
|
+
| Pillar | Tool | Purpose |
|
|
14
|
+
|--------|------|---------|
|
|
15
|
+
| **Logs** | Winston / Pino + Loki | What happened |
|
|
16
|
+
| **Metrics** | Prometheus + Grafana | How the system is behaving |
|
|
17
|
+
| **Traces** | OpenTelemetry + Jaeger | Why something is slow |
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## 📝 Logging Rules
|
|
22
|
+
|
|
23
|
+
### Log Levels
|
|
24
|
+
| Level | When to Use |
|
|
25
|
+
|-------|-------------|
|
|
26
|
+
| `error` | Unexpected failure requiring attention |
|
|
27
|
+
| `warn` | Unexpected but recoverable situation |
|
|
28
|
+
| `info` | Normal significant events (startup, request lifecycle) |
|
|
29
|
+
| `debug` | Detailed debugging info (dev only) |
|
|
30
|
+
| `trace` | Very verbose (never in production) |
|
|
31
|
+
|
|
32
|
+
### Log Format — Structured JSON (always!)
|
|
33
|
+
```js
|
|
34
|
+
// ✅ Structured log — searchable and parseable
|
|
35
|
+
logger.info({
|
|
36
|
+
event: 'order.placed',
|
|
37
|
+
orderId: order.id,
|
|
38
|
+
userId: user.id,
|
|
39
|
+
amount: order.total,
|
|
40
|
+
durationMs: Date.now() - startTime,
|
|
41
|
+
requestId: req.id,
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// ❌ Unstructured log — cannot be queried
|
|
45
|
+
console.log(`Order ${orderId} placed by user ${userId}`);
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Mandatory Fields
|
|
49
|
+
```js
|
|
50
|
+
{
|
|
51
|
+
level: 'info',
|
|
52
|
+
timestamp: '2026-01-01T00:00:00.000Z',
|
|
53
|
+
service: 'order-service',
|
|
54
|
+
version: '1.2.3',
|
|
55
|
+
environment: 'production',
|
|
56
|
+
requestId: 'uuid', // trace across services
|
|
57
|
+
userId: 'uuid', // who triggered it
|
|
58
|
+
event: 'order.placed', // what happened
|
|
59
|
+
durationMs: 45, // how long
|
|
60
|
+
// ... domain-specific fields
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### What NOT to Log
|
|
65
|
+
```js
|
|
66
|
+
// ❌ Never log sensitive data
|
|
67
|
+
logger.info({ password: user.password }); // NEVER
|
|
68
|
+
logger.info({ token: req.headers.authorization }); // NEVER
|
|
69
|
+
logger.info({ creditCard: payment.card }); // NEVER
|
|
70
|
+
logger.info({ ssn: user.socialSecurityNumber }); // NEVER
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Logger Setup (Pino)
|
|
74
|
+
```js
|
|
75
|
+
// src/utils/logger.js
|
|
76
|
+
import pino from 'pino';
|
|
77
|
+
|
|
78
|
+
export const logger = pino({
|
|
79
|
+
level: process.env.LOG_LEVEL || 'info',
|
|
80
|
+
base: {
|
|
81
|
+
service: process.env.APP_NAME,
|
|
82
|
+
version: process.env.npm_package_version,
|
|
83
|
+
env: process.env.NODE_ENV,
|
|
84
|
+
},
|
|
85
|
+
timestamp: pino.stdTimeFunctions.isoTime,
|
|
86
|
+
});
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## 📊 Metrics (Prometheus + Grafana)
|
|
92
|
+
|
|
93
|
+
### Metric Types
|
|
94
|
+
| Type | Use Case | Example |
|
|
95
|
+
|------|----------|---------|
|
|
96
|
+
| **Counter** | Values that only increase | `http_requests_total` |
|
|
97
|
+
| **Gauge** | Values that go up and down | `active_connections`, `memory_usage_bytes` |
|
|
98
|
+
| **Histogram** | Distribution of values | `http_request_duration_seconds` |
|
|
99
|
+
| **Summary** | Pre-calculated percentiles | `request_latency_percentiles` |
|
|
100
|
+
|
|
101
|
+
### Naming Convention
|
|
102
|
+
```
|
|
103
|
+
# Pattern: {namespace}_{subsystem}_{name}_{unit}
|
|
104
|
+
# All lowercase, underscores, snake_case
|
|
105
|
+
|
|
106
|
+
http_request_duration_seconds # histogram
|
|
107
|
+
http_requests_total # counter
|
|
108
|
+
http_requests_in_flight # gauge
|
|
109
|
+
db_query_duration_seconds # histogram
|
|
110
|
+
cache_hits_total # counter
|
|
111
|
+
cache_misses_total # counter
|
|
112
|
+
queue_messages_pending # gauge
|
|
113
|
+
queue_processing_duration_seconds # histogram
|
|
114
|
+
payment_transactions_total # counter (+ status label)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Labels (Dimensions)
|
|
118
|
+
```js
|
|
119
|
+
// ✅ Use labels for meaningful dimensions
|
|
120
|
+
httpRequestCounter.labels({
|
|
121
|
+
method: req.method, // GET, POST, etc.
|
|
122
|
+
route: '/api/v1/users', // normalized route
|
|
123
|
+
status_code: res.statusCode,
|
|
124
|
+
service: 'user-service',
|
|
125
|
+
}).inc();
|
|
126
|
+
|
|
127
|
+
// ❌ Don't use high-cardinality labels (userId, orderId)
|
|
128
|
+
// This creates millions of time series → kills Prometheus
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Express Middleware for Metrics
|
|
132
|
+
```js
|
|
133
|
+
// middleware/metrics.js
|
|
134
|
+
import client from 'prom-client';
|
|
135
|
+
|
|
136
|
+
const httpDuration = new client.Histogram({
|
|
137
|
+
name: 'http_request_duration_seconds',
|
|
138
|
+
help: 'Duration of HTTP requests in seconds',
|
|
139
|
+
labelNames: ['method', 'route', 'status_code'],
|
|
140
|
+
buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
export function metricsMiddleware(req, res, next) {
|
|
144
|
+
const end = httpDuration.startTimer();
|
|
145
|
+
res.on('finish', () => {
|
|
146
|
+
end({ method: req.method, route: req.route?.path || req.path, status_code: res.statusCode });
|
|
147
|
+
});
|
|
148
|
+
next();
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Expose metrics endpoint
|
|
152
|
+
app.get('/metrics', async (req, res) => {
|
|
153
|
+
res.set('Content-Type', client.register.contentType);
|
|
154
|
+
res.end(await client.register.metrics());
|
|
155
|
+
});
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## 📈 Grafana Dashboard Design
|
|
161
|
+
|
|
162
|
+
### Dashboard Naming
|
|
163
|
+
```
|
|
164
|
+
# Pattern: {Service} — {Category}
|
|
165
|
+
User Service — Overview
|
|
166
|
+
User Service — Errors & Latency
|
|
167
|
+
Order Service — Business Metrics
|
|
168
|
+
Infrastructure — Redis
|
|
169
|
+
Infrastructure — PostgreSQL
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Panel Naming
|
|
173
|
+
```
|
|
174
|
+
# Use title case, include units in title
|
|
175
|
+
Request Rate (req/s)
|
|
176
|
+
P99 Latency (ms)
|
|
177
|
+
Error Rate (%)
|
|
178
|
+
Active DB Connections
|
|
179
|
+
Cache Hit Rate (%)
|
|
180
|
+
Queue Depth
|
|
181
|
+
Memory Usage (MB)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### The RED Method (for Services)
|
|
185
|
+
Every service dashboard MUST have these 3 panels:
|
|
186
|
+
- **R** — **Rate**: requests per second
|
|
187
|
+
- **E** — **Errors**: error rate (%)
|
|
188
|
+
- **D** — **Duration**: P50, P95, P99 latency
|
|
189
|
+
|
|
190
|
+
```promql
|
|
191
|
+
# Rate
|
|
192
|
+
rate(http_requests_total{service="order-service"}[5m])
|
|
193
|
+
|
|
194
|
+
# Error rate
|
|
195
|
+
rate(http_requests_total{status_code=~"5.."}[5m])
|
|
196
|
+
/ rate(http_requests_total[5m]) * 100
|
|
197
|
+
|
|
198
|
+
# P99 latency (ms)
|
|
199
|
+
histogram_quantile(0.99,
|
|
200
|
+
rate(http_request_duration_seconds_bucket{service="order-service"}[5m])
|
|
201
|
+
) * 1000
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### The USE Method (for Infrastructure)
|
|
205
|
+
Every infrastructure dashboard MUST have:
|
|
206
|
+
- **U** — **Utilization**: % of resource being used
|
|
207
|
+
- **S** — **Saturation**: queue depth, wait time
|
|
208
|
+
- **E** — **Errors**: error count/rate
|
|
209
|
+
|
|
210
|
+
### Standard Dashboard Layout
|
|
211
|
+
```
|
|
212
|
+
Row 1: Summary / Health Overview (traffic lights)
|
|
213
|
+
Row 2: RED metrics (Rate, Errors, Duration)
|
|
214
|
+
Row 3: Resource usage (CPU, Memory, DB connections)
|
|
215
|
+
Row 4: Business metrics (orders/min, signups, payments)
|
|
216
|
+
Row 5: Logs panel (Loki integration)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## 🚨 Alerting Rules
|
|
222
|
+
|
|
223
|
+
### Severity Levels
|
|
224
|
+
| Level | Response Time | Example |
|
|
225
|
+
|-------|--------------|---------|
|
|
226
|
+
| `critical` | Immediate (PagerDuty) | Service down, payment failures |
|
|
227
|
+
| `warning` | Within 30min (Slack) | High error rate, slow queries |
|
|
228
|
+
| `info` | Business hours | Unusual traffic patterns |
|
|
229
|
+
|
|
230
|
+
### Standard Alert Rules (Prometheus AlertManager)
|
|
231
|
+
```yaml
|
|
232
|
+
groups:
|
|
233
|
+
- name: service-alerts
|
|
234
|
+
rules:
|
|
235
|
+
# Service is down
|
|
236
|
+
- alert: ServiceDown
|
|
237
|
+
expr: up{job="my-service"} == 0
|
|
238
|
+
for: 1m
|
|
239
|
+
severity: critical
|
|
240
|
+
annotations:
|
|
241
|
+
summary: "Service {{ $labels.job }} is down"
|
|
242
|
+
|
|
243
|
+
# High error rate
|
|
244
|
+
- alert: HighErrorRate
|
|
245
|
+
expr: |
|
|
246
|
+
rate(http_requests_total{status_code=~"5.."}[5m])
|
|
247
|
+
/ rate(http_requests_total[5m]) > 0.05
|
|
248
|
+
for: 5m
|
|
249
|
+
severity: warning
|
|
250
|
+
annotations:
|
|
251
|
+
summary: "Error rate > 5% on {{ $labels.service }}"
|
|
252
|
+
|
|
253
|
+
# High P99 latency
|
|
254
|
+
- alert: HighLatency
|
|
255
|
+
expr: |
|
|
256
|
+
histogram_quantile(0.99,
|
|
257
|
+
rate(http_request_duration_seconds_bucket[5m])
|
|
258
|
+
) > 1
|
|
259
|
+
for: 5m
|
|
260
|
+
severity: warning
|
|
261
|
+
annotations:
|
|
262
|
+
summary: "P99 latency > 1s on {{ $labels.service }}"
|
|
263
|
+
|
|
264
|
+
# Low cache hit rate
|
|
265
|
+
- alert: LowCacheHitRate
|
|
266
|
+
expr: |
|
|
267
|
+
rate(cache_hits_total[5m])
|
|
268
|
+
/ (rate(cache_hits_total[5m]) + rate(cache_misses_total[5m])) < 0.7
|
|
269
|
+
for: 10m
|
|
270
|
+
severity: warning
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Alert Naming Convention
|
|
274
|
+
```
|
|
275
|
+
{Severity}{Service}{Problem}
|
|
276
|
+
CriticalPaymentServiceDown
|
|
277
|
+
WarningOrderServiceHighLatency
|
|
278
|
+
WarningRedisLowHitRate
|
|
279
|
+
CriticalDatabaseConnectionsExhausted
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## 🔍 Distributed Tracing (OpenTelemetry)
|
|
285
|
+
|
|
286
|
+
```js
|
|
287
|
+
// src/tracing.js
|
|
288
|
+
import { NodeSDK } from '@opentelemetry/sdk-node';
|
|
289
|
+
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
|
|
290
|
+
import { JaegerExporter } from '@opentelemetry/exporter-jaeger';
|
|
291
|
+
|
|
292
|
+
const sdk = new NodeSDK({
|
|
293
|
+
traceExporter: new JaegerExporter({ endpoint: process.env.JAEGER_ENDPOINT }),
|
|
294
|
+
instrumentations: [getNodeAutoInstrumentations()],
|
|
295
|
+
serviceName: process.env.APP_NAME,
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
sdk.start();
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Span Naming
|
|
302
|
+
```
|
|
303
|
+
# HTTP: {method} {route}
|
|
304
|
+
GET /api/v1/users/:id
|
|
305
|
+
|
|
306
|
+
# DB: {operation} {table}
|
|
307
|
+
SELECT users
|
|
308
|
+
INSERT orders
|
|
309
|
+
|
|
310
|
+
# Cache: {operation} {key_pattern}
|
|
311
|
+
GET user:{id}
|
|
312
|
+
SET session:{id}
|
|
313
|
+
|
|
314
|
+
# Queue: {operation} {queue_name}
|
|
315
|
+
PUBLISH order.placed
|
|
316
|
+
CONSUME email.send
|
|
317
|
+
```
|