@gravito/zenith 0.1.0-beta.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/dist/bin.js +38846 -27303
  3. package/dist/client/assets/index-C332gZ-J.css +1 -0
  4. package/dist/client/assets/index-D4HibwTK.js +436 -0
  5. package/dist/client/index.html +2 -2
  6. package/dist/server/index.js +38846 -27303
  7. package/docs/ALERTING_GUIDE.md +71 -0
  8. package/docs/LARAVEL_ZENITH_ROADMAP.md +109 -0
  9. package/docs/QUASAR_MASTER_PLAN.md +140 -0
  10. package/package.json +52 -48
  11. package/scripts/debug_redis_keys.ts +24 -0
  12. package/specs/PULSE_SPEC.md +86 -0
  13. package/src/client/App.tsx +2 -0
  14. package/src/client/Layout.tsx +18 -0
  15. package/src/client/Sidebar.tsx +2 -1
  16. package/src/client/WorkerStatus.tsx +121 -76
  17. package/src/client/components/BrandIcons.tsx +138 -0
  18. package/src/client/components/ConfirmDialog.tsx +0 -1
  19. package/src/client/components/JobInspector.tsx +18 -6
  20. package/src/client/components/PageHeader.tsx +38 -0
  21. package/src/client/pages/OverviewPage.tsx +17 -20
  22. package/src/client/pages/PulsePage.tsx +478 -0
  23. package/src/client/pages/QueuesPage.tsx +1 -3
  24. package/src/client/pages/SettingsPage.tsx +640 -78
  25. package/src/client/pages/WorkersPage.tsx +71 -3
  26. package/src/client/pages/index.ts +1 -0
  27. package/src/server/index.ts +311 -11
  28. package/src/server/services/AlertService.ts +189 -41
  29. package/src/server/services/CommandService.ts +137 -0
  30. package/src/server/services/PulseService.ts +80 -0
  31. package/src/server/services/QueueService.ts +63 -6
  32. package/src/shared/types.ts +99 -0
  33. package/tsconfig.json +2 -2
  34. package/ARCHITECTURE.md +0 -88
  35. package/BATCH_OPERATIONS_IMPLEMENTATION.md +0 -159
  36. package/EVOLUTION_BLUEPRINT.md +0 -112
  37. package/JOBINSPECTOR_SCROLL_FIX.md +0 -152
  38. package/PULSE_IMPLEMENTATION_PLAN.md +0 -111
  39. package/TESTING_BATCH_OPERATIONS.md +0 -252
  40. package/dist/client/assets/index-DGYEwTDL.css +0 -1
  41. package/dist/client/assets/index-oyTdySX0.js +0 -421
  42. /package/{DEPLOYMENT.md → docs/DEPLOYMENT.md} +0 -0
  43. /package/{DOCS_INTERNAL.md → docs/DOCS_INTERNAL.md} +0 -0
  44. /package/{QUICK_TEST_GUIDE.md → docs/QUICK_TEST_GUIDE.md} +0 -0
  45. /package/{ROADMAP.md → docs/ROADMAP.md} +0 -0
@@ -0,0 +1,71 @@
1
+ # 🔔 Zenith Alerting Guide
2
+
3
+ This guide explains how to configure and manage the alerting system in Zenith to ensure your infrastructure and queues remain healthy.
4
+
5
+ ---
6
+
7
+ ## 🚀 Overview
8
+
9
+ Zenith's alerting engine is **Redis-Native** and **Stateless**.
10
+ * **Persistence**: Rules are stored in Redis (`gravito:zenith:alerts:rules`).
11
+ * **Evaluation**: The server evaluates all rules every 2 seconds against real-time metrics.
12
+ * **Delivery**: Alerts are dispatched via Slack Webhooks.
13
+
14
+ ---
15
+
16
+ ## 🛠️ Configuration Fields
17
+
18
+ When adding a new rule in **Settings > Alerting**, you will encounter these fields:
19
+
20
+ ### 1. Rule Name
21
+ A descriptive label for the alert (e.g., `Critical Backlog`, `Agent Offline`). This name will appear in the Slack notification.
22
+
23
+ ### 2. Type (Metric Category)
24
+ * **Queue Backlog**: Monitors the number of jobs in the `waiting` state.
25
+ * **High Failure Count**: Monitors the number of jobs in the `failed` state.
26
+ * **Worker Loss**: Monitors the total number of active worker nodes.
27
+ * **Node CPU (%)**: Monitors process-level CPU usage reported by Quasar Agents.
28
+ * **Node RAM (%)**: Monitors process-level RAM usage (RSS) relative to system total.
29
+
30
+ ### 3. Threshold
31
+ The numeric value that triggers the alert.
32
+ * For **Backlog/Failure**: The number of jobs (e.g., `1000`).
33
+ * For **CPU/RAM**: The percentage (e.g., `90`).
34
+ * For **Worker Loss**: The *minimum* number of workers expected (e.g., alert triggers if count is `< 2`).
35
+
36
+ ### 4. Cooldown (Minutes)
37
+ **Crucial Concept**: The period the system "stays silent" after an alert is fired.
38
+ * **Logic**: Once a rule triggers and sends a notification, it enters a "lock" state for the duration of the cooldown.
39
+ * **Purpose**: Prevents "Alert Fatigue" and notification storms.
40
+ * **Example**: If set to `30`, and a backlog spike occurs, you get **one** notification. You won't get another one for the same rule for 30 minutes, even if the backlog remains high.
41
+
42
+ ### 5. Queue (Optional)
43
+ Specify a specific queue name (e.g., `orders`, `emails`) to monitor. If left empty, the rule applies to the **total sum** of all queues.
44
+
45
+ ---
46
+
47
+ ## 🌊 Best Practices
48
+
49
+ ### The "Instant Fire" Design
50
+ Zenith alerts are designed for **instant awareness**.
51
+ * If a threshold is met during a 2-second check, the alert fires **immediately**.
52
+ * It does **not** wait for the condition to persist for multiple minutes (Debouncing).
53
+ * **Pro Tip**: If you have frequent "tiny spikes" that resolve themselves in seconds, set your **Threshold** slightly higher than the spikes to avoid noise.
54
+
55
+ ### Recommended Settings
56
+
57
+ | Scenario | Type | Threshold | Cooldown |
58
+ | :--- | :--- | :--- | :--- |
59
+ | **Critical Failure** | High Failure Count | 50 | 15m |
60
+ | **System Overload** | Node CPU | 90 | 30m |
61
+ | **Quiet Hours** | Queue Backlog | 5000 | 120m |
62
+ | **Fatal Shutdown** | Worker Loss | 1 | 10m |
63
+
64
+ ---
65
+
66
+ ## 🔗 Slack Integration
67
+ To receive notifications, ensure the `SLACK_WEBHOOK_URL` environment variable is set before starting the Zenith server.
68
+
69
+ ```bash
70
+ export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/Txxx/Bxxx/Xxxx
71
+ ```
@@ -0,0 +1,109 @@
1
+ # 🚀 Project Zenith: Laravel Integration Roadmap
2
+
3
+ **Repository**: `gravito-framework/laravel-zenith`
4
+ **Target Audience**: Laravel 10/11 Applications
5
+ **Goal**: Provide deep, native introspection into Laravel applications for Gravito Zenith.
6
+
7
+ ---
8
+
9
+ ## 1. Vision & Architecture
10
+
11
+ Unlike the **Quasar Agent** (which is a sidecar daemon for OS/Infrastructure monitoring), **Laravel Zenith** is a native Composer package that lives *inside* the application.
12
+
13
+ * **Role**: " The Reporter". It sees what the OS cannot see.
14
+ * **Transport**: Direct Redis connection (utilizing `swarrot` or standard `predis`/`phpredis`).
15
+ * **Philosophy**: Zero-blocking. All reporting should be "fire-and-forget" or queued to avoid slowing down the user request lifecycle.
16
+
17
+ ---
18
+
19
+ ## 2. Core Features (The "Why")
20
+
21
+ ### A. Live Operational Logs (`logs`)
22
+ * **Feature**: A custom `Log Channel` driver.
23
+ * **Goal**: Stream logs (Info/Error/Debug) directly to Zenith's Live Log view.
24
+ * **Implementation**:
25
+ * `config/logging.php`: Add a `zenith` channel.
26
+ * Push JSON payloads to `flux_console:logs` Redis channel.
27
+
28
+ ### B. Queue Lifecycle Events (`queues`)
29
+ * **Feature**: Listen to Laravel Queue Events (`JobProcessing`, `JobProcessed`, `JobFailed`).
30
+ * **Goal**: Provide granular job insight that `quasar-go` cannot (e.g., "Job X failed with Exception Y", "Job Z took 45s").
31
+ * **Implementation**:
32
+ * Event Subscriber for `Illuminate\Queue\Events\*`.
33
+ * Capture `job->getRawBody()`, `exception->getMessage()`.
34
+
35
+ ### C. Request Performance (`http`)
36
+ * **Feature**: Global Middleware (`ZenithMonitorMiddleware`).
37
+ * **Goal**: Track "Slow Requests", 500 Errors, and Throughput.
38
+ * **Metrics**:
39
+ * Status Codes (2xx, 4xx, 5xx).
40
+ * Duration (ms).
41
+ * Route Name / Controller Action.
42
+
43
+ ### D. System Health Checks
44
+ * **Feature**: `php artisan zenith:check`
45
+ * **Goal**: Verify Redis connection and permissions.
46
+
47
+ ---
48
+
49
+ ## 3. Implementation Roadmap
50
+
51
+ ### Phase 1: The Foundation (Logs & Config)
52
+ **Goal**: Get the package installed and streaming basic logs.
53
+ - [ ] Initialize Repository `gravito-framework/laravel-zenith`.
54
+ - [ ] Create `ZenithServiceProvider`.
55
+ - [ ] Implement `ZenithLogger` (Monolog Handler).
56
+ - [ ] Publishing `config/zenith.php` (Redis connection settings).
57
+ - [ ] **Deliverable**: `Log::info('Hello Zenith')` appears in Zenith UI.
58
+
59
+ ### Phase 2: The Worker's Eye (Queues)
60
+ **Goal**: Deep visibility into Queue Jobs.
61
+ - [ ] Create `ZenithQueueSubscriber`.
62
+ - [ ] Handle `JobFailed`: Serialize exception and push to Zenith Alerting.
63
+ - [ ] Handle `JobProcessed`: Record metrics for "Jobs per minute".
64
+ - [ ] **Deliverable**: Seeing real-time "Job Completed" toasts and Error details in Zenith.
65
+
66
+ ### Phase 3: The Watchtower (HTTP & Exceptions)
67
+ **Goal**: Monitoring web requests.
68
+ - [ ] Create `RecordRequestMetrics` Middleware.
69
+ - [ ] Exception Handler integration (optional, for global error catching).
70
+ - [ ] Filter logic (ignore `/nova`, `/telescope`, etc.).
71
+ - [ ] **Deliverable**: HTTP Throughput graphs in Zenith.
72
+
73
+ ### Phase 4: The Bridge (Remote Control Hooks)
74
+ **Goal**: Allow Zenith to trigger Laravel actions safely.
75
+ - [ ] Expose internal hooks for `quasar-go` to call?
76
+ * *Note*: `quasar-go` already calls `artisan`. Phase 4 might be about ensuring `artisan zenith:run-job {id}` exists if we need advanced job re-running that `queue:retry` can't handle.
77
+
78
+ ---
79
+
80
+ ## 4. Technical Specifications
81
+
82
+ ### Redis Protocol
83
+ We will reuse the **Gravito Pulse Protocol (GPP)** used by `quasar-go`:
84
+ * **Logs**: `PUBLISH flux_console:logs`
85
+ * **Metrics**: `INCR flux_console:metrics:...`
86
+
87
+ ### Configuration (`zenith.php`)
88
+ ```php
89
+ return [
90
+ 'enabled' => env('ZENITH_ENABLED', true),
91
+
92
+ 'connection' => env('ZENITH_REDIS_CONNECTION', 'default'),
93
+
94
+ 'logging' => [
95
+ 'enabled' => true,
96
+ 'level' => 'debug',
97
+ ],
98
+
99
+ 'queues' => [
100
+ 'monitor_all' => true,
101
+ 'ignore_jobs' => [],
102
+ ],
103
+ ];
104
+ ```
105
+
106
+ ### Dependency Strategy
107
+ * **Support**: Laravel 10.x, 11.x
108
+ * **Php**: 8.1+
109
+ * **Driver**: `phpredis` (preferred) or `predis`.
@@ -0,0 +1,140 @@
1
+ # 🌌 Project Quasar: Master Implementation Plan
2
+
3
+ **Version**: 1.0.0 (Unified)
4
+ **Target**: Zenith v1.0
5
+ **Context**: This document supersedes all previous "Pulse" plans. It is the single source of truth for the Quasar monitoring ecosystem.
6
+
7
+ ---
8
+
9
+ ## 1. Vision & Identity
10
+
11
+ **Quasar** is the comprehensive observability layer for the Gravito ecosystem. It unifies infrastructure monitoring (CPU/RAM), application insights (Queues/Slow Logs), and availability checks into a single stream.
12
+
13
+ > **Slogan**: *"The brightest signal in your infrastructure."*
14
+
15
+ ---
16
+
17
+ ## 2. Architecture & Deployment Matrix
18
+
19
+ We employ a "Right Tool for the Job" strategy for deployment:
20
+
21
+ | Ecosystem | Tool | Package | Strategy |
22
+ | :--- | :--- | :--- | :--- |
23
+ | **Node.js / Bun** | **SDK** | `@gravito/quasar` | **In-App Integration**. Directly imports into the app. Captures Event Loop, Heap, and Queues. |
24
+ | **Legacy / Polyglot** | **Agent** | `gravito/quasar-agent` | **Sidecar / Daemon**. Standalone Go binary. Captures OS-level metrics and external Queue states via Redis/API. |
25
+ | **PHP / Laravel** | **Package** | `gravito/laravel-zenith` | **Native Integration**. Laravel Service Provider. Captures Jobs, Logs, and Exceptions. |
26
+
27
+ ### 🚀 Deployment Methods (Zero Friction)
28
+ 1. **NPM**: `npm install @gravito/quasar` (For Node developers)
29
+ 2. **Docker**: `image: gravito/quasar-agent:latest` (For Container/K8s/Laravel Sail)
30
+ 3. **Shell**: `curl -sL get.gravito.dev/quasar | bash` (For Bare Metal/VM)
31
+
32
+ ---
33
+
34
+ ## 3. Data Protocol (The Quasar Schema)
35
+
36
+ All agents/SDKs report to Redis using this unified schema.
37
+
38
+ **Namespace**: `gravito:quasar:`
39
+
40
+ ### A. Heartbeat (Infrastructure)
41
+ * **Key**: `gravito:quasar:node:{service_name}:{node_id}`
42
+ * **TTL**: 30 seconds
43
+ * **Metrics Philosophy**: Report **BOTH** Process and System metrics to isolate resource usage.
44
+ * `process`: metrics for the specific service (RAM usage, CPU time).
45
+ * `system`: metrics for the host OS (Load avg, Total RAM).
46
+
47
+ ### B. Queues (Workload)
48
+ * **Key**: `gravito:quasar:queues:{service_name}`
49
+ * **TTL**: 30 seconds
50
+ * **Purpose**: Snapshots of queue depths from various drivers.
51
+ * Gravito Stream (Native)
52
+ * Laravel Horizon (Redis)
53
+ * BullMQ (Redis)
54
+ * AWS SQS (API)
55
+
56
+ ### C. Insights (Performance)
57
+ * **Key**: `gravito:quasar:slow:{service_name}` (Stream)
58
+ * **Purpose**: Log requests or jobs that exceed performance thresholds.
59
+
60
+ ---
61
+
62
+ ## 4. Execution Roadmap
63
+
64
+ ### Phase 1: Foundation & Application Monitoring (Pulse Node)
65
+ **Goal**: Establish the basic dashboard and Node.js SDK for monitoring application health (CPU/RAM).
66
+
67
+ - [x] **Define Schema**: Update `PULSE_SPEC.md` with new Redis key patterns (`gravito:quasar:node:*`) and payload structure.
68
+ - [x] **SDK Update**: Refactor `@gravito/quasar` (formerly pulse-node) to support:
69
+ - [x] Automatic runtime detection (Node, Bun, Deno).
70
+ - [x] System/Process split metrics.
71
+ - [x] Correct Redis namespacing.
72
+ - [x] **Server Update**: Update Zenith's `PulseService` to scan new key patterns.
73
+ - [x] **UI Overhaul**: Redesign `PulsePage` in Zenith:
74
+ - [x] Implement "Card" layout for nodes.
75
+ - [x] Rich metrics visualization (CPU/RAM split bars).
76
+ - [x] Add brand icons for runtimes (Node, Bun, Deno, PHP, Go, Python).
77
+ - [x] **Layout Optimization**: Compact Grid for Service Groups.
78
+
79
+ ---
80
+
81
+ ### Phase 2: Architecture Evolution - "The Brain-Hand Model" 🧠 🖐️ - **Completed** ✅
82
+ To support advanced features like **Queue Insights** (Phase 2) and **Remote Control** (Phase 3), we are adopting a bidirectional architecture.
83
+
84
+ * **Metric Transport (The Mouth)**: Agent sends metrics to Zenith (via shared Redis).
85
+ * **Local Insight (The Eyes)**: Agent inspects *its own* environment (Local Redis, Local Queue) to gather data. Zenith doesn't need to connect to the App DB directly.
86
+ * **Command execution (The Hand)**: Zenith publishes commands (Retry/Delete), and Agent listens and executes them locally.
87
+
88
+ #### Revised Phase 2: Application Insights (Queues) - **Completed** ✅
89
+ **Goal**: Enable Quasar Agent to "see" local queues and report their status.
90
+
91
+ - [x] **SDK Architecture**: Update `QuasarAgent` to handle **Dual Connections**:
92
+ - `transport`: Connection to Zenith (for sending heartbeat).
93
+ - `app`: Connection to Local App (for inspecting queues/bull/laravel).
94
+ - [x] **Probe Implementation**: Create `QueueProbe` interface and implementations:
95
+ - `RedisListProbe`: Simple `LLEN` checks.
96
+ - [x] `BullProbe` (Future): Check `bull:*:waiting`, etc.
97
+ - [x] `LaravelProbe`: Check `queues:default`, `queues:reserved`, `queues:delayed`.
98
+ - [x] **SDK API**: Expose `.monitorQueue(name, type)` method.
99
+ - [x] **UI Update**: Update `NodeCard` to render a "Queues" section if queue data is present in payload.
100
+
101
+ ### Phase 3: Remote Control (Command & Control) - **Completed** ✅
102
+ **Goal**: Allow Zenith to instruct Quasar to perform actions (Retry Job, Delete Job).
103
+
104
+ - [x] **Protocol**: Define Command Protocol (Redis Pub/Sub: `gravito:quasar:cmd:{service}:{node_id}`).
105
+ - [x] **Agent**: Implement `CommandListener` in SDK.
106
+ - [x] **Command Executors**: Implement `RetryJobExecutor` and `DeleteJobExecutor`.
107
+ - [x] **Security (Allowlist Strategy)**:
108
+ - [x] Implement **Command Allowlist** inside Agent code (only `RETRY_JOB`, `DELETE_JOB` allowed).
109
+ - [ ] (Future) Use **Redis ACL** (v6+) to restrict Agent's `transport` connection.
110
+ - [x] **Server**: Add `CommandService` and `/api/pulse/command` endpoint.
111
+ - [x] **UI**: Add "Retry/Delete" buttons in Zenith `PulsePage` for failed queue jobs.
112
+ - [x] **Documentation**: Created `ALERTING_GUIDE.md` for configuration best practices.
113
+
114
+ ### Phase 4: Polyglot Agent - **Completed** ✅
115
+ * [x] Create `gravito-framework/quasar` repo (`quasar-go`).
116
+ * [x] Develop Go Agent core (utilizing `gopsutil`).
117
+ * [x] System Probe (CPU/RAM)
118
+ * [x] Agent heartbeat loop
119
+ * [x] Config management (env vars)
120
+ * [x] Implement Queue Monitoring in Go Agent:
121
+ * [x] Redis List Probe
122
+ * [x] Laravel Queue Probe
123
+ * [x] Implement Remote Control in Go Agent:
124
+ * [x] Command Listener (Pub/Sub)
125
+ * [x] RETRY_JOB / DELETE_JOB Executors
126
+ * [x] **Laravel Deep Integration**:
127
+ * [x] `LARAVEL_ACTION` Executor (runs `artisan` safely).
128
+ * [x] Auto-discovery of Laravel project root via process inspection.
129
+ * [x] **Advanced Process Introspection**: Captures real-time CPU/RAM usage per Laravel Worker process.
130
+ * [x] **Virtual Node Mapping**: Visualizes individual Laravel Workers as distinct nodes in Zenith UI.
131
+ * [x] Support for `retry-all`, `retry {id}`, and `restart` (graceful worker reload).
132
+ * [x] Docker & Makefile setup.
133
+ * [x] Binary Release pipeline (GitHub Actions).
134
+ * [x] Publish to Docker Hub (`carllee/quasar-go-agent`).
135
+
136
+ ---
137
+
138
+ ## 5. Security & Access
139
+ * **Auth**: Agents authenticate via a shared secret (`QUASAR_TOKEN`) if writing to a remote Redis.
140
+ * **Isolation**: Process metrics only report what they have access to. System metrics require readable `/proc` (in Docker).
package/package.json CHANGED
@@ -1,50 +1,54 @@
1
1
  {
2
- "name": "@gravito/zenith",
3
- "version": "0.1.0-beta.1",
4
- "description": "Gravito Zenith: Zero-config control plane for Gravito Flux & Stream",
5
- "type": "module",
6
- "bin": {
7
- "zenith": "./dist/bin.js",
8
- "flux-console": "./dist/bin.js"
9
- },
10
- "main": "./dist/index.js",
11
- "types": "./dist/index.d.ts",
12
- "scripts": {
13
- "dev:server": "bun run --watch src/server/index.ts",
14
- "dev:client": "vite",
15
- "build": "vite build && bun build ./src/server/index.ts ./src/bin.ts --outdir ./dist --target bun",
16
- "start": "bun ./dist/bin.js",
17
- "test": "bun test",
18
- "seed": "bun scripts/seed.ts",
19
- "worker": "bun scripts/worker.ts"
20
- },
21
- "dependencies": {
22
- "@gravito/atlas": "workspace:*",
23
- "@gravito/photon": "workspace:*",
24
- "@gravito/stream": "workspace:*",
25
- "@tanstack/react-query": "^5.0.0",
26
- "clsx": "^2.1.1",
27
- "date-fns": "^4.1.0",
28
- "framer-motion": "^12.23.26",
29
- "ioredis": "^5.0.0",
30
- "lucide-react": "^0.562.0",
31
- "react": "^19.0.0",
32
- "react-dom": "^19.0.0",
33
- "react-router-dom": "^7.11.0",
34
- "recharts": "^3.6.0",
35
- "tailwind-merge": "^3.4.0"
36
- },
37
- "devDependencies": {
38
- "@types/react": "^19.0.0",
39
- "@types/react-dom": "^19.0.0",
40
- "@vitejs/plugin-react": "^5.1.2",
41
- "autoprefixer": "^10.4.0",
42
- "postcss": "^8.4.0",
43
- "tailwindcss": "^3.4.0",
44
- "typescript": "^5.0.0",
45
- "vite": "^6.0.0"
46
- },
47
- "publishConfig": {
48
- "access": "public"
49
- }
2
+ "name": "@gravito/zenith",
3
+ "version": "1.0.0",
4
+ "description": "Gravito Zenith: Zero-config control plane for Gravito Flux & Stream",
5
+ "type": "module",
6
+ "bin": {
7
+ "zenith": "dist/bin.js",
8
+ "flux-console": "dist/bin.js"
9
+ },
10
+ "main": "./dist/index.js",
11
+ "types": "./dist/index.d.ts",
12
+ "scripts": {
13
+ "dev:server": "bun run --watch src/server/index.ts",
14
+ "dev:client": "vite",
15
+ "build": "vite build && bun build ./src/server/index.ts ./src/bin.ts --outdir ./dist --target bun",
16
+ "start": "bun ./dist/bin.js",
17
+ "test": "bun test",
18
+ "typecheck": "bun tsc -p tsconfig.json --noEmit --skipLibCheck",
19
+ "seed": "bun scripts/seed.ts",
20
+ "worker": "bun scripts/worker.ts"
21
+ },
22
+ "dependencies": {
23
+ "@gravito/atlas": "workspace:*",
24
+ "@gravito/photon": "workspace:*",
25
+ "@gravito/quasar": "workspace:*",
26
+ "@gravito/stream": "workspace:*",
27
+ "@tanstack/react-query": "^5.0.0",
28
+ "clsx": "^2.1.1",
29
+ "date-fns": "^4.1.0",
30
+ "framer-motion": "^12.23.26",
31
+ "ioredis": "^5.0.0",
32
+ "lucide-react": "^0.562.0",
33
+ "nodemailer": "^7.0.12",
34
+ "react": "^19.0.0",
35
+ "react-dom": "^19.0.0",
36
+ "react-router-dom": "^7.11.0",
37
+ "recharts": "^3.6.0",
38
+ "tailwind-merge": "^3.4.0"
39
+ },
40
+ "devDependencies": {
41
+ "@types/nodemailer": "^7.0.4",
42
+ "@types/react": "^19.0.0",
43
+ "@types/react-dom": "^19.0.0",
44
+ "@vitejs/plugin-react": "^5.1.2",
45
+ "autoprefixer": "^10.4.0",
46
+ "postcss": "^8.4.0",
47
+ "tailwindcss": "^3.4.0",
48
+ "typescript": "^5.9.3",
49
+ "vite": "^6.0.0"
50
+ },
51
+ "publishConfig": {
52
+ "access": "public"
53
+ }
50
54
  }
@@ -0,0 +1,24 @@
1
+ import { Redis } from 'ioredis'
2
+
3
+ const redis = new Redis('redis://localhost:6379')
4
+
5
+ async function check() {
6
+ console.log('Connecting to Redis...')
7
+ try {
8
+ const keys = await redis.keys('gravito:quasar:node:*')
9
+ console.log('Keys found count:', keys.length)
10
+ console.log('Keys:', keys)
11
+
12
+ if (keys.length > 0) {
13
+ const val = await redis.get(keys[0])
14
+ console.log('--- Value of first key ---')
15
+ console.log(val)
16
+ console.log('--- End Value ---')
17
+ }
18
+ } catch (err) {
19
+ console.error('Redis Error:', err)
20
+ }
21
+ process.exit(0)
22
+ }
23
+
24
+ check()
@@ -0,0 +1,86 @@
1
+ # Gravito Pulse Implementation Spec
2
+
3
+ ## Overview
4
+ Gravito Pulse is a lightweight APM (Application Performance Monitoring) system integrated into Zenith. It follows the philosophy: *"If you can connect to Redis, you are monitored."*
5
+
6
+ ## 1. Gravito Pulse Protocol (GPP)
7
+
8
+ ### Data Structure
9
+ Pulse uses Redis keys with specific TTLs to represent live services.
10
+
11
+ - **Key Pattern**: `gravito:quasar:node:{service}:{node_id}`
12
+ - **TTL**: 30 seconds (Agents should heartbeat every 10-15s).
13
+ - **Data Type**: String (JSON)
14
+
15
+ ### Payload Schema
16
+ ```json
17
+ {
18
+ "id": "string", // Unique Instance ID (e.g., UUID or Hostname-PID)
19
+ "service": "string", // Group name (e.g., "worker-billing", "api-gateway")
20
+ "language": "string", // "node" | "bun" | "deno" | "php" | "go" | "python" | "other"
21
+ "version": "string", // Language/Runtime Version
22
+ "pid": "number", // Process ID
23
+ "hostname": "string", // Machine Hostname or Custom Name
24
+ "platform": "string", // OS Platform (linux, darwin, win32)
25
+ "cpu": {
26
+ "system": "number", // System Load % (0-100)
27
+ "process": "number", // Process Usage % (0-100)
28
+ "cores": "number" // Core count
29
+ },
30
+ "memory": {
31
+ "system": {
32
+ "total": "number", // System Total Memory (bytes)
33
+ "free": "number", // System Free Memory (bytes)
34
+ "used": "number" // System Used Memory (bytes)
35
+ },
36
+ "process": {
37
+ "rss": "number", // Resident Set Size (bytes)
38
+ "heapTotal": "number",// Heap Total (bytes)
39
+ "heapUsed": "number" // Heap Used (bytes)
40
+ }
41
+ },
42
+ "runtime": {
43
+ "uptime": "number", // Process uptime in seconds
44
+ "framework": "string" // Optional framework info
45
+ },
46
+ "timestamp": "number" // Unix Ms Timestamp
47
+ }
48
+ ```
49
+
50
+ ## 2. Implementation Modules
51
+
52
+ ### A. Client SDK (`@gravito/pulse-node`)
53
+ A lightweight agent to collect metrics and publish to Redis.
54
+ - **Dependencies**: `ioredis`, `pidusage` (optional, or use native `os`/`process`).
55
+ - **Functionality**:
56
+ - `startPulse({ service: string })`: Starts the heartbeat loop.
57
+ - Collects CPU/RAM usage.
58
+ - Publishes to Redis.
59
+
60
+ ### B. Server Collector (Zenith Console)
61
+ - **Service**: `PulseService`
62
+ - **Method**: `getNodes()`
63
+ - Performs `SCAN 0 MATCH pulse:* COUNT 100`.
64
+ - Returns grouped nodes by `service`.
65
+ - **API**: `GET /api/pulse/nodes`
66
+
67
+ ### C. Frontend Dashboard (Zenith UI)
68
+ - **Route**: `/pulse`
69
+ - **Components**:
70
+ - `ServiceGroup`: A container for nodes of a specific service.
71
+ - `NodeCard`: Displays CPU/RAM sparklines (optional) and current health.
72
+ - `HealthBadge`: Green (Fresh), Yellow (>15s ago), Red (Dead/Gone - though Redis TTL handles removal, frontend can handle stale UI).
73
+
74
+ ## 3. Alerts (Phase 2)
75
+ - Server-side checker that monitors values from `PulseService`.
76
+ - Triggers `AlertService` if:
77
+ - CPU > 90% for 2 mins.
78
+ - Memory > 90% for 5 mins.
79
+ - Disk < 10% free.
80
+
81
+ ## 4. Work Plan
82
+ 1. **Define Types**: Add `PulseNode` interface to `@gravito/custom-types` or `flux-console` shared types.
83
+ 2. **Implement Server Collector**: Create `PulseService` in `packages/flux-console/server/services`.
84
+ 3. **Implement API**: Add route in `packages/flux-console/server/routes.ts`.
85
+ 4. **Implement UI**: Create `PulsePage` and components.
86
+ 5. **Implement Node Client**: Add `startPulse` to `packages/stream` (or separate package) to verify "dogfooding" by having the server monitor itself.
@@ -8,6 +8,7 @@ import {
8
8
  LoginPage,
9
9
  MetricsPage,
10
10
  OverviewPage,
11
+ PulsePage,
11
12
  QueuesPage,
12
13
  SchedulesPage,
13
14
  SettingsPage,
@@ -48,6 +49,7 @@ function AuthenticatedRoutes() {
48
49
  <Route path="/schedules" element={<SchedulesPage />} />
49
50
  <Route path="/workers" element={<WorkersPage />} />
50
51
  <Route path="/metrics" element={<MetricsPage />} />
52
+ <Route path="/pulse" element={<PulsePage />} />
51
53
  <Route path="/settings" element={<SettingsPage />} />
52
54
  </Routes>
53
55
  </Layout>
@@ -90,6 +90,15 @@ export function Layout({ children }: LayoutProps) {
90
90
  }
91
91
  })
92
92
 
93
+ ev.addEventListener('pulse', (e) => {
94
+ try {
95
+ const data = JSON.parse(e.data)
96
+ window.dispatchEvent(new CustomEvent('flux-pulse-update', { detail: data }))
97
+ } catch (err) {
98
+ console.error('SSE Pulse Error', err)
99
+ }
100
+ })
101
+
93
102
  ev.onerror = (err) => {
94
103
  console.error('[Zenith] SSE Connection Error', err)
95
104
  ev.close()
@@ -339,6 +348,14 @@ export function Layout({ children }: LayoutProps) {
339
348
  return () => window.removeEventListener('keydown', handleKeyDown)
340
349
  }, [])
341
350
 
351
+ // Auto-scroll to selected item
352
+ useEffect(() => {
353
+ const el = document.getElementById(`command-item-${selectedIndex}`)
354
+ if (el) {
355
+ el.scrollIntoView({ block: 'nearest', behavior: 'smooth' })
356
+ }
357
+ }, [selectedIndex])
358
+
342
359
  const handleSelect = (cmd: CommandItem) => {
343
360
  cmd.action()
344
361
  setIsCommandPaletteOpen(false)
@@ -559,6 +576,7 @@ export function Layout({ children }: LayoutProps) {
559
576
  {filteredCommands.map((cmd, i) => (
560
577
  <button
561
578
  type="button"
579
+ id={`command-item-${i}`}
562
580
  key={cmd.id}
563
581
  className={cn(
564
582
  'w-full flex items-center justify-between p-4 rounded-2xl transition-all cursor-pointer group/cmd outline-none',
@@ -23,10 +23,11 @@ export function Sidebar({ className, collapsed, toggleCollapse }: SidebarProps)
23
23
 
24
24
  const navItems = [
25
25
  { icon: LayoutDashboard, label: 'Overview', path: '/' },
26
+ { icon: Activity, label: 'Pulse', path: '/pulse' },
26
27
  { icon: ListTree, label: 'Queues', path: '/queues' },
27
28
  { icon: Clock, label: 'Schedules', path: '/schedules' },
28
29
  { icon: HardDrive, label: 'Workers', path: '/workers' },
29
- { icon: Activity, label: 'Metrics', path: '/metrics' },
30
+ // { icon: Activity, label: 'Metrics', path: '/metrics' },
30
31
  { icon: Settings, label: 'Settings', path: '/settings' },
31
32
  ]
32
33