@gravito/zenith 1.1.3 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +28 -10
  2. package/dist/bin.js +43235 -76691
  3. package/dist/client/index.html +13 -0
  4. package/dist/server/index.js +43235 -76691
  5. package/package.json +16 -7
  6. package/CHANGELOG.md +0 -62
  7. package/Dockerfile +0 -46
  8. package/Dockerfile.demo-worker +0 -29
  9. package/bin/flux-console.ts +0 -2
  10. package/doc/ECOSYSTEM_EXPANSION_RFC.md +0 -130
  11. package/docker-compose.yml +0 -40
  12. package/docs/ALERTING_GUIDE.md +0 -71
  13. package/docs/DEPLOYMENT.md +0 -157
  14. package/docs/DOCS_INTERNAL.md +0 -73
  15. package/docs/LARAVEL_ZENITH_ROADMAP.md +0 -109
  16. package/docs/QUASAR_MASTER_PLAN.md +0 -140
  17. package/docs/QUICK_TEST_GUIDE.md +0 -72
  18. package/docs/ROADMAP.md +0 -85
  19. package/docs/integrations/LARAVEL.md +0 -207
  20. package/postcss.config.js +0 -6
  21. package/scripts/debug_redis_keys.ts +0 -24
  22. package/scripts/flood-logs.ts +0 -21
  23. package/scripts/seed.ts +0 -213
  24. package/scripts/verify-throttle.ts +0 -49
  25. package/scripts/worker.ts +0 -124
  26. package/specs/PULSE_SPEC.md +0 -86
  27. package/src/bin.ts +0 -6
  28. package/src/client/App.tsx +0 -72
  29. package/src/client/Layout.tsx +0 -669
  30. package/src/client/Sidebar.tsx +0 -112
  31. package/src/client/ThroughputChart.tsx +0 -158
  32. package/src/client/WorkerStatus.tsx +0 -202
  33. package/src/client/components/BrandIcons.tsx +0 -168
  34. package/src/client/components/ConfirmDialog.tsx +0 -134
  35. package/src/client/components/JobInspector.tsx +0 -487
  36. package/src/client/components/LogArchiveModal.tsx +0 -432
  37. package/src/client/components/NotificationBell.tsx +0 -212
  38. package/src/client/components/PageHeader.tsx +0 -47
  39. package/src/client/components/Toaster.tsx +0 -90
  40. package/src/client/components/UserProfileDropdown.tsx +0 -186
  41. package/src/client/contexts/AuthContext.tsx +0 -105
  42. package/src/client/contexts/NotificationContext.tsx +0 -128
  43. package/src/client/index.css +0 -172
  44. package/src/client/main.tsx +0 -15
  45. package/src/client/pages/LoginPage.tsx +0 -164
  46. package/src/client/pages/MetricsPage.tsx +0 -445
  47. package/src/client/pages/OverviewPage.tsx +0 -519
  48. package/src/client/pages/PulsePage.tsx +0 -409
  49. package/src/client/pages/QueuesPage.tsx +0 -378
  50. package/src/client/pages/SchedulesPage.tsx +0 -535
  51. package/src/client/pages/SettingsPage.tsx +0 -1001
  52. package/src/client/pages/WorkersPage.tsx +0 -380
  53. package/src/client/pages/index.ts +0 -8
  54. package/src/client/utils.ts +0 -15
  55. package/src/server/config/ServerConfigManager.ts +0 -90
  56. package/src/server/index.ts +0 -860
  57. package/src/server/middleware/auth.ts +0 -127
  58. package/src/server/services/AlertService.ts +0 -321
  59. package/src/server/services/CommandService.ts +0 -136
  60. package/src/server/services/LogStreamProcessor.ts +0 -93
  61. package/src/server/services/MaintenanceScheduler.ts +0 -78
  62. package/src/server/services/PulseService.ts +0 -148
  63. package/src/server/services/QueueMetricsCollector.ts +0 -138
  64. package/src/server/services/QueueService.ts +0 -924
  65. package/src/shared/types.ts +0 -223
  66. package/tailwind.config.js +0 -80
  67. package/tests/placeholder.test.ts +0 -7
  68. package/tsconfig.json +0 -29
  69. package/tsconfig.node.json +0 -10
  70. package/vite.config.ts +0 -27
package/package.json CHANGED
@@ -1,14 +1,22 @@
1
1
  {
2
2
  "name": "@gravito/zenith",
3
- "version": "1.1.3",
3
+ "sideEffects": false,
4
+ "version": "1.1.6",
4
5
  "description": "Gravito Zenith: Zero-config control plane for Gravito Flux & Stream",
5
6
  "type": "module",
6
7
  "bin": {
7
8
  "zenith": "dist/bin.js",
8
9
  "flux-console": "dist/bin.js"
9
10
  },
10
- "main": "./dist/index.js",
11
- "types": "./dist/index.d.ts",
11
+ "main": "./dist/server/index.js",
12
+ "exports": {
13
+ ".": "./dist/server/index.js",
14
+ "./bin": "./dist/bin.js"
15
+ },
16
+ "files": [
17
+ "dist",
18
+ "README.md"
19
+ ],
12
20
  "scripts": {
13
21
  "dev:server": "bun run --watch src/server/index.ts",
14
22
  "dev:client": "vite",
@@ -22,10 +30,10 @@
22
30
  "test:integration": "test $(find tests -name '*.integration.test.ts' 2>/dev/null | wc -l) -gt 0 && find tests -name '*.integration.test.ts' -print0 | xargs -0 bun test --timeout=10000 || echo 'No integration tests found'"
23
31
  },
24
32
  "dependencies": {
25
- "@gravito/atlas": "^1.6.0",
26
- "@gravito/photon": "^1.0.1",
27
- "@gravito/quasar": "^1.3.0",
28
- "@gravito/stream": "^2.0.2",
33
+ "@gravito/atlas": "^2.5.2",
34
+ "@gravito/photon": "^1.1.3",
35
+ "@gravito/quasar": "^1.3.2",
36
+ "@gravito/stream": "^2.1.1",
29
37
  "@tanstack/react-query": "^5.0.0",
30
38
  "clsx": "^2.1.1",
31
39
  "date-fns": "^4.1.0",
@@ -43,6 +51,7 @@
43
51
  "@types/nodemailer": "^7.0.4",
44
52
  "@types/react": "^19.0.0",
45
53
  "@types/react-dom": "^19.0.0",
54
+ "@types/node": "latest",
46
55
  "@vitejs/plugin-react": "^5.1.2",
47
56
  "autoprefixer": "^10.4.0",
48
57
  "postcss": "^8.4.0",
package/CHANGELOG.md DELETED
@@ -1,62 +0,0 @@
1
- # @gravito/zenith
2
-
3
- ## 1.1.3
4
-
5
- ### Patch Changes
6
-
7
- - Convert all workspace:\* dependencies to version numbers for npm publishing
8
-
9
- - Fixed 144 workspace:\* dependencies across 58 packages
10
- - Ensures all packages work properly when installed from npm
11
- - Resolves issues with bunx and npm installation of CLI tools
12
- - All internal dependencies now use explicit version constraints
13
-
14
- - Updated dependencies
15
- - @gravito/photon@1.0.1
16
- - @gravito/stream@2.0.2
17
-
18
- ## 1.1.2
19
-
20
- ### Patch Changes
21
-
22
- - Updated dependencies [905588f]
23
- - @gravito/stream@2.0.1
24
-
25
- ## 1.1.1
26
-
27
- ### Patch Changes
28
-
29
- - Updated dependencies
30
- - @gravito/atlas@2.1.0
31
- - @gravito/stream@1.0.3
32
-
33
- ## 1.1.0
34
-
35
- ### Minor Changes
36
-
37
- - Implement several more examples and fix module issues, including:
38
- - Support middleware in core route definitions.
39
- - Improve Atlas driver loading and dependency injection.
40
- - Add PostgreSQL support to Ecommerce MVC example.
41
- - Fix internal type resolution issues across packages.
42
-
43
- ### Patch Changes
44
-
45
- - Updated dependencies
46
- - @gravito/atlas@1.2.0
47
- - @gravito/quasar@1.2.0
48
- - @gravito/stream@1.0.2
49
-
50
- ## 1.0.1
51
-
52
- ### Patch Changes
53
-
54
- - @gravito/stream@1.0.1
55
-
56
- ## 1.0.0
57
-
58
- ### Patch Changes
59
-
60
- - Updated dependencies
61
- - @gravito/atlas@1.0.1
62
- - @gravito/stream@1.0.0
package/Dockerfile DELETED
@@ -1,46 +0,0 @@
1
- # Use Bun official image
2
- FROM oven/bun:1.1.26 AS base
3
- WORKDIR /usr/src/app
4
-
5
- # ---- 1. Install Dependencies ----
6
- FROM base AS install
7
- # Copy root files
8
- COPY package.json bun.lock ./
9
- # Copy package.json files for workspace resolution
10
- COPY packages/photon/package.json ./packages/photon/
11
- COPY packages/stream/package.json ./packages/stream/
12
- COPY packages/flux-console/package.json ./packages/flux-console/
13
-
14
- # Install dependencies
15
- RUN bun install --frozen-lockfile
16
-
17
- # ---- 2. Build Stage ----
18
- FROM base AS build
19
- COPY --from=install /usr/src/app/node_modules ./node_modules
20
- COPY --from=install /usr/src/app/packages ./packages
21
- COPY . .
22
-
23
- # Build the console
24
- # This bundles the server and builds the client (Vite)
25
- RUN cd packages/flux-console && bun run build
26
-
27
- # ---- 3. Production Runner ----
28
- FROM base AS release
29
- WORKDIR /app
30
-
31
- # Copy built artifacts
32
- # Note: server and bin are bundled into dist/
33
- COPY --from=build /usr/src/app/packages/flux-console/dist ./dist
34
- COPY --from=build /usr/src/app/packages/flux-console/package.json ./package.json
35
- # Client source/assets are needed for the server to serve them
36
- COPY --from=build /usr/src/app/packages/flux-console/src/client ./src/client
37
-
38
- # Expose port
39
- EXPOSE 3000
40
-
41
- # Environment defaults
42
- ENV PORT=3000
43
- ENV NODE_ENV=production
44
-
45
- # Start the console
46
- CMD ["bun", "run", "dist/bin.js"]
@@ -1,29 +0,0 @@
1
- # Use Bun official image
2
- FROM oven/bun:1.1.26 AS base
3
- WORKDIR /usr/src/app
4
-
5
- # ---- 1. Install Dependencies ----
6
- FROM base AS install
7
- COPY package.json bun.lock ./
8
- COPY packages/photon/package.json ./packages/photon/
9
- COPY packages/stream/package.json ./packages/stream/
10
- COPY packages/flux-console/package.json ./packages/flux-console/
11
- RUN bun install --frozen-lockfile
12
-
13
- # ---- 2. Copy Source ----
14
- FROM base AS build
15
- COPY --from=install /usr/src/app/node_modules ./node_modules
16
- COPY --from=install /usr/src/app/packages ./packages
17
- COPY . .
18
-
19
- # ---- 3. Runner ----
20
- FROM base AS release
21
- WORKDIR /usr/src/app
22
- COPY --from=build /usr/src/app ./
23
-
24
- # Env defaults
25
- ENV NODE_ENV=production
26
-
27
- # Start the demo worker
28
- # It uses the local packages/dist if available, but Bun can run TS directly
29
- CMD ["bun", "run", "packages/flux-console/scripts/demo-worker.ts"]
@@ -1,2 +0,0 @@
1
- #!/usr/bin/env bun
2
- import '../src/server/index'
@@ -1,130 +0,0 @@
1
- # Zenith Ecosystem Expansion RFC
2
-
3
- **Status**: Draft
4
- **Date**: 2026-01-10
5
- **Goal**: Expand Zenith monitoring capabilities beyond Gravito/Laravel to Python, Node.js, and Go ecosystems.
6
-
7
- ---
8
-
9
- ## 1. Executive Summary
10
-
11
- Gravito Zenith (Flux Console) is a unified control plane for background job processing. Currently, it supports **Gravito Stream** (Native) and **Laravel Queues** (via `laravel-zenith`). To become a true polyglot observability platform, we need to implement connectors for other popular queue systems.
12
-
13
- This RFC defines the **Universal Zenith Protocol (UZP)** and proposes implementation roadmaps for Python (Celery) and Node.js (BullMQ).
14
-
15
- ---
16
-
17
- ## 2. The Universal Zenith Protocol (UZP)
18
-
19
- Any background job system can be monitored by Zenith if it implements the following Redis-based interfaces.
20
-
21
- ### 2.1. Discovery (Heartbeat)
22
- Workers must announce their presence every 30 seconds to avoid being marked as "Offline".
23
-
24
- * **Command**: `SETEX flux_console:worker:<worker_id> 60 <payload>`
25
- * **Payload (JSON)**:
26
- ```json
27
- {
28
- "id": "celery@worker-1",
29
- "hostname": "pod-xyz",
30
- "pid": 1234,
31
- "uptime": 3600,
32
- "queues": ["high", "default"],
33
- "concurrency": 4,
34
- "memory": { "rss": "50MB", "heapUsed": "N/A" },
35
- "framework": "celery", // "laravel", "bullmq", "asynq"
36
- "language": "python", // "php", "typescript", "go"
37
- "timestamp": "2026-01-10T12:00:00Z"
38
- }
39
- ```
40
-
41
- ### 2.2. Event Stream (Logs)
42
- Workers publish lifecycle events to a shared Pub/Sub channel.
43
-
44
- * **Command**: `PUBLISH flux_console:logs <payload>`
45
- * **Payload (JSON)**:
46
- ```json
47
- {
48
- "level": "info", // "info" (start), "success", "error"
49
- "message": "Processing Task: tasks.send_email",
50
- "workerId": "celery@worker-1",
51
- "queue": "default",
52
- "jobId": "uuid-v4",
53
- "timestamp": "2026-01-10T12:00:01Z",
54
- "metadata": {
55
- "attempt": 1,
56
- "latency": 45 // ms (for success/error events)
57
- }
58
- }
59
- ```
60
-
61
- ### 2.3. Metrics (Optional but Recommended)
62
- Connectors should increment counters for throughput aggregation.
63
-
64
- * `INCR flux_console:metrics:processed`
65
- * `INCR flux_console:metrics:failed`
66
-
67
- ---
68
-
69
- ## 3. Implementation Plan: Python (Celery)
70
-
71
- **Target**: `gravito/zenith-celery` (PyPI Package)
72
-
73
- ### Architecture
74
- Celery has a rich Signal system. We can hook into `worker_ready`, `task_prerun`, `task_success`, and `task_failure`.
75
-
76
- ### Component Design
77
- 1. **ZenithMonitor**: A Celery Bootstep that starts a background thread for Heartbeats.
78
- 2. **SignalHandlers**:
79
- * `task_prerun`: Publish `level: info` log.
80
- * `task_success`: Publish `level: success` log + metrics.
81
- * `task_failure`: Publish `level: error` log with traceback.
82
-
83
- ### Configuration
84
- ```python
85
- # celery.py
86
- app.conf.zenith_redis_url = "redis://localhost:6379/0"
87
- app.conf.zenith_enabled = True
88
- ```
89
-
90
- ---
91
-
92
- ## 4. Implementation Plan: Node.js (BullMQ)
93
-
94
- **Target**: `@gravito/zenith-bullmq` (NPM Package)
95
-
96
- *Note: Gravito Stream is based on BullMQ principles but internal. This adapter allows *standard* BullMQ instances (e.g., in a NestJS app) to report to Zenith.*
97
-
98
- ### Architecture
99
- BullMQ uses `QueueEvents` (which listens to Redis streams). A separate "Monitor" process is the best approach to avoid modifying the worker code too much.
100
-
101
- ### Component Design
102
- 1. **ZenithMonitor Class**:
103
- ```typescript
104
- const monitor = new ZenithMonitor({
105
- connection: redisOptions,
106
- queues: ['email', 'reports']
107
- });
108
- monitor.start();
109
- ```
110
- 2. It listens to BullMQ global events (completed, failed) and bridges them to UZP.
111
- 3. **Heartbeat**: Since BullMQ workers don't have a central registry, the Monitor acts as a "Virtual Worker" or we require users to instantiate a `ZenithWorker` wrapper.
112
-
113
- ---
114
-
115
- ## 5. Implementation Plan: Go (Asynq)
116
-
117
- **Target**: `github.com/gravito-framework/zenith-asynq`
118
-
119
- ### Architecture
120
- Asynq provides `Server` middleware.
121
-
122
- ### Component Design
123
- 1. **Middleware**: `zenith.NewMiddleware(redisClient)`.
124
- 2. Wraps handler execution to capture Start/Success/Fail times.
125
- 3. Publishes to Redis asynchronously.
126
-
127
- ---
128
-
129
- ## 6. Future Work: Rust (Faktory?)
130
- (To be determined based on demand)
@@ -1,40 +0,0 @@
1
- version: '3.8'
2
-
3
- services:
4
- # Main Persistence for Archive
5
- mysql:
6
- image: mysql:8.0
7
- container_name: flux-mysql
8
- ports:
9
- - "3306:3306"
10
- environment:
11
- MYSQL_ROOT_PASSWORD: root
12
- MYSQL_DATABASE: flux
13
- healthcheck:
14
- test: [ "CMD", "mysqladmin", "ping", "-h", "localhost" ]
15
- timeout: 20s
16
- retries: 10
17
-
18
- # Real-time state store
19
- redis:
20
- image: redis:7-alpine
21
- container_name: flux-redis
22
- ports:
23
- - "6379:6379"
24
- # Flux Console (Optional: run locally via npm dev instead)
25
- # console:
26
- # build: .
27
- # ports:
28
- # - "3000:3000"
29
- # environment:
30
- # - REDIS_URL=redis://redis:6379
31
- # - DB_DRIVER=mysql
32
- # - DB_HOST=mysql
33
- # - DB_USER=root
34
- # - DB_PASSWORD=root
35
- # - DB_NAME=flux
36
- # depends_on:
37
- # mysql:
38
- # condition: service_healthy
39
- # redis:
40
- # condition: service_started
@@ -1,71 +0,0 @@
1
- # 🔔 Zenith Alerting Guide
2
-
3
- This guide explains how to configure and manage the alerting system in Zenith to ensure your infrastructure and queues remain healthy.
4
-
5
- ---
6
-
7
- ## 🚀 Overview
8
-
9
- Zenith's alerting engine is **Redis-Native** and **Stateless**.
10
- * **Persistence**: Rules are stored in Redis (`gravito:zenith:alerts:rules`).
11
- * **Evaluation**: The server evaluates all rules every 2 seconds against real-time metrics.
12
- * **Delivery**: Alerts are dispatched via Slack Webhooks.
13
-
14
- ---
15
-
16
- ## 🛠️ Configuration Fields
17
-
18
- When adding a new rule in **Settings > Alerting**, you will encounter these fields:
19
-
20
- ### 1. Rule Name
21
- A descriptive label for the alert (e.g., `Critical Backlog`, `Agent Offline`). This name will appear in the Slack notification.
22
-
23
- ### 2. Type (Metric Category)
24
- * **Queue Backlog**: Monitors the number of jobs in the `waiting` state.
25
- * **High Failure Count**: Monitors the number of jobs in the `failed` state.
26
- * **Worker Loss**: Monitors the total number of active worker nodes.
27
- * **Node CPU (%)**: Monitors process-level CPU usage reported by Quasar Agents.
28
- * **Node RAM (%)**: Monitors process-level RAM usage (RSS) relative to system total.
29
-
30
- ### 3. Threshold
31
- The numeric value that triggers the alert.
32
- * For **Backlog/Failure**: The number of jobs (e.g., `1000`).
33
- * For **CPU/RAM**: The percentage (e.g., `90`).
34
- * For **Worker Loss**: The *minimum* number of workers expected (e.g., alert triggers if count is `< 2`).
35
-
36
- ### 4. Cooldown (Minutes)
37
- **Crucial Concept**: The period the system "stays silent" after an alert is fired.
38
- * **Logic**: Once a rule triggers and sends a notification, it enters a "lock" state for the duration of the cooldown.
39
- * **Purpose**: Prevents "Alert Fatigue" and notification storms.
40
- * **Example**: If set to `30`, and a backlog spike occurs, you get **one** notification. You won't get another one for the same rule for 30 minutes, even if the backlog remains high.
41
-
42
- ### 5. Queue (Optional)
43
- Specify a specific queue name (e.g., `orders`, `emails`) to monitor. If left empty, the rule applies to the **total sum** of all queues.
44
-
45
- ---
46
-
47
- ## 🌊 Best Practices
48
-
49
- ### The "Instant Fire" Design
50
- Zenith alerts are designed for **instant awareness**.
51
- * If a threshold is met during a 2-second check, the alert fires **immediately**.
52
- * It does **not** wait for the condition to persist for multiple minutes (Debouncing).
53
- * **Pro Tip**: If you have frequent "tiny spikes" that resolve themselves in seconds, set your **Threshold** slightly higher than the spikes to avoid noise.
54
-
55
- ### Recommended Settings
56
-
57
- | Scenario | Type | Threshold | Cooldown |
58
- | :--- | :--- | :--- | :--- |
59
- | **Critical Failure** | High Failure Count | 50 | 15m |
60
- | **System Overload** | Node CPU | 90 | 30m |
61
- | **Quiet Hours** | Queue Backlog | 5000 | 120m |
62
- | **Fatal Shutdown** | Worker Loss | 1 | 10m |
63
-
64
- ---
65
-
66
- ## 🔗 Slack Integration
67
- To receive notifications, ensure the `SLACK_WEBHOOK_URL` environment variable is set before starting the Zenith server.
68
-
69
- ```bash
70
- export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/Txxx/Bxxx/Xxxx
71
- ```
@@ -1,157 +0,0 @@
1
- # Flux Console Deployment Guide
2
-
3
- This whitepaper outlines the recommended deployment strategies for Gravito Flux Console in various environments, from local development to enterprise-scale production clusters.
4
-
5
- ## 1. Deployment Philosophy: "Zero-Config, Anywhere"
6
-
7
- Flux Console is designed to be infrastructure-agnostic. It acts as a stateless monitoring interface that connects to your existing infrastructure (Redis). It does not require its own dedicated database for basic operation.
8
-
9
- ### Core Dependencies
10
- - **Runtime**: Node.js 18+ OR Bun 1.0+ (or use standard binary)
11
- - **Infrastructure**: Redis 6.0+ (Required for state coordination)
12
- - **Optional**: SQL Database (MySQL/PostgreSQL) for History Persistence (Future Feature)
13
-
14
- ---
15
-
16
- ## 2. Deployment Scenarios
17
-
18
- ### Scenario A: Local Development (The "NPM" Way)
19
- Best for individual developers debugging workers locally.
20
-
21
- **Prerequisites:** Node.js or Bun installed.
22
-
23
- ```bash
24
- # S1. Run directly via npx (Zero Installation)
25
- npx @gravito/flux-console
26
- # Automatically detects local Redis at localhost:6379 and opens browser.
27
-
28
- # S2. Install globally for frequent use
29
- npm install -g @gravito/flux-console
30
- flux-console start
31
- ```
32
-
33
- ### Scenario B: Traditional VM / EC2 (The "Process" Way)
34
- Best for bare-metal servers or performance-critical environments where avoiding Docker overhead is desired.
35
-
36
- **Option 1: Node.js + PM2 (Recommended)**
37
- ```bash
38
- # 1. Install globally
39
- npm install -g @gravito/flux-console pm2
40
-
41
- # 2. Start with PM2 for auto-restart and log management
42
- pm2 start flux-console --name flux-monitor -- --port 3000
43
-
44
- # 3. Configure Env Vars (if Redis is remote)
45
- pm2 set flux-monitor:env.REDIS_URL redis://prod-redis:6379
46
- ```
47
-
48
- **Option 2: Standalone Binary (The "Go" Way)**
49
- *Ideal for restricted environments without Node.js installed.*
50
- 1. Download the binary: `flux-console-linux-x64`
51
- 2. `chmod +x ./flux-console-linux-x64`
52
- 3. `./flux-console-linux-x64`
53
-
54
- ### Scenario C: Docker & Container Platforms (The "Cloud-Native" Way)
55
- Best for Kubernetes, AWS ECS, Google Cloud Run, or simple Docker Compose setups.
56
-
57
- **1. Docker Run**
58
- ```bash
59
- docker run -d \
60
- -p 3000:3000 \
61
- -e REDIS_URL=redis://your-redis-host:6379 \
62
- -e AUTH_SECRET=my-super-secret-password \
63
- --name flux-console \
64
- gravito/flux-console:latest
65
- ```
66
-
67
- **2. Docker Compose (Full Stack Example)**
68
- ```yaml
69
- version: '3.8'
70
- services:
71
- redis:
72
- image: redis:alpine
73
- ports:
74
- - "6379:6379"
75
-
76
- flux-console:
77
- image: gravito/flux-console:latest
78
- ports:
79
- - "3000:3000"
80
- environment:
81
- - REDIS_URL=redis://redis:6379
82
- - PORT=3000
83
- depends_on:
84
- - redis
85
-
86
- # Your Application Workers
87
- worker-orders:
88
- build: .
89
- command: npm run start:worker
90
- environment:
91
- - REDIS_URL=redis://redis:6379
92
- ```
93
-
94
- **3. Kubernetes (K8s)**
95
- Deploy as a simple Deployment + Service.
96
-
97
- ```yaml
98
- apiVersion: apps/v1
99
- kind: Deployment
100
- metadata:
101
- name: flux-console
102
- spec:
103
- replicas: 1
104
- selector:
105
- matchLabels:
106
- app: flux-console
107
- template:
108
- metadata:
109
- labels:
110
- app: flux-console
111
- spec:
112
- containers:
113
- - name: flux-console
114
- image: gravito/flux-console:latest
115
- env:
116
- - name: REDIS_URL
117
- valueFrom:
118
- secretKeyRef:
119
- name: redis-secrets
120
- key: url
121
- ports:
122
- - containerPort: 3000
123
- ```
124
-
125
- ---
126
-
127
- ## 3. Security Best Practices
128
-
129
- Since Flux Console provides administrative capabilities (Pause Queue, Retry Job, Delete Job), security is paramount in production.
130
-
131
- 1. **Network Isolation (Private VPC)**:
132
- - **Recommendation**: Do NOT expose Flux Console to the public internet.
133
- - Deploy it within your VPN / Private Subnet.
134
- - Access via VPN or SSH Tunnel.
135
-
136
- 2. **Authentication**:
137
- - Enable built-in simple auth by setting `AUTH_PASSWORD` env var.
138
- - For enterprise, put it behind an Identity Aware Proxy (e.g., Cloudflare Access, AWS ALB OIDC) to enforce SSO (Google/Okta) login.
139
-
140
- 3. **Read-Only Mode (Future Feature)**:
141
- - For giving access to support teams, run a separate instance with `READ_ONLY=true` env var (Roadmap item).
142
-
143
- ## 4. Scaling (High Availability)
144
-
145
- Flux Console is **stateless**. You can run multiple instances behind a Load Balancer for high availability.
146
-
147
- - **Session Affinity**: Not required (JWT based Auth).
148
- - **Resource Usage**: Very low (mostly forwarding Redis data). A standard `t3.micro` or `256MB` container is usually sufficient for monitoring even large clusters.
149
-
150
- ---
151
-
152
- ## 5. Troubleshooting
153
-
154
- **Common Issue: "Cannot connect to Redis"**
155
- - **Docker**: Ensure you use the service name (e.g., `redis`) not `localhost` if inside the same network. Host networking might be needed for accessing host Redis.
156
- - **AWS ElastiCache**: Ensure Security Groups allow traffic on port 6379 from the Console's security group.
157
- - **Encryption**: If Redis uses TLS (rediss://), ensure certificates are trusted or use `REDIS_TLS_REJECT_UNAUTHORIZED=0` (not recommended for prod).
@@ -1,73 +0,0 @@
1
- # Internal Technical Documentation
2
-
3
- This document records technical implementations for Dead Letter Queues (DLQ) and Worker Metrics within the Flux system.
4
-
5
- ## 1. Dead Letter Queue (DLQ)
6
-
7
- ### Storage (Redis)
8
- Failed jobs are moved to a specific list with the suffix `:failed`.
9
- - **Key**: `{queue}:failed`
10
- - **Cap**: 1,000 items (capped via `LTRIM` in `RedisDriver.fail`).
11
-
12
- ### Life Cycle
13
- 1. `Worker` attempts to process a job.
14
- 2. On failure, `Worker` calculates retry delay using `job.getRetryDelay(attempt)`.
15
- 3. If `attempt >= maxAttempts`, `Consumer` catches the error.
16
- 4. `Consumer` calls `QueueManager.fail(job, error)`.
17
- 5. Driver pushes the job to the `:failed` list with `error` and `failedAt` metadata.
18
-
19
- ---
20
-
21
- ## 2. Worker Metrics
22
-
23
- Workers report health metrics during their heartbeat cycle (default: every 5s).
24
-
25
- ### Metric Payload Schema
26
- ```json
27
- {
28
- "cpu": 0.15, // Load average (normalized by cores)
29
- "ram": {
30
- "rss": 120, // Resident Set Size (MB)
31
- "heapUsed": 45, // V8 Heap Used (MB)
32
- "heapTotal": 64 // V8 Heap Total (MB)
33
- }
34
- }
35
- ```
36
-
37
- ### Storage
38
- In Redis, metrics are stored as part of the `flux_console:workers:{id}` hash.
39
- - **Field**: `metrics` (JSON string)
40
-
41
- ---
42
-
43
- ## 3. Bulk Retry Logic (Lua)
44
-
45
- To ensure atomicity and performance, bulk retries of failed jobs use Lua scripts.
46
-
47
- ### Retry All Script
48
- Moves all elements from `{queue}:failed` to `{queue}` then deletes the failed list.
49
- ```lua
50
- local jobs = redis.call('LRANGE', KEYS[1], 0, -1)
51
- for i, job in ipairs(jobs) do
52
- redis.call('RPUSH', KEYS[2], job)
53
- end
54
- redis.call('DEL', KEYS[1])
55
- return #jobs
56
- ```
57
-
58
- ---
59
-
60
- ## 4. System Logs & Archiving
61
-
62
- To maintain a permanent record of system events while keeping Redis memory usage low, Flux Console uses an asynchronous archiving pattern.
63
-
64
- ### Live Logs (Redis)
65
- * **Key**: `flux_console:logs:system` (List)
66
- * **Strategy**: LILO (Last-In-Last-Out) capped at 100 items.
67
- * **Update**: Every `publishLog` call pushes to this list and trims it.
68
-
69
- ### Persistent Archiving (SQL)
70
- * **Trigger**: Every `QueueService.publishLog` call asynchronously sends the log to the configured `PersistenceAdapter`.
71
- * **Table**: `flux_system_logs` (MySQL or SQLite).
72
- * **Search**: The `/api/logs/archive` endpoint performs direct SQL queries with filters on `level`, `worker_id`, `queue`, and `message` content.
73
- * **Retention**: Cleanup is handled via `PersistenceAdapter.cleanup`, removing logs older than the configured threshold (default: 30 days).