@orkify/cli 1.0.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/LICENSE +191 -0
  2. package/README.md +1701 -0
  3. package/bin/orkify +3 -0
  4. package/boot/systemd/orkify@.service +30 -0
  5. package/dist/agent-name.d.ts +4 -0
  6. package/dist/agent-name.js +42 -0
  7. package/dist/alerts/AlertEvaluator.d.ts +14 -0
  8. package/dist/alerts/AlertEvaluator.js +135 -0
  9. package/dist/cli/commands/autostart.d.ts +3 -0
  10. package/dist/cli/commands/autostart.js +11 -0
  11. package/dist/cli/commands/crash-test.d.ts +3 -0
  12. package/dist/cli/commands/crash-test.js +17 -0
  13. package/dist/cli/commands/daemon-reload.d.ts +3 -0
  14. package/dist/cli/commands/daemon-reload.js +72 -0
  15. package/dist/cli/commands/delete.d.ts +3 -0
  16. package/dist/cli/commands/delete.js +37 -0
  17. package/dist/cli/commands/deploy.d.ts +6 -0
  18. package/dist/cli/commands/deploy.js +266 -0
  19. package/dist/cli/commands/down.d.ts +3 -0
  20. package/dist/cli/commands/down.js +36 -0
  21. package/dist/cli/commands/flush.d.ts +3 -0
  22. package/dist/cli/commands/flush.js +28 -0
  23. package/dist/cli/commands/kill.d.ts +3 -0
  24. package/dist/cli/commands/kill.js +35 -0
  25. package/dist/cli/commands/list.d.ts +14 -0
  26. package/dist/cli/commands/list.js +361 -0
  27. package/dist/cli/commands/logs.d.ts +3 -0
  28. package/dist/cli/commands/logs.js +107 -0
  29. package/dist/cli/commands/mcp.d.ts +3 -0
  30. package/dist/cli/commands/mcp.js +151 -0
  31. package/dist/cli/commands/reload.d.ts +3 -0
  32. package/dist/cli/commands/reload.js +54 -0
  33. package/dist/cli/commands/restart.d.ts +3 -0
  34. package/dist/cli/commands/restart.js +43 -0
  35. package/dist/cli/commands/restore.d.ts +3 -0
  36. package/dist/cli/commands/restore.js +88 -0
  37. package/dist/cli/commands/run.d.ts +8 -0
  38. package/dist/cli/commands/run.js +212 -0
  39. package/dist/cli/commands/snap.d.ts +3 -0
  40. package/dist/cli/commands/snap.js +30 -0
  41. package/dist/cli/commands/up.d.ts +3 -0
  42. package/dist/cli/commands/up.js +125 -0
  43. package/dist/cli/crash-recovery.d.ts +2 -0
  44. package/dist/cli/crash-recovery.js +67 -0
  45. package/dist/cli/index.d.ts +3 -0
  46. package/dist/cli/index.js +46 -0
  47. package/dist/cli/parse.d.ts +28 -0
  48. package/dist/cli/parse.js +97 -0
  49. package/dist/cluster/ClusterWrapper.d.ts +18 -0
  50. package/dist/cluster/ClusterWrapper.js +602 -0
  51. package/dist/config/ConfigStore.d.ts +11 -0
  52. package/dist/config/ConfigStore.js +21 -0
  53. package/dist/config/schema.d.ts +103 -0
  54. package/dist/config/schema.js +49 -0
  55. package/dist/constants.d.ts +83 -0
  56. package/dist/constants.js +289 -0
  57. package/dist/cron/CronScheduler.d.ts +25 -0
  58. package/dist/cron/CronScheduler.js +149 -0
  59. package/dist/daemon/GracefulManager.d.ts +8 -0
  60. package/dist/daemon/GracefulManager.js +29 -0
  61. package/dist/daemon/ManagedProcess.d.ts +71 -0
  62. package/dist/daemon/ManagedProcess.js +1020 -0
  63. package/dist/daemon/Orchestrator.d.ts +51 -0
  64. package/dist/daemon/Orchestrator.js +416 -0
  65. package/dist/daemon/RotatingWriter.d.ts +27 -0
  66. package/dist/daemon/RotatingWriter.js +264 -0
  67. package/dist/daemon/index.d.ts +2 -0
  68. package/dist/daemon/index.js +106 -0
  69. package/dist/daemon/startDaemon.d.ts +30 -0
  70. package/dist/daemon/startDaemon.js +693 -0
  71. package/dist/deploy/CommandPoller.d.ts +13 -0
  72. package/dist/deploy/CommandPoller.js +53 -0
  73. package/dist/deploy/DeployExecutor.d.ts +33 -0
  74. package/dist/deploy/DeployExecutor.js +340 -0
  75. package/dist/deploy/config.d.ts +20 -0
  76. package/dist/deploy/config.js +161 -0
  77. package/dist/deploy/env.d.ts +2 -0
  78. package/dist/deploy/env.js +17 -0
  79. package/dist/deploy/tarball.d.ts +32 -0
  80. package/dist/deploy/tarball.js +243 -0
  81. package/dist/detect/framework.d.ts +2 -0
  82. package/dist/detect/framework.js +24 -0
  83. package/dist/ipc/DaemonClient.d.ts +31 -0
  84. package/dist/ipc/DaemonClient.js +248 -0
  85. package/dist/ipc/DaemonServer.d.ts +28 -0
  86. package/dist/ipc/DaemonServer.js +166 -0
  87. package/dist/ipc/MultiUserClient.d.ts +27 -0
  88. package/dist/ipc/MultiUserClient.js +203 -0
  89. package/dist/ipc/protocol.d.ts +7 -0
  90. package/dist/ipc/protocol.js +53 -0
  91. package/dist/ipc/restoreDaemon.d.ts +8 -0
  92. package/dist/ipc/restoreDaemon.js +19 -0
  93. package/dist/machine-id.d.ts +11 -0
  94. package/dist/machine-id.js +51 -0
  95. package/dist/mcp/auth.d.ts +118 -0
  96. package/dist/mcp/auth.js +245 -0
  97. package/dist/mcp/http.d.ts +20 -0
  98. package/dist/mcp/http.js +229 -0
  99. package/dist/mcp/index.d.ts +3 -0
  100. package/dist/mcp/index.js +8 -0
  101. package/dist/mcp/server.d.ts +37 -0
  102. package/dist/mcp/server.js +413 -0
  103. package/dist/probe/compute-fingerprint.d.ts +27 -0
  104. package/dist/probe/compute-fingerprint.js +65 -0
  105. package/dist/probe/parse-frames.d.ts +21 -0
  106. package/dist/probe/parse-frames.js +57 -0
  107. package/dist/probe/resolve-sourcemaps.d.ts +25 -0
  108. package/dist/probe/resolve-sourcemaps.js +281 -0
  109. package/dist/state/StateStore.d.ts +11 -0
  110. package/dist/state/StateStore.js +78 -0
  111. package/dist/telemetry/TelemetryReporter.d.ts +49 -0
  112. package/dist/telemetry/TelemetryReporter.js +451 -0
  113. package/dist/types/index.d.ts +373 -0
  114. package/dist/types/index.js +2 -0
  115. package/package.json +148 -0
  116. package/packages/cache/README.md +114 -0
  117. package/packages/cache/dist/CacheClient.d.ts +26 -0
  118. package/packages/cache/dist/CacheClient.d.ts.map +1 -0
  119. package/packages/cache/dist/CacheClient.js +174 -0
  120. package/packages/cache/dist/CacheClient.js.map +1 -0
  121. package/packages/cache/dist/CacheFileStore.d.ts +45 -0
  122. package/packages/cache/dist/CacheFileStore.d.ts.map +1 -0
  123. package/packages/cache/dist/CacheFileStore.js +446 -0
  124. package/packages/cache/dist/CacheFileStore.js.map +1 -0
  125. package/packages/cache/dist/CachePersistence.d.ts +9 -0
  126. package/packages/cache/dist/CachePersistence.d.ts.map +1 -0
  127. package/packages/cache/dist/CachePersistence.js +67 -0
  128. package/packages/cache/dist/CachePersistence.js.map +1 -0
  129. package/packages/cache/dist/CachePrimary.d.ts +25 -0
  130. package/packages/cache/dist/CachePrimary.d.ts.map +1 -0
  131. package/packages/cache/dist/CachePrimary.js +155 -0
  132. package/packages/cache/dist/CachePrimary.js.map +1 -0
  133. package/packages/cache/dist/CacheStore.d.ts +50 -0
  134. package/packages/cache/dist/CacheStore.d.ts.map +1 -0
  135. package/packages/cache/dist/CacheStore.js +271 -0
  136. package/packages/cache/dist/CacheStore.js.map +1 -0
  137. package/packages/cache/dist/constants.d.ts +6 -0
  138. package/packages/cache/dist/constants.d.ts.map +1 -0
  139. package/packages/cache/dist/constants.js +9 -0
  140. package/packages/cache/dist/constants.js.map +1 -0
  141. package/packages/cache/dist/index.d.ts +16 -0
  142. package/packages/cache/dist/index.d.ts.map +1 -0
  143. package/packages/cache/dist/index.js +86 -0
  144. package/packages/cache/dist/index.js.map +1 -0
  145. package/packages/cache/dist/serialize.d.ts +9 -0
  146. package/packages/cache/dist/serialize.d.ts.map +1 -0
  147. package/packages/cache/dist/serialize.js +40 -0
  148. package/packages/cache/dist/serialize.js.map +1 -0
  149. package/packages/cache/dist/types.d.ts +123 -0
  150. package/packages/cache/dist/types.d.ts.map +1 -0
  151. package/packages/cache/dist/types.js +2 -0
  152. package/packages/cache/dist/types.js.map +1 -0
  153. package/packages/cache/package.json +27 -0
  154. package/packages/cache/src/CacheClient.ts +227 -0
  155. package/packages/cache/src/CacheFileStore.ts +528 -0
  156. package/packages/cache/src/CachePersistence.ts +89 -0
  157. package/packages/cache/src/CachePrimary.ts +172 -0
  158. package/packages/cache/src/CacheStore.ts +308 -0
  159. package/packages/cache/src/constants.ts +10 -0
  160. package/packages/cache/src/index.ts +100 -0
  161. package/packages/cache/src/serialize.ts +49 -0
  162. package/packages/cache/src/types.ts +156 -0
  163. package/packages/cache/tsconfig.json +18 -0
  164. package/packages/cache/tsconfig.tsbuildinfo +1 -0
  165. package/packages/next/README.md +166 -0
  166. package/packages/next/dist/error-capture.d.ts +34 -0
  167. package/packages/next/dist/error-capture.d.ts.map +1 -0
  168. package/packages/next/dist/error-capture.js +130 -0
  169. package/packages/next/dist/error-capture.js.map +1 -0
  170. package/packages/next/dist/error-handler.d.ts +10 -0
  171. package/packages/next/dist/error-handler.d.ts.map +1 -0
  172. package/packages/next/dist/error-handler.js +186 -0
  173. package/packages/next/dist/error-handler.js.map +1 -0
  174. package/packages/next/dist/isr-cache.d.ts +9 -0
  175. package/packages/next/dist/isr-cache.d.ts.map +1 -0
  176. package/packages/next/dist/isr-cache.js +86 -0
  177. package/packages/next/dist/isr-cache.js.map +1 -0
  178. package/packages/next/dist/stream.d.ts +5 -0
  179. package/packages/next/dist/stream.d.ts.map +1 -0
  180. package/packages/next/dist/stream.js +22 -0
  181. package/packages/next/dist/stream.js.map +1 -0
  182. package/packages/next/dist/types.d.ts +33 -0
  183. package/packages/next/dist/types.d.ts.map +1 -0
  184. package/packages/next/dist/types.js +6 -0
  185. package/packages/next/dist/types.js.map +1 -0
  186. package/packages/next/dist/use-cache.d.ts +4 -0
  187. package/packages/next/dist/use-cache.d.ts.map +1 -0
  188. package/packages/next/dist/use-cache.js +86 -0
  189. package/packages/next/dist/use-cache.js.map +1 -0
  190. package/packages/next/dist/utils.d.ts +32 -0
  191. package/packages/next/dist/utils.d.ts.map +1 -0
  192. package/packages/next/dist/utils.js +88 -0
  193. package/packages/next/dist/utils.js.map +1 -0
  194. package/packages/next/package.json +52 -0
  195. package/packages/next/src/error-capture.ts +177 -0
  196. package/packages/next/src/error-handler.ts +221 -0
  197. package/packages/next/src/isr-cache.ts +100 -0
  198. package/packages/next/src/stream.ts +23 -0
  199. package/packages/next/src/types.ts +33 -0
  200. package/packages/next/src/use-cache.ts +99 -0
  201. package/packages/next/src/utils.ts +102 -0
  202. package/packages/next/tsconfig.json +19 -0
  203. package/packages/next/tsconfig.tsbuildinfo +1 -0
package/README.md ADDED
@@ -0,0 +1,1701 @@
1
+ <h1><img src="assets/icon.png" alt="" width="36" align="center" /> orkify</h1>
2
+
3
+ [![Beta](https://img.shields.io/badge/status-beta-yellow)](https://github.com/orkify/orkify)
4
+ [![CI](https://github.com/orkify/orkify/actions/workflows/ci.yml/badge.svg)](https://github.com/orkify/orkify/actions/workflows/ci.yml)
5
+ [![npm](https://img.shields.io/npm/v/orkify)](https://www.npmjs.com/package/orkify)
6
+ [![Node](https://img.shields.io/node/v/orkify)](https://nodejs.org/)
7
+ [![License](https://img.shields.io/npm/l/orkify)](https://github.com/orkify/orkify/blob/main/LICENSE)
8
+ [![TypeScript](https://img.shields.io/badge/TypeScript-%E2%89%A55.9-blue?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
9
+
10
+ Modern JS process orchestration and deployment for your own infrastructure.
11
+
12
+ ## Table of Contents
13
+
14
+ - [Features](#features)
15
+ - [Installation](#installation)
16
+ - [Quick Start](#quick-start)
17
+ - [Commands](#commands)
18
+ - [Options for `up` and `run`](#options-for-up-and-run)
19
+ - [Cluster Mode](#cluster-mode)
20
+ - [Zero-Downtime Reload](#zero-downtime-reload)
21
+ - [Worker Readiness](#worker-readiness)
22
+ - [Graceful Shutdown](#graceful-shutdown)
23
+ - [Environment Variables](#environment-variables)
24
+ - [Worker IPC (Broadcasting)](#worker-ipc-broadcasting)
25
+ - [Shared Cluster Cache](#shared-cluster-cache)
26
+ - [Next.js](#nextjs)
27
+ - [Socket.IO / WebSocket Support](#socketio--websocket-support)
28
+ - [Log Rotation](#log-rotation)
29
+ - [Environment Files](#environment-files)
30
+ - [Snapshot File](#snapshot-file)
31
+ - [Boot Persistence](#boot-persistence)
32
+ - [Container Mode](#container-mode)
33
+ - [Deployment](#deployment)
34
+ - [Source Map Support](#source-map-support)
35
+ - [Cron Scheduler](#cron-scheduler)
36
+ - [MCP Integration](#mcp-integration)
37
+ - [Architecture](#architecture)
38
+ - [Requirements](#requirements)
39
+ - [License](#license)
40
+
41
+ ## Features
42
+
43
+ - **Cluster Mode** - Run multiple workers sharing the same port using Node's cluster module
44
+ - **Cross-Platform Load Balancing** - True round-robin distribution across all workers on Linux, macOS, and Windows
45
+ - **Zero-Downtime Reload** - Rolling restarts that replace workers one-by-one with no dropped requests
46
+ - **WebSocket Sticky Sessions** - Built-in session affinity for Socket.IO and WebSocket connections
47
+ - **Process Persistence** - Save running processes and restore them after reboot
48
+ - **Auto-Restart** - Automatically restart crashed processes with configurable limits
49
+ - **File Watching** - Reload on file changes during development
50
+ - **Log Rotation** - Automatic log rotation with gzip compression and configurable retention
51
+ - **Deployment** - Local and remote deploy with automatic rollback
52
+ - **Cron Scheduler** - Built-in cron that dispatches HTTP requests to managed processes on a schedule
53
+ - **Native TypeScript** - Run `.ts` files directly with no build step (Node.js 22.18+)
54
+ - **Shared Cluster Cache** - Built-in in-memory cache with zero-config cross-worker sync via IPC
55
+ - **Next.js** - Auto-detection, `'use cache'` and ISR cache handlers, Server Actions encryption key, security header stripping (CVE-2025-29927), version skew protection, ISR request coalescing
56
+ - **Modern Stack** - Pure ESM, TypeScript, Node.js 22.18+
57
+ - **MCP Integration** - Built-in Model Context Protocol server for AI tool integration
58
+
59
+ ## Installation
60
+
61
+ ```bash
62
+ npm install
63
+ npm run build
64
+ ```
65
+
66
+ ## Quick Start
67
+
68
+ ```bash
69
+ # Start a single process (daemon mode)
70
+ orkify up app.js
71
+
72
+ # TypeScript works out of the box — no build step
73
+ orkify up app.ts
74
+
75
+ # Start with one worker per CPU core
76
+ orkify up app.js -w 0
77
+
78
+ # Start with 4 clustered workers
79
+ orkify up app.js -w 4
80
+
81
+ # Start with a custom name
82
+ orkify up app.js -n my-api -w 4
83
+
84
+ # Enable file watching for development
85
+ orkify up app.js --watch
86
+
87
+ # Enable sticky sessions for Socket.IO
88
+ orkify up server.js -w 4 --sticky --port 3000
89
+
90
+ # Run in foreground (for containers like Docker/Kubernetes)
91
+ orkify run app.js -w 4
92
+ ```
93
+
94
+ ## Commands
95
+
96
+ | Command | Description |
97
+ | -------------------------------- | ---------------------------------------------------------------- |
98
+ | `orkify up <script>` | Start a process (daemon mode) |
99
+ | `orkify down <name\|id\|all>` | Stop process(es) |
100
+ | `orkify run <script>` | Run in foreground (for containers) |
101
+ | `orkify restart <name\|id\|all>` | Hard restart (stop + start) |
102
+ | `orkify reload <name\|id\|all>` | Zero-downtime rolling reload |
103
+ | `orkify list` | List all processes with status |
104
+ | `orkify list -f, --follow` | Live monitoring — auto-refreshing process table (Ctrl+C to stop) |
105
+ | `orkify list -v, --verbose` | Verbose list (includes PIDs) |
106
+ | `orkify list --all-users` | List processes from all users (requires sudo) |
107
+ | `orkify logs [name]` | View logs (-f to follow, -n lines, --err/--out) |
108
+ | `orkify delete <name\|id\|all>` | Stop and remove from process list |
109
+ | `orkify flush [name\|id\|all]` | Truncate logs and remove rotated archives |
110
+ | `orkify snap [file] [--no-env]` | Snapshot current process list |
111
+ | `orkify restore [file]` | Restore previously saved processes (--no-remote) |
112
+ | `orkify kill [--force]` | Stop the daemon (--force skips graceful shutdown) |
113
+ | `orkify daemon-reload` | Reload daemon code (snap → kill → restore) |
114
+ | `orkify autostart` | Set up [boot persistence](#boot-persistence) |
115
+ | `orkify deploy pack [dir]` | Create a [deploy](#deployment) tarball |
116
+ | `orkify deploy local <tarball>` | [Deploy](#deployment) from a local tarball |
117
+ | `orkify deploy upload [dir]` | Upload a build artifact for [deployment](#deployment) |
118
+ | `orkify mcp` | Start [MCP](#mcp-integration) server for AI tools (stdio) |
119
+ | `orkify mcp --simple-http` | Start [MCP](#mcp-integration) HTTP server (runs inside daemon) |
120
+ | `orkify mcp stop` | Stop the [MCP](#mcp-integration) HTTP server |
121
+ | `orkify mcp status` | Show [MCP](#mcp-integration) HTTP server status |
122
+ | `orkify mcp keygen` | Generate a new [MCP](#mcp-integration) API key |
123
+
124
+ ## Options for `up` and `run`
125
+
126
+ ```
127
+ -n, --name <name> Process name
128
+ -w, --workers <number> Number of workers (0 = CPU cores, -1 = CPUs-1)
129
+ --watch Watch for file changes and reload (up only)
130
+ --watch-paths <paths...> Specific paths to watch (up only)
131
+ --cwd <path> Working directory
132
+ --node-args="<args>" Arguments passed to Node.js (quoted)
133
+ --args="<args>" Arguments passed to your script (quoted)
134
+ --kill-timeout <ms> Graceful shutdown timeout (default: 5000)
135
+ --max-restarts <count> Max restart attempts (default: 10)
136
+ --min-uptime <ms> Min uptime before restart counts (default: 1000)
137
+ --restart-delay <ms> Delay between restarts (default: 100)
138
+ --sticky Enable sticky sessions for WebSocket/Socket.IO
139
+ --port <port> Port for sticky routing (defaults to PORT env)
140
+ --reload-retries <count> Retries per worker slot during reload (0-3, default: 3)
141
+ --health-check <path> Health check endpoint (e.g. /health, requires --port)
142
+ --log-max-size <size> Max log file size before rotation (default: 100M)
143
+ --log-max-files <count> Rotated log files to keep (default: 90, 0 = no rotation)
144
+ --log-max-age <days> Delete rotated logs older than N days (default: 90, 0 = no limit)
145
+ --cron <spec> Cron job (repeatable) — see Cron Scheduler
146
+ ```
147
+
148
+ `--restart-on-mem <size>` — Restart when a worker's RSS exceeds [memory threshold](#memory-threshold-restart) (e.g. 512M, 1G)
149
+
150
+ ## Cluster Mode
151
+
152
+ When you specify `-w <workers>` with more than 1 worker, ORKIFY runs your app in cluster mode:
153
+
154
+ ```bash
155
+ orkify up server.js -w 4
156
+ ```
157
+
158
+ This spawns a primary process that manages 4 worker processes. All workers share the same port - Node's cluster module handles the load balancing automatically.
159
+
160
+ ```
161
+ ┌──────┬──────────┬─────────┬───┬───┬────────┬──────┬──────────┬────────┐
162
+ │ id │ name │ mode │ ↺ │ ✘ │ status │ cpu │ mem │ uptime │
163
+ ├──────┼──────────┼─────────┼───┼───┼────────┼──────┼──────────┼────────┤
164
+ │ 0 │ server │ cluster │ 0 │ 0 │ online │ 0.0% │ 192.1 MB │ - │
165
+ │ │ ├──────────┼─────────┼───┼───┼────────┼──────┼──────────┼────────┤
166
+ │ ├─ 0 │ worker 0 │ │ 0 │ 0 │ online │ 0.0% │ 48.2 MB │ 5m │
167
+ │ │ ├──────────┼─────────┼───┼───┼────────┼──────┼──────────┼────────┤
168
+ │ ├─ 1 │ worker 1 │ │ 0 │ 0 │ online │ 0.0% │ 47.9 MB │ 5m │
169
+ │ │ ├──────────┼─────────┼───┼───┼────────┼──────┼──────────┼────────┤
170
+ │ ├─ 2 │ worker 2 │ │ 0 │ 0 │ online │ 0.0% │ 48.1 MB │ 5m │
171
+ │ │ ├──────────┼─────────┼───┼───┼────────┼──────┼──────────┼────────┤
172
+ │ └─ 3 │ worker 3 │ │ 0 │ 0 │ online │ 0.0% │ 48.0 MB │ 5m │
173
+ └──────┴──────────┴─────────┴───┴───┴────────┴──────┴──────────┴────────┘
174
+ ```
175
+
176
+ ## Zero-Downtime Reload
177
+
178
+ The `reload` command performs a rolling restart:
179
+
180
+ 1. Spawn a new worker
181
+ 2. Wait for it to signal ready
182
+ 3. Gracefully stop the old worker
183
+ 4. Repeat for each worker
184
+
185
+ ```bash
186
+ orkify reload my-api
187
+ ```
188
+
189
+ During reload, there's always at least one worker handling requests - no downtime.
190
+
191
+ ### Reload Failure Handling
192
+
193
+ Each worker slot gets up to N retries during reload (default 3, max 3, configurable with `--reload-retries`):
194
+
195
+ ```bash
196
+ # Disable retries (immediate failure on first timeout)
197
+ orkify up app.js -w 4 --reload-retries 0
198
+
199
+ # Use 1 retry per slot
200
+ orkify up app.js -w 4 --reload-retries 1
201
+ ```
202
+
203
+ If a new worker fails to become ready after all retries:
204
+
205
+ - The **old worker is kept alive** (no process loss)
206
+ - The worker is marked as **stale** — shown as `online (stale)` in `orkify list`
207
+ - Remaining worker slots are **aborted** to prevent cascading failures
208
+
209
+ Fix the issue and reload again — a successful reload clears all stale flags.
210
+
211
+ ## Memory Threshold Restart
212
+
213
+ Automatically restart workers when their RSS memory exceeds a threshold — a safety net for memory-leaking apps:
214
+
215
+ ```bash
216
+ orkify up server.js -w 4 --restart-on-mem 512M
217
+ ```
218
+
219
+ **How it works:**
220
+
221
+ - Checked every 1 second (piggybacks on the existing stats collection interval)
222
+ - **Per-worker**: each worker is checked individually against the threshold, not the aggregate cluster total
223
+ - **Cluster mode**: zero-downtime — a replacement worker is spawned and must become ready before the old one is stopped
224
+ - **Fork mode**: the process is stopped then restarted (brief downtime is unavoidable with a single process)
225
+ - **30-second cooldown** per worker after each memory restart to let the new process stabilize
226
+ - Counts as a restart (visible in `orkify list`) but **not** a crash — does not count toward `--max-restarts`
227
+
228
+ ## Worker Readiness
229
+
230
+ orkify auto-detects when your app starts listening on a port — no extra code needed. If your app calls `server.listen()`, workers are automatically marked as `online`. This works in both fork mode and cluster mode.
231
+
232
+ For background workers or queue consumers that **don't bind a port**, signal ready manually:
233
+
234
+ ```javascript
235
+ // Only needed for apps that don't call server.listen()
236
+ if (process.send) {
237
+ process.send('ready');
238
+ }
239
+ ```
240
+
241
+ Both signals are equivalent — whichever arrives first marks the worker as `online`. If neither arrives within 30 seconds, the worker is marked as `errored`.
242
+
243
+ ### Health Check Readiness
244
+
245
+ When `--health-check` is set (e.g. `--health-check /health`), orkify performs an HTTP readiness check **after** a worker signals ready but **before** declaring it online:
246
+
247
+ ```bash
248
+ orkify up server.js -w 4 --port 3000 --health-check /health
249
+ ```
250
+
251
+ The flow:
252
+
253
+ 1. Worker signals ready (listening event or `process.send('ready')`)
254
+ 2. orkify hits `http://localhost:{port}{healthCheck}` — retries up to 3 times with 1s delay
255
+ 3. If 2xx response → worker is declared online (old worker can be stopped during reload)
256
+ 4. If all retries fail → worker is treated as failed
257
+
258
+ This applies to **all reloads**, not just deploys. If `--health-check` is set but `--port` is not, the health check is skipped.
259
+
260
+ ## Graceful Shutdown
261
+
262
+ Handle SIGTERM to gracefully drain connections:
263
+
264
+ ```javascript
265
+ process.on('SIGTERM', () => {
266
+ server.close(() => {
267
+ process.exit(0);
268
+ });
269
+ });
270
+ ```
271
+
272
+ ## Environment Variables
273
+
274
+ orkify sets these environment variables on every managed process:
275
+
276
+ | Variable | Description |
277
+ | --------------------- | ------------------------------------------------------------ |
278
+ | `ORKIFY_PROCESS_ID` | Process ID in orkify |
279
+ | `ORKIFY_PROCESS_NAME` | Process name (from `-n` flag) |
280
+ | `ORKIFY_WORKER_ID` | Worker index: `0` in fork mode, `0` to `N-1` in cluster mode |
281
+ | `ORKIFY_CLUSTER_MODE` | `"true"` in cluster mode, unset in fork mode |
282
+ | `ORKIFY_WORKERS` | Total number of workers |
283
+ | `ORKIFY_EXEC_MODE` | `"fork"` or `"cluster"` |
284
+
285
+ ### Detecting the Primary Worker
286
+
287
+ Worker IDs are stable — `ORKIFY_WORKER_ID=0` survives crashes, restarts, and zero-downtime reloads. Use it to elect a primary worker for singletons (database connections, WebSocket clients, cron-like tasks):
288
+
289
+ ```javascript
290
+ const isPrimary = !process.env.ORKIFY_WORKER_ID || process.env.ORKIFY_WORKER_ID === '0';
291
+
292
+ if (isPrimary) {
293
+ // Only worker 0 connects to Discord, runs scheduled jobs, etc.
294
+ startSingletonService();
295
+ }
296
+ ```
297
+
298
+ ## Worker IPC (Broadcasting)
299
+
300
+ In cluster mode, workers can send messages to all other workers via the primary process. Send a message with `type: 'broadcast'` and orkify relays it to every sibling:
301
+
302
+ ```javascript
303
+ // Worker 1: send a cache-invalidation signal
304
+ process.send?.({
305
+ __orkify: true,
306
+ type: 'broadcast',
307
+ channel: 'cache:invalidate',
308
+ data: { key: 'users:123' },
309
+ });
310
+
311
+ // All other workers receive it:
312
+ process.on('message', (msg) => {
313
+ if (msg?.__orkify && msg.type === 'broadcast' && msg.channel === 'cache:invalidate') {
314
+ cache.delete(msg.data.key);
315
+ }
316
+ });
317
+ ```
318
+
319
+ Messages must have `__orkify: true` and `type: 'broadcast'`. The `channel` and `data` fields are yours to define. The sending worker does **not** receive its own broadcast — only siblings do.
320
+
321
+ **Request/response pattern**: To route a request to a specific worker (e.g., worker 0 for singletons), broadcast the request. Worker 0 picks it up via `isPrimary`, processes it, and broadcasts the response. Other workers ignore both messages since they don't match any pending request ID.
322
+
323
+ ## Shared Cluster Cache
324
+
325
+ orkify ships a built-in shared cache that works across cluster workers with zero external dependencies. On a single server, reads are faster than localhost Redis — they're synchronous Map lookups with no network round trip, no serialization, and no async overhead. Writes use Node's built-in IPC (Unix domain sockets), which is also faster than a TCP hop to Redis. No extra process to run, no connection pooling to configure.
326
+
327
+ Import it from `@orkify/cache`:
328
+
329
+ ```typescript
330
+ import { cache } from '@orkify/cache';
331
+
332
+ cache.set('user:123', userData, { ttl: 300 }); // write + broadcast
333
+ cache.set('key', value, { ttl: 300, tags: ['group'] }); // with tags
334
+ cache.get<User>('user:123'); // sync — in-memory only
335
+ await cache.getAsync<User>('user:123'); // async — memory first, then disk
336
+ cache.has('key'); // sync local check
337
+ cache.delete('key'); // delete + broadcast
338
+ cache.clear(); // clear + broadcast
339
+ cache.invalidateTag('group'); // delete all tagged entries + record timestamp
340
+ cache.getTagExpiration(['group']); // when was this tag last invalidated?
341
+ cache.updateTagTimestamp('group'); // record timestamp without deleting entries
342
+ cache.stats(); // { size, hits, misses, hitRate, totalBytes, diskSize }
343
+ ```
344
+
345
+ `get()` reads from memory only — always sync, zero overhead. `getAsync()` checks memory first, then falls back to disk if file-backed mode is enabled. Without file-backed mode, `getAsync()` is identical to `get()` (just wrapped in a resolved promise, no disk I/O).
346
+
347
+ ### How It Works
348
+
349
+ **Reads are always local** — `get()` is a synchronous Map lookup with zero overhead. Writes broadcast to all workers via IPC so every worker converges to the same state. Evicted entries spill to disk and can be recovered via `getAsync()`.
350
+
351
+ | Mode | Behavior |
352
+ | -------------------------- | ------------------------------------------------------- |
353
+ | `npm run dev` (standalone) | Local cache + disk cold layer, no IPC |
354
+ | `orkify up -w 1` (fork) | Local cache + disk cold layer, no IPC |
355
+ | `orkify up -w 4` (cluster) | Broadcast cache — writes sync via IPC, reads stay local |
356
+ | `orkify run` (foreground) | Local cache + disk cold layer, no IPC |
357
+
358
+ The API is identical in every mode. In standalone or fork mode, it degrades gracefully to a plain local cache — no errors, no code changes needed. You can use `@orkify/cache` during local development with `node app.js` or `npm run dev` and it works as a regular Map. Deploy with `orkify up -w 4` and the same code now syncs across workers automatically.
359
+
360
+ ### Configuration
361
+
362
+ Optional — call `cache.configure()` before the first use of `cache`, or defaults apply:
363
+
364
+ ```typescript
365
+ import { cache } from '@orkify/cache';
366
+
367
+ cache.configure({
368
+ maxEntries: 50_000, // default: 10,000
369
+ defaultTtl: 300, // default: no expiry (seconds)
370
+ maxMemorySize: 100 * 1024 * 1024, // default: 64 MB per worker
371
+ maxValueSize: 2 << 20, // default: 1 MB
372
+ });
373
+ ```
374
+
375
+ | Option | Default | Description |
376
+ | --------------- | ----------------------- | ------------------------------------------------------------------------------ |
377
+ | `maxEntries` | `10,000` | Maximum entries before LRU eviction kicks in |
378
+ | `defaultTtl` | `undefined` (no expiry) | Default TTL in seconds for entries without an explicit `ttl` |
379
+ | `fileBacked` | `true` | Persist evicted entries to disk, survive restarts, read via `getAsync()` |
380
+ | `maxMemorySize` | `67,108,864` (64 MB) | Memory limit in bytes per worker for LRU eviction |
381
+ | `maxValueSize` | `1,048,576` (1 MB) | Maximum byte size of a single serialized value |
382
+ | `tags` | `undefined` | String tags for `set()` — used with `invalidateTag()` for grouped invalidation |
383
+
384
+ The cache is file-backed by default — evicted entries spill to disk and the cache survives restarts. The sync `get()` path is unaffected (pure Map lookup, zero disk I/O). Disk reads only happen on `getAsync()` for entries not in memory. To disable the disk layer: `cache.configure({ fileBacked: false })`.
385
+
386
+ #### Sync vs Async Reads
387
+
388
+ `get()` reads from memory only — always sync, always fast. `getAsync()` checks memory first, then falls back to disk for evicted entries:
389
+
390
+ ```typescript
391
+ cache.set('key', 'value'); // stored in memory
392
+ cache.get('key'); // sync — in-memory only
393
+ await cache.getAsync('key'); // async — memory first, disk fallback
394
+
395
+ // In async handlers, prefer getAsync to catch cold entries:
396
+ app.get('/api/user/:id', async (req, res) => {
397
+ const key = `user:${req.params.id}`;
398
+ let user = await cache.getAsync<User>(key);
399
+
400
+ if (!user) {
401
+ user = await db.users.findById(req.params.id);
402
+ cache.set(key, user, { ttl: 300, tags: [`org:${user.orgId}`] });
403
+ }
404
+
405
+ res.json(user);
406
+ });
407
+ ```
408
+
409
+ With `fileBacked: false`, `getAsync()` behaves identically to `get()` — no disk I/O, just a resolved promise.
410
+
411
+ ### Tag-Based Invalidation
412
+
413
+ Tags let you group cache entries for bulk invalidation. A key can have multiple tags, and `invalidateTag()` deletes all entries with that tag across all workers:
414
+
415
+ ```typescript
416
+ // Tag entries when setting them
417
+ cache.set('config:proj1:hostA', configA, { ttl: 300, tags: ['project:proj1'] });
418
+ cache.set('config:proj1:hostB', configB, { ttl: 300, tags: ['project:proj1'] });
419
+
420
+ // Later, invalidate everything for that project
421
+ cache.invalidateTag('project:proj1'); // deletes both keys, syncs across workers
422
+ ```
423
+
424
+ Use cases:
425
+
426
+ - **Grouped config**: Invalidate all cached config for a project when settings change
427
+ - **User sessions**: Invalidate all cached data for a user on logout
428
+ - **Deployment**: Clear all cached data for a service on deploy
429
+
430
+ Tags are strings. A key can have multiple tags (`tags: ['project:1', 'org:5']`), and invalidating either tag deletes the key. Tags are preserved across daemon restarts and survive `orkify reload`.
431
+
432
+ #### Tag Timestamps
433
+
434
+ Every `invalidateTag()` call records when the tag was last invalidated. Query it with `getTagExpiration()`:
435
+
436
+ ```typescript
437
+ cache.invalidateTag('project:proj1');
438
+
439
+ // Returns the most recent invalidation timestamp (epoch ms) across the given tags
440
+ cache.getTagExpiration(['project:proj1']); // e.g. 1709510400000
441
+ cache.getTagExpiration(['unknown-tag']); // 0 (never invalidated)
442
+
443
+ // Multiple tags — returns the max timestamp
444
+ cache.getTagExpiration(['project:proj1', 'org:5']); // highest of the two
445
+ ```
446
+
447
+ Use `updateTagTimestamp()` to record a timestamp without deleting entries — useful for stale-while-revalidate patterns where entries stay alive but are marked for background refresh:
448
+
449
+ ```typescript
450
+ cache.updateTagTimestamp('group'); // records Date.now()
451
+ cache.updateTagTimestamp('group', futureTimestamp); // explicit timestamp
452
+ ```
453
+
454
+ Tag timestamps sync across workers via IPC, persist across daemon restarts, and survive `orkify reload`.
455
+
456
+ ### Cluster Mode Details
457
+
458
+ In cluster mode (`orkify up -w 4`), the cache uses orkify's built-in IPC:
459
+
460
+ 1. Worker A calls `cache.set('key', value)` → stores locally (optimistic) + sends to primary
461
+ 2. Primary stores the value, computes `expiresAt`, broadcasts to **all** workers
462
+ 3. Every worker (including A) applies the update — all converge to the same state
463
+
464
+ The primary serializes writes, so concurrent sets to the same key always resolve to a consistent last-write-wins value. New workers joining (on spawn or reload) receive a full cache snapshot immediately so they start warm.
465
+
466
+ ### Persistence
467
+
468
+ In cluster mode, the cache persists across daemon restarts and stays in memory across `orkify reload`. No configuration needed.
469
+
470
+ - **`orkify reload`** — the primary stays alive, new workers receive the cache via IPC snapshot. No disk I/O, no data loss.
471
+ - **`orkify daemon-reload`** / **`orkify kill`** — the cache is written to `~/.@orkify/cache/<name>.json` before the daemon exits. The new primary restores it on startup, so workers start warm.
472
+ - **Worker crash** — the replacement worker gets a snapshot from the primary immediately.
473
+ - **`orkify down`** — the cache is **not** persisted. Stopping a process is an explicit action — restoring potentially stale data (old sessions, revoked tokens, expired API responses) on a later `orkify up` would cause more problems than it solves.
474
+ - **`orkify kill --force`** — the cache is **not** persisted. Force kill sends SIGKILL with no graceful shutdown.
475
+ - **Daemon crash** — the cache is **not** persisted. Crash recovery restores process configs but the cache starts empty.
476
+
477
+ | Scenario | Cache behavior |
478
+ | ---------------------- | ------------------------------------------------------- |
479
+ | `orkify reload` | Warm — workers get snapshot from primary, zero downtime |
480
+ | `orkify daemon-reload` | Persisted to disk, restored on new daemon startup |
481
+ | `orkify kill` | Persisted to disk, restored on next daemon startup |
482
+ | `orkify kill --force` | Cache lost (SIGKILL, no graceful shutdown) |
483
+ | Worker crash | Replacement gets snapshot from primary |
484
+ | `orkify down` | Cache starts empty (clean slate) |
485
+ | Daemon crash | Cache starts empty (crash recovery doesn't persist) |
486
+
487
+ Cache files are stored per process at `~/.@orkify/cache/` as JSON. Tags and V8 types (Map, Set, Date, etc.) are preserved correctly across restarts.
488
+
489
+ In standalone/fork mode, the cache persists to `~/.@orkify/cache/<name>/` by default and survives restarts. Use `getAsync()` to access cold entries that may be on disk. With `fileBacked: false`, the cache lives only in memory — it's gone when the process exits.
490
+
491
+ The disk layer (on by default) works as follows:
492
+
493
+ - Entries evicted from memory spill to disk automatically (`~/.@orkify/cache/<name>/entries/`)
494
+ - On shutdown (`orkify kill`), remaining in-memory entries are flushed to disk
495
+ - On startup, only the disk index is loaded — entries promote lazily to memory on access via `getAsync()`
496
+ - Disk entries have their own TTL and tag expiration checks — stale entries are cleaned up on read and by periodic sweeps
497
+
498
+ In **cluster mode**, the primary process owns the disk layer (reads and writes). Workers can read directly from disk files for fast cold reads without IPC. Writes still go through IPC to the primary.
499
+
500
+ In **fork/standalone mode**, the single process owns the disk layer directly. On graceful shutdown, all in-memory entries are flushed to disk synchronously so the cache survives restarts.
501
+
502
+ ### Consistency Model
503
+
504
+ The cache is **eventually consistent**. Other workers may read a stale value for one IPC round trip after a write. For most use cases (session data, rendered pages, API responses) this is fine. If you need strict consistency, use a database.
505
+
506
+ ### Eviction
507
+
508
+ - **Entry-count LRU**: When `maxEntries` is reached, the least recently accessed entry is evicted on the next write
509
+ - **Byte-based LRU**: Evicts by total memory usage (default 64 MB per worker) in addition to entry count
510
+ - **TTL expiry**: Expired entries are cleaned up lazily on read and by a background sweep every 60 seconds
511
+ - **Disk persistence**: Evicted entries persist on disk (by default) and are promoted back to memory on access via `getAsync()`
512
+ - **Value size limit**: `set()` rejects values exceeding `maxValueSize` (default 1 MB) with a descriptive error
513
+
514
+ ### Validation
515
+
516
+ `set()` validates values before storing:
517
+
518
+ ```typescript
519
+ // Throws — exceeds size limit
520
+ cache.set('huge', 'x'.repeat(2_000_000)); // Error: exceeds max 1048576 bytes
521
+
522
+ // Throws — invalid TTL
523
+ cache.set('key', 'value', { ttl: -1 }); // Error: ttl must be positive
524
+
525
+ // Throws — functions and symbols are not serializable
526
+ cache.set('fn', () => {}); // Error
527
+ ```
528
+
529
+ Values can be any structured-cloneable type: plain objects, arrays, strings, numbers, booleans, `null`, `Map`, `Set`, `Date`, `RegExp`, `Error`, `ArrayBuffer`, and `TypedArray`. JSON-serializable values use JSON internally; complex types (Map, Set, Date, etc.) automatically use V8 serialization. Only functions and symbols are rejected.
530
+
531
+ ## Next.js
532
+
533
+ orkify auto-detects Next.js apps (via `package.json` or `next.config.{ts,js,mjs}`) and provides production-grade hosting out of the box: cache handlers, security hardening, encryption key management, and deploy-time optimizations.
534
+
535
+ ### Packages
536
+
537
+ orkify ships two companion packages for use in your application code:
538
+
539
+ - **`@orkify/cache`** — framework-agnostic shared cache (documented in [Shared Cluster Cache](#shared-cluster-cache) above)
540
+ - **`@orkify/next`** — Next.js integration: cache handlers (`use-cache`, `isr-cache`), browser error tracking (`error-capture`, `error-handler`), and shared utilities
541
+
542
+ Both are bundled with orkify and available as sub-exports. Reference them in your `next.config.ts` and application code as shown below.
543
+
544
+ ### Setup
545
+
546
+ ```typescript
547
+ // next.config.ts
548
+ import type { NextConfig } from 'next';
549
+
550
+ const nextConfig: NextConfig = {
551
+ // Enable 'use cache' directives (required for Next.js 16)
552
+ cacheComponents: true,
553
+
554
+ // Next.js 16 'use cache' directives — backed by @orkify/cache
555
+ cacheHandlers: {
556
+ default: require.resolve('@orkify/next/use-cache'),
557
+ },
558
+
559
+ // ISR / route cache — backed by @orkify/cache
560
+ cacheHandler: require.resolve('@orkify/next/isr-cache'),
561
+
562
+ // Disable Next.js's built-in in-memory cache (orkify handles it)
563
+ cacheMaxMemorySize: 0,
564
+
565
+ // Version skew protection — auto-set by `orkify deploy`, optional for `orkify up/run`
566
+ deploymentId: process.env.NEXT_DEPLOYMENT_ID || undefined,
567
+ };
568
+
569
+ export default nextConfig;
570
+ ```
571
+
572
+ ### Cache Handlers
573
+
574
+ orkify provides drop-in cache handlers for Next.js 16. Both use the same `@orkify/cache` singleton, so tag invalidations propagate across all workers and affect both ISR and `'use cache'` entries.
575
+
576
+ - **`@orkify/next/use-cache`** — handles `'use cache'` directives. Converts between Next.js's stream-based interface and orkify's synchronous cache. Implements staleness checks (hard expiry, revalidation window, soft tags).
577
+ - **`@orkify/next/isr-cache`** — handles ISR / route cache. Simpler adapter: get, set, tag-based revalidation.
578
+
579
+ Both work standalone (`npm run dev`) and in cluster mode — the cache detects the mode automatically.
580
+
581
+ `revalidateTag()` calls in your Next.js app flow through orkify's cache, which broadcasts tag invalidations to all cluster workers via IPC:
582
+
583
+ ```typescript
584
+ // app/actions.ts
585
+ 'use server';
586
+ import { revalidateTag } from 'next/cache';
587
+
588
+ export async function refreshPosts() {
589
+ revalidateTag('posts'); // invalidates across all workers
590
+ }
591
+ ```
592
+
593
+ ### ISR Request Coalescing
594
+
595
+ In cluster mode, multiple workers may detect the same stale cache entry simultaneously. Without coalescing, N workers trigger N parallel revalidations for the same page.
596
+
597
+ orkify uses the shared cache as a distributed lock. When a worker detects staleness, it sets a short-lived `__revalidating:{key}` flag. Other workers seeing this flag serve stale content instead of triggering their own revalidation. The lock auto-expires after 30 seconds and is cleared when the fresh entry is stored.
598
+
599
+ - Hard expiration: **not coalesced** (entry is genuinely expired, must be regenerated)
600
+ - Soft tag invalidation: **not coalesced** (explicit invalidation should always miss)
601
+ - Revalidation window: **coalesced** (stale-while-revalidate semantics)
602
+
603
+ ### Server Actions Encryption Key
604
+
605
+ Next.js encrypts Server Action payloads. If the key differs between cluster workers or across rolling reloads, Server Actions fail with cryptic decryption errors. orkify auto-generates a stable `NEXT_SERVER_ACTIONS_ENCRYPTION_KEY` when it detects a Next.js app:
606
+
607
+ - Generated once at process creation and stored in config
608
+ - Consistent across all cluster workers (shared via `config.env`)
609
+ - Survives reloads and daemon restarts (persisted in the snapshot file)
610
+ - Skipped if you provide your own key via `--env` or shell environment
611
+
612
+ ### Security Header Stripping
613
+
614
+ orkify strips dangerous headers from external requests before they reach your app:
615
+
616
+ | Header | CVE | Risk |
617
+ | ------------------------- | ------------------------- | ------------------------------------------ |
618
+ | `x-middleware-subrequest` | CVE-2025-29927 (CVSS 9.1) | Bypasses Next.js middleware authentication |
619
+ | `x-now-route-matches` | CVE-2024-46982 | Cache poisoning via Vercel routing |
620
+
621
+ Headers are preserved on loopback requests (`127.0.0.1`, `::1`, `::ffff:127.0.0.1`) since Next.js uses them internally. Active in fork mode, cluster mode, and run mode with no configuration needed.
622
+
623
+ ### Version Skew Protection
624
+
625
+ During `orkify deploy`, old and new workers coexist briefly. If client-side bundle hashes changed, a user who loaded a page from an old worker may request assets that only exist in the new version.
626
+
627
+ orkify auto-sets `NEXT_DEPLOYMENT_ID` during deploy (format: `v{version}-{artifactSlice}`). Next.js uses this to tag asset URLs and handle version mismatches gracefully. The ID is passed to both the build command and runtime processes. If you set `NEXT_DEPLOYMENT_ID` in your secrets, orkify won't overwrite it.
628
+
629
+ ### Frontend Error Tracking
630
+
631
+ orkify captures unhandled browser errors (and unhandled promise rejections) and relays them to the daemon via IPC. Errors are bundled into the regular telemetry ingest — no additional API calls are made.
632
+
633
+ **Setup** requires two pieces: a client component in your root layout and a route handler.
634
+
635
+ ```typescript
636
+ // app/layout.tsx
637
+ import { OrkifyErrorCapture } from '@orkify/next/error-capture';
638
+
639
+ export default function RootLayout({ children }: { children: React.ReactNode }) {
640
+ return (
641
+ <html>
642
+ <body>
643
+ {children}
644
+ <OrkifyErrorCapture />
645
+ </body>
646
+ </html>
647
+ );
648
+ }
649
+ ```
650
+
651
+ ```typescript
652
+ // app/orkify/errors/route.ts
653
+ export { POST } from '@orkify/next/error-handler';
654
+ ```
655
+
656
+ The `<OrkifyErrorCapture>` component listens for `error` and `unhandledrejection` events in the browser and posts them to the route handler. The route handler validates the request and forwards the error to the daemon over IPC.
657
+
658
+ **What's captured:** error name, message, stack trace, page URL, and browser user agent.
659
+
660
+ **Stack normalization.** Browser engines produce different stack trace formats. orkify normalizes Firefox and Safari stacks into V8 format (` at functionName (file:line:col)`) before forwarding, so all errors are displayed consistently on the dashboard regardless of browser.
661
+
662
+ **Security.** The route handler enforces three layers of protection:
663
+
664
+ - **Origin validation** — the handler requires the `Origin` header (which browsers always send on POST) and verifies it matches the app's hostname. This blocks cross-origin abuse and non-browser clients. Supports `X-Forwarded-Host` for reverse proxy setups.
665
+ - **Rate limiting** — requests are rate-limited to 10 per 10 seconds per IP to prevent flooding.
666
+ - **Payload validation** — strict Zod schema validation with size caps (64 KB body, 100 stack lines, field-level length limits).
667
+
668
+ **Source maps.** By default, Next.js doesn't generate source maps for client bundles in production. To get resolved (non-minified) browser stacks, add `hidden-source-map` to your Next.js config — this produces `.map` files on disk without exposing them to browsers:
669
+
670
+ ```typescript
671
+ // next.config.ts
672
+ webpack: (config, { isServer }) => {
673
+ if (!isServer) config.devtool = 'hidden-source-map';
674
+ return config;
675
+ },
676
+ ```
677
+
678
+ **Error Boundary integration.** For errors caught by React Error Boundaries, use `reportError()` to forward them manually:
679
+
680
+ ```typescript
681
+ // app/error.tsx
682
+ 'use client';
683
+ import { reportError } from '@orkify/next/error-capture';
684
+
685
+ export default function ErrorBoundary({ error }: { error: Error }) {
686
+ reportError(error);
687
+ return <p>Something went wrong.</p>;
688
+ }
689
+ ```
690
+
691
+ See [`examples/nextjs/`](examples/nextjs/) for a working example.
692
+
693
+ ## Socket.IO / WebSocket Support
694
+
695
+ For WebSocket applications, use the `--sticky` flag to ensure connections from the same client always route to the same worker:
696
+
697
+ ```bash
698
+ orkify up socket-server.js -w 4 --sticky --port 3000
699
+ ```
700
+
701
+ This extracts session IDs from Socket.IO handshakes and consistently routes connections to the same worker based on a hash of the session ID.
702
+
703
+ ## Log Rotation
704
+
705
+ orkify automatically rotates process logs to prevent unbounded disk growth. Logs are written to `~/.orkify/logs/` and rotated when a file exceeds the size threshold or on the first write of a new day.
706
+
707
+ ### How It Works
708
+
709
+ 1. When a log file exceeds `--log-max-size` (default: 100 MB) or a new calendar day starts, orkify rotates the file
710
+ 2. The rotated file is compressed with gzip in the background (typically ~90% compression)
711
+ 3. Archives older than `--log-max-age` days are deleted
712
+ 4. If the archive count still exceeds `--log-max-files`, the oldest are pruned
713
+
714
+ ### Defaults
715
+
716
+ | Setting | Default | Description |
717
+ | ----------------- | ------- | ----------------------------------------- |
718
+ | `--log-max-size` | `100M` | Rotate when file exceeds 100 MB |
719
+ | `--log-max-files` | `90` | Keep up to 90 rotated archives per stream |
720
+ | `--log-max-age` | `90` | Delete archives older than 90 days |
721
+
722
+ With defaults, each process uses at most ~1 GB of log storage: one 100 MB active file + up to 90 compressed archives (~10 MB each at ~90% compression).
723
+
724
+ ### File Layout
725
+
726
+ ```
727
+ ~/.orkify/logs/
728
+ myapp.stdout.log # active (current writes)
729
+ myapp.stdout.log-20260215T091200.123.gz # rotated + compressed
730
+ myapp.stdout.log-20260216T143052.456.gz
731
+ myapp.stderr.log # active stderr
732
+ myapp.stderr.log-20260217T080000.789.gz
733
+ ```
734
+
735
+ ### Configuration
736
+
737
+ ```bash
738
+ # Custom rotation settings
739
+ orkify up app.js --log-max-size 50M --log-max-files 30 --log-max-age 30
740
+
741
+ # Disable rotation (logs grow unbounded)
742
+ orkify up app.js --log-max-files 0
743
+
744
+ # Size accepts K, M, G suffixes
745
+ orkify up app.js --log-max-size 500K
746
+ orkify up app.js --log-max-size 1G
747
+ ```
748
+
749
+ ### Viewing Logs
750
+
751
+ ```bash
752
+ # View last 100 lines (default)
753
+ orkify logs my-api
754
+
755
+ # View last 500 lines
756
+ orkify logs my-api -n 500
757
+
758
+ # Follow log output (stream new logs)
759
+ orkify logs my-api -f
760
+
761
+ # Show only stdout or stderr
762
+ orkify logs my-api --out
763
+ orkify logs my-api --err
764
+ ```
765
+
766
+ ### Flushing Logs
767
+
768
+ Truncate active log files and remove all rotated archives:
769
+
770
+ ```bash
771
+ # Flush logs for all processes
772
+ orkify flush
773
+
774
+ # Flush logs for a specific process
775
+ orkify flush my-api
776
+ ```
777
+
778
+ ## Environment Files
779
+
780
+ ORKIFY supports loading environment variables from `.env` files using Node.js native `--env-file` flag (Node 20.6+). Pass it via `--node-args`:
781
+
782
+ ```bash
783
+ # Daemon mode
784
+ orkify up app.js -w 4 --node-args="--env-file=.env"
785
+
786
+ # Foreground mode
787
+ orkify run app.js -w 4 --node-args="--env-file=.env"
788
+
789
+ # Multiple node args
790
+ orkify up app.js --node-args="--env-file=.env --max-old-space-size=4096"
791
+ ```
792
+
793
+ The env file format:
794
+
795
+ ```bash
796
+ # .env
797
+ DATABASE_URL=postgres://localhost:5432/mydb
798
+ API_KEY=secret-key-123
799
+ NODE_ENV=production
800
+ ```
801
+
802
+ Environment variables are passed to both the primary process and all workers in cluster mode.
803
+
804
+ ### Keeping Secrets Out of State
805
+
806
+ By default `orkify snap` persists the full process environment (including `process.env` inherited values like `PATH`, `HOME`, API keys, etc.) into `~/.orkify/snapshot.yml`. Use `--no-env` to omit environment variables from the snapshot:
807
+
808
+ ```bash
809
+ # Start with env loaded from .env file
810
+ orkify up app.js -n my-api -w 4 --node-args="--env-file=.env"
811
+
812
+ # Save without baking env vars into snapshot.yml
813
+ orkify snap --no-env
814
+
815
+ # Snap to a custom file for use as a declarative config
816
+ orkify snap config/processes.yml
817
+ ```
818
+
819
+ Processes restored via `orkify restore` after a `--no-env` snap will inherit the daemon's own environment. Combined with `--node-args "--env-file .env"`, secrets stay in your `.env` file and are never duplicated into the snapshot.
820
+
821
+ ## Snapshot File
822
+
823
+ `orkify snap` writes a YAML file to `~/.orkify/snapshot.yml` by default. `orkify restore` reads from the same path.
824
+
825
+ ```bash
826
+ # Save and restore — most common usage
827
+ orkify snap
828
+ orkify restore
829
+
830
+ # Custom file paths
831
+ orkify snap config/processes.yml
832
+ orkify restore config/processes.yml
833
+ ```
834
+
835
+ ### File format
836
+
837
+ ```yaml
838
+ version: 1
839
+ processes:
840
+ - name: 'api'
841
+ script: '/app/server.js'
842
+ cwd: '/app'
843
+ workerCount: 4
844
+ execMode: 'cluster'
845
+ watch: false
846
+ env:
847
+ NODE_ENV: 'production'
848
+ PORT: '3000'
849
+ nodeArgs: []
850
+ args: []
851
+ killTimeout: 5000
852
+ maxRestarts: 10
853
+ minUptime: 1000
854
+ restartDelay: 100
855
+ sticky: false
856
+ mcp:
857
+ transport: 'simple-http'
858
+ port: 8787
859
+ bind: '127.0.0.1'
860
+ cors: '*'
861
+ ```
862
+
863
+ The `mcp` section is only present when the MCP HTTP server is running at snapshot time. Old snapshots without `mcp` are loaded normally — `orkify restore` skips MCP startup in that case.
864
+
865
+ ### Restore behavior
866
+
867
+ When you run `orkify restore`, the behavior depends on whether an API key and deploy metadata are present:
868
+
869
+ 1. **With `ORKIFY_API_KEY` + active deploy** — orkify first tries to restore from the remote deploy API. If the remote call fails, it falls back to the local snapshot file automatically.
870
+ 2. **Without API key or deploy** — orkify goes straight to the local snapshot file (`~/.orkify/snapshot.yml`).
871
+ 3. **`--no-remote`** — skips the remote deploy check entirely, always uses the local snapshot.
872
+
873
+ ```bash
874
+ # Restore from remote deploy (if configured), otherwise snapshot
875
+ orkify restore
876
+
877
+ # Always use local snapshot, ignore remote deploy
878
+ orkify restore --no-remote
879
+ ```
880
+
881
+ The file is plain YAML so you can hand-edit it and use it as a declarative config. Here's what it looks like:
882
+
883
+ ```yaml
884
+ version: 1
885
+ processes:
886
+ - name: 'my-api'
887
+ script: '/app/dist/server.js'
888
+ cwd: '/app'
889
+ workerCount: 4
890
+ execMode: 'cluster'
891
+ watch: false
892
+ env:
893
+ NODE_ENV: 'production'
894
+ nodeArgs:
895
+ - '--max-old-space-size=4096'
896
+ args: []
897
+ killTimeout: 5000
898
+ maxRestarts: 10
899
+ minUptime: 1000
900
+ restartDelay: 100
901
+ sticky: false
902
+ port: 3000
903
+ ```
904
+
905
+ **Required fields:**
906
+
907
+ | Field | Description |
908
+ | ----------- | --------------------------------------------------------- |
909
+ | `processes` | Array of process configs |
910
+ | `script` | Path to the entry script (absolute, or relative to `cwd`) |
911
+
912
+ **Optional fields:**
913
+
914
+ | Field | Default | Description |
915
+ | --------------- | ------------------ | -------------------------------------------------------- |
916
+ | `version` | `1` | Schema version |
917
+ | `name` | basename of script | Process name |
918
+ | `cwd` | daemon working dir | Working directory |
919
+ | `workerCount` | `1` | Number of workers (1 = fork mode, >1 = cluster) |
920
+ | `execMode` | from `workerCount` | `"fork"` or `"cluster"` |
921
+ | `watch` | `false` | Watch for file changes |
922
+ | `watchPaths` | — | Specific paths to watch |
923
+ | `env` | — | Environment variables |
924
+ | `nodeArgs` | — | Node.js CLI flags (e.g. `["--inspect"]`) |
925
+ | `args` | — | Script arguments |
926
+ | `killTimeout` | `5000` | Graceful shutdown timeout in ms |
927
+ | `maxRestarts` | `10` | Max auto-restart attempts |
928
+ | `minUptime` | `1000` | Min uptime before a restart counts toward the limit (ms) |
929
+ | `restartDelay` | `100` | Delay between restarts in ms |
930
+ | `sticky` | `false` | Enable sticky sessions for WebSocket/Socket.IO |
931
+ | `port` | — | Port for sticky session routing |
932
+ | `reloadRetries` | `3` | Retries per worker slot during reload (0-3) |
933
+ | `healthCheck` | — | Health check endpoint path (e.g. `/health`) |
934
+ | `cron` | — | [Cron jobs](#cron-scheduler) (array of schedule + path) |
935
+ | `logMaxSize` | `104857600` | Max log file size in bytes before rotation (100 MB) |
936
+ | `logMaxFiles` | `90` | Max rotated log files to keep (0 = no rotation) |
937
+ | `logMaxAge` | `7776000000` | Max age of rotated logs in ms (90 days, 0 = no limit) |
938
+
939
+ A minimal config:
940
+
941
+ ```yaml
942
+ version: 1
943
+ processes:
944
+ - script: /app/dist/server.js
945
+ ```
946
+
947
+ All string values are double-quoted in the generated file to prevent YAML type coercion (e.g. `"3000"` stays a string, not an integer). If you hand-edit the file, unquoted env values like `PORT: 3000` or `DEBUG: true` are automatically coerced back to strings when loaded. Quoting is still recommended to avoid surprises (e.g. `1.0` parses as `1`).
948
+
949
+ ## Boot Persistence
950
+
951
+ To automatically restore processes after a server reboot, use the provided systemd service template.
952
+
953
+ ```bash
954
+ # Find your orkify binary path
955
+ which orkify
956
+
957
+ # Copy the template unit (shipped with the npm package)
958
+ sudo cp $(npm root -g)/orkify/boot/systemd/orkify@.service /etc/systemd/system/
959
+
960
+ # If your orkify binary is not at /usr/local/bin/orkify, edit the unit file:
961
+ # sudo systemctl edit orkify@ → override ExecStart/ExecStop paths
962
+
963
+ # Enable for your user
964
+ sudo systemctl daemon-reload
965
+ sudo systemctl enable orkify@$(whoami)
966
+ ```
967
+
968
+ The `@` template runs as the user you specify after the `@`. Replace `$(whoami)` with any username:
969
+
970
+ ```bash
971
+ # Run as the "deploy" user
972
+ sudo systemctl enable orkify@deploy
973
+
974
+ # Run as "app"
975
+ sudo systemctl enable orkify@app
976
+ ```
977
+
978
+ On boot the service calls `orkify restore` to bring back all snapshotted processes, and `orkify kill` on stop. Each user has their own isolated process list under `~/.orkify/`.
979
+
980
+ Make sure to snapshot your processes so there is something to restore:
981
+
982
+ ```bash
983
+ orkify snap
984
+ ```
985
+
986
+ ### Environment Variables (optional)
987
+
988
+ To inject environment variables (API keys, database credentials, etc.) into your managed processes, create an env file:
989
+
990
+ ```bash
991
+ sudo mkdir -p /etc/orkify
992
+ sudo touch /etc/orkify/env
993
+ sudo chmod 600 /etc/orkify/env
994
+ ```
995
+
996
+ The service template looks for `/etc/orkify/env` and loads it if present. Variables defined there are available to all orkify-managed processes. The file is read by systemd as root before dropping privileges, so `chmod 600` keeps your secrets safe while still injecting them into the process environment.
997
+
998
+ ### Starting
999
+
1000
+ To start immediately without rebooting:
1001
+
1002
+ ```bash
1003
+ sudo systemctl start orkify@$(whoami)
1004
+ ```
1005
+
1006
+ ## Container Mode
1007
+
1008
+ Use `orkify run` for Docker, Kubernetes, or any container environment where you need the process in the foreground.
1009
+
1010
+ ### Why `run` instead of `up`?
1011
+
1012
+ | Mode | Command | Use Case |
1013
+ | ------------- | ------------ | ------------------------------------------- |
1014
+ | **Daemon** | `orkify up` | Development, servers, long-running services |
1015
+ | **Container** | `orkify run` | Docker, Kubernetes, any PID 1 scenario |
1016
+
1017
+ In containers, processes run as PID 1 and must handle signals directly. The `run` command:
1018
+
1019
+ - Runs in the foreground (no daemon)
1020
+ - Properly forwards SIGTERM/SIGINT to child processes
1021
+ - Exits with correct exit codes for orchestrators
1022
+ - Supports graceful shutdown with configurable timeout
1023
+
1024
+ ### Single Instance (Fork Mode)
1025
+
1026
+ Best for most containers where the orchestrator handles scaling:
1027
+
1028
+ ```dockerfile
1029
+ FROM node:22-alpine
1030
+ WORKDIR /app
1031
+ COPY . .
1032
+ RUN npm install && npm run build
1033
+
1034
+ CMD ["orkify", "run", "app.js", "--silent"]
1035
+ ```
1036
+
1037
+ ```yaml
1038
+ # docker-compose.yml
1039
+ services:
1040
+ api:
1041
+ build: .
1042
+ deploy:
1043
+ replicas: 4 # Let Docker/K8s handle scaling
1044
+ ```
1045
+
1046
+ ### Cluster Mode (Multi-Core Containers)
1047
+
1048
+ For containers with multiple CPUs where you want in-process clustering:
1049
+
1050
+ ```dockerfile
1051
+ CMD ["orkify", "run", "app.js", "-w", "4", "--silent"]
1052
+ ```
1053
+
1054
+ ```yaml
1055
+ # kubernetes deployment
1056
+ spec:
1057
+ containers:
1058
+ - name: api
1059
+ command: ['orkify', 'run', 'app.js', '-w', '4', '--silent']
1060
+ resources:
1061
+ limits:
1062
+ cpu: '4' # Match -w count to CPU limit
1063
+ ```
1064
+
1065
+ ### Socket.IO in Containers
1066
+
1067
+ ```dockerfile
1068
+ CMD ["orkify", "run", "server.js", "-w", "4", "--sticky", "--port", "3000", "--silent"]
1069
+ ```
1070
+
1071
+ ### Container Options
1072
+
1073
+ The `run` command supports the same core options as `up`:
1074
+
1075
+ ```
1076
+ -n, --name <name> Process name
1077
+ -w, --workers <number> Number of workers (cluster mode)
1078
+ --cwd <path> Working directory
1079
+ --node-args="<args>" Arguments passed to Node.js (quoted)
1080
+ --args="<args>" Arguments passed to your script (quoted)
1081
+ --sticky Enable sticky sessions for Socket.IO
1082
+ --port <port> Port for sticky session routing
1083
+ --kill-timeout <ms> Graceful shutdown timeout (default: 5000)
1084
+ --reload-retries <count> Retries per worker slot during reload (0-3, default: 3)
1085
+ --silent Suppress startup messages (cleaner container logs)
1086
+ ```
1087
+
1088
+ ### Signal Handling
1089
+
1090
+ The `run` command properly handles container signals:
1091
+
1092
+ ```
1093
+ Container Orchestrator
1094
+
1095
+ │ SIGTERM (graceful stop)
1096
+
1097
+ ┌─────────────────┐
1098
+ │ orkify run │
1099
+ │ │──► Forwards SIGTERM to child
1100
+ │ kill-timeout │──► Waits up to --kill-timeout ms
1101
+ │ │──► SIGKILL if timeout exceeded
1102
+ └────────┬────────┘
1103
+
1104
+
1105
+ Exit code 0 (graceful) or 143 (SIGTERM) or 137 (SIGKILL)
1106
+ ```
1107
+
1108
+ 1. **SIGTERM/SIGINT/SIGHUP** → Forwarded to child process(es)
1109
+ 2. **Graceful shutdown** → Waits for `--kill-timeout` ms (default: 5000)
1110
+ 3. **SIGKILL fallback** → Force kills if child doesn't exit in time
1111
+ 4. **Exit codes** → Preserves child exit code (or 128 + signal number)
1112
+
1113
+ ### Quick Reference
1114
+
1115
+ | Scenario | Command |
1116
+ | ---------------------- | ------------------------------------------------------ |
1117
+ | Simple container | `orkify run app.js --silent` |
1118
+ | Multi-core container | `orkify run app.js -w 4 --silent` |
1119
+ | Socket.IO in container | `orkify run app.js -w 4 --sticky --port 3000 --silent` |
1120
+ | Development (verbose) | `orkify run app.js` |
1121
+ | Long graceful shutdown | `orkify run app.js --kill-timeout 30000 --silent` |
1122
+
1123
+ ## Deployment
1124
+
1125
+ orkify includes built-in deployment with automatic rollback. Create a tarball of your project, deploy it locally or through [orkify.com](https://orkify.com), and orkify handles extract → install → build → symlink → reconcile → monitor.
1126
+
1127
+ ### How It Works
1128
+
1129
+ 1. **Pack** — `orkify deploy pack` creates a tarball of your project
1130
+ 2. **Deploy** — Deploy locally with `orkify deploy local`, or upload to [orkify.com](https://orkify.com) with `orkify deploy upload` and trigger from the dashboard
1131
+ 3. **Execute** — orkify extracts the artifact, runs install/build, and starts your app
1132
+ 4. **Monitor** — orkify watches for crashes after deploy and automatically rolls back if workers fail
1133
+
1134
+ ### Deploy Quick Start
1135
+
1136
+ ```bash
1137
+ # First time: configure deploy settings (saved to orkify.yml)
1138
+ orkify deploy upload --interactive
1139
+
1140
+ # Upload an artifact (defaults to current directory)
1141
+ orkify deploy upload
1142
+
1143
+ # Upload from a specific directory
1144
+ orkify deploy upload ./myapp
1145
+
1146
+ # Bump package.json patch version and upload (e.g. 1.0.0 → 1.0.1)
1147
+ orkify deploy upload --npm-version-patch
1148
+
1149
+ # Explicit API key (alternative to ORKIFY_API_KEY env var)
1150
+ orkify deploy upload --api-key orkify_xxx
1151
+ ```
1152
+
1153
+ #### Upload Options
1154
+
1155
+ | Flag | Description |
1156
+ | --------------------- | ------------------------------------------------- |
1157
+ | `--interactive` | Prompt for deploy settings (saved to orkify.yml) |
1158
+ | `--npm-version-patch` | Bump package.json patch version before upload |
1159
+ | `--api-key <key>` | API key (alternative to `ORKIFY_API_KEY` env var) |
1160
+ | `--api-host <url>` | Override API host URL |
1161
+
1162
+ ### Local Deploy
1163
+
1164
+ Deploy from a local tarball — useful for self-managed servers, air-gapped environments, and custom CI/CD pipelines.
1165
+
1166
+ ```bash
1167
+ # Create a deploy artifact
1168
+ orkify deploy pack ./myapp --output myapp.tar.gz
1169
+
1170
+ # Copy to server and deploy
1171
+ scp myapp.tar.gz server:~/
1172
+ ssh server orkify deploy local myapp.tar.gz
1173
+
1174
+ # With environment variables
1175
+ orkify deploy local myapp.tar.gz --env-file .env.production
1176
+ ```
1177
+
1178
+ ### Deploy Configuration
1179
+
1180
+ Deploy configuration is stored in `orkify.yml` at your project root:
1181
+
1182
+ ```yaml
1183
+ version: 1
1184
+
1185
+ deploy:
1186
+ install: npm ci
1187
+ build: npm run build
1188
+ crashWindow: 30
1189
+ buildEnv:
1190
+ NEXT_PUBLIC_API_URL: 'https://api.example.com'
1191
+ NEXT_PUBLIC_SITE_NAME: 'My App'
1192
+
1193
+ processes:
1194
+ - name: api
1195
+ script: dist/server.js
1196
+ workerCount: 4
1197
+ sticky: true
1198
+ port: 3000
1199
+ healthCheck: /health
1200
+ - name: worker
1201
+ script: dist/worker.js
1202
+ workerCount: 2
1203
+ ```
1204
+
1205
+ The `deploy` section configures build/install steps. The `processes` section defines what gets started — the same format used by `orkify snap`.
1206
+
1207
+ ### Deploy Options
1208
+
1209
+ | Field | Description |
1210
+ | ------------- | -------------------------------------------------------------------------------- |
1211
+ | `install` | Install command (auto-detected: npm, yarn, pnpm, bun) |
1212
+ | `build` | Build command (optional, runs after install) |
1213
+ | `buildEnv` | Build-time-only env vars (e.g. `NEXT_PUBLIC_*`). Not passed to runtime processes |
1214
+ | `crashWindow` | Seconds to monitor for crashes after deploy (default: 30) |
1215
+
1216
+ ### Deploy Lifecycle
1217
+
1218
+ ```
1219
+ Pack → [Upload] → Extract → Install → Build → Reconcile → Monitor → Success
1220
+
1221
+ Crash detected? │
1222
+
1223
+ Auto-rollback
1224
+ ```
1225
+
1226
+ On deploy (both local and remote), orkify **reconciles** running processes against the `processes` in `orkify.yml`:
1227
+
1228
+ - **New processes** are started
1229
+ - **Unchanged processes** get a zero-downtime reload
1230
+ - **Changed processes** (different script, worker count, etc.) are replaced
1231
+ - **Removed processes** are stopped
1232
+
1233
+ The daemon keeps the previous release on disk. If workers crash within the monitoring window, orkify automatically rolls back to the previous version.
1234
+
1235
+ ### orkify.com Platform
1236
+
1237
+ [orkify.com](https://orkify.com) is an optional paid companion that provides:
1238
+
1239
+ - **Deploy management** — Upload artifacts, trigger deploys, track rollout status
1240
+ - **Real-time metrics** — CPU, memory, and event loop monitoring with historical data
1241
+ - **Log streaming** — Centralized log aggregation from all your servers
1242
+ - **Crash detection** — Automatic error capture with stack traces and context
1243
+ - **Remote control** — Start, stop, restart, and reload processes from the dashboard
1244
+ - **Secrets management** — Encrypted environment variables injected at deploy time
1245
+ - **Multi-server** — Manage processes across all your servers from one dashboard
1246
+
1247
+ The CLI works standalone without orkify.com. Connect it by setting an API key:
1248
+
1249
+ ```bash
1250
+ ORKIFY_API_KEY=orkify_xxx orkify up app.js
1251
+ ```
1252
+
1253
+ ## Source Map Support
1254
+
1255
+ When your application uses a bundler (webpack, esbuild, turbopack, rollup, vite), errors from minified or bundled code are automatically resolved to their original source locations using source maps.
1256
+
1257
+ The daemon reads `.map` files from disk at runtime and resolves every frame in the error's stack trace back to the original file, line, and column. The dashboard then shows the original source code instead of minified output. Resolution happens entirely on your server — source maps and original source code never leave your infrastructure. Unlike services that require uploading maps to external servers, there is no build-time upload step and no risk of source code exposure.
1258
+
1259
+ This works automatically when `.map` files are present alongside the bundled output. All major bundlers include `sourcesContent` in their source maps by default, so resolution works even when original source files aren't on disk.
1260
+
1261
+ ### Next.js
1262
+
1263
+ Next.js does not emit server-side source maps by default. To enable them, add the following to your `next.config.ts`:
1264
+
1265
+ ```ts
1266
+ const nextConfig: NextConfig = {
1267
+ experimental: {
1268
+ serverSourceMaps: true,
1269
+ },
1270
+ };
1271
+ ```
1272
+
1273
+ This applies to both webpack and turbopack modes. With this option enabled, errors from API routes and server components will resolve to the original TypeScript source. Browser errors captured via [Frontend Error Tracking](#frontend-error-tracking) go through the same source map resolution pipeline, so minified client-side stacks are mapped back to original source locations.
1274
+
1275
+ ### Deploy Artifacts
1276
+
1277
+ Source maps are available on the deploy target as long as your bundler generates them. In most setups, the build output directory (`.next/`, `dist/`) is gitignored and excluded from the tarball — the deploy `build` step regenerates everything including `.map` files on the target.
1278
+
1279
+ If your build output is committed and you want to exclude `.map` files from the artifact (smaller uploads), set `sourcemaps: false` in your `orkify.yml`:
1280
+
1281
+ ```yaml
1282
+ deploy:
1283
+ install: npm ci
1284
+ build: npm run build
1285
+ sourcemaps: false
1286
+ ```
1287
+
1288
+ Or use the `--no-sourcemaps` flag:
1289
+
1290
+ ```bash
1291
+ orkify deploy upload --no-sourcemaps
1292
+ orkify deploy pack --no-sourcemaps
1293
+ ```
1294
+
1295
+ ### Error Grouping
1296
+
1297
+ Errors are grouped on the dashboard by a fingerprint computed from the error type, message, file, and function name:
1298
+
1299
+ - **Function name over line number.** When a function name is available (from the stack trace or source map), the fingerprint uses `file + function name` instead of `file + line number`. This means errors stay grouped even when lines shift between deploys.
1300
+ - **Error type included.** A `TypeError` and a `ReferenceError` at the same location produce different groups.
1301
+ - **Message normalization.** Dynamic values (UUIDs, numbers, IP addresses, hex strings) are stripped from the message before hashing, so `"User 123 not found"` and `"User 456 not found"` group together.
1302
+ - **Fallback.** When no function name is available (anonymous functions, top-level code), the fingerprint falls back to `file + line number`.
1303
+
1304
+ If you upgrade from a version without this algorithm, existing error groups will re-fingerprint once. This is expected — the new groups are more stable.
1305
+
1306
+ ## Cron Scheduler
1307
+
1308
+ The daemon includes a built-in cron scheduler that dispatches HTTP requests to managed processes on a schedule. This lets you trigger periodic tasks (health checks, cleanup jobs, cache warming) without external cron infrastructure.
1309
+
1310
+ ### Usage
1311
+
1312
+ ```bash
1313
+ # Run a cron job every 2 minutes
1314
+ orkify up app.js --cron "*/2 * * * * /api/cron/heartbeat-check"
1315
+
1316
+ # Multiple cron jobs
1317
+ orkify up app.js \
1318
+ --cron "*/2 * * * * /api/cron/heartbeat-check" \
1319
+ --cron "0 * * * * /api/cron/cleanup"
1320
+ ```
1321
+
1322
+ The `--cron` format is `"<schedule> <path>"` — the last whitespace-delimited token is the HTTP path, everything before it is the cron expression.
1323
+
1324
+ ### Ecosystem Config
1325
+
1326
+ ```yaml
1327
+ # orkify.yml
1328
+ processes:
1329
+ - name: web
1330
+ script: server.js
1331
+ workers: 4
1332
+ cron:
1333
+ - schedule: '*/2 * * * *'
1334
+ path: /api/cron/heartbeat-check
1335
+ - schedule: '0 * * * *'
1336
+ path: /api/cron/cleanup
1337
+ method: POST # default: GET
1338
+ timeout: 60000 # ms, default: 30000
1339
+ ```
1340
+
1341
+ ### How It Works
1342
+
1343
+ 1. When a job is due, the scheduler looks up the process port via the orchestrator
1344
+ 2. It makes an HTTP request to `http://localhost:{port}{path}` with the cron secret as `Authorization: Bearer <secret>`
1345
+ 3. In cluster mode, the OS routes each request to a single worker — no duplication across workers
1346
+ 4. The port is auto-detected when your app calls `server.listen()` — works in both fork and cluster mode
1347
+
1348
+ ### Limits
1349
+
1350
+ | Limit | Value | Reason |
1351
+ | ---------------- | -------- | --------------------------------------------------------------------------- |
1352
+ | Minimum interval | 1 minute | Cron has minute granularity; jobs fire within seconds of their target time |
1353
+ | Maximum interval | 24 hours | Cron jobs running less frequently than daily should use external scheduling |
1354
+
1355
+ Sub-minute schedules (e.g. 6-field expressions with seconds like `*/30 * * * * *`) are rejected at registration time with a clear error.
1356
+
1357
+ ### Overlap Prevention
1358
+
1359
+ Each job tracks a `running` flag. If a previous invocation is still in-flight when the next tick fires, the job is skipped. This prevents slow handlers from stacking up.
1360
+
1361
+ ### Cron Secret
1362
+
1363
+ When cron jobs are configured, orkify generates a random secret per process and:
1364
+
1365
+ 1. Sets `ORKIFY_CRON_SECRET` in the child process environment
1366
+ 2. Sends it as `Authorization: Bearer <secret>` on every cron request
1367
+
1368
+ Your route should validate the header to ensure only the daemon can trigger it:
1369
+
1370
+ ```ts
1371
+ export async function GET(request: NextRequest) {
1372
+ const authHeader = request.headers.get('authorization');
1373
+ if (authHeader !== `Bearer ${process.env.ORKIFY_CRON_SECRET}`) {
1374
+ return new Response('Unauthorized', { status: 401 });
1375
+ }
1376
+ // ... handle cron job
1377
+ }
1378
+ ```
1379
+
1380
+ The secret is regenerated on every process spawn — no config needed. You can also check `process.env.ORKIFY_CRON_SECRET` to detect whether orkify cron is active (e.g. to skip internal timers).
1381
+
1382
+ ### Persistence and Recovery
1383
+
1384
+ Cron jobs are part of the process config and persisted in snapshots. They survive:
1385
+
1386
+ - **`orkify snap` / `orkify restore`** — cron config is saved and restored with the snapshot
1387
+ - **`orkify daemon-reload`** — the daemon captures running configs (including cron), starts a new daemon, and restores them
1388
+ - **Daemon crash** — crash recovery spawns a new daemon and restores all process configs including cron jobs
1389
+
1390
+ In all cases, cron jobs are re-registered automatically when the process is restored. The first tick after recovery evaluates the cron expression from the current time, so no "catch-up" runs are fired for ticks missed while the daemon was down.
1391
+
1392
+ ### Edge Cases
1393
+
1394
+ | Scenario | Behavior |
1395
+ | ------------------------------------ | --------------------------------------------------------------------------- |
1396
+ | Process has no detected port | Job logs "no port detected, skipping" and advances to next run |
1397
+ | Process is stopped (`orkify down`) | Cron jobs are unregistered immediately |
1398
+ | Process is deleted (`orkify delete`) | Cron jobs are unregistered immediately |
1399
+ | HTTP request fails or times out | Error is logged, job advances to next run |
1400
+ | Daemon crashes mid-tick | Crash recovery restores all configs; in-flight requests are lost (no retry) |
1401
+ | Invalid cron expression | Rejected at registration with an error message |
1402
+ | Deploy reconcile | New cron config from `orkify.yml` is registered after reconcile completes |
1403
+
1404
+ ## MCP Integration
1405
+
1406
+ orkify includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server, enabling AI assistants like Claude Code to manage your processes directly. It supports two transports:
1407
+
1408
+ - **Stdio** (default) — for local AI tools running on the same machine. No auth needed.
1409
+ - **HTTP** — for remote AI agents over the network. Authenticated via bearer tokens.
1410
+
1411
+ ### Stdio Mode (Local)
1412
+
1413
+ Stdio is the default transport. The MCP client spawns orkify as a subprocess — same user, same machine, no network involved. No authentication is required.
1414
+
1415
+ There are three ways to register the MCP server with your AI tools:
1416
+
1417
+ #### Option A — `add-mcp` (multi-tool)
1418
+
1419
+ [`add-mcp`](https://github.com/nicepkg/add-mcp) auto-detects installed AI tools (Claude Code, Cursor, VS Code, Windsurf, etc.) and writes the correct config for each one.
1420
+
1421
+ ```bash
1422
+ # Install once (global)
1423
+ npm install -g add-mcp
1424
+
1425
+ # Auto-detect tools and register orkify (interactive)
1426
+ npx add-mcp "orkify mcp"
1427
+
1428
+ # Register globally (user-level, all projects)
1429
+ npx add-mcp "orkify mcp" -g
1430
+
1431
+ # Target a specific tool
1432
+ npx add-mcp "orkify mcp" -a claude-code
1433
+ npx add-mcp "orkify mcp" -a cursor
1434
+ npx add-mcp "orkify mcp" -a vscode
1435
+ ```
1436
+
1437
+ #### Option B — Claude Code CLI
1438
+
1439
+ If you only use Claude Code:
1440
+
1441
+ ```bash
1442
+ claude mcp add orkify -- orkify mcp
1443
+ ```
1444
+
1445
+ #### Option C — Manual JSON
1446
+
1447
+ Add to your Claude Code MCP settings (`~/.claude/settings.json`):
1448
+
1449
+ ```json
1450
+ {
1451
+ "mcpServers": {
1452
+ "orkify": {
1453
+ "command": "orkify",
1454
+ "args": ["mcp"]
1455
+ }
1456
+ }
1457
+ }
1458
+ ```
1459
+
1460
+ For Cursor, VS Code, and other tools, consult their docs for the equivalent MCP config location.
1461
+
1462
+ ### HTTP Mode (Remote)
1463
+
1464
+ HTTP mode starts an authenticated HTTP server inside the daemon that remote AI agents can connect to. Because it runs in-process with the daemon, the MCP server is automatically managed by `orkify kill`, `orkify snap`/`orkify restore`, `orkify daemon-reload`, and crash recovery.
1465
+
1466
+ #### 1. Generate a key
1467
+
1468
+ ```bash
1469
+ # Full access (all tools)
1470
+ orkify mcp keygen --name "my-agent"
1471
+
1472
+ # Read-only (list and logs only)
1473
+ orkify mcp keygen --name "monitor" --tools list,logs
1474
+
1475
+ # Ops access (specific tools)
1476
+ orkify mcp keygen --name "ops" --tools list,logs,restart,reload,down
1477
+
1478
+ # Restrict to specific IPs (individual or CIDR)
1479
+ orkify mcp keygen --name "ci-agent" --allowed-ips "10.0.0.0/8,192.168.1.50"
1480
+ ```
1481
+
1482
+ The command prints the token to stdout and adds it to `~/.orkify/mcp.yml`.
1483
+
1484
+ #### 2. Start the HTTP server
1485
+
1486
+ ```bash
1487
+ # Default: localhost:8787
1488
+ orkify mcp --simple-http
1489
+
1490
+ # Custom port and bind address
1491
+ orkify mcp --simple-http --port 9090 --bind 0.0.0.0
1492
+ ```
1493
+
1494
+ #### 3. Manage the HTTP server
1495
+
1496
+ ```bash
1497
+ # Check if the MCP HTTP server is running
1498
+ orkify mcp status
1499
+
1500
+ # Stop the MCP HTTP server
1501
+ orkify mcp stop
1502
+ ```
1503
+
1504
+ #### 4. Connect a client
1505
+
1506
+ MCP clients authenticate with `Authorization: Bearer <token>`:
1507
+
1508
+ ```bash
1509
+ curl -X POST http://localhost:8787/mcp \
1510
+ -H "Authorization: Bearer orkify_mcp_..." \
1511
+ -H "Content-Type: application/json" \
1512
+ -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"test","version":"1.0.0"}}}'
1513
+ ```
1514
+
1515
+ #### 5. Register with AI tools
1516
+
1517
+ **`add-mcp`:**
1518
+
1519
+ ```bash
1520
+ npx add-mcp "http://your-server:8787/mcp" \
1521
+ --header "Authorization: Bearer orkify_mcp_..." \
1522
+ -n orkify
1523
+ ```
1524
+
1525
+ **Claude Code CLI:**
1526
+
1527
+ ```bash
1528
+ claude mcp add --transport http \
1529
+ --header "Authorization: Bearer orkify_mcp_..." \
1530
+ orkify http://your-server:8787/mcp
1531
+ ```
1532
+
1533
+ #### HTTP Options
1534
+
1535
+ ```
1536
+ --simple-http Use HTTP transport with local key auth
1537
+ --port <port> HTTP port (default: 8787)
1538
+ --bind <address> HTTP bind address (default: 127.0.0.1)
1539
+ --cors <origin> Enable CORS ("*", a specific URL, or comma-separated URLs)
1540
+ ```
1541
+
1542
+ #### CORS (Browser Clients)
1543
+
1544
+ By default, browser-based MCP clients are blocked by CORS policy. Enable CORS with the `--cors` flag:
1545
+
1546
+ ```bash
1547
+ # Allow any origin
1548
+ orkify mcp --simple-http --cors "*"
1549
+
1550
+ # Allow a specific origin
1551
+ orkify mcp --simple-http --cors "https://dashboard.example.com"
1552
+ ```
1553
+
1554
+ When a specific origin is set (not `*`), the server includes a `Vary: Origin` header for correct HTTP caching. OPTIONS preflight requests are handled automatically and cached for 24 hours.
1555
+
1556
+ ### Key Management
1557
+
1558
+ Keys are stored in `~/.orkify/mcp.yml` (created with `0600` permissions):
1559
+
1560
+ ```yaml
1561
+ keys:
1562
+ - name: my-agent
1563
+ token: orkify_mcp_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6
1564
+ tools:
1565
+ - '*'
1566
+
1567
+ - name: monitor
1568
+ token: orkify_mcp_f6e5d4c3b2a1f6e5d4c3b2a1f6e5d4c3b2a1f6e5d4c3b2a1
1569
+ tools:
1570
+ - list
1571
+ - logs
1572
+
1573
+ - name: ci-agent
1574
+ token: orkify_mcp_9876543210ab9876543210ab9876543210ab9876543210ab
1575
+ tools:
1576
+ - list
1577
+ - logs
1578
+ - restart
1579
+ - reload
1580
+ - down
1581
+ allowedIps:
1582
+ - 10.0.0.0/8
1583
+ - 192.168.1.50
1584
+ ```
1585
+
1586
+ Each key has:
1587
+
1588
+ | Field | Description |
1589
+ | ------------ | ---------------------------------------------------------------- |
1590
+ | `name` | Identifier for logging and error messages |
1591
+ | `token` | Bearer token (`orkify_mcp_` + 48 hex chars) |
1592
+ | `tools` | Allowed MCP tools — `["*"]` for all, or explicit list |
1593
+ | `allowedIps` | Optional IP allowlist — individual IPs or CIDRs (all if omitted) |
1594
+
1595
+ #### Keygen Options
1596
+
1597
+ ```
1598
+ orkify mcp keygen [--name <name>] [--tools <tool,...>] [--allowed-ips <ips>]
1599
+
1600
+ --name <name> Key name for identification (default: "default")
1601
+ --tools <tools> Comma-separated list of allowed tools (default: all)
1602
+ --allowed-ips <ips> Comma-separated IPs or CIDRs (default: all)
1603
+ ```
1604
+
1605
+ #### Editing Keys
1606
+
1607
+ The config file is plain YAML — you can hand-edit it to rename keys, change tool permissions, or remove keys. The server reloads the config on `SIGHUP` or file change (polled every 2 seconds), so changes take effect without restarting.
1608
+
1609
+ ### Tool Scoping
1610
+
1611
+ When a key has a restricted `tools` list, any call to a tool not in the list returns a `FORBIDDEN` error. This lets you create read-only keys for monitoring dashboards or limited-access keys for specific teams.
1612
+
1613
+ Valid tool names: `list`, `logs`, `snap`, `listAllUsers`, `up`, `down`, `restart`, `reload`, `delete`, `restore`, `kill`.
1614
+
1615
+ ### Available MCP Tools
1616
+
1617
+ | Tool | Description |
1618
+ | -------------- | ----------------------------------------------- |
1619
+ | `up` | Start a new process with optional configuration |
1620
+ | `down` | Stop process(es) by name, ID, or "all" |
1621
+ | `restart` | Hard restart (stop + start) |
1622
+ | `reload` | Zero-downtime rolling reload |
1623
+ | `delete` | Stop and remove from process list |
1624
+ | `list` | List all processes with status and metrics |
1625
+ | `listAllUsers` | List processes from all users (requires sudo) |
1626
+ | `logs` | Get recent log lines from a process |
1627
+ | `snap` | Snapshot process list for later restoration |
1628
+ | `restore` | Restore previously saved processes |
1629
+ | `kill` | Stop the ORKIFY daemon |
1630
+
1631
+ ### Example Usage
1632
+
1633
+ Once configured, you can ask Claude to manage your processes:
1634
+
1635
+ - "Start my API server with 4 workers"
1636
+ - "List all running processes"
1637
+ - "Reload the web app with zero downtime"
1638
+ - "Show me the logs for the worker process"
1639
+ - "Stop all processes"
1640
+
1641
+ ## Architecture
1642
+
1643
+ ### Daemon Mode (`orkify up`)
1644
+
1645
+ ```
1646
+ ┌─────────────────────────────────────────────────────────────┐
1647
+ │ CLI (orkify up) │
1648
+ └─────────────────────────────┬───────────────────────────────┘
1649
+ │ IPC (Unix Socket / Named Pipe)
1650
+ ┌─────────────────────────────▼───────────────────────────────┐
1651
+ │ Daemon │
1652
+ │ ┌───────────────────────────────────────────────────────┐ │
1653
+ │ │ Orchestrator │ │
1654
+ │ └───────────────────────────┬───────────────────────────┘ │
1655
+ │ │ │
1656
+ │ ┌───────────────────────────▼───────────────────────────┐ │
1657
+ │ │ ManagedProcess │ │
1658
+ │ │ │ │
1659
+ │ │ Fork Mode (-w 1): Cluster Mode (-w N): │ │
1660
+ │ │ ┌─────────────┐ ┌─────────────────────┐ │ │
1661
+ │ │ │ Child │ │ ClusterWrapper │ │ │
1662
+ │ │ │ Process │ │ (Primary) │ │ │
1663
+ │ │ └─────────────┘ │ ┌─────┐ ┌─────┐ │ │ │
1664
+ │ │ │ │ W1 │ │ W2 │ │ │ │
1665
+ │ │ │ └─────┘ └─────┘ │ │ │
1666
+ │ │ │ ┌─────┐ ┌─────┐ │ │ │
1667
+ │ │ │ │ W3 │ │ W4 │ │ │ │
1668
+ │ │ │ └─────┘ └─────┘ │ │ │
1669
+ │ │ └─────────────────────┘ │ │
1670
+ │ └───────────────────────────────────────────────────────┘ │
1671
+ └─────────────────────────────────────────────────────────────┘
1672
+ ```
1673
+
1674
+ ### Container Mode (`orkify run`)
1675
+
1676
+ ```
1677
+ ┌─────────────────────────────────────────────────────────────┐
1678
+ │ Container (PID 1) │
1679
+ │ ┌───────────────────────────────────────────────────────┐ │
1680
+ │ │ orkify run │ │
1681
+ │ │ │ │
1682
+ │ │ Fork Mode (-w 1): Cluster Mode (-w N): │ │
1683
+ │ │ ┌─────────────┐ ┌─────────────────────┐ │ │
1684
+ │ │ │ Child │◄─SIGTERM │ ClusterWrapper │ │ │
1685
+ │ │ │ Process │ │ (Primary) │ │ │
1686
+ │ │ └─────────────┘ │ ┌─────┐ ┌─────┐ │ │ │
1687
+ │ │ │ │ W1 │ │ W2 │◄─SIGTERM │ │
1688
+ │ │ │ └─────┘ └─────┘ │ │ │
1689
+ │ │ └─────────────────────┘ │ │
1690
+ │ └───────────────────────────────────────────────────────┘ │
1691
+ └─────────────────────────────────────────────────────────────┘
1692
+ ```
1693
+
1694
+ ## Requirements
1695
+
1696
+ - Node.js 22.18.0 or higher
1697
+ - **Cross-platform:** macOS, Linux, Windows (uses Unix sockets on macOS/Linux, Named Pipes on Windows)
1698
+
1699
+ ## License
1700
+
1701
+ Apache License 2.0 - see [LICENSE](LICENSE) for details.