niahere 0.2.38 → 0.2.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "niahere",
3
- "version": "0.2.38",
3
+ "version": "0.2.40",
4
4
  "description": "A personal AI assistant daemon — scheduled jobs, chat across Telegram and Slack, persona system, and visual identity.",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -48,6 +48,8 @@ function buildPlist(): string {
48
48
  <key>SuccessfulExit</key>
49
49
  <false/>
50
50
  </dict>
51
+ <key>ThrottleInterval</key>
52
+ <integer>10</integer>
51
53
  <key>StandardOutPath</key>
52
54
  <string>${paths.daemonLog}</string>
53
55
  <key>StandardErrorPath</key>
@@ -86,10 +88,10 @@ async function uninstallLaunchd(): Promise<void> {
86
88
  const path = plistPath();
87
89
  if (!existsSync(path)) return;
88
90
 
91
+ // Unload to stop the process and disable KeepAlive respawn.
92
+ // Keep the plist file so RunAtLoad starts the daemon on next login.
89
93
  const unload = Bun.spawn(["launchctl", "unload", path], { stdout: "pipe", stderr: "pipe" });
90
94
  await unload.exited;
91
-
92
- try { unlinkSync(path); } catch { /* already gone */ }
93
95
  }
94
96
 
95
97
  function isLaunchdInstalled(): boolean {
package/src/core/alive.ts CHANGED
@@ -4,11 +4,72 @@ import { getSql, closeDb } from "../db/connection";
4
4
  import { getFailures, type Check } from "./health";
5
5
 
6
6
  const HEARTBEAT_INTERVAL = 60_000; // 60s
7
+ const PG_DATA_DIRS = [
8
+ "/opt/homebrew/var/postgresql@18",
9
+ "/opt/homebrew/var/postgresql@17",
10
+ "/opt/homebrew/var/postgres",
11
+ ];
7
12
 
8
13
  let timer: ReturnType<typeof setInterval> | null = null;
9
14
  let lastFailures: string[] = [];
10
15
  let recoveryAttempted = false;
11
16
 
17
+ /** Deterministic Postgres recovery: remove stale PID file + restart service. */
18
+ async function recoverPostgres(): Promise<boolean> {
19
+ const ready = Bun.spawnSync(["pg_isready"]);
20
+ if (ready.exitCode === 0) return true; // already up
21
+
22
+ log.info("alive: postgres not ready, attempting deterministic recovery");
23
+
24
+ // Find and remove stale postmaster.pid
25
+ const { existsSync, unlinkSync, readFileSync } = await import("fs");
26
+ for (const dir of PG_DATA_DIRS) {
27
+ const pidFile = `${dir}/postmaster.pid`;
28
+ if (!existsSync(pidFile)) continue;
29
+
30
+ // Read the PID from line 1 and check if it's actually a postgres process
31
+ try {
32
+ const pid = parseInt(readFileSync(pidFile, "utf8").split("\n")[0], 10);
33
+ if (!isNaN(pid)) {
34
+ const check = Bun.spawnSync(["ps", "-p", String(pid), "-o", "comm="]);
35
+ const comm = new TextDecoder().decode(check.stdout).trim();
36
+ if (check.exitCode !== 0 || !comm.includes("postgres")) {
37
+ log.info({ pidFile, stalePid: pid, actualProcess: comm || "dead" }, "alive: removing stale postmaster.pid");
38
+ unlinkSync(pidFile);
39
+ }
40
+ }
41
+ } catch (err) {
42
+ log.warn({ err, pidFile }, "alive: could not inspect postmaster.pid");
43
+ }
44
+ }
45
+
46
+ // Restart the service
47
+ if (process.platform === "darwin") {
48
+ // Try common brew postgresql service names
49
+ for (const svc of ["postgresql@18", "postgresql@17", "postgresql"]) {
50
+ const result = Bun.spawnSync(["brew", "services", "start", svc]);
51
+ if (result.exitCode === 0) {
52
+ log.info({ service: svc }, "alive: brew service start issued");
53
+ break;
54
+ }
55
+ }
56
+ } else {
57
+ Bun.spawnSync(["systemctl", "start", "postgresql"]);
58
+ }
59
+
60
+ // Wait briefly for postgres to come up
61
+ await new Promise((r) => setTimeout(r, 3000));
62
+
63
+ const check = Bun.spawnSync(["pg_isready"]);
64
+ if (check.exitCode === 0) {
65
+ log.info("alive: postgres recovered via deterministic fix");
66
+ return true;
67
+ }
68
+
69
+ log.warn("alive: deterministic postgres recovery failed");
70
+ return false;
71
+ }
72
+
12
73
  async function attemptDbReconnect(): Promise<boolean> {
13
74
  try {
14
75
  await closeDb();
@@ -138,10 +199,28 @@ async function heartbeat(): Promise<void> {
138
199
  }
139
200
  }
140
201
 
141
- // Run recovery agent once per outage
202
+ // Deterministic postgres recovery before LLM agent
203
+ if (failureNames.includes("database") && !recoveryAttempted) {
204
+ const pgFixed = await recoverPostgres();
205
+ if (pgFixed) {
206
+ const reconnected = await attemptDbReconnect();
207
+ if (reconnected) {
208
+ const remaining = await getFailures();
209
+ if (remaining.length === 0) {
210
+ log.info("alive: postgres recovered (deterministic fix, no LLM needed)");
211
+ await notifyUser("Postgres was down (stale PID). Fixed automatically — no LLM agent needed.");
212
+ lastFailures = [];
213
+ recoveryAttempted = false;
214
+ return;
215
+ }
216
+ }
217
+ }
218
+ }
219
+
220
+ // Run LLM recovery agent once per outage (fallback for non-trivial issues)
142
221
  if (!recoveryAttempted) {
143
222
  recoveryAttempted = true;
144
- log.info({ failures: failureNames }, "alive: running recovery agent");
223
+ log.info({ failures: failureNames }, "alive: running LLM recovery agent");
145
224
 
146
225
  const { recovered, report } = await runRecoveryAgent(failures);
147
226
 
@@ -121,7 +121,7 @@ function waitForExit(timeoutMs: number): void {
121
121
  /** Return PIDs of running daemon processes (excluding ourselves). */
122
122
  export function findDaemonPids(): number[] {
123
123
  try {
124
- const result = Bun.spawnSync(["pgrep", "-f", "niahere/src/cli.* run$"]);
124
+ const result = Bun.spawnSync(["pgrep", "-f", "src/cli\\.ts run$"]);
125
125
  const stdout = new TextDecoder().decode(result.stdout).trim();
126
126
  if (!stdout) return [];
127
127
  return stdout.split("\n")
@@ -150,16 +150,19 @@ export async function runDaemon(): Promise<void> {
150
150
  delete process.env.CLAUDE_CODE_ENTRYPOINT;
151
151
  delete process.env.CLAUDE_AGENT_SDK_VERSION;
152
152
 
153
- // Startup guard: if another daemon is alive, exit immediately
153
+ // Startup guard: if another nia daemon is alive, exit immediately.
154
+ // Use pgrep (via findDaemonPids) instead of kill(pid,0) to verify the
155
+ // PID is actually a nia process — not a recycled OS PID from something else.
154
156
  const existingPid = readPid();
155
157
  if (existingPid !== null && existingPid !== process.pid) {
156
- try {
157
- process.kill(existingPid, 0); // Check if alive
158
+ const aliveDaemons = findDaemonPids();
159
+ if (aliveDaemons.includes(existingPid)) {
158
160
  log.debug({ existingPid, myPid: process.pid }, "another daemon is already running, exiting");
159
161
  process.exit(0);
160
- } catch {
161
- // Dead PID in pidfile — safe to take over
162
162
  }
163
+ // PID in file is stale (dead or recycled by OS) — safe to take over
164
+ log.warn({ stalePid: existingPid }, "taking over from stale pid");
165
+ removePid();
163
166
  }
164
167
 
165
168
  // Crash handlers — ensure PID cleanup and logging on unhandled errors.
@@ -247,13 +247,15 @@ export async function runJob(job: JobInput, onActivity?: ActivityCallback): Prom
247
247
  };
248
248
  appendAudit(auditEntry);
249
249
 
250
- state[job.name] = {
250
+ // Re-read state to avoid clobbering concurrent job updates
251
+ const freshState = { ...readState() };
252
+ freshState[job.name] = {
251
253
  lastRun: timestamp,
252
254
  status: result.status,
253
255
  duration_ms: result.duration_ms,
254
256
  error: result.error,
255
257
  };
256
- writeState(state);
258
+ writeState(freshState);
257
259
 
258
260
  return result;
259
261
  } catch (err) {
@@ -278,13 +280,15 @@ export async function runJob(job: JobInput, onActivity?: ActivityCallback): Prom
278
280
  error: errorMsg,
279
281
  });
280
282
 
281
- state[job.name] = {
283
+ // Re-read state to avoid clobbering concurrent job updates
284
+ const freshState = { ...readState() };
285
+ freshState[job.name] = {
282
286
  lastRun: timestamp,
283
287
  status: "error",
284
288
  duration_ms,
285
289
  error: errorMsg,
286
290
  };
287
- writeState(state);
291
+ writeState(freshState);
288
292
 
289
293
  return result;
290
294
  }