niahere 0.2.57 → 0.2.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/optimization-loop/SKILL.md +230 -0
- package/skills/optimize/SKILL.md +238 -0
- package/src/chat/engine.ts +94 -17
- package/src/commands/backup.ts +26 -4
- package/src/core/agents.ts +22 -8
- package/src/core/consolidator.ts +52 -14
- package/src/core/health.ts +92 -23
- package/src/core/skills.ts +18 -6
- package/src/core/summarizer.ts +33 -8
- package/src/db/models/active_engine.ts +5 -3
- package/src/utils/retry.ts +18 -0
package/package.json
CHANGED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: optimization-loop
|
|
3
|
+
description: |
|
|
4
|
+
The iterative optimization pattern (Karpathy Loop / autoresearch). Reference for running
|
|
5
|
+
autonomous experiment loops on any target: modify → score → keep or revert → repeat.
|
|
6
|
+
Use when running multiple iterations of improvement against a measurable metric — code
|
|
7
|
+
benchmarks, prompt quality, copy effectiveness, config tuning, or any scorable target.
|
|
8
|
+
Also known as "autoresearch." Use this skill to understand the pattern and discipline.
|
|
9
|
+
For orchestration (scheduling, user confirmation, job setup), see the "optimize" skill.
|
|
10
|
+
metadata:
|
|
11
|
+
version: 1.0.0
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Optimization Loop
|
|
15
|
+
|
|
16
|
+
The Karpathy Loop: autonomous iterative optimization through disciplined experimentation.
|
|
17
|
+
Modify a target, score the result, keep improvements, revert failures, repeat.
|
|
18
|
+
|
|
19
|
+
This skill defines the **pattern and discipline**. For when/how to schedule and orchestrate
|
|
20
|
+
optimization runs, see the `optimize` skill.
|
|
21
|
+
|
|
22
|
+
## The Pattern
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
freeze contract + rubric
|
|
26
|
+
save baseline (never touch again)
|
|
27
|
+
copy baseline → current-best
|
|
28
|
+
|
|
29
|
+
repeat:
|
|
30
|
+
1. read state — what's been tried, what worked
|
|
31
|
+
2. hypothesize — form a specific idea, informed by history
|
|
32
|
+
3. modify — produce a candidate version
|
|
33
|
+
4. gate check — hard constraints pass? if no → reject
|
|
34
|
+
5. score — compare candidate vs current-best (pairwise)
|
|
35
|
+
6. decide — clearly better? keep. otherwise revert.
|
|
36
|
+
7. log — append to results.jsonl
|
|
37
|
+
8. update state — what you tried, what happened, what next
|
|
38
|
+
|
|
39
|
+
until: budget exhausted, target reached, or plateau detected
|
|
40
|
+
notify user with summary
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Workspace Layout
|
|
44
|
+
|
|
45
|
+
Each optimization run gets a dedicated, self-contained directory:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
~/.niahere/optimizations/{slug}-{hex}/
|
|
49
|
+
├── contract.md # Frozen at start: objective, scope, constraints, metrics, budget
|
|
50
|
+
├── rubric.md # Frozen at start: scoring criteria (never modify during run)
|
|
51
|
+
├── baseline.md # Original version (never modify)
|
|
52
|
+
├── current-best.md # Best version so far (update only on accept)
|
|
53
|
+
├── accepted/ # Every accepted candidate, numbered
|
|
54
|
+
│ ├── 001.md
|
|
55
|
+
│ ├── 002.md
|
|
56
|
+
│ └── ...
|
|
57
|
+
├── results.jsonl # One JSON object per experiment (append-only)
|
|
58
|
+
└── state.md # Your working notebook
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**The slug** is human-readable (e.g., `signup-prompt`). The hex suffix (4 chars) prevents
|
|
62
|
+
collisions across multiple runs on the same target.
|
|
63
|
+
|
|
64
|
+
## The Contract (contract.md)
|
|
65
|
+
|
|
66
|
+
Freeze this at the start. Never modify during the run.
|
|
67
|
+
|
|
68
|
+
```markdown
|
|
69
|
+
# Optimization Contract
|
|
70
|
+
|
|
71
|
+
## Objective
|
|
72
|
+
|
|
73
|
+
[What we're optimizing and why — one sentence]
|
|
74
|
+
|
|
75
|
+
## Target
|
|
76
|
+
|
|
77
|
+
[File path or content being modified]
|
|
78
|
+
[Which sections/parts are in scope — be specific]
|
|
79
|
+
|
|
80
|
+
## Primary Metric
|
|
81
|
+
|
|
82
|
+
[The metric being optimized — what "better" means]
|
|
83
|
+
|
|
84
|
+
## Secondary Metrics (regression guards)
|
|
85
|
+
|
|
86
|
+
[Metrics that must NOT degrade. Each with a threshold.]
|
|
87
|
+
|
|
88
|
+
- [e.g., "Word count must stay under 200"]
|
|
89
|
+
- [e.g., "All existing tests must pass"]
|
|
90
|
+
- [e.g., "Readability score must stay above grade 8"]
|
|
91
|
+
|
|
92
|
+
## Hard Constraints
|
|
93
|
+
|
|
94
|
+
[Violations = automatic reject, no exceptions]
|
|
95
|
+
|
|
96
|
+
- [e.g., "Must mention the free trial"]
|
|
97
|
+
- [e.g., "Must pass lint and type check"]
|
|
98
|
+
|
|
99
|
+
## Soft Preferences
|
|
100
|
+
|
|
101
|
+
[Tiebreakers — not vetoes, but guide decisions]
|
|
102
|
+
|
|
103
|
+
- [e.g., "Prefer shorter over longer"]
|
|
104
|
+
- [e.g., "Prefer simple over clever"]
|
|
105
|
+
|
|
106
|
+
## Budget
|
|
107
|
+
|
|
108
|
+
- Max iterations: [N]
|
|
109
|
+
- Max wall-clock time: [hours]
|
|
110
|
+
|
|
111
|
+
## Stop Rules
|
|
112
|
+
|
|
113
|
+
- All iterations completed
|
|
114
|
+
- Target score reached: [if applicable]
|
|
115
|
+
- Plateau: [N] consecutive discards (default 5)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Scoring
|
|
119
|
+
|
|
120
|
+
### For code targets
|
|
121
|
+
|
|
122
|
+
Run a benchmark or test command. Extract the metric. The command is fixed in the contract
|
|
123
|
+
and cannot be modified during the run.
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
1. Gate check: tests pass? lint clean? types check? → if any fail, reject immediately
|
|
127
|
+
2. Run benchmark command → extract primary metric
|
|
128
|
+
3. Check secondary metrics for regressions → if any violated, reject
|
|
129
|
+
4. Compare primary metric against current-best
|
|
130
|
+
5. Accept only if clearly improved (above noise floor)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### For content targets (prompts, copy, configs)
|
|
134
|
+
|
|
135
|
+
Use pairwise comparison. Never absolute 1-10 scoring.
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
1. Gate check: hard constraints met? (word count, required elements, etc.)
|
|
139
|
+
2. Present both versions side by side:
|
|
140
|
+
- Randomly assign which is "Version A" and "Version B"
|
|
141
|
+
- Do NOT label which is current-best vs candidate
|
|
142
|
+
3. Evaluate using the frozen rubric criteria
|
|
143
|
+
4. Pick the winner — candidate must be CLEARLY better, not just different
|
|
144
|
+
5. If it's a toss-up, reject (bias toward stability)
|
|
145
|
+
6. Check secondary metrics for regressions
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**Anti-bias controls for LLM-as-judge:**
|
|
149
|
+
|
|
150
|
+
- Randomize A/B order every time (prevents position bias)
|
|
151
|
+
- Never reveal which version is "current" vs "candidate"
|
|
152
|
+
- If the margin is slim, run the comparison twice with swapped order
|
|
153
|
+
- The rubric is frozen in `rubric.md` — you cannot modify scoring criteria mid-run
|
|
154
|
+
|
|
155
|
+
## Exploration Strategy
|
|
156
|
+
|
|
157
|
+
Don't just make incremental tweaks. Use staged exploration:
|
|
158
|
+
|
|
159
|
+
**Early phase (first ~30% of iterations):** Go broad. Try fundamentally different approaches.
|
|
160
|
+
Different structures, different angles, different trade-offs. You're mapping the space.
|
|
161
|
+
|
|
162
|
+
**Exploit phase (middle ~50%):** You've found something that works. Refine around it.
|
|
163
|
+
Incremental improvements, wording tweaks, parameter tuning.
|
|
164
|
+
|
|
165
|
+
**Escape phase (if plateaued):** If you hit 5 consecutive discards, try ONE radical departure
|
|
166
|
+
from current-best — something completely different. If that fails too, stop. You've likely
|
|
167
|
+
found a local optimum.
|
|
168
|
+
|
|
169
|
+
## The Results Log (results.jsonl)
|
|
170
|
+
|
|
171
|
+
Append one JSON object per experiment. Never edit previous entries.
|
|
172
|
+
|
|
173
|
+
```json
|
|
174
|
+
{"n": 1, "status": "keep", "hypothesis": "shorter opening hook", "score_note": "candidate clearly more direct", "duration_s": 45, "timestamp": "2026-04-07T02:14:00Z"}
|
|
175
|
+
{"n": 2, "status": "discard", "hypothesis": "add social proof", "score_note": "toss-up, rejected for stability", "duration_s": 38, "timestamp": "2026-04-07T02:21:00Z"}
|
|
176
|
+
{"n": 3, "status": "crash", "hypothesis": "doubled context window", "error": "benchmark timed out", "duration_s": 300, "timestamp": "2026-04-07T02:28:00Z"}
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Every entry must include:
|
|
180
|
+
|
|
181
|
+
- `n` — experiment number
|
|
182
|
+
- `status` — `keep`, `discard`, or `crash`
|
|
183
|
+
- `hypothesis` — what you tried and why (one line)
|
|
184
|
+
- `score_note` — why you kept or discarded (one line)
|
|
185
|
+
- `timestamp` — when the experiment completed
|
|
186
|
+
|
|
187
|
+
## Resumability
|
|
188
|
+
|
|
189
|
+
If the run crashes or is interrupted:
|
|
190
|
+
|
|
191
|
+
1. Read `current-best.md` — this is always the last accepted version
|
|
192
|
+
2. Read `results.jsonl` — count completed experiments, review what was tried
|
|
193
|
+
3. Read `state.md` — pick up your thinking from where you left off
|
|
194
|
+
4. Continue from the next experiment number
|
|
195
|
+
5. Do NOT re-run completed experiments
|
|
196
|
+
|
|
197
|
+
## Scoring Integrity
|
|
198
|
+
|
|
199
|
+
**The scorer and the optimizer must be separated in intent.** You are both proposer and judge,
|
|
200
|
+
so you must be disciplined:
|
|
201
|
+
|
|
202
|
+
- The rubric is frozen. Do not adjust criteria because a candidate "almost" passes.
|
|
203
|
+
- Do not add special cases to make a favorite candidate win.
|
|
204
|
+
- Do not lower the bar after repeated failures. If nothing passes, that's a valid outcome.
|
|
205
|
+
- If you notice you're gaming your own rubric, stop and note it in state.md.
|
|
206
|
+
|
|
207
|
+
## When Finished
|
|
208
|
+
|
|
209
|
+
1. Update `state.md` with a final summary:
|
|
210
|
+
- Baseline description vs final best description
|
|
211
|
+
- Total experiments: N run, X accepted, Y discarded, Z crashed
|
|
212
|
+
- Key findings: what worked, what didn't, surprises
|
|
213
|
+
2. Send a message to the user (via `send_message`):
|
|
214
|
+
```
|
|
215
|
+
[optimization] Done. Ran N experiments on [target].
|
|
216
|
+
X accepted, Y discarded. [One-line summary of the best version vs baseline].
|
|
217
|
+
Results: ~/.niahere/optimizations/{slug}-{hex}/
|
|
218
|
+
```
|
|
219
|
+
3. Do NOT auto-apply the result. The user reviews `current-best.md` and decides
|
|
220
|
+
whether to use it.
|
|
221
|
+
|
|
222
|
+
## Principles
|
|
223
|
+
|
|
224
|
+
- **Propose, never apply.** The optimization produces a candidate. The user promotes it.
|
|
225
|
+
- **Simplicity criterion.** A marginal improvement that adds complexity isn't worth keeping.
|
|
226
|
+
Removing something while maintaining quality is always a win.
|
|
227
|
+
- **Bias toward stability.** When in doubt, reject. Keeping a good version is better than
|
|
228
|
+
accepting a sideways move.
|
|
229
|
+
- **One target, one metric, one run.** Don't try to optimize multiple things simultaneously.
|
|
230
|
+
Run separate optimizations for separate targets.
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: optimize
|
|
3
|
+
description: |
|
|
4
|
+
Schedule or run an iterative optimization pass on code, prompts, copy, or any scorable
|
|
5
|
+
target. Use when user asks to "optimize this", "run experiments", "autoresearch this",
|
|
6
|
+
"iterate on this overnight", "can this be better", or proactively suggest after completing
|
|
7
|
+
work that could benefit from further iteration. Also use when a job wants to self-optimize
|
|
8
|
+
something within its own run. Handles spec confirmation, scoring setup, job scheduling,
|
|
9
|
+
and result delivery. For the loop discipline itself, references the optimization-loop skill.
|
|
10
|
+
metadata:
|
|
11
|
+
version: 1.0.0
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Optimize
|
|
15
|
+
|
|
16
|
+
Schedule or run autonomous optimization passes. This skill handles the orchestration —
|
|
17
|
+
when to use it, how to confirm specs, how to schedule, how to deliver results.
|
|
18
|
+
|
|
19
|
+
For the loop discipline, scoring methods, and workspace layout, invoke the
|
|
20
|
+
`optimization-loop` skill.
|
|
21
|
+
|
|
22
|
+
## Two Entry Points
|
|
23
|
+
|
|
24
|
+
### 1. User explicitly asks
|
|
25
|
+
|
|
26
|
+
User says "autoresearch this", "optimize this overnight", "run experiments on this",
|
|
27
|
+
"can you iterate on this more", or similar.
|
|
28
|
+
|
|
29
|
+
**Don't suggest — confirm and schedule.** The user already wants this. Move to Step 1.
|
|
30
|
+
|
|
31
|
+
### 2. Proactive suggestion (after immediate work)
|
|
32
|
+
|
|
33
|
+
You just finished a task — rewrote copy, tuned a prompt, optimized a function. The result
|
|
34
|
+
is good, but more iterations could find something better.
|
|
35
|
+
|
|
36
|
+
Suggest briefly:
|
|
37
|
+
|
|
38
|
+
> "This is solid. Want me to schedule an overnight optimization pass? I'll run ~30
|
|
39
|
+
> experiments scoring each version against [brief criteria] and have the best version
|
|
40
|
+
> ready by morning."
|
|
41
|
+
|
|
42
|
+
**Rules for suggesting:**
|
|
43
|
+
|
|
44
|
+
- Only suggest when there's a clear, scorable metric
|
|
45
|
+
- Only suggest when the target is self-contained (one file, one prompt, one section)
|
|
46
|
+
- Don't suggest for trivial tasks or quick fixes
|
|
47
|
+
- Don't push if the user declines — move on immediately
|
|
48
|
+
- Don't suggest if the user said they need this done now and can't wait
|
|
49
|
+
|
|
50
|
+
## Step 1: Confirm the Setup
|
|
51
|
+
|
|
52
|
+
Before scheduling, confirm these with the user. Be concise — a quick summary, not an
|
|
53
|
+
interrogation.
|
|
54
|
+
|
|
55
|
+
**Target** — What are we optimizing?
|
|
56
|
+
|
|
57
|
+
- A file (code, config, prompt file)
|
|
58
|
+
- A section of content (landing page hero, email subject line)
|
|
59
|
+
- A prompt or template
|
|
60
|
+
|
|
61
|
+
**Scoring method** — How do we know if a version is better?
|
|
62
|
+
|
|
63
|
+
- Code: what benchmark or test command produces a number?
|
|
64
|
+
- Content: what criteria matter? (clarity, persuasiveness, brevity, conversion, etc.)
|
|
65
|
+
- Custom: does the user have a specific scoring script?
|
|
66
|
+
|
|
67
|
+
**Constraints** — What can't change?
|
|
68
|
+
|
|
69
|
+
- Hard constraints (must-haves, test requirements, word limits)
|
|
70
|
+
- Soft preferences (shorter is better, simpler is better)
|
|
71
|
+
|
|
72
|
+
**Secondary metrics** — What must NOT get worse?
|
|
73
|
+
|
|
74
|
+
- Code: performance can't drop, memory can't increase, tests must pass
|
|
75
|
+
- Content: readability, brand voice, required elements
|
|
76
|
+
- These are regression guards — violations veto an otherwise good candidate
|
|
77
|
+
|
|
78
|
+
**Iterations** — How many experiments? Default 30. User can adjust.
|
|
79
|
+
|
|
80
|
+
**When** — Now, or schedule for later? If later, what time?
|
|
81
|
+
|
|
82
|
+
Example confirmation:
|
|
83
|
+
|
|
84
|
+
> "Here's the plan:
|
|
85
|
+
>
|
|
86
|
+
> - **Target**: signup prompt at `src/prompts/signup.md`
|
|
87
|
+
> - **Scoring**: pairwise comparison on clarity, persuasiveness, and brevity
|
|
88
|
+
> - **Constraints**: must mention free trial, keep under 150 words
|
|
89
|
+
> - **Regression guards**: readability must stay above grade 8
|
|
90
|
+
> - **Iterations**: 30 experiments
|
|
91
|
+
> - **When**: tonight at midnight
|
|
92
|
+
>
|
|
93
|
+
> Sound right?"
|
|
94
|
+
|
|
95
|
+
Wait for confirmation before proceeding.
|
|
96
|
+
|
|
97
|
+
## Step 2: Set Up the Workspace
|
|
98
|
+
|
|
99
|
+
Create the optimization directory:
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
~/.niahere/optimizations/{slug}-{hex}/
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Where `{slug}` is a short descriptive name and `{hex}` is 4 random hex chars.
|
|
106
|
+
|
|
107
|
+
Create the frozen files:
|
|
108
|
+
|
|
109
|
+
1. **contract.md** — objective, target, primary metric, secondary metrics, constraints,
|
|
110
|
+
preferences, budget, stop rules (see optimization-loop skill for template)
|
|
111
|
+
2. **rubric.md** — detailed scoring criteria
|
|
112
|
+
- For code: the benchmark command and how to extract the metric
|
|
113
|
+
- For content: the pairwise comparison rubric with specific criteria and weights
|
|
114
|
+
3. **baseline.md** — copy the current version of the target (the starting point)
|
|
115
|
+
4. **current-best.md** — copy of baseline (will be updated during the run)
|
|
116
|
+
5. **state.md** — initialize with "Run starting. 0 experiments completed."
|
|
117
|
+
6. **accepted/** — create empty directory
|
|
118
|
+
|
|
119
|
+
## Step 3: Compose the Job Prompt
|
|
120
|
+
|
|
121
|
+
Build a self-contained job prompt that encodes everything the agent needs to run
|
|
122
|
+
the optimization loop autonomously. The prompt must include:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
Job: optimization — {slug}
|
|
126
|
+
|
|
127
|
+
You are running an optimization loop. Follow the optimization-loop pattern strictly.
|
|
128
|
+
|
|
129
|
+
## Your workspace
|
|
130
|
+
{absolute path to the optimization directory}
|
|
131
|
+
|
|
132
|
+
## What to optimize
|
|
133
|
+
{description of the target — file path, what it does, context}
|
|
134
|
+
|
|
135
|
+
## Current version
|
|
136
|
+
{full content of the target}
|
|
137
|
+
|
|
138
|
+
## Contract
|
|
139
|
+
{contents of contract.md}
|
|
140
|
+
|
|
141
|
+
## Scoring rubric
|
|
142
|
+
{contents of rubric.md}
|
|
143
|
+
|
|
144
|
+
## Loop instructions
|
|
145
|
+
|
|
146
|
+
Read your workspace files (contract.md, rubric.md, baseline.md, current-best.md,
|
|
147
|
+
state.md, results.jsonl) to understand the current state.
|
|
148
|
+
|
|
149
|
+
For each iteration:
|
|
150
|
+
1. Read state.md for context on what's been tried
|
|
151
|
+
2. Form a hypothesis — what to change and why
|
|
152
|
+
3. Produce a candidate version
|
|
153
|
+
4. Gate check — verify all hard constraints from the contract
|
|
154
|
+
5. Score — compare candidate vs current-best using the rubric (pairwise, randomized order)
|
|
155
|
+
6. If candidate is clearly better AND no secondary metric regressions:
|
|
156
|
+
- Update current-best.md
|
|
157
|
+
- Save candidate to accepted/{NNN}.md
|
|
158
|
+
- Log {"status": "keep", ...} to results.jsonl
|
|
159
|
+
7. If not clearly better:
|
|
160
|
+
- Discard candidate
|
|
161
|
+
- Log {"status": "discard", ...} to results.jsonl
|
|
162
|
+
8. Update state.md with what you tried and learned
|
|
163
|
+
|
|
164
|
+
Stop when:
|
|
165
|
+
- Completed {N} iterations, OR
|
|
166
|
+
- {stop_count} consecutive discards (plateau), OR
|
|
167
|
+
- Target score reached (if specified in contract)
|
|
168
|
+
|
|
169
|
+
When finished, update state.md with a final summary and send a message to the user:
|
|
170
|
+
"[optimization] Done. Ran N experiments on {target}. X accepted, Y discarded.
|
|
171
|
+
{One-line summary}. Results: {workspace path}"
|
|
172
|
+
|
|
173
|
+
IMPORTANT:
|
|
174
|
+
- Do NOT modify contract.md or rubric.md
|
|
175
|
+
- Do NOT auto-apply results to the original file
|
|
176
|
+
- Do NOT stop to ask the user questions — run autonomously until done
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Step 4: Schedule the Job
|
|
180
|
+
|
|
181
|
+
Use the `add_job` MCP tool (preferred) or `nia job add` CLI:
|
|
182
|
+
|
|
183
|
+
- **name**: `optimize-{slug}` (e.g., `optimize-signup-prompt`)
|
|
184
|
+
- **schedule**: ISO timestamp for the agreed time, or now
|
|
185
|
+
- **schedule_type**: `once`
|
|
186
|
+
- **prompt**: the composed job prompt from Step 3
|
|
187
|
+
- **always**: `true` (overnight runs need to ignore active hours)
|
|
188
|
+
- **stateless**: `yes` (the optimization uses its own workspace, not the job's state.md)
|
|
189
|
+
|
|
190
|
+
Confirm to the user:
|
|
191
|
+
|
|
192
|
+
> "Scheduled. The optimization run starts at {time} and will run ~{N} experiments.
|
|
193
|
+
> I'll message you when it's done with the results."
|
|
194
|
+
|
|
195
|
+
## Step 5: After Completion
|
|
196
|
+
|
|
197
|
+
When the user asks about results, or when reviewing the notification:
|
|
198
|
+
|
|
199
|
+
1. Read `~/.niahere/optimizations/{slug}-{hex}/state.md` for the summary
|
|
200
|
+
2. Read `results.jsonl` for the experiment log
|
|
201
|
+
3. Show `current-best.md` vs `baseline.md` — the diff is the value
|
|
202
|
+
4. Show the accepted progression if the user wants to see the journey
|
|
203
|
+
5. Ask if the user wants to apply the result to the original target
|
|
204
|
+
|
|
205
|
+
## Running Now vs Later
|
|
206
|
+
|
|
207
|
+
**"Run it now":** Schedule with the current timestamp. The user stays in the conversation
|
|
208
|
+
and can check results when the job finishes. Good for shorter runs (10-15 iterations).
|
|
209
|
+
|
|
210
|
+
**"Schedule for later":** Schedule for a specific time (midnight, after hours). The user
|
|
211
|
+
goes about their day. The notification arrives when done. Good for longer runs (30+ iterations).
|
|
212
|
+
|
|
213
|
+
**"Run it inline":** If the user wants to optimize something RIGHT NOW in this conversation
|
|
214
|
+
(not as a job), you can run the optimization-loop pattern directly without scheduling a job.
|
|
215
|
+
Use this for quick 5-10 iteration runs where the user is watching.
|
|
216
|
+
|
|
217
|
+
## When a Job Self-Optimizes
|
|
218
|
+
|
|
219
|
+
A running job (e.g., news-curator, prompt-generator) can use this pattern to improve
|
|
220
|
+
its own approach. The flow:
|
|
221
|
+
|
|
222
|
+
1. Job creates an optimization subdirectory in its workspace or in `~/.niahere/optimizations/`
|
|
223
|
+
2. Runs the loop inline (not as a sub-job — within its own execution)
|
|
224
|
+
3. Saves the best version in the workspace
|
|
225
|
+
4. Does NOT auto-apply changes to its own prompt or config
|
|
226
|
+
5. Sends a message: "I found a better approach for [X]. Review at [path]."
|
|
227
|
+
6. The user decides whether to apply it (e.g., via `nia job update`)
|
|
228
|
+
|
|
229
|
+
## What NOT to Optimize
|
|
230
|
+
|
|
231
|
+
- Things without a clear metric (vague "make it better")
|
|
232
|
+
- Targets that require human judgment with no proxy (art, brand voice decisions)
|
|
233
|
+
- Multi-file changes with complex interdependencies
|
|
234
|
+
- Anything where the scoring takes longer than the modification (defeats the loop)
|
|
235
|
+
- Security-sensitive code where autonomous changes are risky
|
|
236
|
+
|
|
237
|
+
If the target doesn't fit, say so. Not everything benefits from iterative optimization.
|
|
238
|
+
Sometimes the first good version is the right answer.
|
package/src/chat/engine.ts
CHANGED
|
@@ -8,7 +8,15 @@ import { randomUUID } from "crypto";
|
|
|
8
8
|
import { buildSystemPrompt, getSessionContext } from "./identity";
|
|
9
9
|
import { getAgentDefinitions } from "../core/agents";
|
|
10
10
|
import { Session, Message, ActiveEngine } from "../db/models";
|
|
11
|
-
import type {
|
|
11
|
+
import type {
|
|
12
|
+
Attachment,
|
|
13
|
+
SendResult,
|
|
14
|
+
StreamCallback,
|
|
15
|
+
ActivityCallback,
|
|
16
|
+
SendCallbacks,
|
|
17
|
+
ChatEngine,
|
|
18
|
+
EngineOptions,
|
|
19
|
+
} from "../types";
|
|
12
20
|
import { truncate, formatToolUse } from "../utils/format-activity";
|
|
13
21
|
import { consolidateSession } from "../core/consolidator";
|
|
14
22
|
import { summarizeSession } from "../core/summarizer";
|
|
@@ -25,10 +33,19 @@ interface SDKUserMessage {
|
|
|
25
33
|
}
|
|
26
34
|
|
|
27
35
|
/** Convert provider-agnostic attachments to Anthropic content blocks. */
|
|
28
|
-
export function buildContentBlocks(
|
|
36
|
+
export function buildContentBlocks(
|
|
37
|
+
text: string,
|
|
38
|
+
attachments?: Attachment[],
|
|
39
|
+
): MessageParam["content"] {
|
|
29
40
|
if (!attachments?.length) return text;
|
|
30
41
|
|
|
31
|
-
const blocks: Array<
|
|
42
|
+
const blocks: Array<
|
|
43
|
+
| { type: "text"; text: string }
|
|
44
|
+
| {
|
|
45
|
+
type: "image";
|
|
46
|
+
source: { type: "base64"; media_type: string; data: string };
|
|
47
|
+
}
|
|
48
|
+
> = [];
|
|
32
49
|
|
|
33
50
|
for (const att of attachments) {
|
|
34
51
|
if (att.type === "image") {
|
|
@@ -94,6 +111,7 @@ class MessageStream {
|
|
|
94
111
|
|
|
95
112
|
interface PendingResult {
|
|
96
113
|
userMessage: string;
|
|
114
|
+
userSaved: boolean;
|
|
97
115
|
onStream: StreamCallback | null;
|
|
98
116
|
onActivity: ActivityCallback | null;
|
|
99
117
|
accumulatedText: string;
|
|
@@ -103,15 +121,22 @@ interface PendingResult {
|
|
|
103
121
|
reject: (error: Error) => void;
|
|
104
122
|
}
|
|
105
123
|
|
|
106
|
-
|
|
107
124
|
function sessionFileExists(sessionId: string, cwd: string): boolean {
|
|
108
125
|
// SDK stores sessions at ~/.claude/projects/<encoded-cwd>/<session-id>.jsonl
|
|
109
126
|
const encoded = cwd.replace(/\//g, "-");
|
|
110
|
-
const sessionFile = join(
|
|
127
|
+
const sessionFile = join(
|
|
128
|
+
homedir(),
|
|
129
|
+
".claude",
|
|
130
|
+
"projects",
|
|
131
|
+
encoded,
|
|
132
|
+
`${sessionId}.jsonl`,
|
|
133
|
+
);
|
|
111
134
|
return existsSync(sessionFile);
|
|
112
135
|
}
|
|
113
136
|
|
|
114
|
-
export async function createChatEngine(
|
|
137
|
+
export async function createChatEngine(
|
|
138
|
+
opts: EngineOptions,
|
|
139
|
+
): Promise<ChatEngine> {
|
|
115
140
|
const { room, channel, resume, mcpServers } = opts;
|
|
116
141
|
let systemPrompt = buildSystemPrompt("chat", channel);
|
|
117
142
|
|
|
@@ -156,7 +181,10 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
156
181
|
idleTimer = setTimeout(async () => {
|
|
157
182
|
if (pending) {
|
|
158
183
|
// Don't tear down while a request is in flight
|
|
159
|
-
log.warn(
|
|
184
|
+
log.warn(
|
|
185
|
+
{ room },
|
|
186
|
+
"idle timer fired while request pending, skipping teardown",
|
|
187
|
+
);
|
|
160
188
|
return;
|
|
161
189
|
}
|
|
162
190
|
// Memory consolidation + session summary before "sleep"
|
|
@@ -165,7 +193,10 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
165
193
|
log.error({ err, room }, "consolidation failed during idle teardown");
|
|
166
194
|
});
|
|
167
195
|
summarizeSession(sessionId, room).catch((err) => {
|
|
168
|
-
log.error(
|
|
196
|
+
log.error(
|
|
197
|
+
{ err, room },
|
|
198
|
+
"session summary failed during idle teardown",
|
|
199
|
+
);
|
|
169
200
|
});
|
|
170
201
|
}
|
|
171
202
|
teardown();
|
|
@@ -185,7 +216,10 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
185
216
|
longRunningTimer = setTimeout(() => {
|
|
186
217
|
if (pending) {
|
|
187
218
|
longRunningWarned = true;
|
|
188
|
-
log.warn(
|
|
219
|
+
log.warn(
|
|
220
|
+
{ room, elapsed: LONG_RUNNING_WARN / 1000 },
|
|
221
|
+
"engine request running for 30+ minutes",
|
|
222
|
+
);
|
|
189
223
|
}
|
|
190
224
|
}, LONG_RUNNING_WARN);
|
|
191
225
|
}
|
|
@@ -250,7 +284,7 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
250
284
|
await Session.create(sessionId, room);
|
|
251
285
|
}
|
|
252
286
|
|
|
253
|
-
if (pending) {
|
|
287
|
+
if (pending && !pending.userSaved) {
|
|
254
288
|
await Message.save({
|
|
255
289
|
sessionId,
|
|
256
290
|
room,
|
|
@@ -258,6 +292,7 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
258
292
|
content: pending.userMessage,
|
|
259
293
|
isFromAgent: false,
|
|
260
294
|
});
|
|
295
|
+
pending.userSaved = true;
|
|
261
296
|
messageCount++;
|
|
262
297
|
}
|
|
263
298
|
}
|
|
@@ -279,7 +314,10 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
279
314
|
if (lines.length > 1) {
|
|
280
315
|
// Show the last complete line (not the partial one being typed)
|
|
281
316
|
const completeLine = lines[lines.length - 2]?.trim();
|
|
282
|
-
if (
|
|
317
|
+
if (
|
|
318
|
+
completeLine &&
|
|
319
|
+
completeLine !== pending.lastThinkingLine
|
|
320
|
+
) {
|
|
283
321
|
pending.lastThinkingLine = completeLine;
|
|
284
322
|
pending.onActivity?.(truncate(completeLine, 70));
|
|
285
323
|
}
|
|
@@ -364,15 +402,26 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
364
402
|
try {
|
|
365
403
|
messageId = await Message.save(saveParams);
|
|
366
404
|
} catch {
|
|
367
|
-
messageId = await Message.save({
|
|
405
|
+
messageId = await Message.save({
|
|
406
|
+
...saveParams,
|
|
407
|
+
metadata: undefined,
|
|
408
|
+
});
|
|
368
409
|
}
|
|
369
410
|
await Session.touch(sessionId);
|
|
370
|
-
Session.accumulateMetadata(sessionId, {
|
|
411
|
+
Session.accumulateMetadata(sessionId, {
|
|
412
|
+
...metadata,
|
|
413
|
+
channel,
|
|
414
|
+
}).catch(() => {});
|
|
371
415
|
}
|
|
372
416
|
|
|
373
417
|
await ActiveEngine.unregister(room);
|
|
374
418
|
clearLongRunningTimer();
|
|
375
|
-
pending.resolve({
|
|
419
|
+
pending.resolve({
|
|
420
|
+
result: resultText,
|
|
421
|
+
costUsd,
|
|
422
|
+
turns,
|
|
423
|
+
messageId,
|
|
424
|
+
});
|
|
376
425
|
pending = null;
|
|
377
426
|
resetIdleTimer();
|
|
378
427
|
} else {
|
|
@@ -390,9 +439,16 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
390
439
|
// Stream ended without a result — subprocess exited or was killed
|
|
391
440
|
if (pending) {
|
|
392
441
|
const partial = pending.accumulatedText;
|
|
393
|
-
log.error(
|
|
442
|
+
log.error(
|
|
443
|
+
{ room, partialChars: partial.length },
|
|
444
|
+
"query stream ended without result, rejecting pending request",
|
|
445
|
+
);
|
|
394
446
|
await ActiveEngine.unregister(room).catch(() => {});
|
|
395
|
-
pending.reject(
|
|
447
|
+
pending.reject(
|
|
448
|
+
new Error(
|
|
449
|
+
`stream ended without result (${partial.length} chars accumulated)`,
|
|
450
|
+
),
|
|
451
|
+
);
|
|
396
452
|
pending = null;
|
|
397
453
|
}
|
|
398
454
|
} catch (err) {
|
|
@@ -419,7 +475,11 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
419
475
|
return room;
|
|
420
476
|
},
|
|
421
477
|
|
|
422
|
-
async send(
|
|
478
|
+
async send(
|
|
479
|
+
userMessage: string,
|
|
480
|
+
callbacks?: SendCallbacks,
|
|
481
|
+
attachments?: Attachment[],
|
|
482
|
+
) {
|
|
423
483
|
// Clear idle timer — engine is not idle while processing a request
|
|
424
484
|
clearIdleTimer();
|
|
425
485
|
startLongRunningTimer();
|
|
@@ -430,9 +490,26 @@ export async function createChatEngine(opts: EngineOptions): Promise<ChatEngine>
|
|
|
430
490
|
startQuery();
|
|
431
491
|
}
|
|
432
492
|
|
|
493
|
+
// Save user message to DB if session already exists (resumed session).
|
|
494
|
+
// For new sessions, the init handler saves it once sessionId is known.
|
|
495
|
+
let userSaved = false;
|
|
496
|
+
if (sessionId) {
|
|
497
|
+
await Message.save({
|
|
498
|
+
sessionId,
|
|
499
|
+
room,
|
|
500
|
+
sender: "user",
|
|
501
|
+
content: userMessage,
|
|
502
|
+
isFromAgent: false,
|
|
503
|
+
});
|
|
504
|
+
await Session.touch(sessionId);
|
|
505
|
+
userSaved = true;
|
|
506
|
+
messageCount++;
|
|
507
|
+
}
|
|
508
|
+
|
|
433
509
|
return new Promise<SendResult>((resolve, reject) => {
|
|
434
510
|
pending = {
|
|
435
511
|
userMessage,
|
|
512
|
+
userSaved,
|
|
436
513
|
onStream: callbacks?.onStream || null,
|
|
437
514
|
onActivity: callbacks?.onActivity || null,
|
|
438
515
|
accumulatedText: "",
|
package/src/commands/backup.ts
CHANGED
|
@@ -57,9 +57,25 @@ export async function createBackup(silent = false): Promise<string> {
|
|
|
57
57
|
if (dbUrl) {
|
|
58
58
|
const dumpPath = join(home, "tmp", "db-backup.sql");
|
|
59
59
|
mkdirSync(join(home, "tmp"), { recursive: true });
|
|
60
|
-
|
|
60
|
+
// Parse URL to avoid exposing password in process list (visible via ps)
|
|
61
|
+
const url = new URL(dbUrl);
|
|
62
|
+
const dbName = decodeURIComponent(url.pathname.replace(/^\//, ""));
|
|
63
|
+
const pgArgs = ["pg_dump", "-f", dumpPath];
|
|
64
|
+
if (url.hostname) pgArgs.push("-h", url.hostname);
|
|
65
|
+
if (url.port) pgArgs.push("-p", url.port);
|
|
66
|
+
if (url.username) pgArgs.push("-U", decodeURIComponent(url.username));
|
|
67
|
+
if (dbName) pgArgs.push("-d", dbName);
|
|
68
|
+
const pgEnv: Record<string, string> = { ...process.env } as Record<
|
|
69
|
+
string,
|
|
70
|
+
string
|
|
71
|
+
>;
|
|
72
|
+
if (url.password) pgEnv.PGPASSWORD = decodeURIComponent(url.password);
|
|
73
|
+
const sslmode = url.searchParams.get("sslmode");
|
|
74
|
+
if (sslmode) pgEnv.PGSSLMODE = sslmode;
|
|
75
|
+
const pg = Bun.spawn(pgArgs, {
|
|
61
76
|
stdout: "pipe",
|
|
62
77
|
stderr: "pipe",
|
|
78
|
+
env: pgEnv,
|
|
63
79
|
});
|
|
64
80
|
const exitCode = await pg.exited;
|
|
65
81
|
if (exitCode === 0 && existsSync(dumpPath)) {
|
|
@@ -71,7 +87,9 @@ export async function createBackup(silent = false): Promise<string> {
|
|
|
71
87
|
dbDumped = true;
|
|
72
88
|
} else if (!silent) {
|
|
73
89
|
const stderr = await new Response(pg.stderr).text();
|
|
74
|
-
console.log(
|
|
90
|
+
console.log(
|
|
91
|
+
` ⚠ db dump skipped: ${stderr.trim() || `exit ${exitCode}`}`,
|
|
92
|
+
);
|
|
75
93
|
}
|
|
76
94
|
}
|
|
77
95
|
|
|
@@ -94,8 +112,12 @@ export async function createBackup(silent = false): Promise<string> {
|
|
|
94
112
|
|
|
95
113
|
// Clean up temp db dump
|
|
96
114
|
if (dbDumped) {
|
|
97
|
-
try {
|
|
98
|
-
|
|
115
|
+
try {
|
|
116
|
+
unlinkSync(join(home, "db-backup.sql"));
|
|
117
|
+
} catch {}
|
|
118
|
+
try {
|
|
119
|
+
unlinkSync(join(home, "tmp", "db-backup.sql"));
|
|
120
|
+
} catch {}
|
|
99
121
|
}
|
|
100
122
|
|
|
101
123
|
const size = statSync(outPath).size;
|
package/src/core/agents.ts
CHANGED
|
@@ -48,19 +48,25 @@ export function scanAgents(): AgentInfo[] {
|
|
|
48
48
|
try {
|
|
49
49
|
meta = (yaml.load(fmMatch[1]) as Record<string, unknown>) || {};
|
|
50
50
|
} catch (err) {
|
|
51
|
-
log.warn(
|
|
51
|
+
log.warn(
|
|
52
|
+
{ err, agent: entry.name, path: agentFile },
|
|
53
|
+
"failed to parse agent metadata, skipping",
|
|
54
|
+
);
|
|
52
55
|
continue;
|
|
53
56
|
}
|
|
54
|
-
const name =
|
|
57
|
+
const name =
|
|
58
|
+
(typeof meta.name === "string" ? meta.name : "") || entry.name;
|
|
55
59
|
|
|
56
|
-
|
|
57
|
-
seen.
|
|
60
|
+
const key = name.toLowerCase();
|
|
61
|
+
if (seen.has(key)) continue;
|
|
62
|
+
seen.add(key);
|
|
58
63
|
|
|
59
64
|
const body = content.replace(/^---\n[\s\S]*?\n---\n*/, "").trim();
|
|
60
65
|
|
|
61
66
|
agents.push({
|
|
62
67
|
name,
|
|
63
|
-
description:
|
|
68
|
+
description:
|
|
69
|
+
typeof meta.description === "string" ? meta.description : "",
|
|
64
70
|
body,
|
|
65
71
|
model: typeof meta.model === "string" ? meta.model : undefined,
|
|
66
72
|
source,
|
|
@@ -74,13 +80,21 @@ export function scanAgents(): AgentInfo[] {
|
|
|
74
80
|
export function getAgentsSummary(): string {
|
|
75
81
|
const agents = scanAgents();
|
|
76
82
|
if (agents.length === 0) return "";
|
|
77
|
-
const lines = agents.map((a) =>
|
|
83
|
+
const lines = agents.map((a) =>
|
|
84
|
+
a.description ? `- @${a.name}: ${a.description}` : `- @${a.name}`,
|
|
85
|
+
);
|
|
78
86
|
return `Available agents:\n${lines.join("\n")}`;
|
|
79
87
|
}
|
|
80
88
|
|
|
81
|
-
export function getAgentDefinitions(): Record<
|
|
89
|
+
export function getAgentDefinitions(): Record<
|
|
90
|
+
string,
|
|
91
|
+
{ description: string; prompt: string; model?: string }
|
|
92
|
+
> {
|
|
82
93
|
const agents = scanAgents();
|
|
83
|
-
const defs: Record<
|
|
94
|
+
const defs: Record<
|
|
95
|
+
string,
|
|
96
|
+
{ description: string; prompt: string; model?: string }
|
|
97
|
+
> = {};
|
|
84
98
|
|
|
85
99
|
for (const agent of agents) {
|
|
86
100
|
defs[agent.name] = {
|
package/src/core/consolidator.ts
CHANGED
|
@@ -22,8 +22,11 @@ import { runTask } from "./runner";
|
|
|
22
22
|
import { log } from "../utils/log";
|
|
23
23
|
import type { SessionMessage } from "../types";
|
|
24
24
|
|
|
25
|
-
/**
|
|
26
|
-
|
|
25
|
+
/** Bounded dedup: sessionId → message count at last consolidation. Prevents re-processing
|
|
26
|
+
* the same messages while allowing re-consolidation when new turns arrive. */
|
|
27
|
+
const processedCounts = new Map<string, number>();
|
|
28
|
+
const inFlight = new Set<string>();
|
|
29
|
+
const MAX_TRACKED = 500;
|
|
27
30
|
|
|
28
31
|
/** Max messages to include in transcript (most recent). Keeps prompt size bounded. */
|
|
29
32
|
const MAX_TRANSCRIPT_MESSAGES = 50;
|
|
@@ -37,11 +40,15 @@ function shouldSkip(room: string): boolean {
|
|
|
37
40
|
function formatTranscript(messages: SessionMessage[]): string {
|
|
38
41
|
const recent = messages.slice(-MAX_TRANSCRIPT_MESSAGES);
|
|
39
42
|
const skipped = messages.length - recent.length;
|
|
40
|
-
const prefix =
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
const prefix =
|
|
44
|
+
skipped > 0 ? `[...${skipped} earlier messages omitted]\n\n` : "";
|
|
45
|
+
|
|
46
|
+
return (
|
|
47
|
+
prefix +
|
|
48
|
+
recent
|
|
49
|
+
.map((m) => `[${m.sender}] (${m.createdAt}): ${m.content.slice(0, 2000)}`)
|
|
50
|
+
.join("\n\n")
|
|
51
|
+
);
|
|
45
52
|
}
|
|
46
53
|
|
|
47
54
|
/** Build the extraction prompt from a conversation transcript. */
|
|
@@ -80,7 +87,10 @@ Do NOT message the user about this. Save silently and report a brief summary of
|
|
|
80
87
|
}
|
|
81
88
|
|
|
82
89
|
/** Run the consolidation agent loop. */
|
|
83
|
-
async function runConsolidation(
|
|
90
|
+
async function runConsolidation(
|
|
91
|
+
transcript: string,
|
|
92
|
+
source: string,
|
|
93
|
+
): Promise<void> {
|
|
84
94
|
await runTask({
|
|
85
95
|
name: "consolidator",
|
|
86
96
|
prompt: buildConsolidationPrompt(transcript, source),
|
|
@@ -91,21 +101,42 @@ async function runConsolidation(transcript: string, source: string): Promise<voi
|
|
|
91
101
|
* Consolidate a chat session's conversation into memories.
|
|
92
102
|
* Called when a chat engine goes idle or is explicitly closed.
|
|
93
103
|
*/
|
|
94
|
-
export async function consolidateSession(
|
|
104
|
+
export async function consolidateSession(
|
|
105
|
+
sessionId: string,
|
|
106
|
+
room: string,
|
|
107
|
+
): Promise<void> {
|
|
95
108
|
if (shouldSkip(room)) return;
|
|
96
|
-
if (
|
|
97
|
-
consolidated.add(sessionId);
|
|
109
|
+
if (inFlight.has(sessionId)) return;
|
|
98
110
|
|
|
99
111
|
try {
|
|
100
112
|
const messages = await Message.getBySession(sessionId);
|
|
101
113
|
if (messages.length < 2) return;
|
|
102
114
|
|
|
103
|
-
|
|
115
|
+
// Skip if already processed this exact message count
|
|
116
|
+
if (processedCounts.get(sessionId) === messages.length) return;
|
|
117
|
+
|
|
118
|
+
inFlight.add(sessionId);
|
|
119
|
+
|
|
120
|
+
log.info(
|
|
121
|
+
{ sessionId, room, messageCount: messages.length },
|
|
122
|
+
"consolidator: extracting memories from chat",
|
|
123
|
+
);
|
|
104
124
|
|
|
105
125
|
const transcript = formatTranscript(messages);
|
|
106
126
|
await runConsolidation(transcript, `chat session idle — ${room}`);
|
|
127
|
+
|
|
128
|
+
// Mark as processed only on success
|
|
129
|
+
processedCounts.set(sessionId, messages.length);
|
|
130
|
+
|
|
131
|
+
// Evict oldest entries when over cap
|
|
132
|
+
if (processedCounts.size > MAX_TRACKED) {
|
|
133
|
+
const firstKey = processedCounts.keys().next().value;
|
|
134
|
+
if (firstKey) processedCounts.delete(firstKey);
|
|
135
|
+
}
|
|
107
136
|
} catch (err) {
|
|
108
137
|
log.error({ err, sessionId, room }, "consolidator: chat extraction failed");
|
|
138
|
+
} finally {
|
|
139
|
+
inFlight.delete(sessionId);
|
|
109
140
|
}
|
|
110
141
|
}
|
|
111
142
|
|
|
@@ -113,7 +144,11 @@ export async function consolidateSession(sessionId: string, room: string): Promi
|
|
|
113
144
|
* Consolidate a job run's output into memories.
|
|
114
145
|
* Called after a job completes in the runner.
|
|
115
146
|
*/
|
|
116
|
-
export async function consolidateJobRun(
|
|
147
|
+
export async function consolidateJobRun(
|
|
148
|
+
jobName: string,
|
|
149
|
+
jobPrompt: string,
|
|
150
|
+
result: string,
|
|
151
|
+
): Promise<void> {
|
|
117
152
|
// Skip if the job itself is the consolidator (prevent infinite loop)
|
|
118
153
|
if (jobName === "memory-consolidation") return;
|
|
119
154
|
|
|
@@ -123,7 +158,10 @@ export async function consolidateJobRun(jobName: string, jobPrompt: string, resu
|
|
|
123
158
|
if (result.length < 50) return;
|
|
124
159
|
|
|
125
160
|
try {
|
|
126
|
-
log.info(
|
|
161
|
+
log.info(
|
|
162
|
+
{ jobName, resultChars: result.length },
|
|
163
|
+
"consolidator: extracting memories from job",
|
|
164
|
+
);
|
|
127
165
|
await runConsolidation(transcript, `job run — ${jobName}`);
|
|
128
166
|
} catch (err) {
|
|
129
167
|
log.error({ err, jobName }, "consolidator: job extraction failed");
|
package/src/core/health.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { getPaths } from "../utils/paths";
|
|
|
5
5
|
import { isRunning, readPid } from "./daemon";
|
|
6
6
|
import { errMsg } from "../utils/errors";
|
|
7
7
|
import { localTime } from "../utils/time";
|
|
8
|
+
import { withRetry } from "../utils/retry";
|
|
8
9
|
|
|
9
10
|
export type CheckStatus = "ok" | "warn" | "fail";
|
|
10
11
|
export type Check = { name: string; status: CheckStatus; detail: string };
|
|
@@ -22,9 +23,17 @@ export async function runHealthChecks(): Promise<Check[]> {
|
|
|
22
23
|
// Daemon
|
|
23
24
|
const pid = readPid();
|
|
24
25
|
if (isRunning()) {
|
|
25
|
-
checks.push({
|
|
26
|
+
checks.push({
|
|
27
|
+
name: "daemon",
|
|
28
|
+
status: "ok",
|
|
29
|
+
detail: "running (pid: " + pid + ")",
|
|
30
|
+
});
|
|
26
31
|
} else if (pid) {
|
|
27
|
-
checks.push({
|
|
32
|
+
checks.push({
|
|
33
|
+
name: "daemon",
|
|
34
|
+
status: "fail",
|
|
35
|
+
detail: "stale pid file (pid: " + pid + ", not running)",
|
|
36
|
+
});
|
|
28
37
|
} else {
|
|
29
38
|
checks.push({ name: "daemon", status: "warn", detail: "not running" });
|
|
30
39
|
}
|
|
@@ -32,19 +41,35 @@ export async function runHealthChecks(): Promise<Check[]> {
|
|
|
32
41
|
// Config
|
|
33
42
|
if (existsSync(paths.config)) {
|
|
34
43
|
const raw = readRawConfig();
|
|
35
|
-
checks.push({
|
|
44
|
+
checks.push({
|
|
45
|
+
name: "config",
|
|
46
|
+
status: "ok",
|
|
47
|
+
detail: Object.keys(raw).length + " keys loaded",
|
|
48
|
+
});
|
|
36
49
|
} else {
|
|
37
|
-
checks.push({
|
|
50
|
+
checks.push({
|
|
51
|
+
name: "config",
|
|
52
|
+
status: "fail",
|
|
53
|
+
detail: "missing (" + paths.config + ")",
|
|
54
|
+
});
|
|
38
55
|
}
|
|
39
56
|
|
|
40
57
|
// Database
|
|
41
58
|
try {
|
|
42
59
|
if (!config.database_url || !config.database_url.startsWith("postgres")) {
|
|
43
|
-
checks.push({
|
|
60
|
+
checks.push({
|
|
61
|
+
name: "database",
|
|
62
|
+
status: "fail",
|
|
63
|
+
detail: 'invalid url: "' + (config.database_url || "(empty)") + '"',
|
|
64
|
+
});
|
|
44
65
|
} else {
|
|
45
66
|
const { checkDbHealth } = await import("../commands/health-db");
|
|
46
67
|
const ok = await checkDbHealth(config.database_url);
|
|
47
|
-
checks.push({
|
|
68
|
+
checks.push({
|
|
69
|
+
name: "database",
|
|
70
|
+
status: ok ? "ok" : "fail",
|
|
71
|
+
detail: ok ? "connected" : "unreachable",
|
|
72
|
+
});
|
|
48
73
|
}
|
|
49
74
|
} catch (err) {
|
|
50
75
|
checks.push({ name: "database", status: "fail", detail: errMsg(err) });
|
|
@@ -60,13 +85,26 @@ export async function runHealthChecks(): Promise<Check[]> {
|
|
|
60
85
|
const tgToken = config.channels.telegram.bot_token;
|
|
61
86
|
if (tgToken) {
|
|
62
87
|
try {
|
|
63
|
-
const resp = await
|
|
64
|
-
|
|
88
|
+
const resp = await withRetry(() =>
|
|
89
|
+
fetch(`https://api.telegram.org/bot${tgToken}/getMe`, {
|
|
90
|
+
signal: AbortSignal.timeout(5000),
|
|
91
|
+
}),
|
|
92
|
+
);
|
|
93
|
+
const data = (await resp.json()) as { ok: boolean };
|
|
65
94
|
results.push(data.ok ? "telegram: connected" : "telegram: auth failed");
|
|
66
|
-
if (!data.ok)
|
|
95
|
+
if (!data.ok)
|
|
96
|
+
checks.push({
|
|
97
|
+
name: "telegram",
|
|
98
|
+
status: "fail",
|
|
99
|
+
detail: "auth failed",
|
|
100
|
+
});
|
|
67
101
|
} catch {
|
|
68
102
|
results.push("telegram: unreachable");
|
|
69
|
-
checks.push({
|
|
103
|
+
checks.push({
|
|
104
|
+
name: "telegram",
|
|
105
|
+
status: "warn",
|
|
106
|
+
detail: "unreachable",
|
|
107
|
+
});
|
|
70
108
|
}
|
|
71
109
|
}
|
|
72
110
|
|
|
@@ -74,31 +112,57 @@ export async function runHealthChecks(): Promise<Check[]> {
|
|
|
74
112
|
const slToken = config.channels.slack.bot_token;
|
|
75
113
|
if (slToken) {
|
|
76
114
|
try {
|
|
77
|
-
const resp = await
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
115
|
+
const resp = await withRetry(() =>
|
|
116
|
+
fetch("https://slack.com/api/auth.test", {
|
|
117
|
+
method: "POST",
|
|
118
|
+
headers: {
|
|
119
|
+
Authorization: `Bearer ${slToken}`,
|
|
120
|
+
"Content-Type": "application/json",
|
|
121
|
+
},
|
|
122
|
+
signal: AbortSignal.timeout(5000),
|
|
123
|
+
}),
|
|
124
|
+
);
|
|
125
|
+
const data = (await resp.json()) as { ok: boolean; error?: string };
|
|
126
|
+
results.push(
|
|
127
|
+
data.ok
|
|
128
|
+
? "slack: connected"
|
|
129
|
+
: `slack: ${data.error || "auth failed"}`,
|
|
130
|
+
);
|
|
131
|
+
if (!data.ok)
|
|
132
|
+
checks.push({
|
|
133
|
+
name: "slack",
|
|
134
|
+
status: "fail",
|
|
135
|
+
detail: data.error || "auth failed",
|
|
136
|
+
});
|
|
84
137
|
} catch {
|
|
85
138
|
results.push("slack: unreachable");
|
|
86
|
-
checks.push({ name: "slack", status: "
|
|
139
|
+
checks.push({ name: "slack", status: "warn", detail: "unreachable" });
|
|
87
140
|
}
|
|
88
141
|
}
|
|
89
142
|
|
|
90
143
|
if (results.length === 0) {
|
|
91
|
-
checks.push({
|
|
144
|
+
checks.push({
|
|
145
|
+
name: "channels",
|
|
146
|
+
status: "warn",
|
|
147
|
+
detail: "enabled but no tokens configured",
|
|
148
|
+
});
|
|
92
149
|
} else {
|
|
93
150
|
const allOk = results.every((r) => r.includes("connected"));
|
|
94
|
-
checks.push({
|
|
151
|
+
checks.push({
|
|
152
|
+
name: "channels",
|
|
153
|
+
status: allOk ? "ok" : "warn",
|
|
154
|
+
detail: results.join(", "),
|
|
155
|
+
});
|
|
95
156
|
}
|
|
96
157
|
}
|
|
97
158
|
|
|
98
159
|
// API keys
|
|
99
160
|
const geminiKey = config.gemini_api_key;
|
|
100
161
|
const rawConfig = readRawConfig();
|
|
101
|
-
const openaiKey =
|
|
162
|
+
const openaiKey =
|
|
163
|
+
typeof rawConfig.openai_api_key === "string"
|
|
164
|
+
? rawConfig.openai_api_key
|
|
165
|
+
: null;
|
|
102
166
|
const apiKeys: string[] = [];
|
|
103
167
|
if (geminiKey) apiKeys.push("gemini");
|
|
104
168
|
if (openaiKey) apiKeys.push("openai");
|
|
@@ -110,11 +174,16 @@ export async function runHealthChecks(): Promise<Check[]> {
|
|
|
110
174
|
|
|
111
175
|
// Persona files
|
|
112
176
|
const personaFiles = ["identity.md", "owner.md", "soul.md"];
|
|
113
|
-
const missing = personaFiles.filter(
|
|
177
|
+
const missing = personaFiles.filter(
|
|
178
|
+
(f) => !existsSync(join(paths.selfDir, f)),
|
|
179
|
+
);
|
|
114
180
|
checks.push({
|
|
115
181
|
name: "persona",
|
|
116
182
|
status: missing.length === 0 ? "ok" : "warn",
|
|
117
|
-
detail:
|
|
183
|
+
detail:
|
|
184
|
+
missing.length === 0
|
|
185
|
+
? "all files present"
|
|
186
|
+
: "missing: " + missing.join(", "),
|
|
118
187
|
});
|
|
119
188
|
|
|
120
189
|
// Daemon log
|
package/src/core/skills.ts
CHANGED
|
@@ -40,15 +40,25 @@ export function scanSkills(): SkillInfo[] {
|
|
|
40
40
|
try {
|
|
41
41
|
meta = (yaml.load(fmMatch[1]) as Record<string, unknown>) || {};
|
|
42
42
|
} catch (err) {
|
|
43
|
-
log.warn(
|
|
43
|
+
log.warn(
|
|
44
|
+
{ err, skill: entry.name, path: skillFile },
|
|
45
|
+
"failed to parse skill metadata, skipping",
|
|
46
|
+
);
|
|
44
47
|
continue;
|
|
45
48
|
}
|
|
46
|
-
const name =
|
|
49
|
+
const name =
|
|
50
|
+
(typeof meta.name === "string" ? meta.name : "") || entry.name;
|
|
47
51
|
|
|
48
|
-
|
|
49
|
-
seen.
|
|
52
|
+
const key = name.toLowerCase();
|
|
53
|
+
if (seen.has(key)) continue;
|
|
54
|
+
seen.add(key);
|
|
50
55
|
|
|
51
|
-
skills.push({
|
|
56
|
+
skills.push({
|
|
57
|
+
name,
|
|
58
|
+
description:
|
|
59
|
+
typeof meta.description === "string" ? meta.description : "",
|
|
60
|
+
source,
|
|
61
|
+
});
|
|
52
62
|
}
|
|
53
63
|
}
|
|
54
64
|
|
|
@@ -62,6 +72,8 @@ export function getSkillNames(): string[] {
|
|
|
62
72
|
export function getSkillsSummary(): string {
|
|
63
73
|
const skills = scanSkills();
|
|
64
74
|
if (skills.length === 0) return "";
|
|
65
|
-
const lines = skills.map((s) =>
|
|
75
|
+
const lines = skills.map((s) =>
|
|
76
|
+
s.description ? `- /${s.name}: ${s.description}` : `- /${s.name}`,
|
|
77
|
+
);
|
|
66
78
|
return `Available skills:\n${lines.join("\n")}`;
|
|
67
79
|
}
|
package/src/core/summarizer.ts
CHANGED
|
@@ -15,8 +15,10 @@ import { runTask } from "./runner";
|
|
|
15
15
|
import { log } from "../utils/log";
|
|
16
16
|
import type { SessionMessage } from "../types";
|
|
17
17
|
|
|
18
|
-
/**
|
|
19
|
-
const
|
|
18
|
+
/** Bounded dedup: sessionId → message count at last summarization. */
|
|
19
|
+
const processedCounts = new Map<string, number>();
|
|
20
|
+
const inFlight = new Set<string>();
|
|
21
|
+
const MAX_TRACKED = 500;
|
|
20
22
|
|
|
21
23
|
/** Max messages to include (most recent). */
|
|
22
24
|
const MAX_MESSAGES = 30;
|
|
@@ -33,16 +35,26 @@ function formatTranscript(messages: SessionMessage[]): string {
|
|
|
33
35
|
* Summarize a session and store the result in the sessions table.
|
|
34
36
|
* Called when a chat engine goes idle — produces a context bridge for the next session.
|
|
35
37
|
*/
|
|
36
|
-
export async function summarizeSession(
|
|
38
|
+
export async function summarizeSession(
|
|
39
|
+
sessionId: string,
|
|
40
|
+
room: string,
|
|
41
|
+
): Promise<void> {
|
|
37
42
|
if (room.includes("placeholder")) return;
|
|
38
|
-
if (
|
|
39
|
-
summarized.add(sessionId);
|
|
43
|
+
if (inFlight.has(sessionId)) return;
|
|
40
44
|
|
|
41
45
|
try {
|
|
42
46
|
const messages = await Message.getBySession(sessionId);
|
|
43
47
|
if (messages.length < 2) return;
|
|
44
48
|
|
|
45
|
-
|
|
49
|
+
// Skip if already processed this exact message count
|
|
50
|
+
if (processedCounts.get(sessionId) === messages.length) return;
|
|
51
|
+
|
|
52
|
+
inFlight.add(sessionId);
|
|
53
|
+
|
|
54
|
+
log.info(
|
|
55
|
+
{ sessionId, room, messageCount: messages.length },
|
|
56
|
+
"summarizer: generating session summary",
|
|
57
|
+
);
|
|
46
58
|
|
|
47
59
|
const transcript = formatTranscript(messages);
|
|
48
60
|
|
|
@@ -71,11 +83,24 @@ Keep it concise — a handoff note, not a report. Output ONLY the summary text.`
|
|
|
71
83
|
const summary = output.agentText.trim();
|
|
72
84
|
if (summary && summary.length > 10 && summary.length < 2000) {
|
|
73
85
|
await Session.setSummary(sessionId, summary);
|
|
74
|
-
|
|
86
|
+
processedCounts.set(sessionId, messages.length);
|
|
87
|
+
if (processedCounts.size > MAX_TRACKED) {
|
|
88
|
+
const firstKey = processedCounts.keys().next().value;
|
|
89
|
+
if (firstKey) processedCounts.delete(firstKey);
|
|
90
|
+
}
|
|
91
|
+
log.info(
|
|
92
|
+
{ sessionId, room, summaryChars: summary.length },
|
|
93
|
+
"summarizer: saved",
|
|
94
|
+
);
|
|
75
95
|
} else {
|
|
76
|
-
log.warn(
|
|
96
|
+
log.warn(
|
|
97
|
+
{ sessionId, room, length: summary.length },
|
|
98
|
+
"summarizer: output too short or too long, skipped",
|
|
99
|
+
);
|
|
77
100
|
}
|
|
78
101
|
} catch (err) {
|
|
79
102
|
log.error({ err, sessionId, room }, "summarizer: failed");
|
|
103
|
+
} finally {
|
|
104
|
+
inFlight.delete(sessionId);
|
|
80
105
|
}
|
|
81
106
|
}
|
|
@@ -26,7 +26,9 @@ export async function unregister(room: string): Promise<void> {
|
|
|
26
26
|
await sql`DELETE FROM active_engines WHERE room = ${room}`;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
export async function clearStale(
|
|
29
|
+
export async function clearStale(
|
|
30
|
+
maxAgeMs: number = 5 * 60 * 1000,
|
|
31
|
+
): Promise<void> {
|
|
30
32
|
const sql = getSql();
|
|
31
33
|
await sql`DELETE FROM active_engines WHERE last_ping < NOW() - ${maxAgeMs / 1000}::int * interval '1 second'`;
|
|
32
34
|
}
|
|
@@ -38,8 +40,8 @@ export async function clearAll(): Promise<void> {
|
|
|
38
40
|
|
|
39
41
|
export async function list(): Promise<ActiveEngine[]> {
|
|
40
42
|
const sql = getSql();
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
const rows =
|
|
44
|
+
await sql`SELECT room, channel, started_at, last_ping FROM active_engines ORDER BY started_at`;
|
|
43
45
|
return rows.map((r) => ({
|
|
44
46
|
room: r.room,
|
|
45
47
|
channel: r.channel,
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/** Retry a function with Fibonacci backoff. Only retries on thrown errors (not bad return values). */
|
|
2
|
+
export async function withRetry<T>(
|
|
3
|
+
fn: () => Promise<T>,
|
|
4
|
+
retries = 3,
|
|
5
|
+
): Promise<T> {
|
|
6
|
+
let a = 1,
|
|
7
|
+
b = 1;
|
|
8
|
+
for (let i = 0; i <= retries; i++) {
|
|
9
|
+
try {
|
|
10
|
+
return await fn();
|
|
11
|
+
} catch (err) {
|
|
12
|
+
if (i === retries) throw err;
|
|
13
|
+
await new Promise((r) => setTimeout(r, a * 1000));
|
|
14
|
+
[a, b] = [b, a + b];
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
throw new Error("unreachable"); // satisfies TS return type
|
|
18
|
+
}
|