@onlooker-community/ecosystem 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.github/workflows/autofix.yml +65 -0
- package/.release-please-manifest.json +3 -3
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/plugins/compass/README.md +173 -0
- package/plugins/counsel/README.md +98 -0
- package/plugins/governor/README.md +127 -0
- package/plugins/librarian/.claude-plugin/plugin.json +2 -2
- package/plugins/librarian/CHANGELOG.md +7 -0
- package/plugins/librarian/scripts/lib/librarian-cli.sh +339 -0
- package/plugins/librarian/skills/librarian/SKILL.md +63 -0
- package/plugins/scribe/.claude-plugin/plugin.json +1 -1
- package/plugins/scribe/CHANGELOG.md +7 -0
- package/plugins/scribe/README.md +118 -0
- package/plugins/scribe/scripts/hooks/scribe-capture.sh +0 -0
- package/plugins/scribe/scripts/hooks/scribe-session-start.sh +0 -0
- package/plugins/scribe/scripts/hooks/scribe-stop.sh +0 -0
- package/plugins/warden/README.md +185 -0
- package/test/bats/librarian-cli.bats +305 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ecosystem",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.24.0",
|
|
4
4
|
"description": "Observability substrate for Claude Code. Provides the shared ~/.onlooker/ storage root, canonical schema-validated event emission, session and tool tracking hooks, and prompt rules. Required by all other Onlooker plugins.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Onlooker Community",
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
name: Auto-format
|
|
2
|
+
|
|
3
|
+
# Applies `npm run lint` + `npm run format` automatically so formatting drift
|
|
4
|
+
# never has to be fixed by hand. On pull requests it pushes a fixup commit back
|
|
5
|
+
# to the PR branch (re-triggering checks via the PAT) — this is what keeps the
|
|
6
|
+
# release-please PR green without manual intervention. On push to main it acts
|
|
7
|
+
# as a safety net, committing fixes directly with `[skip ci]` to avoid looping.
|
|
8
|
+
on:
|
|
9
|
+
pull_request:
|
|
10
|
+
push:
|
|
11
|
+
branches:
|
|
12
|
+
- main
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
autofix:
|
|
19
|
+
name: Auto-format and lint
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
# On fork PRs the PAT secret is not available and we cannot push to the
|
|
22
|
+
# contributor's branch, so skip the job rather than fail noisily. Same-repo
|
|
23
|
+
# PRs (including release-please's) and pushes to main always run.
|
|
24
|
+
if: >-
|
|
25
|
+
github.event_name == 'push' ||
|
|
26
|
+
github.event.pull_request.head.repo.full_name == github.repository
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
with:
|
|
30
|
+
# Use the release-please PAT (not the default GITHUB_TOKEN) so the
|
|
31
|
+
# pushed fixup commit re-triggers required checks on the PR. Commits
|
|
32
|
+
# made with GITHUB_TOKEN do not start new workflow runs.
|
|
33
|
+
token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
|
|
34
|
+
# Check out the actual branch head. For pull_request events the
|
|
35
|
+
# default checkout is a detached merge ref we cannot push from.
|
|
36
|
+
ref: ${{ github.head_ref || github.ref_name }}
|
|
37
|
+
|
|
38
|
+
- uses: actions/setup-node@v4
|
|
39
|
+
with:
|
|
40
|
+
node-version: '22'
|
|
41
|
+
|
|
42
|
+
- name: Install Node dependencies
|
|
43
|
+
run: npm ci
|
|
44
|
+
|
|
45
|
+
- name: Apply lint and format fixes
|
|
46
|
+
run: |
|
|
47
|
+
npm run lint
|
|
48
|
+
npm run format
|
|
49
|
+
|
|
50
|
+
- name: Commit and push fixes
|
|
51
|
+
run: |
|
|
52
|
+
if git diff --quiet; then
|
|
53
|
+
echo "No formatting changes needed."
|
|
54
|
+
exit 0
|
|
55
|
+
fi
|
|
56
|
+
git config user.name 'github-actions[bot]'
|
|
57
|
+
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
|
58
|
+
git add -A
|
|
59
|
+
if [ "${{ github.event_name }}" = 'push' ]; then
|
|
60
|
+
# On main, do not skip CI; the follow-up autofix run will no-op once the repo is clean.
|
|
61
|
+
git commit -m 'style: auto-format :art:'
|
|
62
|
+
else
|
|
63
|
+
git commit -m 'style: auto-format :art:'
|
|
64
|
+
fi
|
|
65
|
+
git push origin HEAD:${{ github.head_ref || github.ref_name }}
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
|
-
".": "0.
|
|
2
|
+
".": "0.24.0",
|
|
3
3
|
"plugins/archivist": "0.1.0",
|
|
4
4
|
"plugins/tribunal": "1.0.1",
|
|
5
5
|
"plugins/echo": "0.2.0",
|
|
6
6
|
"plugins/cartographer": "0.2.0",
|
|
7
7
|
"plugins/governor": "0.2.0",
|
|
8
8
|
"plugins/compass": "0.2.0",
|
|
9
|
-
"plugins/scribe": "0.2.
|
|
9
|
+
"plugins/scribe": "0.2.1",
|
|
10
10
|
"plugins/counsel": "0.2.0",
|
|
11
11
|
"plugins/warden": "0.2.0",
|
|
12
|
-
"plugins/librarian": "0.
|
|
12
|
+
"plugins/librarian": "0.2.0",
|
|
13
13
|
"plugins/curator": "0.1.0",
|
|
14
14
|
"plugins/historian": "0.2.0"
|
|
15
15
|
}
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.24.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.23.1...ecosystem-v0.24.0) (2026-06-04)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **librarian:** /librarian review skill closes promotion loop :tada: ([#68](https://github.com/onlooker-community/ecosystem/issues/68)) ([8f3e3db](https://github.com/onlooker-community/ecosystem/commit/8f3e3dbdf6f08dceb0cf61d46281936a4f9954de))
|
|
9
|
+
|
|
10
|
+
## [0.23.1](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.23.0...ecosystem-v0.23.1) (2026-06-04)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Bug Fixes
|
|
14
|
+
|
|
15
|
+
* **scribe:** mark hook scripts executable :relieved: ([#64](https://github.com/onlooker-community/ecosystem/issues/64)) ([05603e5](https://github.com/onlooker-community/ecosystem/commit/05603e56895c009c1435d1712592adbbc4c15e61))
|
|
16
|
+
|
|
3
17
|
## [0.23.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.22.0...ecosystem-v0.23.0) (2026-06-04)
|
|
4
18
|
|
|
5
19
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# Compass
|
|
2
|
+
|
|
3
|
+
Pre-write intent clarity gate.
|
|
4
|
+
|
|
5
|
+
Compass fires on `PreToolUse` for write-class operations, evaluates whether the pending write has sufficient intent clarity to proceed, and blocks with a structured clarification prompt when confidence is low or the evaluators disagree. It is the only plugin that gates write-class tool calls before they execute — complementing governor (budget), tribunal (post-task quality), and warden (safety). To avoid the most common false positive — a terse user reply to a question the agent just asked — Compass evaluates the pending write against the **prior assistant turn** as context, not the current context alone. See [ADR-001](docs/adr/001-evaluate-prompts-in-context.md).
|
|
6
|
+
|
|
7
|
+
Compass is a sibling plugin to [`ecosystem`](../../) and assumes the Onlooker observability substrate (`~/.onlooker/`) is present.
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
| Hook | Surface | What Compass does |
|
|
12
|
+
|------|---------|-------------------|
|
|
13
|
+
| `PreToolUse` | `Write`, `Edit`, `MultiEdit` | Runs the full evaluation pipeline and blocks the write when confidence is below threshold or the evaluators disagree. |
|
|
14
|
+
| `PreToolUse` | `Bash` | Same pipeline, but first matches the command against write patterns (redirects, `rm`, `mv`, `cp`, `tee`, and similar). Exits 0 immediately when no write pattern matches. |
|
|
15
|
+
| `PostToolUse` | `Write`, `Edit`, `MultiEdit` | Records the written file path, stem, and timestamp to the session cooldown table so same-file follow-up writes are not re-checked. |
|
|
16
|
+
| `SessionStart` | `*` | Initializes session state: zero turn-check count, empty cooldown table, closed circuit breaker. Exits silently when Compass is disabled. |
|
|
17
|
+
|
|
18
|
+
The evaluation pipeline runs in order:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
Trigger Gate → Transcript Reader → Symbolic Skip Layer → Sanitizer → N=5 Evaluators → Gate
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
1. **Trigger gate** — applies, in order, the skip sentinel (`[compass:skip]`), skip globs, dir-plus-stem cooldown, the per-turn check budget, the context minimum, and the circuit breaker. The first match short-circuits and emits `compass.check.skipped`.
|
|
25
|
+
2. **Transcript reader** — resolves the prior assistant turn from `transcript_path` in the hook JSON payload (the same field `tribunal-stop-gate.sh` reads). Always reads one turn back — already committed before `PreToolUse` fires, so there is no timing-skew risk. When `transcript_path` is absent or unreadable, the pipeline proceeds with an empty prior turn.
|
|
26
|
+
3. **Symbolic skip layer** — short-circuits to a pass without an LLM call when the prior turn is an enumerated question and the current context is a clean option reference (a number, an ordinal phrase, or a short affirmation with no qualifier clause). Controlled by `skip_patterns.reply_to_question.enabled` (default `true`).
|
|
27
|
+
4. **Sanitizer** — strips evaluator prompt tags, control characters, and null bytes from all evaluator-bound fields, then truncates them, before any content leaves the machine.
|
|
28
|
+
5. **N=5 evaluators** — launches `evaluator.n` parallel Haiku calls with a structured prompt that places `<prior_assistant_turn>` and `<context_excerpt>` in separate XML-delimited slots. The convergence question is: *"Given the prior assistant turn as context, would two independent readers converge on the same interpretation of this write?"*
|
|
29
|
+
6. **Gate** — aggregates the sample scores into a mean and standard deviation and applies the blocking rule.
|
|
30
|
+
|
|
31
|
+
### Blocking rule
|
|
32
|
+
|
|
33
|
+
Compass blocks the write when **`confidence < confidence_threshold` OR `stddev > stddev_threshold`** (defaults `0.65` and `0.20`). The standard-deviation signal is independent of the mean — when the evaluators disagree, that disagreement is itself a reliable ambiguity signal. A blocked write surfaces the triggering file and tool, the mean score, the standard deviation, the most common concern, the evaluator rationale, and three resolution paths: type `compass: proceed` to override, provide more context for a single re-check, or type `compass: cancel` to abandon the write.
|
|
34
|
+
|
|
35
|
+
### Error handling
|
|
36
|
+
|
|
37
|
+
The default `error_policy` is `closed`: when fewer than `evaluator.min_valid_samples` return valid JSON, Compass blocks the write and explains that the check could not complete. Set `error_policy: "open"` to pass writes through on evaluator failure (appropriate for CI). A session-scoped circuit breaker opens after `circuit_breaker.consecutive_failures_to_open` consecutive failures and fails open for `circuit_breaker.open_duration_seconds`, regardless of `error_policy`.
|
|
38
|
+
|
|
39
|
+
## Activation
|
|
40
|
+
|
|
41
|
+
Compass is **off by default**. Enable per-project in `.claude/settings.json`:
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"compass": {
|
|
46
|
+
"enabled": true
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Or globally in `~/.claude/settings.json`.
|
|
52
|
+
|
|
53
|
+
## Configuration
|
|
54
|
+
|
|
55
|
+
All keys are optional. Unset keys fall back to the plugin's `config.json` defaults.
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"compass": {
|
|
60
|
+
"enabled": false,
|
|
61
|
+
"evaluator": {
|
|
62
|
+
"model": "claude-haiku-4-5-20251001",
|
|
63
|
+
"n": 5,
|
|
64
|
+
"temperature": 0.3,
|
|
65
|
+
"max_output_tokens": 128,
|
|
66
|
+
"sample_timeout_seconds": 8,
|
|
67
|
+
"min_valid_samples": 3
|
|
68
|
+
},
|
|
69
|
+
"confidence_threshold": 0.65,
|
|
70
|
+
"stddev_threshold": 0.2,
|
|
71
|
+
"cooldown": {
|
|
72
|
+
"strategy": "path_and_identity",
|
|
73
|
+
"seconds": 120,
|
|
74
|
+
"identity_match": "dir_plus_stem"
|
|
75
|
+
},
|
|
76
|
+
"transcript": {
|
|
77
|
+
"prior_turn_chars_max": 800,
|
|
78
|
+
"transcript_max_age_seconds": 300
|
|
79
|
+
},
|
|
80
|
+
"skip_patterns": {
|
|
81
|
+
"reply_to_question": {
|
|
82
|
+
"enabled": true
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"max_checks_per_turn": 3,
|
|
86
|
+
"min_context_chars": 80,
|
|
87
|
+
"context_chars_max": 600,
|
|
88
|
+
"include_file_contents": false,
|
|
89
|
+
"skip_globs": ["**/*.lock", "**/*.sum", "**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"],
|
|
90
|
+
"error_policy": "closed",
|
|
91
|
+
"circuit_breaker": {
|
|
92
|
+
"enabled": true,
|
|
93
|
+
"consecutive_failures_to_open": 3,
|
|
94
|
+
"open_duration_seconds": 300,
|
|
95
|
+
"open_behavior": "fail_open"
|
|
96
|
+
},
|
|
97
|
+
"intervention": {
|
|
98
|
+
"recheck_limit": 1
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
| Key | Default | Description |
|
|
105
|
+
|-----|---------|-------------|
|
|
106
|
+
| `enabled` | `false` | Must be `true` for any evaluation to run. |
|
|
107
|
+
| `evaluator.model` | `claude-haiku-4-5-20251001` | Model used for each evaluation sample. Haiku is fast and cheap; the convergence prompt does not require deep reasoning. |
|
|
108
|
+
| `evaluator.n` | `5` | Number of parallel evaluation samples launched per check. |
|
|
109
|
+
| `evaluator.temperature` | `0.3` | Sampling temperature. The noise floor at `n=5`, `temperature=0.3` is ~0.62–0.65 for unambiguous tasks, which informs the `confidence_threshold` default. |
|
|
110
|
+
| `evaluator.max_output_tokens` | `128` | Token ceiling per sample. The evaluator returns a small JSON object, so this is intentionally tight. |
|
|
111
|
+
| `evaluator.sample_timeout_seconds` | `8` | Per-sample watchdog. Samples not returned within this window are killed and excluded. |
|
|
112
|
+
| `evaluator.min_valid_samples` | `3` | Minimum number of samples that must return valid JSON. Below this, the `error_policy` is applied. |
|
|
113
|
+
| `confidence_threshold` | `0.65` | Mean score below which the write is blocked. Set at the top of the noise floor; raise after running calibration rather than lowering blindly. |
|
|
114
|
+
| `stddev_threshold` | `0.2` | Sample standard deviation above which the write is blocked, independent of the mean. |
|
|
115
|
+
| `cooldown.seconds` | `120` | A write whose path shares a parent directory and filename stem with a recent successful write is skipped within this window. |
|
|
116
|
+
| `cooldown.identity_match` | `dir_plus_stem` | Cooldown identity strategy. Stem comparison strips only the final extension; the cooldown does not carry across a rename. |
|
|
117
|
+
| `transcript.prior_turn_chars_max` | `800` | Maximum characters of the prior assistant turn fed into the evaluator. Set to `0` to omit the prior turn for near-zero egress. |
|
|
118
|
+
| `transcript.transcript_max_age_seconds` | `300` | Maximum age of the transcript file Compass will read the prior turn from. |
|
|
119
|
+
| `skip_patterns.reply_to_question.enabled` | `true` | Enables the symbolic skip layer. When disabled, every write that passes the trigger gate reaches the LLM evaluator. |
|
|
120
|
+
| `max_checks_per_turn` | `3` | Per-turn evaluation budget. Writes beyond this skip with reason `turn_budget_exhausted`. |
|
|
121
|
+
| `min_context_chars` | `80` | Minimum sanitized context length. Shorter context skips with reason `insufficient_context`. |
|
|
122
|
+
| `context_chars_max` | `600` | Maximum characters of context sent to the evaluator. Set to `0` for near-zero egress. |
|
|
123
|
+
| `include_file_contents` | `false` | When `false`, file contents are never sent to the evaluator — only tool name, file path, operation type, prior turn excerpt, and context excerpt. |
|
|
124
|
+
| `skip_globs` | lock/sum/`node_modules`/`.git`/`dist`/`build` patterns | Paths matching any glob skip evaluation entirely. |
|
|
125
|
+
| `error_policy` | `"closed"` | `closed` blocks on evaluator failure; `open` passes the write through and emits `compass.check.skipped` with reason `sampler_error`. |
|
|
126
|
+
| `circuit_breaker.enabled` | `true` | Enables the session-scoped circuit breaker. |
|
|
127
|
+
| `circuit_breaker.consecutive_failures_to_open` | `3` | Consecutive evaluator failures before the circuit opens. |
|
|
128
|
+
| `circuit_breaker.open_duration_seconds` | `300` | How long the circuit stays open (failing open) before attempting to close. While open, writes skip with reason `circuit_open`. |
|
|
129
|
+
| `intervention.recheck_limit` | `1` | Maximum re-checks per intervention after a user supplies clarification. |
|
|
130
|
+
|
|
131
|
+
The plugin's `config.json` is the source of truth for available knobs.
|
|
132
|
+
|
|
133
|
+
### Data egress
|
|
134
|
+
|
|
135
|
+
Every evaluation sends content to the `evaluator.model` API endpoint. With `include_file_contents: false` (the default), Compass sends only the tool name, file path, operation type, bash command string (command only, not stdin), the prior assistant turn excerpt, and the context excerpt — never file contents. For near-zero egress, set `context_chars_max: 0` and `transcript.prior_turn_chars_max: 0`. Compass cannot auto-detect sensitive paths; for sensitive repositories, set `enabled: false`.
|
|
136
|
+
|
|
137
|
+
## Storage layout
|
|
138
|
+
|
|
139
|
+
Compass keeps per-session state — the turn-check count, cooldown table, and circuit-breaker state — under the shared substrate:
|
|
140
|
+
|
|
141
|
+
```text
|
|
142
|
+
~/.onlooker/compass/sessions/
|
|
143
|
+
└── <session-id>.json # turn_check_count, cooldown[], circuit_breaker{state, consecutive_failures, opened_at}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
State is keyed by session ID and initialized at `SessionStart`. The runtime root is always resolved via `${ONLOOKER_DIR:-$HOME/.onlooker}` so the test suite's isolated temp home is respected.
|
|
147
|
+
|
|
148
|
+
## Events emitted
|
|
149
|
+
|
|
150
|
+
Compass emits the canonical `compass.*` event surface from [`@onlooker-community/schema`](https://github.com/onlooker-community/schema). All events land in `~/.onlooker/logs/onlooker-events.jsonl` and are validated against the schema before write.
|
|
151
|
+
|
|
152
|
+
| Event | When | Key payload fields |
|
|
153
|
+
|-------|------|--------------------|
|
|
154
|
+
| `compass.check.passed` | Confidence ≥ threshold and stddev ≤ threshold. | `confidence`, `stddev`, `file_path`, `tool_name`, `had_prior_turn` |
|
|
155
|
+
| `compass.check.failed` | Confidence < threshold or stddev > threshold. | `confidence`, `stddev`, `primary_concern`, `file_path` |
|
|
156
|
+
| `compass.check.skipped` | A gate rule or the symbolic skip layer matched. | `reason`, `file_path` |
|
|
157
|
+
|
|
158
|
+
`compass.check.skipped` reasons: `skip_sentinel`, `skip_glob`, `dir_plus_stem_cooldown`, `turn_budget_exhausted`, `insufficient_context`, `circuit_open`, `reply_to_question_pattern`, and `sampler_error`.
|
|
159
|
+
|
|
160
|
+
## Requirements
|
|
161
|
+
|
|
162
|
+
- The `ecosystem` plugin installed (for the `~/.onlooker/` substrate and canonical event emission).
|
|
163
|
+
- `claude` CLI on `PATH` (the evaluator shells out to `claude -p` for each sample).
|
|
164
|
+
- `jq` for JSON manipulation.
|
|
165
|
+
- `node` for canonical-event emission.
|
|
166
|
+
|
|
167
|
+
## Architecture decisions
|
|
168
|
+
|
|
169
|
+
Key decisions made during design are recorded in [`docs/adr/`](docs/adr/):
|
|
170
|
+
|
|
171
|
+
- [ADR-001](docs/adr/001-evaluate-prompts-in-context.md) — Evaluate prompts in context (with the prior assistant turn), not in isolation, plus the symbolic skip pattern for question-answer turns
|
|
172
|
+
|
|
173
|
+
The full design, including failure modes, the intervention UX, integration points, and open questions, lives in [`docs/design.md`](docs/design.md).
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Counsel
|
|
2
|
+
|
|
3
|
+
Weekly synthesis and coaching brief from the full observability stack.
|
|
4
|
+
|
|
5
|
+
Counsel reads every plugin's events from your onlooker log, runs a single synthesis pass to surface recurring patterns, improvement opportunities, and wins, and writes a structured Markdown brief. At session start, when the last brief has gone stale, it regenerates one and injects it as invisible context — turning weeks of accumulated agent telemetry into a short, actionable read.
|
|
6
|
+
|
|
7
|
+
Counsel is a sibling plugin to [`ecosystem`](../../) and assumes the Onlooker observability substrate (`~/.onlooker/`) is present.
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
| Hook | What Counsel does |
|
|
12
|
+
|------|-------------------|
|
|
13
|
+
| `SessionStart` | Resolves the project key, checks whether the latest brief is older than `synthesis_interval_days`. If stale (and enough events exist), reads the last `lookback_days` of events from the onlooker log, calls `claude -p` with a synthesis prompt, writes a `YYYY-WW.md` brief under `~/.onlooker/counsel/<project-key>/briefs/`, emits `counsel.brief.generated`, and injects the brief as `additionalContext`. |
|
|
14
|
+
|
|
15
|
+
The synthesis pass produces a structured JSON object — `summary`, `patterns`, `recommendations` (each with `title`, `rationale`, and a `high`/`medium`/`low` `priority`), `wins`, and `watch` — which Counsel formats into the Markdown brief.
|
|
16
|
+
|
|
17
|
+
Counsel partitions the event stream by source plugin, recognizing `tribunal`, `echo`, `sentinel`, `warden`, `oracle`, and `meridian` events (everything else maps to a generic `onlooker_events` source). The synthesis prompt focuses on recurring failure modes and blocked gates, prompt regression trends, budget and resource pressure, quality trends over time, and what the team is consistently doing well.
|
|
18
|
+
|
|
19
|
+
The hook always exits 0 — it never blocks a session from starting. It skips silently when Counsel is disabled, the directory has no project key (non-git), the latest brief is still fresh, or fewer than `capture.min_events` events fall inside the lookback window.
|
|
20
|
+
|
|
21
|
+
## Activation
|
|
22
|
+
|
|
23
|
+
Counsel is **on by default**. Disable it per-project in `.claude/settings.json` (or globally in `~/.claude/settings.json`):
|
|
24
|
+
|
|
25
|
+
```json
|
|
26
|
+
{
|
|
27
|
+
"counsel": {
|
|
28
|
+
"enabled": false
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Configuration
|
|
34
|
+
|
|
35
|
+
All keys are optional. Unset keys fall back to the plugin's `config.json` defaults.
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"counsel": {
|
|
40
|
+
"enabled": true,
|
|
41
|
+
"synthesis_interval_days": 7,
|
|
42
|
+
"lookback_days": 30,
|
|
43
|
+
"evaluator": {
|
|
44
|
+
"model": "claude-haiku-4-5-20251001",
|
|
45
|
+
"timeout": 90,
|
|
46
|
+
"max_tokens": 4096,
|
|
47
|
+
"temperature": 0.4
|
|
48
|
+
},
|
|
49
|
+
"capture": {
|
|
50
|
+
"min_events": 10,
|
|
51
|
+
"events_chars_max": 60000
|
|
52
|
+
},
|
|
53
|
+
"output": {
|
|
54
|
+
"brief_max_chars": 3000
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
| Key | Default | Description |
|
|
61
|
+
|-----|---------|-------------|
|
|
62
|
+
| `enabled` | `true` | Set to `false` to skip all synthesis and injection. |
|
|
63
|
+
| `synthesis_interval_days` | `7` | Minimum age (in days) of the latest brief before a new one is generated. A brief younger than this is considered fresh and the hook skips. |
|
|
64
|
+
| `lookback_days` | `30` | How far back in the event log to read when synthesizing a brief. |
|
|
65
|
+
| `evaluator.model` | `claude-haiku-4-5-20251001` | Model used for the synthesis pass. Haiku is fast and cheap; upgrade for higher-stakes repos. |
|
|
66
|
+
| `evaluator.timeout` | `90` | Per-call wall-clock timeout (seconds) passed to the `timeout` command around `claude -p`. |
|
|
67
|
+
| `evaluator.max_tokens` | `4096` | Token ceiling for the synthesis response. |
|
|
68
|
+
| `evaluator.temperature` | `0.4` | Sampling temperature for the synthesis pass. |
|
|
69
|
+
| `capture.min_events` | `10` | Minimum number of events in the lookback window required to generate a brief. Below this, the hook skips silently. |
|
|
70
|
+
| `capture.events_chars_max` | `60000` | Hard ceiling on the characters of summarized event text fed into the synthesis prompt. |
|
|
71
|
+
| `output.brief_max_chars` | `3000` | Hard ceiling on the brief characters injected as session context. |
|
|
72
|
+
|
|
73
|
+
## Storage layout
|
|
74
|
+
|
|
75
|
+
```text
|
|
76
|
+
~/.onlooker/counsel/<project-key>/
|
|
77
|
+
└── briefs/
|
|
78
|
+
└── <YYYY-WW>.md # one brief per ISO week; newest sorts last
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Briefs are named by ISO year and week (`date '+%G-%V'`). The injected brief is always the lexicographically newest `.md` file in the directory; its file modification time is what the staleness check compares against `synthesis_interval_days`. When no project key can be derived, briefs fall back to `~/.onlooker/counsel/unknown/briefs/`.
|
|
82
|
+
|
|
83
|
+
Project key: first 12 hex chars of SHA256 of `remote:<git-remote-origin-url>`, falling back to SHA256 of `root:<repo-root>` for repos without a remote. The repo root is resolved through the git common directory, so worktrees of the same repo share a key. This mirrors the tribunal and scribe keying scheme.
|
|
84
|
+
|
|
85
|
+
## Events emitted
|
|
86
|
+
|
|
87
|
+
Counsel emits its event surface through [`@onlooker-community/schema`](https://github.com/onlooker-community/schema). All events are validated against the schema before being appended to `~/.onlooker/logs/onlooker-events.jsonl`.
|
|
88
|
+
|
|
89
|
+
| Event | When |
|
|
90
|
+
|-------|------|
|
|
91
|
+
| `counsel.brief.generated` | After a brief is written. Payload includes `period_start`, `period_end`, `recommendation_count`, and `sources_consulted` (the set of source plugins present in the analyzed event batch). |
|
|
92
|
+
|
|
93
|
+
## Requirements
|
|
94
|
+
|
|
95
|
+
- The `ecosystem` plugin installed (for the `~/.onlooker/` substrate and canonical event emission).
|
|
96
|
+
- `claude` CLI on `PATH` (the hook shells out to `claude -p` for the synthesis pass).
|
|
97
|
+
- `jq` for JSON manipulation.
|
|
98
|
+
- `node` for canonical-event emission.
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Governor
|
|
2
|
+
|
|
3
|
+
Resource governance and budget enforcement for the Onlooker ecosystem.
|
|
4
|
+
|
|
5
|
+
Governor tracks per-session token and cost spend, gates `Task` spawns before they exceed a configurable budget ceiling, and emits `governor.*` events for audit. Named for the steam-engine governor — the flyweight device that throttles a machine back before it runs away — it keeps a session's subagent fan-out inside a spend envelope instead of letting it accelerate unchecked.
|
|
6
|
+
|
|
7
|
+
Governor is a sibling plugin to [`ecosystem`](../../) and assumes the Onlooker observability substrate (`~/.onlooker/`) is present.
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
Governor keeps a per-session JSONL ledger and consults it on every `Task` spawn. Accounting is two-phase: the gate writes a *reservation* before a spawn runs so concurrent spawns each see the others' in-flight cost, and completion *cancels* that reservation and records observed spend.
|
|
12
|
+
|
|
13
|
+
| Hook | Matcher | What Governor does |
|
|
14
|
+
|------|---------|--------------------|
|
|
15
|
+
| `SessionStart` | `*` | Creates `~/.onlooker/governance/ledgers/`, sweeps stale lock directories left by crashed prior sessions (emitting `governor.lock.stale_cleared` for each), and checks that the global policy file exists (warns to stderr if missing — never blocks). |
|
|
16
|
+
| `PreToolUse` | `Task` | The gate. Estimates tokens for the spawn, reads consumed tokens from the ledger under an atomic check-and-reserve lock, decides allow or block, writes a reservation when allowing, and emits `governor.gate.checked`. |
|
|
17
|
+
| `PostToolUse` | `Task` | Records the completed call: negates the reservation estimate, adds observed `actual_tokens` when the tool response carries usage counts, appends a record to the ledger, and emits `governor.call.recorded`. |
|
|
18
|
+
| `Stop` | `*` | Reads cumulative totals from the ledger and emits `governor.session.complete` with token, cost, and call summaries plus an `under_budget` flag. |
|
|
19
|
+
|
|
20
|
+
### The gate decision
|
|
21
|
+
|
|
22
|
+
On each `Task` spawn, Governor estimates the spawn's token cost, adds it to the session's consumed tokens, and compares the projection against two thresholds:
|
|
23
|
+
|
|
24
|
+
- **`ceiling_exceeded`** — projected tokens exceed `tokens_default × hard_stop_margin`. **Always blocks**, regardless of enforcement mode.
|
|
25
|
+
- **`budget_exceeded`** — projected tokens exceed `tokens_default` but stay under the hard ceiling. Blocks only in `hard` enforcement; in `soft` enforcement the spawn is allowed and only the event is emitted.
|
|
26
|
+
- **`lock_timeout`** — the gate lock could not be acquired within its timeout. Treated as a block in `hard` enforcement.
|
|
27
|
+
|
|
28
|
+
To block, the hook writes `{"decision":"block","reason":"..."}` to stdout (the Claude Code `PreToolUse` block protocol) and still exits 0. All other paths allow the spawn. Every decision — allow or block — emits `governor.gate.checked` with the decision, reason, estimate, and remaining budget.
|
|
29
|
+
|
|
30
|
+
### Token and cost estimation
|
|
31
|
+
|
|
32
|
+
Governor does not know the model a spawn will use, so estimates are a planning-time upper bound. Tokens are estimated from the tool-input JSON using a **tier table** of characters-per-token ratios:
|
|
33
|
+
|
|
34
|
+
| Content tier | Characters per token |
|
|
35
|
+
|--------------|----------------------|
|
|
36
|
+
| ASCII prose | 4.0 |
|
|
37
|
+
| Code / JSON | 3.0 |
|
|
38
|
+
| Mixed | 2.5 |
|
|
39
|
+
| Non-Latin | 1.5 |
|
|
40
|
+
|
|
41
|
+
The raw estimate is multiplied by `safety_margin` before the gate check. Cost is derived from tokens at a blended ~$9 per million (Sonnet-class $3/M input + $15/M output, assuming a 50/50 split). When a `PostToolUse` response carries `usage.input_tokens` / `usage.output_tokens`, the actual count is recorded alongside the estimate and the running total converges to real spend.
|
|
42
|
+
|
|
43
|
+
## Activation
|
|
44
|
+
|
|
45
|
+
Governor is **off by default**. Enable it per-project in `.claude/settings.json`:
|
|
46
|
+
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"governor": {
|
|
50
|
+
"enabled": true
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Or globally in `~/.claude/settings.json`. While disabled, every hook skips silently and no ledger is written.
|
|
56
|
+
|
|
57
|
+
## Configuration
|
|
58
|
+
|
|
59
|
+
All keys are optional. Unset keys fall back to the plugin's `config.json` defaults.
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"governor": {
|
|
64
|
+
"enabled": false,
|
|
65
|
+
"enforcement": "soft",
|
|
66
|
+
"global_policy_path": "~/.onlooker/governance/global-policy.yaml",
|
|
67
|
+
"session": {
|
|
68
|
+
"tokens_default": 100000,
|
|
69
|
+
"cost_usd_default": 1.0,
|
|
70
|
+
"reserve_pct": 10
|
|
71
|
+
},
|
|
72
|
+
"estimation": {
|
|
73
|
+
"safety_margin": 1.3,
|
|
74
|
+
"hard_stop_margin": 1.5,
|
|
75
|
+
"method": "tier_table"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
| Key | Default | Description |
|
|
82
|
+
|-----|---------|-------------|
|
|
83
|
+
| `enabled` | `false` | Must be `true` for any tracking, gating, or event emission to run. |
|
|
84
|
+
| `enforcement` | `"soft"` | `"soft"` tracks and emits events but never blocks on a budget overrun; `"hard"` blocks `Task` spawns once the budget is exceeded. A `ceiling_exceeded` overrun blocks in both modes. |
|
|
85
|
+
| `global_policy_path` | `"~/.onlooker/governance/global-policy.yaml"` | Path checked at `SessionStart`. Missing file warns to stderr only — the session runs without a global ceiling. |
|
|
86
|
+
| `session.tokens_default` | `100000` | Per-session token budget. Projecting past this triggers `budget_exceeded`. Overridable per session via the `ONLOOKER_SESSION_BUDGET_TOKENS` environment variable. |
|
|
87
|
+
| `session.cost_usd_default` | `1.0` | Per-session cost budget in USD. Used by the `Stop` hook to set `under_budget`. |
|
|
88
|
+
| `session.reserve_pct` | `10` | Percentage of the budget held in reserve. |
|
|
89
|
+
| `estimation.safety_margin` | `1.3` | Multiplier applied to the raw token estimate before the gate check. |
|
|
90
|
+
| `estimation.hard_stop_margin` | `1.5` | Multiplier on `tokens_default` that defines the hard ceiling (`ceiling_exceeded`), which blocks regardless of enforcement mode. |
|
|
91
|
+
| `estimation.method` | `"tier_table"` | Estimation strategy. Only `tier_table` is implemented. |
|
|
92
|
+
|
|
93
|
+
Config resolves in three layers, latest wins: plugin `config.json` → `~/.claude/settings.json` → `<repo>/.claude/settings.json`.
|
|
94
|
+
|
|
95
|
+
## Storage layout
|
|
96
|
+
|
|
97
|
+
```text
|
|
98
|
+
~/.onlooker/governance/
|
|
99
|
+
├── ledgers/
|
|
100
|
+
│ ├── <session-id>.jsonl # one ledger per session (id sanitized to [a-zA-Z0-9-_])
|
|
101
|
+
│ ├── <session-id>.jsonl.lock # gate / write lock
|
|
102
|
+
│ └── <session-id>.jsonl.poisoned # marker written if a ledger write fails after retries
|
|
103
|
+
└── global-policy.yaml # advisory global ceiling (optional, checked at SessionStart)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Each ledger line is a JSON record. The gate appends `record_type: "reservation"` rows with a positive `estimated_tokens`; completion appends rows with a negated `estimated_tokens` (canceling the reservation) plus `actual_tokens` when usage is reported. Session totals sum `estimated_tokens + actual_tokens` across every row, so the running total resolves to in-flight estimates plus completed actuals.
|
|
107
|
+
|
|
108
|
+
Governor honors `$ONLOOKER_DIR`; it never hardcodes `~/.onlooker`, so the test suite's isolated temp home is respected.
|
|
109
|
+
|
|
110
|
+
## Events emitted
|
|
111
|
+
|
|
112
|
+
Governor emits the canonical `governor.*` event surface from [`@onlooker-community/schema`](https://github.com/onlooker-community/schema) v2.4.0+. All events land in `~/.onlooker/logs/onlooker-events.jsonl` and are validated against the schema before write.
|
|
113
|
+
|
|
114
|
+
| Event | When |
|
|
115
|
+
|-------|------|
|
|
116
|
+
| `governor.gate.checked` | On every `Task` spawn at the `PreToolUse` gate. Carries `decision`, `estimated_tokens`, `tokens_available`, `estimation_method`, `safety_margin`, and a `reason` when blocked. |
|
|
117
|
+
| `governor.call.recorded` | After a `Task` completes (`PostToolUse`). Carries `estimated_tokens`, `cost_usd_estimated`, `duration_ms`, and — when usage is reported — `actual_tokens` and `estimation_error_pct`. |
|
|
118
|
+
| `governor.session.complete` | At `Stop`. Carries `total_tokens`, `total_cost_usd`, `total_api_calls`, `budget_usd`, `under_budget`, and `ledger_poisoned`. |
|
|
119
|
+
| `governor.lock.stale_cleared` | At `SessionStart`, once per stale lock directory swept (older than 60 seconds). |
|
|
120
|
+
| `governor.ledger.write_failed` | When a ledger write fails after its retry budget; the ledger is poisoned and `unrecorded_tokens` is reported. |
|
|
121
|
+
|
|
122
|
+
## Requirements
|
|
123
|
+
|
|
124
|
+
- The `ecosystem` plugin installed (for the `~/.onlooker/` substrate and canonical event emission).
|
|
125
|
+
- `jq` for JSON manipulation.
|
|
126
|
+
- `node` for canonical-event emission.
|
|
127
|
+
- `awk` for fractional token and cost arithmetic (standard on macOS and most Linux distributions).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "librarian",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Consolidation layer between archivist's per-session artifacts and the user's durable typed memory store. Detects which session decisions, dead-ends, and open questions deserve to live across sessions, classifies them into the user/feedback/project/reference types, and queues them as proposals for explicit confirmation. Auto-promotion is opt-in. Builds on the Onlooker ecosystem plugin.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Onlooker Community",
|
|
@@ -9,6 +9,6 @@
|
|
|
9
9
|
"homepage": "https://onlooker.dev",
|
|
10
10
|
"repository": "https://github.com/onlooker-community/ecosystem",
|
|
11
11
|
"license": "MIT",
|
|
12
|
-
"skills": [],
|
|
12
|
+
"skills": ["./skills/librarian"],
|
|
13
13
|
"agents": []
|
|
14
14
|
}
|
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.0](https://github.com/onlooker-community/ecosystem/compare/librarian-v0.1.0...librarian-v0.2.0) (2026-06-04)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **librarian:** /librarian review skill closes promotion loop :tada: ([#68](https://github.com/onlooker-community/ecosystem/issues/68)) ([8f3e3db](https://github.com/onlooker-community/ecosystem/commit/8f3e3dbdf6f08dceb0cf61d46281936a4f9954de))
|
|
9
|
+
|
|
3
10
|
## [0.1.0](https://github.com/onlooker-community/ecosystem/compare/librarian-v0.0.1...librarian-v0.1.0) (2026-06-04)
|
|
4
11
|
|
|
5
12
|
|