openwriter 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/backlinks.js +89 -26
- package/dist/server/state.js +9 -0
- package/package.json +1 -1
- package/skill/SKILL.md +29 -12
- package/skill/agents/openwriter-enrichment-minion.md +46 -82
- package/skill/docs/enrichment.md +30 -29
package/dist/server/backlinks.js
CHANGED
|
@@ -160,19 +160,28 @@ export function writeFrontmatter(filename, newData) {
|
|
|
160
160
|
atomicWriteFileSync(filePath, newFrontmatter);
|
|
161
161
|
}
|
|
162
162
|
// ============================================================================
|
|
163
|
-
// COMPUTE-LIVE BACKLINKS — the
|
|
163
|
+
// COMPUTE-LIVE BACKLINKS — the v0.20 surface, extended in v0.21
|
|
164
164
|
// ============================================================================
|
|
165
165
|
//
|
|
166
|
-
// `computeBacklinksFor(targetDocId)` returns every
|
|
167
|
-
//
|
|
168
|
-
//
|
|
169
|
-
// `
|
|
170
|
-
//
|
|
171
|
-
|
|
166
|
+
// `computeBacklinksFor(targetDocId)` returns every inbound edge pointing at
|
|
167
|
+
// targetDocId. Two sources contribute:
|
|
168
|
+
//
|
|
169
|
+
// 1. Doc-level edges from `references:` frontmatter arrays (v0.20 model —
|
|
170
|
+
// structural, no node granularity). Entry: `{ from_doc }`.
|
|
171
|
+
// 2. Paragraph-anchored edges from prose `[text](doc:DOCID#NODEID)` link
|
|
172
|
+
// marks in the body (v0.21 — restores per-paragraph backlinks for the
|
|
173
|
+
// dotted-underline + "See connections" UI). Entry: `{ from_doc, from_node,
|
|
174
|
+
// to_node, text }`.
|
|
175
|
+
//
|
|
176
|
+
// Cached in memory; any write that touches references or body invalidates
|
|
177
|
+
// (state.ts:writeToDisk after every save). Cache rebuilds lazily on next read.
|
|
178
|
+
/** Inverse index: target docId → list of inbound edges. */
|
|
172
179
|
let backlinksCache = null;
|
|
173
180
|
/** Build (or rebuild) the entire inverse index by scanning every .md in the
|
|
174
|
-
* data dir.
|
|
175
|
-
*
|
|
181
|
+
* data dir. Two passes per file: frontmatter references (cheap) + body
|
|
182
|
+
* paragraph-anchored prose links (parse + walk). For personal corpora of a
|
|
183
|
+
* few hundred docs this lands in ~1-2 seconds; the cache holds across many
|
|
184
|
+
* reads, so amortized cost is negligible. */
|
|
176
185
|
function buildBacklinksCache() {
|
|
177
186
|
const cache = new Map();
|
|
178
187
|
let files = [];
|
|
@@ -182,6 +191,24 @@ function buildBacklinksCache() {
|
|
|
182
191
|
catch {
|
|
183
192
|
return cache;
|
|
184
193
|
}
|
|
194
|
+
/** Dedup keys per target: source docs with no `to_node` collapse to one
|
|
195
|
+
* doc-level entry; paragraph-anchored entries dedup per (from_doc, to_node)
|
|
196
|
+
* pair so multi-link-same-anchor in a single source counts once. */
|
|
197
|
+
const seen = new Map();
|
|
198
|
+
function push(targetDocId, entry) {
|
|
199
|
+
const key = entry.to_node ? `${entry.from_doc}#${entry.to_node}` : entry.from_doc;
|
|
200
|
+
let seenForTarget = seen.get(targetDocId);
|
|
201
|
+
if (!seenForTarget) {
|
|
202
|
+
seenForTarget = new Set();
|
|
203
|
+
seen.set(targetDocId, seenForTarget);
|
|
204
|
+
}
|
|
205
|
+
if (seenForTarget.has(key))
|
|
206
|
+
return;
|
|
207
|
+
seenForTarget.add(key);
|
|
208
|
+
if (!cache.has(targetDocId))
|
|
209
|
+
cache.set(targetDocId, []);
|
|
210
|
+
cache.get(targetDocId).push(entry);
|
|
211
|
+
}
|
|
185
212
|
for (const f of files) {
|
|
186
213
|
try {
|
|
187
214
|
const raw = readFileSync(join(getDataDir(), f), 'utf-8');
|
|
@@ -189,15 +216,34 @@ function buildBacklinksCache() {
|
|
|
189
216
|
const sourceDocId = parsed.data?.docId;
|
|
190
217
|
if (!sourceDocId || typeof sourceDocId !== 'string')
|
|
191
218
|
continue;
|
|
219
|
+
// Pass 1: structural references (frontmatter). Doc-level only.
|
|
192
220
|
const refs = parsed.data?.references;
|
|
193
|
-
if (
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
221
|
+
if (Array.isArray(refs)) {
|
|
222
|
+
for (const targetDocId of refs) {
|
|
223
|
+
if (typeof targetDocId !== 'string')
|
|
224
|
+
continue;
|
|
225
|
+
push(targetDocId, { from_doc: sourceDocId });
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
// Pass 2: paragraph-anchored prose links. Only entries with a #NODEID
|
|
229
|
+
// anchor in the href contribute — doc-level prose links are already
|
|
230
|
+
// captured by Pass 1 via the references-auto-sync at save time.
|
|
231
|
+
try {
|
|
232
|
+
const tipDoc = markdownToTiptap(raw).document;
|
|
233
|
+
const proseLinks = extractForwardLinks(tipDoc, sourceDocId);
|
|
234
|
+
for (const link of proseLinks) {
|
|
235
|
+
if (!link.to_node)
|
|
236
|
+
continue; // doc-level — Pass 1 handles it
|
|
237
|
+
push(link.to_doc, {
|
|
238
|
+
from_doc: link.from_doc,
|
|
239
|
+
from_node: link.from_node,
|
|
240
|
+
to_node: link.to_node,
|
|
241
|
+
text: link.text,
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
catch {
|
|
246
|
+
// markdownToTiptap can throw on malformed bodies — best-effort skip
|
|
201
247
|
}
|
|
202
248
|
}
|
|
203
249
|
catch {
|
|
@@ -207,24 +253,41 @@ function buildBacklinksCache() {
|
|
|
207
253
|
return cache;
|
|
208
254
|
}
|
|
209
255
|
/** Drop the in-memory cache. Next read rebuilds from disk. Called from
|
|
210
|
-
* state.ts:writeToDisk after a save that may have changed references
|
|
256
|
+
* state.ts:writeToDisk after a save that may have changed references OR the
|
|
257
|
+
* body's prose link set. */
|
|
211
258
|
export function invalidateBacklinksCache() {
|
|
212
259
|
backlinksCache = null;
|
|
213
260
|
}
|
|
214
261
|
/**
|
|
215
|
-
* Return every
|
|
216
|
-
*
|
|
217
|
-
*
|
|
262
|
+
* Return every inbound edge pointing at targetDocId — both doc-level (from
|
|
263
|
+
* `references:` frontmatter) and paragraph-anchored (from prose
|
|
264
|
+
* `[text](doc:DOCID#NODEID)` links). Cached in memory.
|
|
265
|
+
*
|
|
266
|
+
* Entries with `to_node` populated are paragraph-anchored: the backlinks
|
|
267
|
+
* decoration plugin paints a dotted underline on the matching target
|
|
268
|
+
* paragraph, and the context menu surfaces "See connections" listing the
|
|
269
|
+
* sources. Entries without `to_node` are doc-level and intended for
|
|
270
|
+
* doc-scope UI (e.g. "N sources link to this doc").
|
|
218
271
|
*/
|
|
219
272
|
export function computeBacklinksFor(targetDocId) {
|
|
220
273
|
if (!backlinksCache)
|
|
221
274
|
backlinksCache = buildBacklinksCache();
|
|
222
|
-
const
|
|
223
|
-
if (!
|
|
275
|
+
const entries = backlinksCache.get(targetDocId);
|
|
276
|
+
if (!entries)
|
|
224
277
|
return [];
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
278
|
+
// Stable sort: paragraph-anchored entries first (so per-paragraph UI gets
|
|
279
|
+
// them ordered consistently), then doc-level, both by from_doc.
|
|
280
|
+
return [...entries].sort((a, b) => {
|
|
281
|
+
const aAnchored = a.to_node ? 0 : 1;
|
|
282
|
+
const bAnchored = b.to_node ? 0 : 1;
|
|
283
|
+
if (aAnchored !== bAnchored)
|
|
284
|
+
return aAnchored - bAnchored;
|
|
285
|
+
if (a.from_doc !== b.from_doc)
|
|
286
|
+
return a.from_doc < b.from_doc ? -1 : 1;
|
|
287
|
+
if ((a.to_node ?? '') !== (b.to_node ?? ''))
|
|
288
|
+
return (a.to_node ?? '') < (b.to_node ?? '') ? -1 : 1;
|
|
289
|
+
return 0;
|
|
290
|
+
});
|
|
228
291
|
}
|
|
229
292
|
// ============================================================================
|
|
230
293
|
// PROSE-LINK AUTO-SYNC — backward compat for legacy [text](doc:id) prose links
|
package/dist/server/state.js
CHANGED
|
@@ -2609,6 +2609,9 @@ export function saveDocToFile(filename, doc) {
|
|
|
2609
2609
|
const overlay = extractOverlay(doc);
|
|
2610
2610
|
saveOverlay(docId, overlay);
|
|
2611
2611
|
}
|
|
2612
|
+
// Backlinks cache invalidate — browser sent a doc-update for a non-active
|
|
2613
|
+
// doc; the prose-link set on that doc may have changed.
|
|
2614
|
+
invalidateBacklinksCache();
|
|
2612
2615
|
}
|
|
2613
2616
|
catch { /* best-effort */ }
|
|
2614
2617
|
}
|
|
@@ -2758,6 +2761,12 @@ function flushDocToFile(filename, doc, title, metadata) {
|
|
|
2758
2761
|
saveOverlay(docId, overlay);
|
|
2759
2762
|
}
|
|
2760
2763
|
setPendingCacheEntry(filename, countPending(doc.content));
|
|
2764
|
+
// Backlinks cache invalidation — non-active write paths (populate_document on
|
|
2765
|
+
// a fresh doc, applyChangesToFile, applyTextEditsToFile) all funnel through
|
|
2766
|
+
// here. Any of them can change references: or the prose-link set, so the
|
|
2767
|
+
// computed inverse cache must drop. Mirrors the active-doc invalidate at the
|
|
2768
|
+
// tail of writeToDisk.
|
|
2769
|
+
invalidateBacklinksCache();
|
|
2761
2770
|
}
|
|
2762
2771
|
export function populateDocumentFile(filename, doc) {
|
|
2763
2772
|
const targetPath = resolveDocPath(filename);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "openwriter",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.21.0",
|
|
4
4
|
"description": "The open-source writing surface for AI agents. Markdown-native editor with pending change review — your agent writes, you accept or reject.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
package/skill/SKILL.md
CHANGED
|
@@ -16,7 +16,7 @@ description: |
|
|
|
16
16
|
Requires: OpenWriter MCP server configured. Browser UI at localhost:5050.
|
|
17
17
|
metadata:
|
|
18
18
|
author: travsteward
|
|
19
|
-
version: "0.
|
|
19
|
+
version: "0.10.0"
|
|
20
20
|
repository: https://github.com/travsteward/openwriter
|
|
21
21
|
license: MIT
|
|
22
22
|
---
|
|
@@ -31,12 +31,12 @@ You are a writing collaborator. You read documents and make edits **exclusively
|
|
|
31
31
|
2. **The terminal is for discussion only.** Use chat messages to explain your edits, ask questions, discuss direction, or summarize what you changed. Never use it as the writing surface.
|
|
32
32
|
3. **Name every document.** When you encounter a generically named doc ("Quote Tweet", "Article", "Untitled", etc.), rename it based on its content before proceeding. Titles are the human scanning layer — a sidebar full of "Quote Tweet" is useless. Use `rename_item` with the docId. Short, descriptive titles: "Venezuela Proxy States QT", "Feature Blindness Article".
|
|
33
33
|
4. **Metadata first.** When the user asks you to work on a document, call `get_metadata` immediately after `read_pad`. Tweet docs store the parent URL in `tweetContext.url`, article docs store context in `articleContext`. **Never search externally for a tweet URL that's already in the document metadata.** This prevents wasting paid API calls on information you already have.
|
|
34
|
-
5. **Dispatch the enrichment minion when openwriter says so.** OpenWriter surfaces stale enrichment two ways: (a) `ENRICHMENT_STATUS:
|
|
34
|
+
5. **Dispatch the enrichment minion when openwriter says so.** OpenWriter surfaces stale enrichment two ways: (a) `ENRICHMENT_STATUS: ...` in the MCP server's session-start instructions; (b) a `⚠ N docs need enrichment. Dispatch:` footer on `list_documents`, `list_workspaces`, and `get_workspace_structure` responses. Both signals include the **complete dispatch call inline** — copy it verbatim and fire the Agent tool with those exact field values. When you see either signal, dispatch the minion immediately, before responding to the user's request. The minion is orchestrator-mode by default: it calls `list_dirty_docs` itself, self-bounds the batch, reads each doc, writes a logline, calls `mark_enriched` once with the whole batch, and returns a short summary. **v0.19.0 schema:** the minion writes ONE field — `logline`. The agent owns `status` (`canonical` / `draft`); the system owns `enrichmentStale`. The legacy fields `domain`, `concepts`, and `docRole` were dropped. The `prompt` field in the dispatch line is a placeholder — the minion ignores its content because its full procedure lives in its system prompt at `~/.claude/agents/openwriter-enrichment-minion.md`.
|
|
35
35
|
|
|
36
36
|
**Surfacing to the user:** treat enrichment like the inbox — a maintenance reflex, not a feature they have to ask for. Phrasing depends on context:
|
|
37
37
|
|
|
38
38
|
- **First time in a session, small batch (N ≤ 5):** silent dispatch + one-line aside in your response: "Enriched 3 docs in the background. Now, ..."
|
|
39
|
-
- **First time in a session, medium batch (5 < N ≤ 20):** brief explanation on first surface: "OpenWriter just refreshed loglines
|
|
39
|
+
- **First time in a session, medium batch (5 < N ≤ 20):** brief explanation on first surface: "OpenWriter just refreshed loglines on 12 docs in the background. Now, ..." Sets expectations once; subsequent runs can stay silent.
|
|
40
40
|
- **First time in a session, large batch (N > 20):** give the user a heads-up BEFORE dispatching: "OpenWriter detected 47 docs that haven't been summarized yet — first-time setup. Refreshing them in the background; this'll take ~30 seconds and a few cents of Haiku usage." Then dispatch and report when done.
|
|
41
41
|
- **Very large batch (N > 30):** one minion can't get through that many in reasonable wall time. Switch to **chunked parallel dispatch** — multiple minions, each given an explicit docId list, all dispatched in a single message with `run_in_background: true`. Full procedure (chunking strategy, explicit-list prompt format, failure modes) lives in this skill's `docs/enrichment.md`. Read that doc before dispatching anything over 30 docs.
|
|
42
42
|
|
|
@@ -150,8 +150,8 @@ Every document has an immutable **docId** (8-char hex, e.g. `a1b2c3d4`) in its Y
|
|
|
150
150
|
| `list_workspaces` | List all workspaces with title and doc count |
|
|
151
151
|
| `create_workspace` | Create a new workspace |
|
|
152
152
|
| `delete_workspace` | Delete a workspace and all its document files (moves to OS trash) |
|
|
153
|
-
| `get_workspace_structure` | Get full workspace tree: containers, docs, enrichment (logline
|
|
154
|
-
| `get_item_context` | Get progressive disclosure context for a doc — workspace context + the doc's own enrichment (logline,
|
|
153
|
+
| `get_workspace_structure` | Get full workspace tree: containers, docs, per-doc enrichment (logline, status, STALE marker), workspace-level vocab/schema, plus context (characters, settings, rules) |
|
|
154
|
+
| `get_item_context` | Get progressive disclosure context for a doc — workspace context + the doc's own enrichment (logline, status, enrichmentStale) |
|
|
155
155
|
| `update_workspace_context` | Update workspace context (characters, settings, rules) |
|
|
156
156
|
|
|
157
157
|
### Workspace Organization
|
|
@@ -165,15 +165,29 @@ Every document has an immutable **docId** (8-char hex, e.g. `a1b2c3d4`) in its Y
|
|
|
165
165
|
| `move_item` | Move or reorder a doc, container, or workspace (type: doc/container/workspace) |
|
|
166
166
|
| `rename_item` | Rename a workspace, container, or document (type: workspace/container/document) |
|
|
167
167
|
|
|
168
|
-
### Enrichment (
|
|
168
|
+
### Enrichment (three-field schema — v0.19.0)
|
|
169
169
|
|
|
170
|
-
OpenWriter detects when a doc has drifted past enrichment thresholds (sentence-hash Jaccard drift, character-count volume ratio) on every save and stamps `enrichmentStale: true`. The agent's job is to dispatch the enrichment minion (see firm rule 5 + `docs/enrichment.md` in this skill) to refresh the
|
|
170
|
+
OpenWriter detects when a doc has drifted past enrichment thresholds (sentence-hash Jaccard drift, character-count volume ratio) on every save and stamps `enrichmentStale: true`. The agent's job is to dispatch the enrichment minion (see firm rule 5 + `docs/enrichment.md` in this skill) to refresh the logline.
|
|
171
|
+
|
|
172
|
+
**The three-field schema** — each field has exactly one owner:
|
|
173
|
+
|
|
174
|
+
| Field | Owner | Set how |
|
|
175
|
+
|-------|-------|---------|
|
|
176
|
+
| `logline` | LLM (minion) | `mark_enriched({ docs: [{ docId, logline }] })` |
|
|
177
|
+
| `status` (`canonical` / `draft`) | Agent | `create_document({ status })` on create; `set_metadata({ status })` on lifecycle change |
|
|
178
|
+
| `enrichmentStale` | System | OpenWriter sets on save; minion clears on `mark_enriched` |
|
|
179
|
+
|
|
180
|
+
**Lifecycle convention for `status`:**
|
|
181
|
+
- Default to `draft` on new docs (omit `status` from `create_document` and it lands as `draft`).
|
|
182
|
+
- Flip to `canonical` when the doc commits to the workspace spine (Beats locked, Research Note is now load-bearing, Master Reference is the source of truth).
|
|
183
|
+
- Flip back to `draft` when superseded (e.g. Ch 7 Beats v3 ships → demote v1/v2 to `draft`).
|
|
184
|
+
- The common crawl pattern is `crawl({ status: "canonical" })` — that's the trusted-shelf query.
|
|
171
185
|
|
|
172
186
|
| Tool | Key Params | Description |
|
|
173
187
|
|------|-----------|-------------|
|
|
174
188
|
| `list_dirty_docs` | `workspaceFile?` | List docs that need enrichment (never enriched OR explicitly flagged stale). Returns identity + reason only — no bodies. Optionally scoped to one workspace. Docs in opted-out workspaces (`enrichmentDisabled: true`) are excluded. |
|
|
175
|
-
| `mark_enriched` | `docs: [{docId, logline
|
|
176
|
-
| `crawl` | `workspaceFile?`, `
|
|
189
|
+
| `mark_enriched` | `docs: [{docId, logline}]` | Stamp one or more docs as freshly enriched. **Strict schema** — passing `domain` / `concepts` / `docRole` / `status` fails validation. OpenWriter auto-computes baselines (`lastEnrichedAt`, `lastEnrichedCharCount`, `lastEnrichedSentences`), clears `enrichmentStale`, and retires legacy fields from frontmatter. The minion calls this once at the end of its run with the full batch. |
|
|
190
|
+
| `crawl` | `workspaceFile?`, `tags?`, `status?` (`canonical`/`draft`), `hasLogline?` | Bulk-read enrichment fields per doc with AND-composed filters. The agent's "scan the shelf" primitive — ~60 tokens per doc, no bodies. v0.19.0 dropped `domain` / `concepts` / `docRole` filters (their fields had no authority discipline); `status` is the replacement axis for the common load-bearing-vs-working query. |
|
|
177
191
|
|
|
178
192
|
### Comments
|
|
179
193
|
|
|
@@ -275,9 +289,12 @@ create_document({
|
|
|
275
289
|
|
|
276
290
|
- **`workspace`** (string) — workspace title to add the doc to. Auto-creates if not found (case-insensitive match).
|
|
277
291
|
- **`container`** (string) — container name within the workspace (e.g. "Chapters", "Notes", "References"). Auto-creates if not found. Requires `workspace`.
|
|
278
|
-
-
|
|
292
|
+
- **`afterId`** (string, optional) — docId (8-char hex) or containerId to place the new doc immediately after. Omit and the doc lands at the **bottom** of its parent (the default since 0.18.0, matching the ascending-order convention: oldest at top, newest at bottom). Use `afterId` when you need surgical placement — e.g. inserting a new chapter doc immediately after the chapter's Beats doc.
|
|
293
|
+
- All three are optional — omit `workspace` for standalone docs outside any workspace.
|
|
294
|
+
|
|
295
|
+
This eliminates the need for separate `create_workspace`, `create_container`, and `move_item` calls when building up a workspace. The default-bottom landing also eliminates the need for a follow-up `move_item` pass to fix sidebar order after every create — the doc lands in convention position the first time.
|
|
279
296
|
|
|
280
|
-
|
|
297
|
+
`create_container` accepts the same `afterId` parameter with identical semantics — new containers default to the bottom of their parent and can be precisely placed via `afterId`. The Drafts sub-container that goes under every chapter container, for example, can be created with `afterId` set to the chapter's Research Notes docId so it lands at the very bottom in one call.
|
|
281
298
|
|
|
282
299
|
### Batched Creation (multiple docs at once)
|
|
283
300
|
|
|
@@ -299,7 +316,7 @@ When creating **two or more documents together** — a tweet thread saved as sep
|
|
|
299
316
|
**Rules:**
|
|
300
317
|
- Each write in the batch gets its own sidebar spinner keyed to its filename — a spinner only clears when you `populate_document` that specific `docId`
|
|
301
318
|
- Spinners persist across app refreshes (server-side registry)
|
|
302
|
-
- Same per-write fields as `create_document`: `title`, `content_type`, optional `workspace`/`container`/`url`/`path`
|
|
319
|
+
- Same per-write fields as `create_document`: `title`, `content_type`, optional `workspace`/`container`/`url`/`path`/`afterId`
|
|
303
320
|
- `reply` / `quote` types still require `url`
|
|
304
321
|
- For a **single** document, use `create_document` — don't reach for `declare_writes` just to wrap one entry
|
|
305
322
|
|
|
@@ -5,105 +5,76 @@ description: |
|
|
|
5
5
|
drift/volume detector. Dispatch when ENRICHMENT_STATUS appears in MCP
|
|
6
6
|
init instructions OR when a `⚠ N docs need enrichment` footer fires on
|
|
7
7
|
list_documents / list_workspaces / get_workspace_structure. Reads each
|
|
8
|
-
dirty doc
|
|
9
|
-
docRole, status), calls mark_enriched once with the whole batch.
|
|
8
|
+
dirty doc and stamps it with a single field — logline — via mark_enriched.
|
|
10
9
|
Returns a one-line summary.
|
|
11
10
|
model: haiku
|
|
12
11
|
maxTurns: 500
|
|
13
|
-
tools: mcp__openwriter__list_dirty_docs,
|
|
12
|
+
tools: mcp__openwriter__list_dirty_docs, mcp__openwriter__read_pad, mcp__openwriter__mark_enriched
|
|
14
13
|
---
|
|
15
14
|
|
|
16
15
|
# OpenWriter Enrichment Minion
|
|
17
16
|
|
|
18
17
|
You are an isolated sub-agent. Your single job: take the workspace's dirty
|
|
19
|
-
docs and stamp each one with concise, accurate
|
|
20
|
-
|
|
21
|
-
body.
|
|
18
|
+
docs and stamp each one with a concise, accurate logline so the main agent
|
|
19
|
+
can crawl the workspace at concept level without reading every body.
|
|
22
20
|
|
|
23
21
|
Do the work. Return a one-line summary. Do not narrate process. Do not ask
|
|
24
22
|
questions. The main agent dispatched you because the work needs doing.
|
|
25
23
|
|
|
26
|
-
## What enrichment is
|
|
24
|
+
## What enrichment is (v0.19.0)
|
|
27
25
|
|
|
28
|
-
|
|
26
|
+
One LLM-written frontmatter field:
|
|
29
27
|
|
|
30
28
|
- **logline** — précis (non-fiction) or logline (fiction) summarizing the
|
|
31
|
-
content. Under
|
|
32
|
-
not the kind of doc it is.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"psychology"). Lowercase, hyphenated. 3–8 per doc. Skip (or `[]`) if
|
|
40
|
-
nothing distinct.
|
|
41
|
-
- **docRole** — best fit from: `canonical` (master reference for its topic),
|
|
42
|
-
`vignette` (single illustrative example/story/worked instance),
|
|
43
|
-
`reference` (supporting info pulled in by other docs), `draft`
|
|
44
|
-
(work-in-progress, not yet authoritative), `chapter` (book-shaped
|
|
45
|
-
sequential content), `beat` (sub-chapter scene/argument), `scratch`
|
|
46
|
-
(brainstorm/dump/capture surface).
|
|
47
|
-
- **status** — `draft` (default, work-in-progress), `canonical` (finished
|
|
48
|
-
authoritative version), or `stale` (superseded but not deleted). Use
|
|
49
|
-
`draft` when uncertain. Archive state lives in `archivedAt`, not here.
|
|
29
|
+
content. **Under 150 chars.** No scaffolding — describe the content
|
|
30
|
+
itself, not the kind of doc it is. Drift-resistant: small body edits
|
|
31
|
+
rarely change what the doc IS about.
|
|
32
|
+
|
|
33
|
+
That's the entire payload. `status` (canonical / draft) is the agent's
|
|
34
|
+
field — set on `create_document` and via `set_metadata`, never by you.
|
|
35
|
+
`enrichmentStale` is the system's flag — openwriter sets it on save and
|
|
36
|
+
clears it when you call `mark_enriched`. You never touch either.
|
|
50
37
|
|
|
51
38
|
## The exact procedure
|
|
52
39
|
|
|
53
40
|
### Step 1. Find the work
|
|
54
41
|
|
|
55
|
-
**
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
**Otherwise**, call `mcp__openwriter__list_dirty_docs` with no arguments. It
|
|
60
|
-
returns every workspace's dirty docs in one response. Each entry has
|
|
61
|
-
`docId`, `filename`, `title`, `workspaceFile`, `reason` (`never_enriched` or
|
|
42
|
+
**Default — self-discovery.** You will normally be dispatched with no input
|
|
43
|
+
list. Call `mcp__openwriter__list_dirty_docs` with no arguments. It returns
|
|
44
|
+
every workspace's dirty docs in one response. Each entry has `docId`,
|
|
45
|
+
`filename`, `title`, `workspaceFile`, `reason` (`never_enriched` or
|
|
62
46
|
`stale_flag`).
|
|
63
47
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
### Step 2. Pull workspace vocabularies
|
|
48
|
+
**Special case — explicit list.** If the dispatching prompt provided an
|
|
49
|
+
explicit docId list, use that directly and skip `list_dirty_docs`.
|
|
67
50
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
51
|
+
**Self-bound the batch.** If the dirty list has more than 12 entries,
|
|
52
|
+
process only the first 12 this run. The footer will fire on the next
|
|
53
|
+
openwriter tool call and the acting agent will dispatch you again to drain
|
|
54
|
+
the rest. One run = one bounded batch, never a full sweep of a huge
|
|
55
|
+
backlog.
|
|
72
56
|
|
|
73
|
-
|
|
74
|
-
workspaceFile → { vocab: [...] | null, schema, domain, logline }
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
If a workspace has no vocab, that's fine — generate free-form domain labels
|
|
78
|
-
for its docs (consistently within the same workspace).
|
|
57
|
+
If `total === 0`, return `"No enrichment work pending."` and stop.
|
|
79
58
|
|
|
80
|
-
### Step
|
|
59
|
+
### Step 2. Enrich each doc
|
|
81
60
|
|
|
82
61
|
For each dirty doc:
|
|
83
62
|
|
|
84
63
|
1. `mcp__openwriter__read_pad` with `docId` to get the body.
|
|
85
|
-
2.
|
|
86
|
-
otherwise pick a durable label that fits the workspace's apparent
|
|
87
|
-
subject.
|
|
64
|
+
2. Write a logline ≤150 chars describing the content. One sentence.
|
|
88
65
|
3. Hold the result in memory. **Do not call mark_enriched per doc.**
|
|
89
66
|
|
|
90
67
|
Specifics:
|
|
91
68
|
|
|
92
69
|
- One-line / near-empty docs (`<50 chars` body): logline = title or a
|
|
93
|
-
one-phrase summary
|
|
94
|
-
title clearly says otherwise.
|
|
70
|
+
one-phrase summary of what the doc is for.
|
|
95
71
|
- Docs with `tweetContext` / `articleContext` / `blogContext` in metadata:
|
|
96
|
-
|
|
97
|
-
(article/blog), `draft` (in-progress post).
|
|
72
|
+
describe the post's argument, not "a tweet about X".
|
|
98
73
|
- Chapter-shaped docs (titles like "Ch 3 — Beats", "Chapter 5: ..."):
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
- Working surfaces ("Beat Sheet", "Decisions Log", "Open Questions"):
|
|
102
|
-
`reference` or `scratch` as fits.
|
|
103
|
-
- Master reference docs (e.g. "Sexual Dimorphism — Master Reference"):
|
|
104
|
-
`docRole: "canonical"`, `status: "canonical"`.
|
|
74
|
+
describe what happens / what's argued in the chapter, not "chapter 3 of
|
|
75
|
+
the book".
|
|
105
76
|
|
|
106
|
-
### Step
|
|
77
|
+
### Step 3. Single bulk write
|
|
107
78
|
|
|
108
79
|
After processing every doc, call `mcp__openwriter__mark_enriched` ONCE with
|
|
109
80
|
the full array:
|
|
@@ -111,18 +82,19 @@ the full array:
|
|
|
111
82
|
```
|
|
112
83
|
mark_enriched({
|
|
113
84
|
docs: [
|
|
114
|
-
{ docId, logline
|
|
85
|
+
{ docId, logline },
|
|
115
86
|
...
|
|
116
87
|
]
|
|
117
88
|
})
|
|
118
89
|
```
|
|
119
90
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
91
|
+
The schema is **strict** — passing any other field (`domain`, `concepts`,
|
|
92
|
+
`docRole`, `status`) fails validation. OpenWriter computes the
|
|
93
|
+
at-enrichment baseline (sentence-hash snapshot, char count, timestamp) and
|
|
94
|
+
clears each doc's `enrichmentStale` flag atomically. You do not compute or
|
|
95
|
+
pass any of those — that is openwriter's bookkeeping.
|
|
124
96
|
|
|
125
|
-
### Step
|
|
97
|
+
### Step 4. Report
|
|
126
98
|
|
|
127
99
|
Return a one-paragraph summary in this shape:
|
|
128
100
|
|
|
@@ -131,17 +103,16 @@ Enriched N docs across M workspaces. Touched: ws-a (N₁), ws-b (N₂), ...
|
|
|
131
103
|
Failures (if any): <docId> — <reason>.
|
|
132
104
|
```
|
|
133
105
|
|
|
134
|
-
Do not include the loglines
|
|
135
|
-
|
|
106
|
+
Do not include the loglines in your report. The main agent doesn't need to
|
|
107
|
+
see them — they're on disk. Brevity matters.
|
|
136
108
|
|
|
137
109
|
## Hard rules
|
|
138
110
|
|
|
139
111
|
1. **Never modify a body.** Enrichment is frontmatter-only via
|
|
140
112
|
`mark_enriched`. The tools you have access to don't let you write to a
|
|
141
113
|
doc's body — that's by design.
|
|
142
|
-
2. **Never
|
|
143
|
-
|
|
144
|
-
summary report. Don't extend the vocab yourself.
|
|
114
|
+
2. **Never write `status`.** That's the agent's field. The schema rejects
|
|
115
|
+
it.
|
|
145
116
|
3. **One mark_enriched call.** Batch every doc into a single bulk write.
|
|
146
117
|
Per-doc calls are wasted round-trips.
|
|
147
118
|
4. **No prose to the user.** Return only the summary. Don't explain your
|
|
@@ -151,26 +122,19 @@ doesn't need to see them — they're on disk. Brevity matters.
|
|
|
151
122
|
doc.
|
|
152
123
|
6. **Skip docs that fail to read.** If `read_pad` errors, omit the doc and
|
|
153
124
|
note it in your summary. Don't loop or retry.
|
|
154
|
-
7. **Concepts are concrete.** Skip the field entirely (or use `[]`) before
|
|
155
|
-
listing vague topics. "biology" is not a concept; "t-gate" is.
|
|
156
125
|
|
|
157
126
|
## Worked example
|
|
158
127
|
|
|
159
128
|
Input: dirty doc titled "Sexual Dimorphism — Master Reference", body
|
|
160
129
|
covering the T-gate mechanism, tournament-vs-pairbonding contrast, contest
|
|
161
|
-
mosaic theory, dimorphic trait inventory.
|
|
162
|
-
with `vocab: ["Dimorphism", "Frame", "Territory", "Contest Mosaic"]`.
|
|
130
|
+
mosaic theory, dimorphic trait inventory.
|
|
163
131
|
|
|
164
132
|
Output:
|
|
165
133
|
|
|
166
134
|
```json
|
|
167
135
|
{
|
|
168
136
|
"docId": "b88ede9b",
|
|
169
|
-
"logline": "
|
|
170
|
-
"domain": "Dimorphism",
|
|
171
|
-
"concepts": ["t-gate", "contest-mosaic", "tournament-male", "pairbonding", "dimorphic-traits"],
|
|
172
|
-
"docRole": "canonical",
|
|
173
|
-
"status": "canonical"
|
|
137
|
+
"logline": "T-gate mechanism, dimorphic trait inventory, and the contest-vs-pairbonding selection contrast."
|
|
174
138
|
}
|
|
175
139
|
```
|
|
176
140
|
|
package/skill/docs/enrichment.md
CHANGED
|
@@ -30,19 +30,18 @@ Returns every dirty doc across all workspaces with `docId`, `title`,
|
|
|
30
30
|
`workspaceFile`, `reason`. If `total ≤ 30`, stop — single minion path
|
|
31
31
|
(firm rule 5) is correct. If `total > 30`, continue.
|
|
32
32
|
|
|
33
|
-
### 2. Chunk
|
|
33
|
+
### 2. Chunk the work
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
workspace
|
|
35
|
+
v0.19.0 simplified the minion to logline-only — workspace vocab is no
|
|
36
|
+
longer relevant (the `domain` field that used it was dropped). You can
|
|
37
|
+
group chunks however you want; workspace-grouping is no longer required.
|
|
38
|
+
Practical defaults:
|
|
38
39
|
|
|
39
|
-
**Target:
|
|
40
|
+
**Target: 12–15 docs per chunk.**
|
|
40
41
|
|
|
41
|
-
- **Very large
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
workspaces into one mixed chunk so you don't spawn an army of
|
|
45
|
-
minions for trivial work.
|
|
42
|
+
- **Very large dirty list (>100 docs):** split into chunks of ~15.
|
|
43
|
+
- **Workspace-grouped is still fine** if it makes the dispatch prompts
|
|
44
|
+
easier to read, but it's no longer a performance concern.
|
|
46
45
|
|
|
47
46
|
You'll typically land on 4–10 chunks. Don't exceed ~10 parallel —
|
|
48
47
|
Anthropic per-account rate limits kick in beyond that and you get
|
|
@@ -64,26 +63,26 @@ The minion's agent file (`~/.claude/agents/openwriter-enrichment-minion.md`)
|
|
|
64
63
|
supports an explicit-list mode — pass docIds in the prompt and the minion
|
|
65
64
|
skips `list_dirty_docs` and uses your list directly.
|
|
66
65
|
|
|
67
|
-
Example prompt for one chunk:
|
|
66
|
+
Example prompt for one chunk (v0.19.0 — logline-only):
|
|
68
67
|
|
|
69
68
|
```
|
|
70
69
|
Enrich these specific openwriter docs:
|
|
71
70
|
|
|
72
|
-
Workspace: territory-c20b4ab0.json
|
|
73
71
|
- a1b2c3d4 — Frame Holding Master Reference
|
|
74
72
|
- e5f6a7b8 — Tournament Male
|
|
75
73
|
- 9z8y7x6w — Contest Mosaic Theory
|
|
76
|
-
|
|
77
|
-
Workspace: book-3.0-d2f1.json
|
|
78
74
|
- 1q2w3e4r — Ch 3 — Beats
|
|
79
75
|
- 5t6y7u8i — Ch 4 — Draft
|
|
80
76
|
|
|
81
|
-
|
|
82
|
-
|
|
77
|
+
For each: read_pad to get the body, write a logline ≤150 chars, then
|
|
78
|
+
bulk mark_enriched at the end with { docId, logline } per entry.
|
|
83
79
|
```
|
|
84
80
|
|
|
85
81
|
Keep prompts short. The minion already knows the procedure from its
|
|
86
|
-
agent file — you're just handing it the work list.
|
|
82
|
+
agent file — you're just handing it the work list. The minion's tool
|
|
83
|
+
allowlist (v0.19.0) is `list_dirty_docs`, `read_pad`, `mark_enriched`
|
|
84
|
+
— `get_workspace_structure` is no longer needed because there's no
|
|
85
|
+
workspace-vocab dependency.
|
|
87
86
|
|
|
88
87
|
### 5. Surface to the user (large-batch phrasing)
|
|
89
88
|
|
|
@@ -120,11 +119,11 @@ enrich the same docs in parallel. Most enrichments succeed (last write
|
|
|
120
119
|
wins on the frontmatter), but it's wasteful and the per-doc baselines
|
|
121
120
|
get computed multiple times. Explicit lists partition the work cleanly.
|
|
122
121
|
|
|
123
|
-
**Why
|
|
124
|
-
Two reasons: (1) turn budget — each doc costs 1
|
|
125
|
-
call
|
|
126
|
-
|
|
127
|
-
|
|
122
|
+
**Why 12–15 docs per chunk and not 50?**
|
|
123
|
+
Two reasons: (1) turn budget — each doc costs ~1 turn (one `read_pad`
|
|
124
|
+
call); ~15 docs leaves headroom inside the 500-turn ceiling even with
|
|
125
|
+
retries. (2) failure isolation — if one minion's batch errors, you lose
|
|
126
|
+
15 docs of work, not 50.
|
|
128
127
|
|
|
129
128
|
**Why dispatch in one message, not sequential Agent calls?**
|
|
130
129
|
Sequential `Agent` calls block each other. Only multiple `Agent` tool
|
|
@@ -132,18 +131,20 @@ uses in the **same assistant message** run truly in parallel.
|
|
|
132
131
|
|
|
133
132
|
## Cost ballpark
|
|
134
133
|
|
|
135
|
-
Haiku token cost per doc: ~3K
|
|
136
|
-
share of mark_enriched).
|
|
134
|
+
Haiku token cost per doc: ~1.5K–3K in v0.19.0 (one read_pad + one
|
|
135
|
+
logline synthesis + share of mark_enriched). Roughly half what it cost
|
|
136
|
+
under v0.16's five-field schema.
|
|
137
137
|
|
|
138
|
-
| Corpus size | Approx cost |
|
|
138
|
+
| Corpus size | Approx cost (v0.19.0) |
|
|
139
139
|
|---|---|
|
|
140
|
-
| 30 docs | ~$0.
|
|
141
|
-
| 100 docs | ~$0.
|
|
142
|
-
| 500 docs | ~$0.
|
|
140
|
+
| 30 docs | ~$0.02 |
|
|
141
|
+
| 100 docs | ~$0.08 |
|
|
142
|
+
| 500 docs | ~$0.40 |
|
|
143
143
|
|
|
144
144
|
Compare to ~$5.00 per doc if you used the general-purpose subagent with
|
|
145
145
|
full MCP tool registry (~50K token overhead per spawn). The custom
|
|
146
|
-
minion's tool allowlist (
|
|
146
|
+
minion's tool allowlist (3 tools in v0.19.0: `list_dirty_docs`,
|
|
147
|
+
`read_pad`, `mark_enriched`) is what makes the math work.
|
|
147
148
|
|
|
148
149
|
## Failure modes
|
|
149
150
|
|