npm - openwriter - Versions diffs - 0.20.0 → 0.21.0 - Mend

openwriter 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/server/backlinks.js +89 -26
package/dist/server/state.js +9 -0
package/package.json +1 -1
package/skill/SKILL.md +29 -12
package/skill/agents/openwriter-enrichment-minion.md +46 -82
package/skill/docs/enrichment.md +30 -29

package/dist/server/backlinks.js CHANGED Viewed

@@ -160,19 +160,28 @@ export function writeFrontmatter(filename, newData) {
     atomicWriteFileSync(filePath, newFrontmatter);
 }
 // ============================================================================
-// COMPUTE-LIVE BACKLINKS — the new v0.20 surface
+// COMPUTE-LIVE BACKLINKS — the v0.20 surface, extended in v0.21
 // ============================================================================
 //
-// `computeBacklinksFor(targetDocId)` returns every doc that lists targetDocId
-// in its `references:` frontmatter array. Cached in an inverse-index map keyed
-// by target docId. Any write that touches a source's references calls
-// `invalidateBacklinksCache(sourceDocId)` to wipe affected entries; the next
-// read rebuilds them lazily.
-/** Inverse index: target docId → Set of source docIds that reference it. */
+// `computeBacklinksFor(targetDocId)` returns every inbound edge pointing at
+// targetDocId. Two sources contribute:
+//
+//   1. Doc-level edges from `references:` frontmatter arrays (v0.20 model —
+//      structural, no node granularity). Entry: `{ from_doc }`.
+//   2. Paragraph-anchored edges from prose `[text](doc:DOCID#NODEID)` link
+//      marks in the body (v0.21 — restores per-paragraph backlinks for the
+//      dotted-underline + "See connections" UI). Entry: `{ from_doc, from_node,
+//      to_node, text }`.
+//
+// Cached in memory; any write that touches references or body invalidates
+// (state.ts:writeToDisk after every save). Cache rebuilds lazily on next read.
+/** Inverse index: target docId → list of inbound edges. */
 let backlinksCache = null;
 /** Build (or rebuild) the entire inverse index by scanning every .md in the
- *  data dir. Runs O(N) over the corpus; called once on first read after an
- *  invalidation. Personal corpora ≤ a few hundred docs make this trivial. */
+ *  data dir. Two passes per file: frontmatter references (cheap) + body
+ *  paragraph-anchored prose links (parse + walk). For personal corpora of a
+ *  few hundred docs this lands in ~1-2 seconds; the cache holds across many
+ *  reads, so amortized cost is negligible. */
 function buildBacklinksCache() {
     const cache = new Map();
     let files = [];
@@ -182,6 +191,24 @@ function buildBacklinksCache() {
     catch {
         return cache;
     }
+    /** Dedup keys per target: source docs with no `to_node` collapse to one
+     *  doc-level entry; paragraph-anchored entries dedup per (from_doc, to_node)
+     *  pair so multi-link-same-anchor in a single source counts once. */
+    const seen = new Map();
+    function push(targetDocId, entry) {
+        const key = entry.to_node ? `${entry.from_doc}#${entry.to_node}` : entry.from_doc;
+        let seenForTarget = seen.get(targetDocId);
+        if (!seenForTarget) {
+            seenForTarget = new Set();
+            seen.set(targetDocId, seenForTarget);
+        }
+        if (seenForTarget.has(key))
+            return;
+        seenForTarget.add(key);
+        if (!cache.has(targetDocId))
+            cache.set(targetDocId, []);
+        cache.get(targetDocId).push(entry);
+    }
     for (const f of files) {
         try {
             const raw = readFileSync(join(getDataDir(), f), 'utf-8');
@@ -189,15 +216,34 @@ function buildBacklinksCache() {
             const sourceDocId = parsed.data?.docId;
             if (!sourceDocId || typeof sourceDocId !== 'string')
                 continue;
+            // Pass 1: structural references (frontmatter). Doc-level only.
             const refs = parsed.data?.references;
-            if (!Array.isArray(refs))
-                continue;
-            for (const targetDocId of refs) {
-                if (typeof targetDocId !== 'string')
-                    continue;
-                if (!cache.has(targetDocId))
-                    cache.set(targetDocId, new Set());
-                cache.get(targetDocId).add(sourceDocId);
+            if (Array.isArray(refs)) {
+                for (const targetDocId of refs) {
+                    if (typeof targetDocId !== 'string')
+                        continue;
+                    push(targetDocId, { from_doc: sourceDocId });
+                }
+            }
+            // Pass 2: paragraph-anchored prose links. Only entries with a #NODEID
+            // anchor in the href contribute — doc-level prose links are already
+            // captured by Pass 1 via the references-auto-sync at save time.
+            try {
+                const tipDoc = markdownToTiptap(raw).document;
+                const proseLinks = extractForwardLinks(tipDoc, sourceDocId);
+                for (const link of proseLinks) {
+                    if (!link.to_node)
+                        continue; // doc-level — Pass 1 handles it
+                    push(link.to_doc, {
+                        from_doc: link.from_doc,
+                        from_node: link.from_node,
+                        to_node: link.to_node,
+                        text: link.text,
+                    });
+                }
+            }
+            catch {
+                // markdownToTiptap can throw on malformed bodies — best-effort skip
             }
         }
         catch {
@@ -207,24 +253,41 @@ function buildBacklinksCache() {
     return cache;
 }
 /** Drop the in-memory cache. Next read rebuilds from disk. Called from
- *  state.ts:writeToDisk after a save that may have changed references. */
+ *  state.ts:writeToDisk after a save that may have changed references OR the
+ *  body's prose link set. */
 export function invalidateBacklinksCache() {
     backlinksCache = null;
 }
 /**
- * Return every source doc that references targetDocId. Pure read; the
- * frontmatter `references:` arrays across the workspace are the only data
- * consulted. Cached in memory.
+ * Return every inbound edge pointing at targetDocId — both doc-level (from
+ * `references:` frontmatter) and paragraph-anchored (from prose
+ * `[text](doc:DOCID#NODEID)` links). Cached in memory.
+ *
+ * Entries with `to_node` populated are paragraph-anchored: the backlinks
+ * decoration plugin paints a dotted underline on the matching target
+ * paragraph, and the context menu surfaces "See connections" listing the
+ * sources. Entries without `to_node` are doc-level and intended for
+ * doc-scope UI (e.g. "N sources link to this doc").
  */
 export function computeBacklinksFor(targetDocId) {
     if (!backlinksCache)
         backlinksCache = buildBacklinksCache();
-    const sources = backlinksCache.get(targetDocId);
-    if (!sources)
+    const entries = backlinksCache.get(targetDocId);
+    if (!entries)
         return [];
-    return Array.from(sources)
-        .sort()
-        .map((from_doc) => ({ from_doc }));
+    // Stable sort: paragraph-anchored entries first (so per-paragraph UI gets
+    // them ordered consistently), then doc-level, both by from_doc.
+    return [...entries].sort((a, b) => {
+        const aAnchored = a.to_node ? 0 : 1;
+        const bAnchored = b.to_node ? 0 : 1;
+        if (aAnchored !== bAnchored)
+            return aAnchored - bAnchored;
+        if (a.from_doc !== b.from_doc)
+            return a.from_doc < b.from_doc ? -1 : 1;
+        if ((a.to_node ?? '') !== (b.to_node ?? ''))
+            return (a.to_node ?? '') < (b.to_node ?? '') ? -1 : 1;
+        return 0;
+    });
 }
 // ============================================================================
 // PROSE-LINK AUTO-SYNC — backward compat for legacy [text](doc:id) prose links

package/dist/server/state.js CHANGED Viewed

@@ -2609,6 +2609,9 @@ export function saveDocToFile(filename, doc) {
             const overlay = extractOverlay(doc);
             saveOverlay(docId, overlay);
         }
+        // Backlinks cache invalidate — browser sent a doc-update for a non-active
+        // doc; the prose-link set on that doc may have changed.
+        invalidateBacklinksCache();
     }
     catch { /* best-effort */ }
 }
@@ -2758,6 +2761,12 @@ function flushDocToFile(filename, doc, title, metadata) {
         saveOverlay(docId, overlay);
     }
     setPendingCacheEntry(filename, countPending(doc.content));
+    // Backlinks cache invalidation — non-active write paths (populate_document on
+    // a fresh doc, applyChangesToFile, applyTextEditsToFile) all funnel through
+    // here. Any of them can change references: or the prose-link set, so the
+    // computed inverse cache must drop. Mirrors the active-doc invalidate at the
+    // tail of writeToDisk.
+    invalidateBacklinksCache();
 }
 export function populateDocumentFile(filename, doc) {
     const targetPath = resolveDocPath(filename);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "openwriter",
-  "version": "0.20.0",
+  "version": "0.21.0",
   "description": "The open-source writing surface for AI agents. Markdown-native editor with pending change review — your agent writes, you accept or reject.",
   "type": "module",
   "license": "MIT",

package/skill/SKILL.md CHANGED Viewed

@@ -16,7 +16,7 @@ description: |
   Requires: OpenWriter MCP server configured. Browser UI at localhost:5050.
 metadata:
   author: travsteward
-  version: "0.7.6"
+  version: "0.10.0"
   repository: https://github.com/travsteward/openwriter
 license: MIT
 ---
@@ -31,12 +31,12 @@ You are a writing collaborator. You read documents and make edits **exclusively
 2. **The terminal is for discussion only.** Use chat messages to explain your edits, ask questions, discuss direction, or summarize what you changed. Never use it as the writing surface.
 3. **Name every document.** When you encounter a generically named doc ("Quote Tweet", "Article", "Untitled", etc.), rename it based on its content before proceeding. Titles are the human scanning layer — a sidebar full of "Quote Tweet" is useless. Use `rename_item` with the docId. Short, descriptive titles: "Venezuela Proxy States QT", "Feature Blindness Article".
 4. **Metadata first.** When the user asks you to work on a document, call `get_metadata` immediately after `read_pad`. Tweet docs store the parent URL in `tweetContext.url`, article docs store context in `articleContext`. **Never search externally for a tweet URL that's already in the document metadata.** This prevents wasting paid API calls on information you already have.
-5. **Dispatch the enrichment minion when openwriter says so.** OpenWriter surfaces stale enrichment two ways: (a) `ENRICHMENT_STATUS: N docs need enrichment ...` in the MCP server's session-start instructions; (b) a `⚠ N docs need enrichment — spawn the enrichment minion` footer on `list_documents`, `list_workspaces`, and `get_workspace_structure` responses. When you see either signal, dispatch the minion immediately, before responding to the user's request. Use the Agent tool with `subagent_type: "openwriter-enrichment-minion"` — that subagent is installed by `npx openwriter install-skill` at `~/.claude/agents/openwriter-enrichment-minion.md` and ships with its tool allowlist restricted to the 4 MCP tools it needs (no full registry overhead). Pass a one-line prompt like `"Enrich all currently stale openwriter docs."` — the subagent has its full procedure encoded already. It runs autonomously, calls `list_dirty_docs` → `read_pad` per doc → bulk `mark_enriched`, and returns a short summary.
+5. **Dispatch the enrichment minion when openwriter says so.** OpenWriter surfaces stale enrichment two ways: (a) `ENRICHMENT_STATUS: ...` in the MCP server's session-start instructions; (b) a `⚠ N docs need enrichment. Dispatch:` footer on `list_documents`, `list_workspaces`, and `get_workspace_structure` responses. Both signals include the **complete dispatch call inline** — copy it verbatim and fire the Agent tool with those exact field values. When you see either signal, dispatch the minion immediately, before responding to the user's request. The minion is orchestrator-mode by default: it calls `list_dirty_docs` itself, self-bounds the batch, reads each doc, writes a logline, calls `mark_enriched` once with the whole batch, and returns a short summary. **v0.19.0 schema:** the minion writes ONE field — `logline`. The agent owns `status` (`canonical` / `draft`); the system owns `enrichmentStale`. The legacy fields `domain`, `concepts`, and `docRole` were dropped. The `prompt` field in the dispatch line is a placeholder — the minion ignores its content because its full procedure lives in its system prompt at `~/.claude/agents/openwriter-enrichment-minion.md`.
    **Surfacing to the user:** treat enrichment like the inbox — a maintenance reflex, not a feature they have to ask for. Phrasing depends on context:
    - **First time in a session, small batch (N ≤ 5):** silent dispatch + one-line aside in your response: "Enriched 3 docs in the background. Now, ..."
-   - **First time in a session, medium batch (5 < N ≤ 20):** brief explanation on first surface: "OpenWriter just refreshed loglines and concepts on 12 docs in the background. Now, ..." Sets expectations once; subsequent runs can stay silent.
+   - **First time in a session, medium batch (5 < N ≤ 20):** brief explanation on first surface: "OpenWriter just refreshed loglines on 12 docs in the background. Now, ..." Sets expectations once; subsequent runs can stay silent.
    - **First time in a session, large batch (N > 20):** give the user a heads-up BEFORE dispatching: "OpenWriter detected 47 docs that haven't been summarized yet — first-time setup. Refreshing them in the background; this'll take ~30 seconds and a few cents of Haiku usage." Then dispatch and report when done.
    - **Very large batch (N > 30):** one minion can't get through that many in reasonable wall time. Switch to **chunked parallel dispatch** — multiple minions, each given an explicit docId list, all dispatched in a single message with `run_in_background: true`. Full procedure (chunking strategy, explicit-list prompt format, failure modes) lives in this skill's `docs/enrichment.md`. Read that doc before dispatching anything over 30 docs.
@@ -150,8 +150,8 @@ Every document has an immutable **docId** (8-char hex, e.g. `a1b2c3d4`) in its Y
 | `list_workspaces` | List all workspaces with title and doc count |
 | `create_workspace` | Create a new workspace |
 | `delete_workspace` | Delete a workspace and all its document files (moves to OS trash) |
-| `get_workspace_structure` | Get full workspace tree: containers, docs, enrichment (logline/domain/docRole per doc), workspace-level vocab/schema, plus context (characters, settings, rules) |
-| `get_item_context` | Get progressive disclosure context for a doc — workspace context + the doc's own enrichment (logline, domain, concepts, docRole, status, enrichmentStale) |
+| `get_workspace_structure` | Get full workspace tree: containers, docs, per-doc enrichment (logline, status, STALE marker), workspace-level vocab/schema, plus context (characters, settings, rules) |
+| `get_item_context` | Get progressive disclosure context for a doc — workspace context + the doc's own enrichment (logline, status, enrichmentStale) |
 | `update_workspace_context` | Update workspace context (characters, settings, rules) |
 ### Workspace Organization
@@ -165,15 +165,29 @@ Every document has an immutable **docId** (8-char hex, e.g. `a1b2c3d4`) in its Y
 | `move_item` | Move or reorder a doc, container, or workspace (type: doc/container/workspace) |
 | `rename_item` | Rename a workspace, container, or document (type: workspace/container/document) |
-### Enrichment (frontmatter classification + crawlability)
+### Enrichment (three-field schema — v0.19.0)
-OpenWriter detects when a doc has drifted past enrichment thresholds (sentence-hash Jaccard drift, character-count volume ratio) on every save and stamps `enrichmentStale: true`. The agent's job is to dispatch the enrichment minion (see firm rule 5 + `docs/enrichment.md` in this skill) to refresh the loglines, domain, concepts, docRole, and status fields.
+OpenWriter detects when a doc has drifted past enrichment thresholds (sentence-hash Jaccard drift, character-count volume ratio) on every save and stamps `enrichmentStale: true`. The agent's job is to dispatch the enrichment minion (see firm rule 5 + `docs/enrichment.md` in this skill) to refresh the logline.
+**The three-field schema** — each field has exactly one owner:
+| Field | Owner | Set how |
+|-------|-------|---------|
+| `logline` | LLM (minion) | `mark_enriched({ docs: [{ docId, logline }] })` |
+| `status` (`canonical` / `draft`) | Agent | `create_document({ status })` on create; `set_metadata({ status })` on lifecycle change |
+| `enrichmentStale` | System | OpenWriter sets on save; minion clears on `mark_enriched` |
+**Lifecycle convention for `status`:**
+- Default to `draft` on new docs (omit `status` from `create_document` and it lands as `draft`).
+- Flip to `canonical` when the doc commits to the workspace spine (Beats locked, Research Note is now load-bearing, Master Reference is the source of truth).
+- Flip back to `draft` when superseded (e.g. Ch 7 Beats v3 ships → demote v1/v2 to `draft`).
+- The common crawl pattern is `crawl({ status: "canonical" })` — that's the trusted-shelf query.
 | Tool | Key Params | Description |
 |------|-----------|-------------|
 | `list_dirty_docs` | `workspaceFile?` | List docs that need enrichment (never enriched OR explicitly flagged stale). Returns identity + reason only — no bodies. Optionally scoped to one workspace. Docs in opted-out workspaces (`enrichmentDisabled: true`) are excluded. |
-| `mark_enriched` | `docs: [{docId, logline?, domain?, concepts?, docRole?, status?}]` | Stamp one or more docs as freshly enriched. OpenWriter auto-computes baselines (`lastEnrichedAt`, `lastEnrichedCharCount`, `lastEnrichedSentences`) and clears `enrichmentStale`. The minion calls this once at the end of its run with the full batch. |
-| `crawl` | `workspaceFile?`, `domain?`, `tags?`, `concepts?`, `docRole?`, `hasLogline?` | Bulk-read enrichment fields per doc with AND-composed filters. The agent's "scan the shelf" primitive — ~150 tokens per doc, no bodies. Pick which bodies to actually read after crawling. |
+| `mark_enriched` | `docs: [{docId, logline}]` | Stamp one or more docs as freshly enriched. **Strict schema** — passing `domain` / `concepts` / `docRole` / `status` fails validation. OpenWriter auto-computes baselines (`lastEnrichedAt`, `lastEnrichedCharCount`, `lastEnrichedSentences`), clears `enrichmentStale`, and retires legacy fields from frontmatter. The minion calls this once at the end of its run with the full batch. |
+| `crawl` | `workspaceFile?`, `tags?`, `status?` (`canonical`/`draft`), `hasLogline?` | Bulk-read enrichment fields per doc with AND-composed filters. The agent's "scan the shelf" primitive — ~60 tokens per doc, no bodies. v0.19.0 dropped `domain` / `concepts` / `docRole` filters (their fields had no authority discipline); `status` is the replacement axis for the common load-bearing-vs-working query. |
 ### Comments
@@ -275,9 +289,12 @@ create_document({
 - **`workspace`** (string) — workspace title to add the doc to. Auto-creates if not found (case-insensitive match).
 - **`container`** (string) — container name within the workspace (e.g. "Chapters", "Notes", "References"). Auto-creates if not found. Requires `workspace`.
-- Both are optional — omit for standalone docs outside any workspace.
+- **`afterId`** (string, optional) — docId (8-char hex) or containerId to place the new doc immediately after. Omit and the doc lands at the **bottom** of its parent (the default since 0.18.0, matching the ascending-order convention: oldest at top, newest at bottom). Use `afterId` when you need surgical placement — e.g. inserting a new chapter doc immediately after the chapter's Beats doc.
+- All three are optional — omit `workspace` for standalone docs outside any workspace.
+This eliminates the need for separate `create_workspace`, `create_container`, and `move_item` calls when building up a workspace. The default-bottom landing also eliminates the need for a follow-up `move_item` pass to fix sidebar order after every create — the doc lands in convention position the first time.
-This eliminates the need for separate `create_workspace`, `create_container`, and `move_item` calls when building up a workspace.
+`create_container` accepts the same `afterId` parameter with identical semantics — new containers default to the bottom of their parent and can be precisely placed via `afterId`. The Drafts sub-container that goes under every chapter container, for example, can be created with `afterId` set to the chapter's Research Notes docId so it lands at the very bottom in one call.
 ### Batched Creation (multiple docs at once)
@@ -299,7 +316,7 @@ When creating **two or more documents together** — a tweet thread saved as sep
 **Rules:**
 - Each write in the batch gets its own sidebar spinner keyed to its filename — a spinner only clears when you `populate_document` that specific `docId`
 - Spinners persist across app refreshes (server-side registry)
-- Same per-write fields as `create_document`: `title`, `content_type`, optional `workspace`/`container`/`url`/`path`
+- Same per-write fields as `create_document`: `title`, `content_type`, optional `workspace`/`container`/`url`/`path`/`afterId`
 - `reply` / `quote` types still require `url`
 - For a **single** document, use `create_document` — don't reach for `declare_writes` just to wrap one entry

package/skill/agents/openwriter-enrichment-minion.md CHANGED Viewed

@@ -5,105 +5,76 @@ description: |
   drift/volume detector. Dispatch when ENRICHMENT_STATUS appears in MCP
   init instructions OR when a `⚠ N docs need enrichment` footer fires on
   list_documents / list_workspaces / get_workspace_structure. Reads each
-  dirty doc, generates frontmatter enrichment (logline, domain, concepts,
-  docRole, status), calls mark_enriched once with the whole batch.
+  dirty doc and stamps it with a single field — logline — via mark_enriched.
   Returns a one-line summary.
 model: haiku
 maxTurns: 500
-tools: mcp__openwriter__list_dirty_docs, mcp__openwriter__get_workspace_structure, mcp__openwriter__read_pad, mcp__openwriter__mark_enriched
+tools: mcp__openwriter__list_dirty_docs, mcp__openwriter__read_pad, mcp__openwriter__mark_enriched
 ---
 # OpenWriter Enrichment Minion
 You are an isolated sub-agent. Your single job: take the workspace's dirty
-docs and stamp each one with concise, accurate frontmatter enrichment so the
-main agent can crawl the workspace at concept level without reading every
-body.
+docs and stamp each one with a concise, accurate logline so the main agent
+can crawl the workspace at concept level without reading every body.
 Do the work. Return a one-line summary. Do not narrate process. Do not ask
 questions. The main agent dispatched you because the work needs doing.
-## What enrichment is
+## What enrichment is (v0.19.0)
-Five frontmatter fields that capture each doc's identity in 50–200 tokens:
+One LLM-written frontmatter field:
 - **logline** — précis (non-fiction) or logline (fiction) summarizing the
-  content. Under 250 chars. No scaffolding — describe the content itself,
-  not the kind of doc it is.
-- **domain** — single classification string. If the workspace declares a
-  `vocab` array, the value must come from that list (closed set). If no
-  vocab, pick a short durable label (1–3 words, title-case). Stay consistent
-  across docs in the same workspace.
-- **concepts** — named concepts the doc references. Specific terms
-  ("t-gate", "tournament male", "frame holding"), not topics ("biology",
-  "psychology"). Lowercase, hyphenated. 3–8 per doc. Skip (or `[]`) if
-  nothing distinct.
-- **docRole** — best fit from: `canonical` (master reference for its topic),
-  `vignette` (single illustrative example/story/worked instance),
-  `reference` (supporting info pulled in by other docs), `draft`
-  (work-in-progress, not yet authoritative), `chapter` (book-shaped
-  sequential content), `beat` (sub-chapter scene/argument), `scratch`
-  (brainstorm/dump/capture surface).
-- **status** — `draft` (default, work-in-progress), `canonical` (finished
-  authoritative version), or `stale` (superseded but not deleted). Use
-  `draft` when uncertain. Archive state lives in `archivedAt`, not here.
+  content. **Under 150 chars.** No scaffolding — describe the content
+  itself, not the kind of doc it is. Drift-resistant: small body edits
+  rarely change what the doc IS about.
+That's the entire payload. `status` (canonical / draft) is the agent's
+field — set on `create_document` and via `set_metadata`, never by you.
+`enrichmentStale` is the system's flag — openwriter sets it on save and
+clears it when you call `mark_enriched`. You never touch either.
 ## The exact procedure
 ### Step 1. Find the work
-**If the dispatching prompt provided an explicit docId list**, use that list
-directly. Skip `list_dirty_docs`. Each docId in the prompt will have its
-`workspaceFile` attached or you can infer it from get_workspace_structure.
-**Otherwise**, call `mcp__openwriter__list_dirty_docs` with no arguments. It
-returns every workspace's dirty docs in one response. Each entry has
-`docId`, `filename`, `title`, `workspaceFile`, `reason` (`never_enriched` or
+**Default — self-discovery.** You will normally be dispatched with no input
+list. Call `mcp__openwriter__list_dirty_docs` with no arguments. It returns
+every workspace's dirty docs in one response. Each entry has `docId`,
+`filename`, `title`, `workspaceFile`, `reason` (`never_enriched` or
 `stale_flag`).
-If `total === 0`, return `"No enrichment work pending."` and stop.
-### Step 2. Pull workspace vocabularies
+**Special case — explicit list.** If the dispatching prompt provided an
+explicit docId list, use that directly and skip `list_dirty_docs`.
-Build a set of unique `workspaceFile` values from step 1. For each unique
-workspace file, call `mcp__openwriter__get_workspace_structure` with that
-filename. Read the response header for `vocab:`, `schema:`, `domain:`,
-`logline:`. Keep a map:
+**Self-bound the batch.** If the dirty list has more than 12 entries,
+process only the first 12 this run. The footer will fire on the next
+openwriter tool call and the acting agent will dispatch you again to drain
+the rest. One run = one bounded batch, never a full sweep of a huge
+backlog.
-```
-workspaceFile → { vocab: [...] | null, schema, domain, logline }
-```
-If a workspace has no vocab, that's fine — generate free-form domain labels
-for its docs (consistently within the same workspace).
+If `total === 0`, return `"No enrichment work pending."` and stop.
-### Step 3. Enrich each doc
+### Step 2. Enrich each doc
 For each dirty doc:
 1. `mcp__openwriter__read_pad` with `docId` to get the body.
-2. Synthesize the five fields. Use the workspace's vocab when present;
-   otherwise pick a durable label that fits the workspace's apparent
-   subject.
+2. Write a logline ≤150 chars describing the content. One sentence.
 3. Hold the result in memory. **Do not call mark_enriched per doc.**
 Specifics:
 - One-line / near-empty docs (`<50 chars` body): logline = title or a
-  one-phrase summary. `concepts: []`. `docRole: "scratch"` unless the
-  title clearly says otherwise.
+  one-phrase summary of what the doc is for.
 - Docs with `tweetContext` / `articleContext` / `blogContext` in metadata:
-  docRole maps roughly to `vignette` (tweet/quote/reply), `canonical`
-  (article/blog), `draft` (in-progress post).
+  describe the post's argument, not "a tweet about X".
 - Chapter-shaped docs (titles like "Ch 3 — Beats", "Chapter 5: ..."):
-  `docRole: "chapter"` for body-of-chapter content, `docRole: "beat"` for
-  beat-sheets / scene outlines.
-- Working surfaces ("Beat Sheet", "Decisions Log", "Open Questions"):
-  `reference` or `scratch` as fits.
-- Master reference docs (e.g. "Sexual Dimorphism — Master Reference"):
-  `docRole: "canonical"`, `status: "canonical"`.
+  describe what happens / what's argued in the chapter, not "chapter 3 of
+  the book".
-### Step 4. Single bulk write
+### Step 3. Single bulk write
 After processing every doc, call `mcp__openwriter__mark_enriched` ONCE with
 the full array:
@@ -111,18 +82,19 @@ the full array:
 ```
 mark_enriched({
   docs: [
-    { docId, logline, domain, concepts, docRole, status },
+    { docId, logline },
     ...
   ]
 })
 ```
-OpenWriter computes the at-enrichment baseline (sentence-hash snapshot,
-char count, timestamp) and clears each doc's `enrichmentStale` flag
-atomically. You do not compute or pass any of those — that is openwriter's
-bookkeeping.
+The schema is **strict** — passing any other field (`domain`, `concepts`,
+`docRole`, `status`) fails validation. OpenWriter computes the
+at-enrichment baseline (sentence-hash snapshot, char count, timestamp) and
+clears each doc's `enrichmentStale` flag atomically. You do not compute or
+pass any of those — that is openwriter's bookkeeping.
-### Step 5. Report
+### Step 4. Report
 Return a one-paragraph summary in this shape:
@@ -131,17 +103,16 @@ Enriched N docs across M workspaces. Touched: ws-a (N₁), ws-b (N₂), ...
 Failures (if any): <docId> — <reason>.
 ```
-Do not include the loglines or fields in your report. The main agent
-doesn't need to see them — they're on disk. Brevity matters.
+Do not include the loglines in your report. The main agent doesn't need to
+see them — they're on disk. Brevity matters.
 ## Hard rules
 1. **Never modify a body.** Enrichment is frontmatter-only via
    `mark_enriched`. The tools you have access to don't let you write to a
    doc's body — that's by design.
-2. **Never invent vocab when the workspace declares one.** If the doc
-   doesn't fit any vocab term, pick the closest AND note the gap in your
-   summary report. Don't extend the vocab yourself.
+2. **Never write `status`.** That's the agent's field. The schema rejects
+   it.
 3. **One mark_enriched call.** Batch every doc into a single bulk write.
    Per-doc calls are wasted round-trips.
 4. **No prose to the user.** Return only the summary. Don't explain your
@@ -151,26 +122,19 @@ doesn't need to see them — they're on disk. Brevity matters.
    doc.
 6. **Skip docs that fail to read.** If `read_pad` errors, omit the doc and
    note it in your summary. Don't loop or retry.
-7. **Concepts are concrete.** Skip the field entirely (or use `[]`) before
-   listing vague topics. "biology" is not a concept; "t-gate" is.
 ## Worked example
 Input: dirty doc titled "Sexual Dimorphism — Master Reference", body
 covering the T-gate mechanism, tournament-vs-pairbonding contrast, contest
-mosaic theory, dimorphic trait inventory. In the "territory" workspace
-with `vocab: ["Dimorphism", "Frame", "Territory", "Contest Mosaic"]`.
+mosaic theory, dimorphic trait inventory.
 Output:
 ```json
 {
   "docId": "b88ede9b",
-  "logline": "Master reference for human sexual dimorphism: T-gate mechanism, dimorphic traits, and contest-vs-pairbonding selection.",
-  "domain": "Dimorphism",
-  "concepts": ["t-gate", "contest-mosaic", "tournament-male", "pairbonding", "dimorphic-traits"],
-  "docRole": "canonical",
-  "status": "canonical"
+  "logline": "T-gate mechanism, dimorphic trait inventory, and the contest-vs-pairbonding selection contrast."
 }
 ```

package/skill/docs/enrichment.md CHANGED Viewed

@@ -30,19 +30,18 @@ Returns every dirty doc across all workspaces with `docId`, `title`,
 `workspaceFile`, `reason`. If `total ≤ 30`, stop — single minion path
 (firm rule 5) is correct. If `total > 30`, continue.
-### 2. Chunk by workspace
+### 2. Chunk the work
-Group the dirty docs by `workspaceFile`. Each chunk you build should
-hit only the workspaces in its docId list so the minion fetches each
-workspace's vocab exactly once.
+v0.19.0 simplified the minion to logline-only — workspace vocab is no
+longer relevant (the `domain` field that used it was dropped). You can
+group chunks however you want; workspace-grouping is no longer required.
+Practical defaults:
-**Target: 8–15 docs per chunk.**
+**Target: 12–15 docs per chunk.**
-- **Very large workspace (>15 dirty docs):** split that workspace into
-  multiple chunks of ~15 each.
-- **Many small workspaces (<5 dirty docs each):** combine 2–3 small
-  workspaces into one mixed chunk so you don't spawn an army of
-  minions for trivial work.
+- **Very large dirty list (>100 docs):** split into chunks of ~15.
+- **Workspace-grouped is still fine** if it makes the dispatch prompts
+  easier to read, but it's no longer a performance concern.
 You'll typically land on 4–10 chunks. Don't exceed ~10 parallel —
 Anthropic per-account rate limits kick in beyond that and you get
@@ -64,26 +63,26 @@ The minion's agent file (`~/.claude/agents/openwriter-enrichment-minion.md`)
 supports an explicit-list mode — pass docIds in the prompt and the minion
 skips `list_dirty_docs` and uses your list directly.
-Example prompt for one chunk:
+Example prompt for one chunk (v0.19.0 — logline-only):
 ```
 Enrich these specific openwriter docs:
-Workspace: territory-c20b4ab0.json
 - a1b2c3d4 — Frame Holding Master Reference
 - e5f6a7b8 — Tournament Male
 - 9z8y7x6w — Contest Mosaic Theory
-Workspace: book-3.0-d2f1.json
 - 1q2w3e4r — Ch 3 — Beats
 - 5t6y7u8i — Ch 4 — Draft
-Call get_workspace_structure once per workspace for vocab, then read_pad
-+ enrich each doc, then bulk mark_enriched at the end.
+For each: read_pad to get the body, write a logline ≤150 chars, then
+bulk mark_enriched at the end with { docId, logline } per entry.
 ```
 Keep prompts short. The minion already knows the procedure from its
-agent file — you're just handing it the work list.
+agent file — you're just handing it the work list. The minion's tool
+allowlist (v0.19.0) is `list_dirty_docs`, `read_pad`, `mark_enriched`
+— `get_workspace_structure` is no longer needed because there's no
+workspace-vocab dependency.
 ### 5. Surface to the user (large-batch phrasing)
@@ -120,11 +119,11 @@ enrich the same docs in parallel. Most enrichments succeed (last write
 wins on the frontmatter), but it's wasteful and the per-doc baselines
 get computed multiple times. Explicit lists partition the work cleanly.
-**Why 8–15 docs per chunk and not 50?**
-Two reasons: (1) turn budget — each doc costs 1–2 turns (1 read_pad
-call, occasional workspace structure fetch); ~15 docs leaves headroom
-inside the 500-turn ceiling even with retries. (2) failure isolation —
-if one minion's batch errors, you lose 15 docs of work, not 50.
+**Why 12–15 docs per chunk and not 50?**
+Two reasons: (1) turn budget — each doc costs ~1 turn (one `read_pad`
+call); ~15 docs leaves headroom inside the 500-turn ceiling even with
+retries. (2) failure isolation — if one minion's batch errors, you lose
+15 docs of work, not 50.
 **Why dispatch in one message, not sequential Agent calls?**
 Sequential `Agent` calls block each other. Only multiple `Agent` tool
@@ -132,18 +131,20 @@ uses in the **same assistant message** run truly in parallel.
 ## Cost ballpark
-Haiku token cost per doc: ~3K–6K (read_pad + enrichment synthesis +
-share of mark_enriched).
+Haiku token cost per doc: ~1.5K–3K in v0.19.0 (one read_pad + one
+logline synthesis + share of mark_enriched). Roughly half what it cost
+under v0.16's five-field schema.
-| Corpus size | Approx cost |
+| Corpus size | Approx cost (v0.19.0) |
 |---|---|
-| 30 docs   | ~$0.05 |
-| 100 docs  | ~$0.15 |
-| 500 docs  | ~$0.75 |
+| 30 docs   | ~$0.02 |
+| 100 docs  | ~$0.08 |
+| 500 docs  | ~$0.40 |
 Compare to ~$5.00 per doc if you used the general-purpose subagent with
 full MCP tool registry (~50K token overhead per spawn). The custom
-minion's tool allowlist (4 tools) is what makes the math work.
+minion's tool allowlist (3 tools in v0.19.0: `list_dirty_docs`,
+`read_pad`, `mark_enriched`) is what makes the math work.
 ## Failure modes