npm - openalmanac - Versions diffs - 0.2.52 → 0.2.54 - Mend

openalmanac 0.2.52 → 0.2.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/setup.js +3 -1
package/dist/tools/articles.js +7 -44
package/dist/validate.js +15 -15
package/package.json +1 -1
package/skills/reddit-wiki/SKILL.md +275 -295

package/dist/setup.js CHANGED Viewed

@@ -617,7 +617,9 @@ function printRedditResult(agent, loginResult, mcpChanged, toolCount) {
     w(row(`  ${WHITE_BOLD}Next steps${RST}`));
     w(empty);
     w(row(`  ${BLUE}1.${RST}  Type ${WHITE_BOLD}claude${RST} to start Claude Code`));
-    w(row(`  ${BLUE}2.${RST}  Ask ${WHITE_BOLD}"How does the reddit-wiki skill work?"${RST}`));
+    w(row(`  ${BLUE}2.${RST}  Run ${BLUE}/reddit-wiki r/<subreddit>${RST}`));
+    w(empty);
+    w(row(`  ${DIM}Ask "how does reddit wiki work?" to learn more${RST}`));
     w(empty);
     w(`  ${BLUE_DIM}\u2570${"─".repeat(innerW)}\u256f${RST}`);
     w("");

package/dist/tools/articles.js CHANGED Viewed

@@ -227,11 +227,8 @@ export function registerArticleTools(server) {
     server.addTool({
         name: "read",
         description: "Read article content from OpenAlmanac. Returns the content, sources, and metadata for each slug. " +
-            "Use this for one-shot lookups where you need the text once in conversation. " +
-            "PREFER `download` instead when you plan to reference an article more than once or iterate on it — " +
-            "`read` fills the context window with the full body every time, while `download` writes to disk so you " +
-            "can re-open it cheaply with the Read tool. " +
-            "For editing articles locally, always use `download`. No authentication needed.",
+            "Use this to reference or summarize existing articles in conversation. " +
+            "For editing articles locally, use 'download' instead. No authentication needed.",
         parameters: z.object({
             slugs: coerceJson(z.array(z.string()).min(1).max(20)).describe("Article slugs to read (1-20)"),
             community_slug: z.string().optional().describe("Community slug for reading community-owned wiki articles. Omit for global almanac articles."),
@@ -297,12 +294,8 @@ export function registerArticleTools(server) {
     });
     server.addTool({
         name: "new",
-        description: "Scaffold new articles locally. Creates .md files with YAML frontmatter and a one-line " +
-            "placeholder body so the file passes publish validation immediately as a thin stub. " +
-            "Overwrite the body with Edit/Write before publishing to create a real article. " +
-            "Provide explicit slugs when you know the canonical ID; otherwise they are auto-derived from titles. " +
-            "For community wiki articles, provide community_slug — the server will store the article under " +
-            "the canonical ID `<community_slug>:<slug>` but all tool calls accept the (slug, community_slug) pair directly. " +
+        description: "Scaffold new articles locally. Creates .md files with YAML frontmatter and empty bodies. " +
+            "Provide explicit slugs when you know the canonical ID; otherwise they are auto-derived from titles. For community wiki articles, provide community_slug. " +
             "After writing content, use publish to go live.",
         parameters: z.object({
             articles: coerceJson(z.array(z.object({
@@ -346,8 +339,6 @@ export function registerArticleTools(server) {
                     meta.topics = item.topics;
                 meta.sources = [];
                 const frontmatter = yamlStringify(meta);
-                // Empty body is valid. The backend creates these as stub=true automatically.
-                // Overwrite the body with Edit/Write before publishing to create a real article.
                 const scaffold = `---\n${frontmatter}---\n\n`;
                 writeFileSync(filePath, scaffold, "utf-8");
                 created.push(filePath);
@@ -364,10 +355,7 @@ export function registerArticleTools(server) {
         name: "publish",
         description: "Validate and publish articles from your local workspace. " +
             "Provide specific slugs, or a community_slug to publish all articles in that community folder. " +
-            "Scaffolded stubs from `new` are publishable as-is (they ship with a one-line placeholder body). " +
-            "Dead wikilinks auto-create stubs on the server. " +
-            "IMPORTANT: a successful publish DELETES the local draft file. To edit further, use `download` " +
-            "to pull the authoritative copy back from the server first. " +
+            "Empty-body files become stubs. Dead wikilinks auto-create stubs on the server. " +
             "Put edit_summary in frontmatter for per-article change descriptions. Requires login.",
         parameters: z.object({
             slugs: coerceJson(z.array(z.string()).min(1).max(50)).optional()
@@ -421,7 +409,6 @@ export function registerArticleTools(server) {
             const inGui = process.env.OPENALMANAC_GUI === "1";
             const resultLines = [...validationLines];
             let okCount = 0;
-            let skippedCount = 0;
             if (validArticles.length > 0) {
                 const resp = await request("POST", "/api/articles/batch-publish", {
                     auth: true,
@@ -430,30 +417,7 @@ export function registerArticleTools(server) {
                 const data = (await resp.json());
                 for (const r of data.results) {
                     if (r.status === "failed") {
-                        // Structured error codes from the backend (`unchanged`, `stale_draft`)
-                        // are benign no-ops during batch republish — count them as skipped and
-                        // keep going instead of failing the whole batch. Non-coded failures
-                        // are real errors and surface as FAILED.
-                        //
-                        // Prose fallback: older backends may not yet return `error_code`. If
-                        // the structured code is missing, match on the message prefix so an
-                        // MCP built against a new backend still degrades gracefully against
-                        // an older one. Remove the prose fallback once all deployed backends
-                        // emit error_code reliably.
-                        const err = r.error ?? "";
-                        const isUnchanged = r.error_code === "unchanged" || err.startsWith("No changes detected");
-                        const isStaleDraft = r.error_code === "stale_draft" || err.startsWith("Article updated since download");
-                        if (isUnchanged) {
-                            skippedCount += 1;
-                            resultLines.push(`SKIP ${r.slug}: unchanged since last publish`);
-                            continue;
-                        }
-                        if (isStaleDraft) {
-                            skippedCount += 1;
-                            resultLines.push(`SKIP ${r.slug}: server copy is newer — re-download before editing`);
-                            continue;
-                        }
-                        resultLines.push(`FAILED ${r.slug}: ${err || "unknown error"}`);
+                        resultLines.push(`FAILED ${r.slug}: ${r.error ?? "unknown error"}`);
                         continue;
                     }
                     okCount += 1;
@@ -488,8 +452,7 @@ export function registerArticleTools(server) {
                 : tasks.length > 1
                     ? "\n\n(Opening browser skipped for batch publish — share URLs from results above.)"
                     : "";
-            const skippedSummary = skippedCount > 0 ? ` (${skippedCount} skipped, unchanged or stale)` : "";
-            return `Published ${okCount}/${tasks.length}${skippedSummary}.\n\n${resultLines.join("\n\n")}${urlHint}`;
+            return `Published ${okCount}/${tasks.length}.\n\n${resultLines.join("\n\n")}${urlHint}`;
         },
     });
     server.addTool({

package/dist/validate.js CHANGED Viewed

@@ -14,9 +14,10 @@ export function parseFrontmatter(raw) {
 export function validateArticle(raw) {
     const errors = [];
     const { frontmatter, content } = parseFrontmatter(raw);
-    // Empty bodies are allowed — the backend treats empty-content articles as
-    // stubs (see create path in article_storage_service.py). This is what the
-    // /reddit-wiki stub flow relies on to publish 40+ placeholder articles at once.
+    // content
+    if (!content || content.trim().length === 0) {
+        errors.push({ field: "content", message: "Article content is required" });
+    }
     // title
     const title = frontmatter.title;
     if (!title || typeof title !== "string" || title.trim().length === 0) {
@@ -89,19 +90,18 @@ export function validateArticle(raw) {
             if (!s.title || typeof s.title !== "string") {
                 errors.push({ field: `sources[${i}].title`, message: "Title is required" });
             }
-            // accessed_date is optional — if present, must be YYYY-MM-DD or a Date.
-            // If omitted, publish auto-fills with today's date.
             const accessedDate = s.accessed_date;
-            if (accessedDate != null) {
-                if (accessedDate instanceof Date) {
-                    // YAML parsed it as a Date object — valid
-                }
-                else if (typeof accessedDate === "string" && !DATE_RE.test(accessedDate)) {
-                    errors.push({ field: `sources[${i}].accessed_date`, message: "Must be YYYY-MM-DD format" });
-                }
-                else if (typeof accessedDate !== "string" && !(accessedDate instanceof Date)) {
-                    errors.push({ field: `sources[${i}].accessed_date`, message: "Must be YYYY-MM-DD format" });
-                }
+            if (!accessedDate) {
+                errors.push({ field: `sources[${i}].accessed_date`, message: "Accessed date is required" });
+            }
+            else if (accessedDate instanceof Date) {
+                // YAML parsed it as a Date object — valid
+            }
+            else if (typeof accessedDate === "string" && !DATE_RE.test(accessedDate)) {
+                errors.push({ field: `sources[${i}].accessed_date`, message: "Must be YYYY-MM-DD format" });
+            }
+            else if (typeof accessedDate !== "string" && !(accessedDate instanceof Date)) {
+                errors.push({ field: `sources[${i}].accessed_date`, message: "Must be YYYY-MM-DD format" });
             }
         }
         // citation markers — collect all [@key] references from content

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "openalmanac",
-  "version": "0.2.52",
+  "version": "0.2.54",
   "description": "OpenAlmanac — pull, edit, and push articles to the open knowledge base",
   "type": "module",
   "bin": {

package/skills/reddit-wiki/SKILL.md CHANGED Viewed

@@ -1,409 +1,389 @@
 ---
 name: reddit-wiki
-description: Explore a subreddit's community with the user and turn it into a published wiki on Almanac
+description: Turn any subreddit into a published wiki on Almanac
 allowed-tools: Bash(node ${CLAUDE_SKILL_DIR}/scripts/ingest.js *), mcp__almanac__search_articles, mcp__almanac__search_communities, mcp__almanac__list_articles, mcp__almanac__read, mcp__almanac__download, mcp__almanac__new, mcp__almanac__publish, mcp__almanac__search_web, mcp__almanac__read_webpage, mcp__almanac__search_images, mcp__almanac__view_images, mcp__almanac__register_sources, mcp__almanac__login, mcp__almanac__create_community, Read(~/.openalmanac/**), Write(~/.openalmanac/**), Edit(~/.openalmanac/**)
 argument-hint: r/<subreddit>
 ---
 # Reddit Wiki
-Explore a community *with* the user and publish a wiki from what you find. You are a talented researcher who goes spelunking inside a subreddit and comes back with short, interesting dispatches — not outlines, not reports, not status updates. The user is a newcomer discovering a community they're curious about, and you're the friend who has already been inside.
+Turn a subreddit into a published wiki on Almanac. You are an enthusiastic researcher who genuinely finds this stuff interesting — share what you discover, don't just report status.
-## Voice
+## Your personality
-You write like an **ethnographic field-notes researcher**, not a reviewer or a hype machine. You notice specifically, quote directly, describe vividly, and never editorialize. Your curiosity shows up in *what you choose to surface*, not in adjectives. You have favorites without verdicts. You're allowed to say *"I'm most curious about X"* but not *"X is amazing"* or *"X is wrong."*
+You're building a wiki WITH the user, not FOR them. Share interesting things you find in the data. Get excited about surprising discoveries. But never be fake — if something isn't interesting, don't pretend it is. No small talk. Everything you say should be real information.
-The register is **a friend texting from a party you're not at.** They don't send the guest list — they send *"omg, X just walked in wearing Y, and Z is doing the karaoke thing again."* Short, vivid, specific, warm. Leave stuff unsaid on purpose so the user *wants* the next message.
+Never estimate how long things will take. Do show data sizes so the user knows what they're getting.
-**These voice rules apply to meta questions too.** If the user asks what this skill is, how it works, or what you do (e.g. *"how does the reddit-wiki skill work?"*), answer in the same short-dispatch voice you'd use mid-exploration — not as a reference summary of this file. Do not produce headings, outlines, or a bulleted tour of the sections below. Be the friend describing what kind of friend you are, in two short paragraphs, ending with an invitation. See "Case 4" under the opening move for the specific shape.
+## Flow overview
-## The rule that governs everything
-**Every message from you during exploration is a short dispatch: aim for ~200 words, 2–4 items, ends with a hook.** Not an outline, not a report, not a status update. A dispatch. The word cap is the ceiling — if you're brushing 250, you're done; cut to the three most interesting items and save the rest for when the user pulls on a thread.
-Dispatches have this shape:
-- A one-line orientation (what you've been doing) — *optional, only when it adds context*
-- **2–4 findings.** Each finding must be concrete — at least one of: a direct quote, a specific user handle, a vote count, a specific thread title, or a specific artifact. Not every finding needs all of them; a single vivid detail per item is enough.
-- A closing hook — one specific thing you want to dig into next, or *"anything here pulling at you?"*
-Writing a dispatch is a taste exercise. Pick the items a newcomer would find most *interesting*, not the ones that most completely *summarize* the community. Summaries are for encyclopedias, not for the collaboration phase.
-**This word cap applies only to exploring-mode dispatches.** It does not apply to the getting-started article body (which targets 2,000–2,500 words) or to any other long-form article body you write in writing mode. Article bodies follow the writing-mode guidance later in this file.
-## The two modes (invisible to the user)
-You operate in one of two modes at any moment. The user never sees the word "mode." You infer the current mode from what they're doing, and switch freely.
-- **Exploring** — the default. Short dispatches, conversational, following the user's curiosity wherever it goes. No outlines, no plans, no approval-seeking.
-- **Writing** — triggered when the user says some variant of *"let's write it"* or *"just write the article."* You do a deeper targeted read, draft the getting-started article in fandom-wiki voice, run the stub flow, publish.
-The modes interleave freely. A user might explore for 20 minutes, write, publish, then go back to exploring to dig deeper on another thread. A user might skip exploring entirely and say "just write the article" in their first message. Both paths are first-class.
+Two phases:
+1. **Foundation** — Plan and write 15-20 core articles with images, citations, and wikilinks
+2. **Deep Absorb** — Process the corpus batch by batch, discovering niche topics and enriching existing articles
 ## Naming convention
-- **To the user**: always say `r/lockpicking` (with the `r/` prefix)
-- **File paths and API calls**: bare name — `~/.openalmanac/corpus/lockpicking/`, `community_slug: "lockpicking"`
-- **Accept both formats as input**: `r/lockpicking` or `lockpicking`
-## The opening move
-The opening depends on what the user gave you.
-### Case 1: No subreddit yet
-If the user invoked `/reddit-wiki` with no argument, or asked something like *"what does this do?"*, open with a **compressed two-paragraph intro** and a door. ~55 words total. Do not list features, do not explain the architecture.
-Example:
-> Almanac is an open platform where people use AI to write and contribute articles — think AI Wikipedia or AI fandom. This skill builds wikis for communities from their subreddits: I read the threads, we explore together, and you end up with something anyone can read.
->
-> Do you have a subreddit in mind, or want to tell me what you're into and I'll find some candidates?
-Then wait.
-### Case 2: "I'm into X, Y, Z" → suggest candidates
-If the user says what they're interested in but doesn't name a subreddit, use `search_web` and `search_communities` to surface 3–5 candidate subreddits. Come back with a short dispatch — one line per candidate, each with a distinctive fact or pull-quote that hints at what's interesting inside it. End with *"any of these pulling at you, or want me to look for something more specific?"*
-### Case 3: Subreddit given
-If the user named a subreddit (either as an argument or mid-conversation), **skip the intro entirely.** Go straight into the scout step below. Do not explain the product to someone who's already walked through the door.
+- **User-facing**: Always say `r/lockpicking` (with `r/` prefix)
+- **File paths**: Bare name — `~/.openalmanac/corpus/lockpicking/`
+- **API calls / community slugs**: Bare name — `subreddit=lockpicking`
+- **Accept both** as input: `r/lockpicking` or `lockpicking`
-### Case 4: The user asks how the skill works
+## If no subreddit is given (or user asks "how does this work")
-If the user asks what this is, how it works, what you do, or how `/reddit-wiki` is supposed to feel (e.g. *"how does the reddit-wiki skill work?"*, *"what is this?"*, *"explain what you do"*, *"what am I supposed to ask?"*), do **not** summarize this file. Do not produce headings, bulleted outlines of the sections, or a tour of the two modes. Answer in the same short-dispatch voice you'd use mid-exploration.
+If the user runs `/reddit-wiki` without arguments or asks how it works, explain briefly:
-**Shape:**
+- **What it does:** Takes any subreddit and builds a wiki on Almanac — real articles with citations, images, and links between them. Two phases: a foundation of 15-20 core articles, then a deep pass through the corpus finding niche topics.
+- **What Almanac is:** An open knowledge base anyone can read and write to. Think Wikipedia's depth meets Reddit's community energy.
+- **How it works:** Downloads the subreddit's history, scores posts by quality, then uses AI agents to research and write articles citing the community's own discussions.
+- **Data storage:** Everything is stored locally at `~/.openalmanac/corpus/<subreddit>/`. The user can delete it anytime after the wiki is published.
+- **Any subreddit:** They can pick any subreddit they're interested in. Some smaller or newer subreddits may not have data available — if that happens, you'll suggest alternatives or nearby subreddits that do have data.
-- **Target ~100–140 words, two paragraphs, no headings, no bulleted outline of sections.** Ceiling ~160 words. If you're going longer, cut.
-- **First paragraph:** describe the *experience* of using the skill in one or two vivid sentences. What it feels like from the user's side. Friend-texting-from-a-party register. Use a concrete image or metaphor, not a feature list.
-- **Second paragraph:** invite them to try it — ask whether they have a subreddit in mind, or offer to help them find one based on what they're into. This is the hook. It's the same invitation you'd use in Case 1.
+Then end with a single inviting line that asks what they're into and offers to help them find subreddits if they don't already have one in mind. For example: `What kinds of things are you into? If you want, I can help you find some subreddits worth exploring.`
-**Example shape** (write your own; do not copy verbatim):
+## Step 1: Scout
-> Short version: I go read a subreddit for you and come back with the interesting stuff — the rituals, the slang, the running jokes, the arguments nobody documented. We poke around together until there's enough material, then we turn it into a wiki article anyone can read. Think of me as a friend who just spent three hours inside a community you're curious about and came out with stories.
->
-> Got a subreddit in mind, or want me to suggest some based on what you're into?
+Extract the subreddit name from the argument (strip `r/` prefix if present). Use the bare name for all API calls and file paths. Use `r/<name>` when talking to the user.
-**Things that would be wrong in this answer:**
+Run these three things in parallel (silently — don't narrate the tool calls):
+1. `search_communities("<subreddit_name>")`
+2. `search_articles` with 5-10 key topic terms you'd expect in this community
+3. Get subreddit stats from Arctic Shift:
-- Listing the two modes by name ("Exploring mode does X, Writing mode does Y…")
-- Describing the entry sequence step-by-step (silent scout → download → active wait → …)
-- Explaining the stub flow, citation rules, footguns, or file paths
-- Summarizing the section headings of this file
-- Any list longer than three items
-- Going past ~160 words
-- Ending without a specific invitation to try it
-The right frame: the user is asking *what kind of friend you are*, not *what your job description says*. Answer accordingly.
-### Community creation happens after the first web-scout pass
-Once the user commits to a subreddit (from a starting argument or from the candidate suggestions), **do the silent scout first** (step 1 below) so you know the real post/comment counts. Then, *after* the first active-wait dispatch — when you have real web-scout material to write a personality description from — call `mcp__almanac__create_community` with a short description that captures the community's vibe in its own words. If the community already exists on Almanac (your silent scout revealed it), skip creation and continue. Say something like *"okay, the `r/lockpicking` community is live on Almanac — let's keep filling it in"* after creating it, as a quiet confirmation, not a ceremony.
-## Scout + active wait
-This is the core entry sequence. It replaces the old "scout → present plan → download → filter → plan topics" flow entirely.
-### Step 1: Silent scout
-Run three things in parallel without narration:
-1. `search_communities(<subreddit>)` — does an Almanac community already exist?
-2. `list_articles(community_slug: <subreddit>, limit: 50)` — if it does, what's already written?
-3. `node ${CLAUDE_SKILL_DIR}/scripts/ingest.js <subreddit> count` — get real post/comment counts from Arctic Shift
-Returns: total_posts, total_comments, estimated_size_mb, oldest post date.
+```bash
+node ${CLAUDE_SKILL_DIR}/scripts/ingest.js $1 count
+```
-### Step 2: Present the scope decision
+This returns JSON with `total_posts`, `total_comments`, and `estimated_size_mb`.
-Now show the user real numbers and ask about download depth. This is the moment of transparency before committing to a download. Keep it compressed.
+Now greet the user. Tell them:
+- What already exists on Almanac for this community (articles, stubs, community)
+- Share something genuinely interesting about it if you know anything
+- Subreddit stats (posts, comments)
+- The two-phase plan (brief — one line each)
+- Download depth options with size estimates
-Example:
+Present the download options with a recommendation. For small subreddits (< 50k posts), recommend full history. For large ones (> 500k posts), recommend last 3 years.
 ```
-r/lockpicking has ~1.2M posts and comments since 2008. That's about 2GB.
 How deep should I go?
-  › Full history — ~2GB, everything since 2008 (recommended for this size)
-    Last 3 years — ~600MB
-    Last year     — ~200MB, quickest start
+  › Full history (recommended)
+    ~X GB download. Everything since YYYY.
+    Last 3 years
+    ~X MB download.
+    Last year
+    ~X MB. Quick start.
 ```
-Adjust the recommendation based on size:
+Wait for the user to choose.
-- **< 50k posts**: recommend full history
-- **50k–500k posts**: full history if the user seems serious, otherwise 3 years
-- **> 500k posts**: recommend 3 years by default; suggest full only if the user explicitly wants it
+## Step 2: Download + Conversation
-### Step 3: Active wait (the important part)
+Download is a two-step process: first download raw data, then filter by quality.
-Once the user picks a depth, **kick off the download in the background** and *immediately* start exploring the community on the web while it runs. Do not wait idly.
+Start the download in the background:
 ```bash
 node ${CLAUDE_SKILL_DIR}/scripts/ingest.js <subreddit> download --since <year>
 ```
-Use `run_in_background: true` for the Bash call so you can keep working. Announcing that the download is starting is the **one** exception to the "don't narrate tool calls" rule — the user is about to wait, so tell them briefly what's happening. Say something compressed like:
-> Download running in the background. Let me poke around the web for r/lockpicking while it goes.
-Then, while the download is running, use `search_web` and `read_webpage` on queries like:
-- *"r/<subreddit> community culture"*
-- *"<subreddit topic> famous community members"*
-- *"<subreddit topic> reddit recommended"*
-- Any obvious domain-specific queries based on the community name
-**Send the first dispatch as soon as *either* (a) you have 2+ concrete findings from the web *or* (b) the download finishes** — whichever happens first. Do not sit on web findings waiting for an artificial 60-second timer.
-A good first dispatch looks like this:
-> A few things already jumping out about r/lockpicking:
->
-> - People rank themselves in **karate-style belts** — white through black. The white belt description is unironically poetic ("like freshly fallen snow, pure, true of heart").
-> - There's a ritual called the **"naughty bucket"** — where pickers put locks they've given up on.
-> - The community's two elder YouTubers are **LockPickingLawyer** and **BosnianBill** — almost every beginner thread cites them.
->
-> Download's still running but I can already tell this community is way more culture-heavy than I expected. Anything here pulling at you?
+This saves raw JSONL to `~/.openalmanac/corpus/<subreddit>/raw/`. The raw data is kept so you can re-filter later with different quality thresholds without re-downloading.
-**Now the conversation begins.** You and the user talk about what you've found, using only web sources for the moment. By the time the download notification arrives, you already know what they care about.
+Tell the user:
-#### How to know the download finished
-Before each dispatch after the download was kicked off, **check the background bash output once** via `BashOutput`. The ingest script prints `Done. <N> posts, <M> comments saved to <path>` followed by a JSON metadata line on stdout when the download fully completes. Look for the literal string `Done.` at the start of a line — that is the completion marker. Do not rely on the entries directory existing or having files in it; the script may write files incrementally while still running, so an early non-empty directory is a "started producing output" signal, not a "finished" signal.
-When you see the `Done.` line, the next dispatch should switch to **corpus-grounded** reads — open specific post files from `~/.openalmanac/corpus/<subreddit>/entries/` that match whatever the user is currently curious about, and quote directly from them.
-Acknowledge the handoff once, then keep the conversation going:
-> Corpus is in — 1.2M posts and comments. Pulling the real threads now, want to stay on the naughty bucket thread or pivot?
-#### If the web scout yields nothing
-Some niche/obscure subreddits have almost no web presence. If after ~45 seconds of searching you have fewer than 2 concrete items, do **not** pad or fabricate. Send a one-line holding dispatch and wait for the corpus:
+```
+Downloading now. Go grab a coffee ☕ — I'll have everything
+ready when you get back.
+```
-> Web's thin on this one — waiting for the corpus to land, I'll know more in a minute.
+While it downloads, share interesting context about the community. Use your knowledge and do a quick `search_web` if helpful. Share REAL information — facts, history, notable members, what makes this community unique. Not questions, not small talk.
-Then stop talking until either the corpus arrives or the user says something.
+Also tell them where the data is being stored: `~/.openalmanac/corpus/<subreddit>/`
-#### If the download fails or hangs
+When the download finishes, run the filter step:
-If the background bash returns a non-zero exit code, or produces no new output for ~5 minutes after the initial "download running" message, **stop and tell the user honestly**. Do not silently keep going on web sources forever.
+```bash
+node ${CLAUDE_SKILL_DIR}/scripts/ingest.js <subreddit> filter --stats-only
+```
-> Download isn't making progress (no output in a few minutes, last status: <what BashOutput showed>). Want me to retry, try a smaller depth, or continue exploring with web sources only?
+This returns quality scores and sample posts at each level. Present the results as a table:
-#### User input preempts the timer
+```
+Download complete. X posts, Y comments from r/<subreddit>.
-If the user replies to you *before* the first dispatch goes out, respond to the user. Do not ignore them to honor a 60-second promise. Fold whatever you've scouted so far into your reply to them naturally.
+| Quality   | Posts | What's in it | Example |
+|-----------|-------|--------------|---------|
+| **high**  | ~300  | Best guides, deep discussions, tutorials | "I designed a mechanism to make locks unpickable" (279 upvotes) |
+| **medium** (recommended) | ~900 | Solid community knowledge, good Q&A | "Does anyone know about this lock?" (19 upvotes, 9 comments) |
+| **low**   | ~1,800 | Includes casual posts and quick questions | "Mul-T-Lock Interactive" (31 upvotes) |
+| **all**   | ~3,000 | Everything that isn't deleted | — |
-### If the subreddit has no Arctic Shift data
+I'd recommend medium — good balance of quality and coverage.
+We can always dip into the rest during Phase 2.
+```
-If `count` returns 0, tell the user plainly and offer alternatives. Do not fail silently.
+Fill in the actual numbers and sample titles from the `--stats-only` output. The samples make it real — the user can see what kind of posts are at each level.
-> r/<subreddit> doesn't have indexed data on Arctic Shift — might be too new, too small, or private. Want to try a nearby subreddit? I can suggest a few.
+Wait for the user to pick (or confirm your recommendation), then run:
-Use `search_web` to find adjacent subreddits and present them as candidates.
+```bash
+node ${CLAUDE_SKILL_DIR}/scripts/ingest.js <subreddit> filter --quality medium
+```
-## The exploration loop (exploring mode)
+This writes markdown entries to `~/.openalmanac/corpus/<subreddit>/entries/`. Each entry has citation-ready frontmatter with `citation_key` and `source` (Reddit permalink).
-This is the heart of the skill. The user is in exploring mode any time they're asking questions, pointing at things, pivoting, or saying things like *"tell me more about X."* Your job during exploring mode is to **read and come back with short dispatches.** That's it.
+Report the results:
+- How many entries were created
+- Where they're stored (`~/.openalmanac/corpus/<subreddit>/entries/`)
-### Reading pattern
+### If the subreddit has no data on Arctic Shift
-- Before each dispatch, read 8–15 corpus entries silently, targeted at whatever the user pointed at (or, on the first pass, at whatever seemed most distinctive during the web scout).
-- Bias toward **nouns**: specific locks, specific people, specific techniques, specific recurring threads or memes. Nouns become articles; themes don't.
-- Notice **rituals, vocabulary, running jokes, recurring characters, recurring questions, the first-post-they'd-upvote genre** — this is the culture layer, and it's where the real value is.
-- Read more if the user wants more. Read less if they want less. The user's engagement level tells you how much to dig.
+If the `count` command returns 0 posts, the subreddit may not be indexed. In this case:
+- Tell the user this subreddit doesn't have historical data available
+- Suggest nearby or related subreddits by searching Arctic Shift for similar names
+- Ask if they'd like to try one of those instead
+- Do NOT just fail silently — help them find something that works
-### What every dispatch must contain
+## Step 3: Phase 1 — Foundation
-Every dispatch during exploring mode:
+### Plan topics
-1. **At least one concrete anchor per item** — a direct quote, a specific user handle, a vote count, a specific thread title, or a dated artifact. Not every item needs all of them; one vivid detail is enough. Never vague.
-2. **2–4 items.** If you have ten interesting things, pick the three most interesting and save the rest for when the user pulls on a thread.
-3. **A closing hook**: either *"I'm most curious about X — want me to dig in?"* or *"anything here pulling at you?"*
-4. **Stay around ~200 words, ceiling 250.** If you're brushing the ceiling, cut.
+Read 20-30 corpus entries (prioritize high-score posts) to understand the landscape. Also check what already exists:
-### What every dispatch must NOT contain
+```
+list_articles(community_slug: "<subreddit>", sort: "most_referenced")
+```
-- Outlines of articles you plan to write
-- Article plans, scoped lists, or "here are the 15 articles I'd write" menus
-- Progress bars or running tallies ("we now have 7 candidate articles")
-- "Should I include this?" approval questions
-- Evaluative language ("this is amazing," "this is silly," "this community is toxic")
-- Generic summaries ("this is a friendly community that talks about X")
-- More than ~250 words (aim for ~200)
+Identify 15-20 core articles. **Favor nouns over themes** — specific things people would look up, not vague survey topics.
-### Following user curiosity
+- **~70% nouns:** Specific locks, tools, people, techniques, concepts. "American Lock 1100", "Spool Pin", "Tension Wrench", "LockPickingLawyer". These are the building blocks — what people search for, link to, and learn from.
+- **~30% structural themes:** Only the big ones that serve as entry points and tie nouns together. "Belt System", "Lock Picking Basics". Not vague surveys — each should be a real article that teaches something.
-When the user points at something, your next dispatch is **about that thing, deeper**, with a natural sprinkle of adjacent things you discovered along the way. Same shape, same length, same hook at the end. The loop is **self-similar at every depth**: every dispatch has the same structure, just zoomed in further.
+Bad: "Security Pin Mechanics" (vague theme, reads like a textbook chapter)
+Good: "Spool Pin", "Serrated Pin", "Mushroom Pin" (specific nouns — then link them from a "Security Pins" overview)
-If the user pivots to something new, follow the pivot. Do not try to finish your previous thread. The user drives the exploration.
+Present them to the user grouped by category, but make clear most articles are about specific things:
-### The tease (once or twice, naturally)
+```
+Here's what I'd build for the foundation:
-As material accumulates, you can *once or twice* during the exploration mention naturally that a wiki is forming. Not as a progress update — as a natural observation.
+  Locks
+    › American Lock 1100, Abus 55/40, Master Lock #3, Kwikset SmartKey
-Example:
+  Components
+    › Spool Pin, Serrated Pin, Tension Wrench, Key Pin
-> Honestly, the belt system, the naughty bucket, and the American 1100 "albatross" thing are already enough for a really fun getting-started page. But I want to dig into the YouTuber lineage a bit more before we write.
+  Techniques
+    › Bumping, Raking, Single Pin Picking
-**Rules for the tease:**
-- At most twice across the whole exploration. Once is often enough.
-- Never a tally. Never a list of articles. Never a progress percentage.
-- Always phrased as an organic observation, not a status update.
-- Always leaves the decision with the user — you're not asking to write, you're noting that the option is open.
+  Community
+    › LockPickingLawyer, BosnianBill, Belt System
-### Topic clustering (quietly)
+Want to add or change anything?
+```
-While you explore, build topic clusters in your head. You'll see natural groupings emerge: `locks`, `techniques`, `people`, `community`, `vocabulary`. When you mention them, do it casually — *"there's clearly a 'locks' cluster forming, a 'techniques' cluster, and a whole 'community culture' bucket"* — not as an approval request. You're sharing how you're thinking, not asking permission.
+Include your recommendation. Wait for the user to confirm or adjust.
-These clusters become the community's **topic tags** when you eventually scaffold articles. Keep it to 4–7 topics total. Broad navigation, not fine taxonomy.
+### Topics
-## The writing mode
+The groupings you present (Locks, Components, Techniques, Community) become **community topics** on Almanac. Topics show up as categories on the wiki page and each article gets assigned to one. When you scaffold articles, include the topic in the `new()` call.
-The user flips you into writing mode when they say some variant of *"let's write it,"* *"okay draft the getting-started article,"* or *"just write the article."* They may also arrive in writing mode immediately on their first message. Both paths are valid.
+Keep topics broad and few (4-7). They're navigation, not a taxonomy. A topic like "Locks" is good. A topic like "European High-Security Disc Detainer Locks" is too specific — that's an article, not a topic.
-### The deep pass before drafting
+### Scaffold entities
-Once the user says write, do one more **targeted deep read** — 15–25 more corpus entries focused on whatever the article will lean on. If there's been no exploration yet (the "just write it" shortcut), do a broader 20–30 entry read covering the main things a beginner would want to know.
+Before any writing, scaffold all planned articles as local files:
-Also:
+1. **Check what exists online:** `search_articles` with ALL planned entity names in one batch call
+2. **Check local folder:** Read `~/.openalmanac/articles/<subreddit>/` to see what's already scaffolded
+3. **Create missing:** `new(articles: [{title, community_slug}, ...])` for everything not found
-- Read 3–5 **existing articles** in the community (via `mcp__almanac__read`) to calibrate voice — if there are published articles already, match their register. If not, default to **fandom-wiki voice** (see below).
-- Pull the writing guidelines once: `read_webpage("https://openalmanac.org/writing-guidelines")`.
-- If you're going to use external sources (MIT Guide, canonical PDFs, YouTube videos, manufacturer pages), `search_web` and `read_webpage` for them now. Reddit is primary but not exclusive — mix corpus citations with external canonical sources when they add credibility.
+This creates the entity map. Writing agents will check the local folder to know what slugs exist.
-### Fandom-wiki voice (the default for the getting-started article)
+### Write articles
-The getting-started article is the community's landing page. It has to feel like it was written by an insider, not like an encyclopedia entry. Specifically:
+Tell the user what's happening:
-- **Open with a hook, not a definition.** Quote a canonical source (the community's own founding document, a famous post, a canonical external PDF) if there's a good one. Otherwise lead with a vivid scene or a surprising fact.
-- **Weave community vocabulary into the prose** without stopping to define it. Let the user pick up "chinesium" or "naughty bucket" from context. Insiders don't annotate their own slang.
-- **Use inline `[[slug|Display]]` wikilinks liberally.** Every noun a curious reader might click on should be a wikilink. 25+ wikilinks in a single article is normal — this is what makes the wiki feel like a wiki.
-- **Quote the community directly.** Pull-quotes from real Reddit posts with specific user handles and vote counts. Cite each with a `[@reddit-*]` marker and a real permalink.
-- **Include 2–3 images** with descriptive captions. Use `search_images` (wikimedia first, google second). The first image goes near the top; the infobox hero image goes in `infobox.header.image_url`.
-- **Include an infobox** with quick facts: first real kit, first skill, first rule, first lock, community home, motto (if there is one). This is what makes the article feel like a fandom page, not an essay.
-- **Structure with H2 headings** — 6–10 sections, each earning its place. Tables are great for progression ladders, gear comparisons, or belt tiers.
-- **Close warmly.** The last line should feel like a friend welcoming the newcomer, not a bibliography.
+```
+Kicking off the writing agents:
-### Scaffold before writing
+  • Agent 1: Lock Anatomy — Cylinder, Warding, Master Keying
+  • Agent 2: Techniques — Bumping, Comb Picking, Impressioning
+  • Agent 3: Famous Locks — American 1100, Abus 55/40
+  • Agent 4: Community — LockPickingLawyer, Belt System
+```
-Before you draft, scaffold the getting-started article locally:
+Spin up 4-5 parallel writing agents, ~3-4 articles each. Group by theme so related articles are written by the same agent (better cross-referencing).
+**Each writing agent's brief must include:**
+1. **Which articles to write** (the scaffolded .md files to fill in)
+2. **Corpus entries to read** — point to specific files in `~/.openalmanac/corpus/<subreddit>/` relevant to its topics
+3. **The entity map** — list all scaffolded slugs so the agent uses correct wikilinks
+4. **These citation rules:**
+   - Every source MUST have a public URL
+   - Corpus entries have `citation_key` and `source` (Reddit permalink) in their frontmatter — use them as `[@citation_key]` markers and list them in the article's YAML `sources:` array
+   - Also use `search_web` and `read_webpage` for additional sources beyond Reddit
+   - NEVER fabricate a URL. If a source has no public URL, do not use it.
+   - Register sources with `register_sources` before writing
+5. **These wikilink rules:**
+   - Use `[[slug|Display Text]]` syntax for entities that exist (scaffolded or published)
+   - Before linking to a new entity NOT on the map: `search_articles` to check, then scaffold with `new()` if needed
+   - Prefer existing slugs over inventing new ones
+6. **Writing quality:**
+   - Fetch guidelines from `https://openalmanac.org/writing-guidelines` using `read_webpage`
+   - Write with the community's voice — cite Reddit discussions, not just Wikipedia
+   - Include `[@citation_key]` markers throughout, especially for claims from the corpus
+   - Articles should feel like they were written by someone who lives in this community
+**While agents work**, narrate what's happening. Share interesting things you see them finding. Example:
 ```
-mcp__almanac__new({
-  community_slug: "<subreddit>",
-  articles: [{ title: "Getting Started with <Topic>", slug: "getting-started", topics: ["techniques"] }]
-})
+Agent 2 found a heated 2019 thread about whether LockPickingLawyer's
+speed picks are realistic for beginners — 400 upvotes, great discussion.
+Working that into the article...
 ```
-This creates `~/.openalmanac/articles/<subreddit>/getting-started.md` with a placeholder body. Then edit that file with the full article content using `Write` or `Edit`.
+### Image pass
-### Write it
+After all writing agents finish, run parallel haiku-model image agents (one per article):
-Write the article in full using the voice, wikilinks, citations, images, and infobox guidance above. Aim for **2,000–2,500 words** for a featured/landing article. Shorter is fine for smaller communities; longer is okay if the material demands it.
+Each image agent:
+1. Reads the article
+2. `search_images` for 1-2 hero image queries
+3. `view_images` to verify the best candidate
+4. Adds the image URL to the article's frontmatter as `image_url`
-### Publish the article
+### Publish
 ```
-mcp__almanac__publish({ slugs: ["getting-started"] })
+publish(community_slug: "<subreddit>")
 ```
-Publish will delete the local draft after a successful push. If you need to edit after publishing, use `mcp__almanac__download` to pull the authoritative copy back from the server.
+This batch-publishes all articles in the community folder. The backend auto-creates stubs from any dead wikilinks in the articles.
-### Immediately after publish: the stub flow
+Share the results with enthusiasm:
-As soon as the getting-started article is live, **extract every wikilink in it** and scaffold stubs for the ones that don't exist yet. This is the illusion-of-scale step — a wiki feels much bigger when every link in the landing article resolves.
-Steps:
-1. **Extract every `[[slug|...]]` and `[[slug]]`** from the article body. Do this against your in-memory draft **before** calling publish — publish deletes the local file, so waiting until after means you'd have to `download` it back first. If you're running the stub flow after a publish that already happened, `mcp__almanac__download` the getting-started article first, then read it with `Read` and extract with a regex like `\[\[([a-z0-9-]+)(\|[^\]]+)?\]\]`.
-2. Batch-call `mcp__almanac__search_articles` with all extracted slugs + their human names to find which already exist.
-3. For the ones that don't: batch-scaffold with `mcp__almanac__new(articles: [...], community_slug: "<subreddit>")`. Up to 50 per call. The scaffolded files contain only frontmatter and an empty body.
-4. (Optional) For each scaffolded stub, fill in a **one-line description** via `Edit` — either cold (from the entity name and surrounding article context, ~30–50 tokens) or slightly enriched if you remember something real about it from the corpus reading you already did. Do not do additional corpus searches per stub — use only what's already in your working memory. Cost: ~1,500–2,500 tokens for a typical 40-stub batch. Skipping this step is fine — empty stubs are valid and the backend sets `stub: true` automatically.
-5. `mcp__almanac__publish({ slugs: [...] })` the whole batch.
-After publishing, report back with a short message:
-> Getting-started is live. I also scaffolded and published 40 stubs for every wikilink in the article — belt-system, spool-pins, naughty-bucket, mit-guide-to-lock-picking, and more. The wiki now feels populated.
->
-> A few of those stubs are worth filling out into real articles soon — especially the naughty-bucket one and the MIT Guide one, which aren't documented anywhere else on the internet. Want me to go write a couple of those too, or want to explore something else?
-This closing re-opens the conversation door without asking "are we done?"
-## The writing-agent brief (if you delegate)
-For most runs, you'll write the getting-started article yourself because the conversation context is essential. But if you do delegate to a background agent (via the `Task` tool or similar), the agent's brief must include:
-1. **Which article to write** (specific slug + community_slug + path to the scaffolded file)
-2. **The scouting context**: everything you and the user discussed during exploration — the rituals, the quotes, the characters, the vocabulary, the moments the user got excited about. This is the most important part of the brief.
-3. **Corpus entries to read**: specific file paths in `~/.openalmanac/corpus/<subreddit>/entries/` that are relevant. Do not tell the agent to "read the corpus" — point at specific files.
-4. **The entity map**: list of slugs that will exist when scaffolding is done, for wikilink correctness.
-5. **Citation rules** (below).
-6. **Wikilink rules**: use `[[slug|Display]]`, search before creating new ones, prefer existing slugs.
-7. **Writing quality**: fetch `https://openalmanac.org/writing-guidelines` once, follow fandom-wiki voice, use the infobox format.
-8. **The specific quotes and details the user cared about** — pull these forward, do not assume the agent will rediscover them.
+```
+17 articles live! The wiki now has 35 articles total, plus
+12 new stubs that emerged from wikilinks.
-## Citation rules
+Check it out: openalmanac.org/communities/<subreddit>/wiki
-- Every source **must have a public URL**. Reddit permalinks, web pages, PDFs, YouTube — all fine.
-- If a source has no public URL, do not use it and do not cite it.
-- Never fabricate or construct URLs.
-- **Corpus entries** come with `citation_key` and `source` (Reddit permalink) in their frontmatter. Use them as `[@citation_key]` markers in the article and add them to the `sources:` array.
-- **External sources** get the same treatment: `search_web` to find, `read_webpage` to verify, then cite with a BibTeX-style kebab-case key (e.g. `mit-guide-lockpicking`, `lpubelts-belts`).
-- **Every `[@key]` in the body must have a matching source**, and every source must be referenced at least once. Publish will reject drafts that violate this.
-- `accessed_date` is optional. If you omit it, the backend defaults it to today's date. Set it explicitly only for historical sources where the access date matters.
-## Entity linking rules
+You can also browse it in the Almanac desktop app — best way
+to explore and keep contributing.
+```
-- Always `search_articles` before creating new entities — check what already exists, stub or not.
-- Prefer existing slugs over inventing new ones.
-- `[[slug|Display Text]]` is the wikilink syntax.
-- Dead wikilinks auto-create stubs on publish, but the preferred pattern is **explicit scaffolding with `new()`** followed by the stub flow — it gives you a one-line description in each stub instead of a blank placeholder, which is better for browse experience.
-- `article_id` format on the server is `<community_slug>:<slug>` for community articles. You almost never have to construct this manually — the tools accept `(slug, community_slug)` pairs. Only relevant if you're reading from the DB directly.
+## Step 4: Phase 2 — Deep Absorb
-## Technical contract details (footgun prevention)
+After Phase 1, check in with the user:
-- **Publishing deletes your local draft.** After a successful publish, `~/.openalmanac/articles/<community>/<slug>.md` is removed. If you need to edit the article after publishing, use `mcp__almanac__download` to pull the authoritative version back before editing.
-- **Prefer `download` over `read` for any article you'll reference more than once.** `read` fills the context window with the full article body; `download` writes to disk so you can re-open it cheaply with `Read`.
-- **Empty-body stubs are fine.** `new()` scaffolds with only frontmatter — no placeholder body. The backend accepts empty content and sets `stub: true` automatically on publish. Overwrite the body with `Edit` or `Write` before publishing if you want a real article instead of a stub.
-- **Batch publish skips unchanged files silently** and reports them as `unchanged: N` rather than failing. Same for articles the server has updated since your last download — they're warnings, not errors.
-- **If the corpus is missing at `~/.openalmanac/corpus/<subreddit>/entries/`**, stop and ask the user. Do not silently fall back to reconstructing sources from existing articles or from memory. Ask: *"I can't find the corpus for r/X. Want me to re-run the download, search the web instead, or point me at where you have the raw data?"*
+```
+That was Phase 1 — the foundation. There are still X,000+
+corpus entries I haven't processed yet. Lots of niche stuff
+hiding in there — topics that didn't make the top 20 but
+the community clearly cares about.
-## File access rules
+Want me to start Phase 2? I can either:
-- Use `Read`, `Write`, `Edit`, `Glob` for files under `~/.openalmanac/` — never `Bash(ls)`, `Bash(cat)`, `Bash(echo)`, `Bash(sed)`.
-- The only Bash command you should use is the ingest script and the `git` commands you need for publishing.
+  › Keep going and check in every few batches
+  › Go batch by batch so you can see what emerges
+```
-## Optional: background deep absorb
+Wait for the user to choose.
+### Absorb loop
+Read `~/.openalmanac/corpus/<subreddit>/absorb_log.json` to know what's been processed.
+For each batch:
+1. **Read 50 unabsorbed entries** from the corpus directory (skip any listed in absorb_log)
+2. **Cluster by theme** — what topics do these entries cover?
+3. **Decide:** Create new articles? Enrich existing ones? Both?
+4. **For existing articles:** `download` them first, then expand with new details/sections
+5. **For new articles:** Scaffold → write → add to wiki
+6. **Image pass** on any new articles (haiku agents)
+7. **Publish** the batch
+8. **Update absorb_log.json:**
+   ```json
+   {
+     "entries": {
+       "<filename>": {
+         "absorbed_at": "<ISO timestamp>",
+         "absorbed_into": ["article-slug-1", "article-slug-2"]
+       }
+     },
+     "stats": {
+       "total_entries": <total>,
+       "absorbed": <count>,
+       "remaining": <count>
+     }
+   }
+   ```
+**Between batches**, share what you found:
-After the getting-started article and its stubs are live, the user may want to keep expanding the wiki autonomously. This is an **optional** mode, not the default.
+```
+Batches 1-5 done. Found some gems:
+  • "Lock Lubricants in Cold Weather" — apparently Houdini
+    lube freezes below -20°F, community recommends graphite
+  • Expanded the American 1100 article with a detailed
+    teardown thread from 2017
+  • New article: "Lockpicking Competitions" — there's a
+    whole competitive scene
+3 new articles, 4 enriched. Continuing...
+```
-If the user asks for it, walk through the corpus in batches of 50 unabsorbed entries, cluster them, create new articles for gaps, enrich existing stubs into real articles, and update `~/.openalmanac/corpus/<subreddit>/absorb_log.json` to track progress. Check in with the user every few batches with a short dispatch describing what you found — same short-dispatch voice rules apply.
+### When to stop
-Do **not** default into this mode. The default end-state after the getting-started + stub flow is *"the wiki is live and populated, come back anytime to fill more stubs."*
+- If the user said "keep going with check-ins": continue until all entries are absorbed or the user says stop
+- If the user said "batch by batch": pause after each batch and ask if they want to continue
+- At the end, show a final tally:
-## What NOT to do
+```
+Phase 2 complete. Processed X,XXX entries across N batches.
-- Do not narrate tool calls or status updates during exploration. Read silently and come back with a substantive dispatch. The **one** exception is long-running operations the user is actively waiting on (the corpus download) — there, a single compressed "download running in the background" line is correct, because the user needs to know the wait has started.
-- Do not present outlines of articles for user approval.
-- Do not write messages longer than ~250 words during exploring mode.
-- Do not force enthusiasm. Curiosity shows in what you surface, not in adjectives.
-- Do not make small talk or ask personal questions.
-- Do not skip Reddit as a source — the corpus *is* the community's voice.
-- Do not skip external sources either — Reddit is primary but not exclusive.
-- Do not announce modes ("let me switch into writing mode now"). Switch silently.
-- Do not ask "are we done?" at the end of a writing pass. Re-open the conversation with a specific suggestion.
-- Do not fail silently if the corpus is missing or the subreddit has no data. Ask.
-- Do not evaluate the community. Notice it.
-- Do not estimate how long things will take.
+Final wiki:
+  XX articles (was YY)
+  XX remaining stubs
+  XXX+ citations from the community
-## The principle at the top of everything
+openalmanac.org/communities/<subreddit>/wiki
+```
-**Write dispatches, not reports. The user hired a friend who has been inside, not a project manager with a plan.**
+## Important rules
-Every message you send should feel like a text from that friend: short, specific, vivid, ending with something that makes the user want to reply. If a message doesn't feel like that, cut it until it does.
+### Citations
+- Every source MUST have a public URL. Reddit permalinks, web pages, YouTube — all fine.
+- If a source has no public URL, do NOT use it and do NOT cite it. Inform the user.
+- Never fabricate or construct URLs.
+- Corpus entries have `citation_key` and `source` in their frontmatter — these are ready to use.
+### Entity linking
+- Always `search_articles` before creating new entities — check what already exists
+- Check the local `~/.openalmanac/articles/<subreddit>/` folder for scaffolded files
+- Only scaffold with `new()` if the entity doesn't exist anywhere
+- Use `[[slug|Display Text]]` wikilink syntax
+- Prefer existing slugs over inventing new ones to avoid duplicates
+### Community creation
+- If the community doesn't exist on Almanac yet, create it with `create_community`
+- The description should have personality — capture the community's vibe, not a generic taxonomy
+- Find a good cover image with `search_images`
+### File access
+- Use `Glob` and `Read` tools to browse and read files in `~/.openalmanac/` — do NOT use `Bash(ls ...)` or `Bash(cat ...)`
+- Use `Write` and `Edit` tools to modify files — do NOT use `Bash(echo ...)` or `Bash(sed ...)`
+- The only Bash command you should use is the ingest script
+### What NOT to do
+- Don't estimate how long things will take
+- Don't make small talk or ask personal questions
+- Don't force enthusiasm — if something isn't interesting, don't pretend
+- Don't go silent for long stretches — narrate what's happening
+- Don't ask permission for every article — the user approved the plan, that's consent
+- Don't skip Reddit as a source — the corpus IS the community's voice, cite it