npm - @calltelemetry/openclaw-linear - Versions diffs - 0.8.0 → 0.8.1 - Mend

@calltelemetry/openclaw-linear 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +152 -34
package/openclaw.plugin.json +3 -2
package/package.json +1 -1
package/prompts.yaml +27 -1
package/src/agent/agent.test.ts +49 -0
package/src/agent/agent.ts +26 -1
package/src/infra/doctor.ts +2 -2
package/src/pipeline/e2e-planning.test.ts +77 -54
package/src/pipeline/intent-classify.test.ts +285 -0
package/src/pipeline/intent-classify.ts +259 -0
package/src/pipeline/planner.test.ts +159 -40
package/src/pipeline/planner.ts +98 -32
package/src/pipeline/webhook.ts +322 -226
package/src/tools/claude-tool.ts +6 -0
package/src/tools/code-tool.test.ts +3 -3
package/src/tools/code-tool.ts +2 -2
package/src/tools/planner-tools.test.ts +1 -1
package/src/tools/planner-tools.ts +10 -2

package/README.md CHANGED Viewed

@@ -11,8 +11,9 @@ Connect Linear to AI agents. Issues get triaged, implemented, and audited — au
 - **New issue?** Agent estimates story points, adds labels, sets priority.
 - **Assign to agent?** A worker implements it, an independent auditor verifies it, done.
-- **Comment `@qa review this`?** The QA agent responds with its expertise.
-- **Say "plan this project"?** A planner interviews you and builds your full issue hierarchy.
+- **Comment anything?** The bot understands natural language — no magic commands needed.
+- **Say "let's plan the features"?** A planner interviews you, writes user stories, and builds your full issue hierarchy.
+- **Plan looks good?** A different AI model automatically audits the plan before dispatch.
 - **Agent goes silent?** A watchdog kills it and retries automatically.
 - **Want updates?** Get notified on Discord, Slack, Telegram, or Signal.
@@ -284,53 +285,99 @@ If something went wrong, start with `log.jsonl` — it shows every phase, how lo
 ---
+## Comment Routing — Talk Naturally
+You don't need to memorize magic commands. The bot uses an LLM-based intent classifier to understand what you want from any comment.
+```
+User comment → Intent Classifier (small model, ~2s) → Route to handler
+                         ↓ (on failure)
+                    Regex fallback → Route to handler
+```
+**What the bot understands:**
+| What you say | What happens |
+|---|---|
+| "let's plan the features for this" | Starts planning interview |
+| "looks good, ship it" (during planning) | Runs plan audit + cross-model review |
+| "nevermind, cancel this" (during planning) | Exits planning mode |
+| "hey kaylee can you look at this?" | Routes to Kaylee (no `@` needed) |
+| "what can I do here?" | Default agent responds (not silently dropped) |
+| "fix the search bug" | Default agent dispatches work |
+`@mentions` still work as a fast path — if you write `@kaylee`, the classifier is skipped entirely for speed.
+> **Tip:** Configure `classifierAgentId` to point to a small/fast model agent (like Haiku) for low-latency, low-cost intent classification. The classifier only needs ~300 tokens per call.
+---
 ## Planning a Project
 For larger work, the planner helps you break a project into issues with dependencies, then dispatches them automatically.
 ### Start planning
-Comment on any issue that belongs to a Linear project:
+Comment on any issue that belongs to a Linear project — use natural language:
-> `@ctclaw plan this project`
+> "let's plan out the features for this project"
-The planner enters **interview mode** and asks you questions:
+The planner enters **interview mode** and asks you questions one at a time:
-> Entering planning mode for **Search Feature**. What are the main feature areas you'd like to build?
+> I'm entering planning mode for **Search Feature**. I'll interview you about the features you want to build, then structure everything into Linear issues.
+>
+> Let's start — what is this project about, and what are the main feature areas?
 ### Build the plan
-Reply with your ideas. The planner creates issues, sets dependencies, and asks follow-up questions:
+Reply with your ideas. The planner creates issues with **user stories** and **acceptance criteria**, sets dependencies, and asks follow-up questions:
 > I've created 3 issues:
 > - **PROJ-2:** Build search API endpoint (3 pts, blocks PROJ-3)
 > - **PROJ-3:** Search results page (2 pts, blocked by PROJ-2)
 > - **PROJ-4:** Autocomplete suggestions (1 pt, independent)
 >
-> Should the autocomplete call a separate endpoint or share the search API?
+> For PROJ-2, here's what I wrote for acceptance criteria:
+> - *Given* a user sends a search query, *When* results exist, *Then* they are returned with pagination
+>
+> Does that cover it? Should the autocomplete call a separate endpoint or share the search API?
+The planner proactively asks for:
+- **User stories** — "As a [role], I want [feature] so that [benefit]"
+- **Acceptance criteria** — Given/When/Then format
+- **UAT test scenarios** — How to manually verify the feature
 Keep replying until the plan looks right. The planner updates issues in real time.
-### Finalize
+### Finalize & Cross-Model Review
-When you're happy with the plan, comment:
+When you're happy, say something like "looks good" or "finalize plan". The planner runs a validation check:
+- Every issue has a description (50+ characters) with acceptance criteria
+- Every non-epic issue has an estimate and priority
+- No circular dependencies in the DAG
-> `finalize plan`
+**If validation passes, a cross-model review runs automatically:**
-The planner runs a validation check:
-- Every issue has a description (50+ characters)
-- Every issue has an estimate
-- Every issue has a priority
-- No circular dependencies
+> ## Plan Passed Checks
+>
+> **3 issues** with valid dependency graph.
+>
+> Let me have **Codex** audit this and make recommendations.
-**If validation passes:**
+A different AI model (always the complement of your primary model) reviews the plan for gaps:
-> ## Plan Approved
->
-> The plan for **Search Feature** passed all checks.
-> **3 issues** created with valid dependency graph.
+| Your primary model | Auto-reviewer |
+|---|---|
+| Claude / Anthropic | Codex |
+| Codex / OpenAI | Gemini |
+| Gemini / Google | Codex |
+| Other (Kimi, Mistral, etc.) | Gemini |
+After the review, the planner summarizes recommendations and asks you to approve:
-The project enters **DAG dispatch mode** — issues are assigned to the agent automatically, respecting dependency order. Up to 3 issues run in parallel. As each completes, newly unblocked issues start.
+> Codex suggested adding error handling scenarios to PROJ-2 and noted PROJ-4 could be split into frontend/backend. I've updated PROJ-2's acceptance criteria. The PROJ-4 split is optional — your call.
+>
+> If you're happy with this plan, say **approve plan** to start dispatching.
 **If validation fails:**
@@ -340,13 +387,16 @@ The project enters **DAG dispatch mode** — issues are assigned to the agent au
 > - PROJ-2: description too short (< 50 chars)
 > - PROJ-3: missing estimate
 >
+> **Warnings:**
+> - PROJ-4: no acceptance criteria found in description
+>
 > Please address these issues, then say "finalize plan" again.
-Fix the issues and try again. You can also say `abandon planning` to exit without dispatching.
+Fix the issues and try again. You can also say "cancel" or "stop planning" to exit without dispatching.
 ### DAG dispatch progress
-As issues complete, you'll get progress notifications:
+After approval, issues are assigned to the agent automatically in dependency order. Up to 3 issues run in parallel.
 > `📊 Search Feature: 2/3 complete`
@@ -364,11 +414,14 @@ If an issue gets stuck (all retries failed), dependent issues are blocked and yo
 |---|---|
 | Create a new issue | Agent triages — adds estimate, labels, priority |
 | Assign an issue to the agent | Worker → Audit → Done (or retry, or escalate) |
-| Comment `@qa check the tests` | QA agent responds |
-| Comment `@ctclaw plan this project` | Planning interview starts |
-| Reply during planning | Issues created/updated, follow-up questions |
-| Comment `finalize plan` | Validates, then auto-dispatches |
-| Comment `abandon planning` | Exits planning mode |
+| Comment anything on an issue | Intent classifier routes to the right handler |
+| Mention an agent by name (with or without `@`) | That agent responds |
+| Ask a question or request work | Default agent handles it |
+| Say "plan this project" (on a project issue) | Planning interview starts |
+| Reply during planning | Issues created/updated with user stories & AC |
+| Say "looks good" / "finalize plan" | Validates → cross-model review → approval |
+| Say "approve plan" (after review) | Dispatches all issues in dependency order |
+| Say "cancel" / "abandon planning" | Exits planning mode |
 | `/dispatch list` | Shows all active dispatches |
 | `/dispatch retry CT-123` | Re-runs a stuck dispatch |
 | `/dispatch status CT-123` | Detailed dispatch info |
@@ -401,6 +454,8 @@ Add settings under the plugin entry in `openclaw.json`:
 | Key | Type | Default | What it does |
 |---|---|---|---|
 | `defaultAgentId` | string | `"default"` | Which agent runs the pipeline |
+| `classifierAgentId` | string | — | Agent for intent classification (use a small/fast model like Haiku) |
+| `plannerReviewModel` | string | auto | Cross-model plan reviewer: `"claude"`, `"codex"`, or `"gemini"`. Auto-detects the complement of your primary model. |
 | `enableAudit` | boolean | `true` | Run auditor after implementation |
 | `enableOrchestration` | boolean | `true` | Allow `spawn_agent` / `ask_agent` tools |
 | `maxReworkAttempts` | number | `2` | Max audit failures before escalation |
@@ -408,11 +463,13 @@ Add settings under the plugin entry in `openclaw.json`:
 | `worktreeBaseDir` | string | `"~/.openclaw/worktrees"` | Where worktrees are created |
 | `repos` | object | — | Multi-repo map (see [Multi-Repo](#multi-repo)) |
 | `dispatchStatePath` | string | `"~/.openclaw/linear-dispatch-state.json"` | Dispatch state file |
+| `planningStatePath` | string | `"~/.openclaw/linear-planning-state.json"` | Planning session state file |
 | `promptsPath` | string | — | Custom prompts file path |
 | `notifications` | object | — | Notification targets (see [Notifications](#notifications)) |
 | `inactivitySec` | number | `120` | Kill agent if silent this long |
 | `maxTotalSec` | number | `7200` | Max total agent session time |
 | `toolTimeoutSec` | number | `600` | Max single `code_run` time |
+| `claudeApiKey` | string | — | Anthropic API key for Claude CLI (passed as `ANTHROPIC_API_KEY` env var). Required if using Claude backend. |
 ### Environment Variables
@@ -456,9 +513,17 @@ One agent must have `"isDefault": true` — that's the one that handles triage a
 Create `coding-tools.json` in the plugin root to configure which CLI backend agents use:
+> **Warning — Claude Code (Anthropic) and headless/automated usage**
+>
+> Calling Claude Code via CLI in a headless or automated context (which is how this plugin
+> uses it) may violate [Anthropic's Terms of Service](https://www.anthropic.com/terms).
+> The default backend is **Codex CLI** (OpenAI). **Gemini CLI** (Google) is used as the
+> cross-model reviewer. If you choose to use Claude despite this, you do so at your own risk.
+> See [Claude API Key](#claude-api-key) below for opt-in configuration.
 ```json
 {
-  "codingTool": "claude",
+  "codingTool": "codex",
   "agentCodingTools": {},
   "backends": {
     "claude": { "aliases": ["claude", "claude code", "anthropic"] },
@@ -468,7 +533,33 @@ Create `coding-tools.json` in the plugin root to configure which CLI backend age
 }
 ```
-The agent calls `code_run` without knowing which backend is active. Resolution order: explicit `backend` parameter > per-agent override > global default > `"claude"`.
+The agent calls `code_run` without knowing which backend is active. Resolution order: explicit `backend` parameter > per-agent override > global default > `"codex"`.
+#### Claude API Key
+If you opt in to using Claude as a backend (despite the TOS concerns noted above), you can
+provide an Anthropic API key so the Claude CLI authenticates via API key instead of its
+built-in interactive auth.
+Set `claudeApiKey` in the plugin config:
+```json
+{
+  "plugins": {
+    "entries": {
+      "openclaw-linear": {
+        "config": {
+          "claudeApiKey": "sk-ant-..."
+        }
+      }
+    }
+  }
+}
+```
+The key is passed to the Claude CLI subprocess as the `ANTHROPIC_API_KEY` environment variable.
+You can also set `ANTHROPIC_API_KEY` as a process-level environment variable (e.g., in your
+systemd unit file) as a fallback. The plugin config value takes precedence if both are set.
 ---
@@ -569,6 +660,10 @@ rework:
 | `{{tier}}` | Complexity tier (junior/medior/senior) |
 | `{{attempt}}` | Current attempt number |
 | `{{gaps}}` | Audit gaps from previous attempt |
+| `{{projectName}}` | Project name (planner prompts) |
+| `{{planSnapshot}}` | Current plan structure (planner prompts) |
+| `{{reviewModel}}` | Name of cross-model reviewer (planner review) |
+| `{{crossModelFeedback}}` | Review recommendations (planner review) |
 ### CLI
@@ -762,8 +857,8 @@ Example output:
   ✔ Default agent: coder
   Coding Tools
-  ✔ coding-tools.json loaded (default: claude)
-  ✔ claude: found at /usr/local/bin/claude
+  ✔ coding-tools.json loaded (default: codex)
+  ✔ codex: found at /usr/local/bin/codex
   Files & Directories
   ✔ Dispatch state: 1 active, 5 completed
@@ -785,7 +880,7 @@ Every warning and error includes a `→` line telling you what to do. Run `docto
 ### Unit tests
-422 tests covering the full pipeline — triage, dispatch, audit, planning, notifications, and infrastructure:
+454 tests covering the full pipeline — triage, dispatch, audit, planning, intent classification, cross-model review, notifications, and infrastructure:
 ```bash
 cd ~/claw-extensions/linear
@@ -806,6 +901,7 @@ npx tsx scripts/uat-linear.ts
 npx tsx scripts/uat-linear.ts --test dispatch
 npx tsx scripts/uat-linear.ts --test planning
 npx tsx scripts/uat-linear.ts --test mention
+npx tsx scripts/uat-linear.ts --test intent
 ```
 **What each scenario does:**
@@ -868,6 +964,25 @@ npx tsx scripts/uat-linear.ts --test mention
 [mention] Total: 18s
 ```
+#### `--test intent` (Natural language routing)
+1. Creates a test issue and posts a question (no `@mention`)
+2. Verifies the bot responds (not silently dropped)
+3. Posts a comment with an agent name but no `@` prefix
+4. Verifies that agent responds
+5. Tests plan review flow with cross-model audit
+**Expected output:**
+```
+[intent] Created issue ENG-202
+[intent] Posted "what can I do with this?" — waiting for response...
+[intent] ✔ Bot responded to question (12s)
+[intent] Posted "hey kaylee analyze this" — waiting for response...
+[intent] ✔ Kaylee responded without @mention (15s)
+[intent] Total: 27s
+```
 ### Verify notifications
 ```bash
@@ -947,6 +1062,9 @@ journalctl --user -u openclaw-gateway -f         # Watch live logs
 | Audit always fails | Run `openclaw openclaw-linear prompts validate` to check prompt syntax. |
 | Multi-repo not detected | Markers must be `<!-- repos: name1, name2 -->`. Names must match `repos` config keys. |
 | `/dispatch` not responding | Restart gateway. Check plugin loaded with `openclaw doctor`. |
+| Comments ignored (no response) | Check logs for intent classification results. If classifier fails, regex fallback may not match. |
+| Intent classifier slow | Set `classifierAgentId` to a small model agent (Haiku). Default uses your primary model. |
+| Cross-model review fails | The reviewer model CLI must be installed. Check logs for "cross-model review unavailable". |
 | Rich notifications are plain text | Set `"richFormat": true` in notifications config. |
 | Gateway rejects config keys | Strict validator. Run `openclaw doctor --fix`. |

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "openclaw-linear",
   "name": "Linear Agent",
   "description": "Linear integration with OAuth support, agent pipeline, and webhook-driven AI agent lifecycle",
-  "version": "0.8.0",
+  "version": "0.8.1",
   "configSchema": {
     "type": "object",
     "additionalProperties": false,
@@ -58,7 +58,8 @@
       "maxReworkAttempts": { "type": "number", "description": "Max audit failures before escalation", "default": 2 },
       "inactivitySec": { "type": "number", "description": "Kill sessions with no I/O for this many seconds (default: 120)", "default": 120 },
       "maxTotalSec": { "type": "number", "description": "Max total runtime for agent sessions in seconds (default: 7200)", "default": 7200 },
-      "toolTimeoutSec": { "type": "number", "description": "Max runtime for a single code_run CLI invocation in seconds (default: 600)", "default": 600 }
+      "toolTimeoutSec": { "type": "number", "description": "Max runtime for a single code_run CLI invocation in seconds (default: 600)", "default": 600 },
+      "claudeApiKey": { "type": "string", "description": "Anthropic API key for Claude CLI backend (passed as ANTHROPIC_API_KEY env var)", "sensitive": true }
     }
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@calltelemetry/openclaw-linear",
-  "version": "0.8.0",
+  "version": "0.8.1",
   "description": "Linear Agent plugin for OpenClaw — webhook-driven AI pipeline with OAuth, multi-agent routing, and issue triage",
   "type": "module",
   "license": "MIT",

package/prompts.yaml CHANGED Viewed

@@ -96,13 +96,20 @@ planner:
     - Issues under epics are concrete deliverables with acceptance criteria.
     - Sub-issues are atomic work units that together complete a parent issue.
     - Use "blocks" relationships to express ordering: if A must finish before B starts, A blocks B.
-    - Every issue description must include clear acceptance criteria.
     - Every non-epic issue needs a story point estimate and priority.
+    - Every issue description must include:
+      - A user story: "As a [role], I want [feature] so that [benefit]"
+      - Acceptance criteria in Given/When/Then format
+      - At least one UAT test scenario describing how to verify the feature manually
+    - If the user skips acceptance criteria, write reasonable defaults and confirm with them.
     INTERVIEW APPROACH:
     - Ask ONE focused question at a time. Never dump a questionnaire.
     - After each user response, create or update issues to capture what you learned.
     - Briefly summarize what you added before asking your next question.
+    - After capturing a feature, ask for acceptance criteria: "How would you know this is working? What should a user be able to do?"
+    - Proactively suggest UAT scenarios: "Here's how I'd test this — does that cover it?"
+    - Don't front-load all questions — weave user stories and acceptance criteria into the natural conversation.
     - When the plan feels complete, invite the user to say "finalize plan".
     - If the user is vague ("make it better", "you decide"), propose concrete options and ask them to pick.
     - If you've gathered enough info after several turns with no new details, suggest: "This looks ready — say **finalize plan** when you're happy with it."
@@ -138,3 +145,22 @@ planner:
     - If you want to stop, say **"abandon planning"**
     Let's start — what is this project about, and what are the main feature areas?
+  review: |
+    ## Plan Review for {{projectName}}
+    The plan passed all deterministic checks ({{issueCount}} issues, valid DAG, all have estimates and priorities).
+    ### Current Plan
+    {{planSnapshot}}
+    ### {{reviewModel}}'s Recommendations
+    {{crossModelFeedback}}
+    Your job:
+    1. Evaluate the recommendations above — which ones are worth applying?
+    2. If any are good, use your tools to update the relevant issues now
+    3. Summarize what you changed (if anything) and what you didn't change (and why)
+    4. End with: "If you're happy with this plan, say **approve plan** to start dispatching."
+    Post your review as a comment.

package/src/agent/agent.test.ts CHANGED Viewed

@@ -204,6 +204,55 @@ describe("runAgent subprocess", () => {
   });
 });
+describe("runAgent date/time injection", () => {
+  it("injects current date/time into the message sent to subprocess", async () => {
+    const api = createApi();
+    const runCmd = vi.fn().mockResolvedValue({
+      code: 0,
+      stdout: JSON.stringify({ result: { payloads: [{ text: "done" }] } }),
+      stderr: "",
+    });
+    (api.runtime.system as any).runCommandWithTimeout = runCmd;
+    await runAgent({
+      api,
+      agentId: "test",
+      sessionId: "s1",
+      message: "do something",
+    });
+    // The --message arg should contain the date context prefix
+    const args: string[] = runCmd.mock.calls[0][0];
+    const msgIdx = args.indexOf("--message");
+    const passedMessage = args[msgIdx + 1];
+    expect(passedMessage).toMatch(/^\[Current date\/time:.*\d{4}.*\]/);
+    expect(passedMessage).toContain("do something");
+  });
+  it("includes ISO timestamp in the injected context", async () => {
+    const api = createApi();
+    const runCmd = vi.fn().mockResolvedValue({
+      code: 0,
+      stdout: "ok",
+      stderr: "",
+    });
+    (api.runtime.system as any).runCommandWithTimeout = runCmd;
+    await runAgent({
+      api,
+      agentId: "test",
+      sessionId: "s1",
+      message: "test task",
+    });
+    const args: string[] = runCmd.mock.calls[0][0];
+    const msgIdx = args.indexOf("--message");
+    const passedMessage = args[msgIdx + 1];
+    // Should contain ISO format like 2026-02-19T05:45:00.000Z
+    expect(passedMessage).toMatch(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/);
+  });
+});
 describe("runAgent retry wrapper", () => {
   it("returns success on first attempt when no watchdog kill", async () => {
     const api = createApi();

package/src/agent/agent.ts CHANGED Viewed

@@ -95,6 +95,27 @@ export async function runAgent(params: {
   return { success: false, output: "Watchdog retry exhausted" };
 }
+// ---------------------------------------------------------------------------
+// Date/time injection — every LLM request gets the current timestamp so models
+// don't hallucinate the year (Kimi K2.5 thinks it's 2025).
+// ---------------------------------------------------------------------------
+function buildDateContext(): string {
+  const now = new Date();
+  const iso = now.toISOString();
+  // Human-readable: "Tuesday, February 18, 2026, 11:42 PM CST"
+  const human = now.toLocaleString("en-US", {
+    weekday: "long",
+    year: "numeric",
+    month: "long",
+    day: "numeric",
+    hour: "numeric",
+    minute: "2-digit",
+    timeZoneName: "short",
+  });
+  return `[Current date/time: ${human} (${iso})]`;
+}
 /**
  * Single attempt to run an agent (no retry logic).
  */
@@ -106,7 +127,11 @@ async function runAgentOnce(params: {
   timeoutMs?: number;
   streaming?: AgentStreamCallbacks;
 }): Promise<AgentRunResult> {
-  const { api, agentId, sessionId, message, streaming } = params;
+  const { api, agentId, sessionId, streaming } = params;
+  // Inject current timestamp into every LLM request
+  const message = `${buildDateContext()}\n\n${params.message}`;
   const pluginConfig = (api as any).pluginConfig as Record<string, unknown> | undefined;
   const wdConfig = resolveWatchdogConfig(agentId, pluginConfig);
   const timeoutMs = params.timeoutMs ?? wdConfig.maxTotalMs;

package/src/infra/doctor.ts CHANGED Viewed

@@ -321,13 +321,13 @@ export function checkCodingTools(): CheckResult[] {
   const config = loadCodingConfig();
   const hasConfig = !!config.codingTool || !!config.backends;
   if (hasConfig) {
-    checks.push(pass(`coding-tools.json loaded (default: ${config.codingTool ?? "claude"})`));
+    checks.push(pass(`coding-tools.json loaded (default: ${config.codingTool ?? "codex"})`));
   } else {
     checks.push(warn("coding-tools.json not found or empty (using defaults)", undefined, { fix: "Create coding-tools.json in the plugin root — see README for format" }));
   }
   // Validate default backend
-  const defaultBackend = config.codingTool ?? "claude";
+  const defaultBackend = config.codingTool ?? "codex";
   if (VALID_BACKENDS.includes(defaultBackend)) {
     // already reported in the line above
   } else {