npm - waypoint-codex - Versions diffs - 0.20.0 → 1.0.0 - Mend

waypoint-codex 0.20.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +18 -37
package/dist/src/cli.js +1 -1
package/dist/src/core.js +33 -116
package/dist/src/docs-index.js +1 -1
package/dist/src/templates.js +0 -10
package/package.json +1 -1
package/templates/.agents/skills/agi-help/SKILL.md +1 -1
package/templates/.agents/skills/code-guide-audit/SKILL.md +1 -5
package/templates/.agents/skills/planning/SKILL.md +6 -10
package/templates/.agents/skills/pr-review/SKILL.md +0 -1
package/templates/.codex/agents/code-health-reviewer.toml +6 -5
package/templates/.codex/agents/code-reviewer.toml +6 -5
package/templates/.codex/agents/plan-reviewer.toml +6 -5
package/templates/.waypoint/ACTIVE_PLANS.md +7 -7
package/templates/.waypoint/README.md +5 -8
package/templates/.waypoint/config.toml +0 -5
package/templates/.waypoint/docs/README.md +1 -3
package/templates/.waypoint/scripts/build-docs-index.mjs +25 -11
package/templates/.waypoint/scripts/prepare-context.mjs +120 -205
package/templates/WORKSPACE.md +2 -6
package/templates/managed-agents-block.md +18 -11
package/dist/src/track-index.js +0 -107
package/templates/.agents/skills/break-it-qa/SKILL.md +0 -184
package/templates/.agents/skills/break-it-qa/agents/openai.yaml +0 -4
package/templates/.agents/skills/conversation-retrospective/SKILL.md +0 -147
package/templates/.agents/skills/conversation-retrospective/agents/openai.yaml +0 -4
package/templates/.agents/skills/docs-sync/SKILL.md +0 -78
package/templates/.agents/skills/docs-sync/agents/openai.yaml +0 -4
package/templates/.agents/skills/merge-ready-owner/SKILL.md +0 -196
package/templates/.agents/skills/merge-ready-owner/agents/openai.yaml +0 -4
package/templates/.agents/skills/pre-pr-hygiene/SKILL.md +0 -83
package/templates/.agents/skills/pre-pr-hygiene/agents/openai.yaml +0 -4
package/templates/.agents/skills/work-tracker/SKILL.md +0 -141
package/templates/.agents/skills/work-tracker/agents/openai.yaml +0 -4
package/templates/.agents/skills/workspace-compress/SKILL.md +0 -113
package/templates/.agents/skills/workspace-compress/agents/openai.yaml +0 -4
package/templates/.waypoint/SOUL.md +0 -71
package/templates/.waypoint/agent-operating-manual.md +0 -109
package/templates/.waypoint/scripts/build-track-index.mjs +0 -169
package/templates/.waypoint/track/README.md +0 -38
package/templates/.waypoint/track/_template.md +0 -48

package/dist/src/track-index.js DELETED Viewed

@@ -1,107 +0,0 @@
-import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
-import path from "node:path";
-const VALID_TRACK_STATUSES = new Set(["active", "blocked", "paused", "done", "archived"]);
-const ACTIVE_TRACK_STATUSES = new Set(["active", "blocked", "paused"]);
-const SKIP_NAMES = new Set(["README.md", "CHANGELOG.md", "LICENSE.md"]);
-function shouldSkipTrackFile(entry) {
-    return SKIP_NAMES.has(entry) || entry.startsWith("_");
-}
-function parseFrontmatter(filePath) {
-    const text = readFileSync(filePath, "utf8");
-    if (!text.startsWith("---\n")) {
-        return { summary: "", lastUpdated: "", readWhen: [], status: "" };
-    }
-    const endIndex = text.indexOf("\n---\n", 4);
-    if (endIndex === -1) {
-        return { summary: "", lastUpdated: "", readWhen: [], status: "" };
-    }
-    const frontmatter = text.slice(4, endIndex);
-    let summary = "";
-    let lastUpdated = "";
-    let status = "";
-    const readWhen = [];
-    let collectingReadWhen = false;
-    for (const rawLine of frontmatter.split("\n")) {
-        const line = rawLine.trim();
-        if (line.startsWith("summary:")) {
-            summary = line.slice("summary:".length).trim().replace(/^['"]|['"]$/g, "");
-            collectingReadWhen = false;
-            continue;
-        }
-        if (line.startsWith("last_updated:")) {
-            lastUpdated = line.slice("last_updated:".length).trim().replace(/^['"]|['"]$/g, "");
-            collectingReadWhen = false;
-            continue;
-        }
-        if (line.startsWith("status:")) {
-            status = line.slice("status:".length).trim().replace(/^['"]|['"]$/g, "").toLowerCase();
-            collectingReadWhen = false;
-            continue;
-        }
-        if (line.startsWith("read_when:")) {
-            collectingReadWhen = true;
-            continue;
-        }
-        if (collectingReadWhen && line.startsWith("- ")) {
-            readWhen.push(line.slice(2).trim());
-            continue;
-        }
-        if (collectingReadWhen && line.length > 0) {
-            collectingReadWhen = false;
-        }
-    }
-    return { summary, lastUpdated, readWhen, status };
-}
-function walkTracks(projectRoot, currentDir, output, invalid) {
-    for (const entry of readdirSync(currentDir)) {
-        const fullPath = path.join(currentDir, entry);
-        const stat = statSync(fullPath);
-        if (stat.isDirectory()) {
-            walkTracks(projectRoot, fullPath, output, invalid);
-            continue;
-        }
-        if (!entry.endsWith(".md") || shouldSkipTrackFile(entry)) {
-            continue;
-        }
-        const { summary, lastUpdated, readWhen, status } = parseFrontmatter(fullPath);
-        const relPath = path.relative(projectRoot, fullPath);
-        if (!summary || !lastUpdated || readWhen.length === 0 || !VALID_TRACK_STATUSES.has(status)) {
-            invalid.push(relPath);
-            continue;
-        }
-        output.push({ path: relPath, summary, readWhen, status });
-    }
-}
-export function renderTracksIndex(projectRoot, trackDir) {
-    const entries = [];
-    const invalidTracks = [];
-    if (existsSync(trackDir)) {
-        walkTracks(projectRoot, trackDir, entries, invalidTracks);
-    }
-    const lines = [
-        "# Tracks Index",
-        "",
-        "Auto-generated by `waypoint sync` / `waypoint doctor`. Read active trackers when resuming long-running work.",
-        "",
-        "## .waypoint/track/",
-        "",
-    ];
-    if (entries.length === 0) {
-        lines.push("No tracker files found.");
-    }
-    else {
-        for (const entry of entries.sort((a, b) => a.path.localeCompare(b.path))) {
-            lines.push(`- **${entry.path}** — [${entry.status}] ${entry.summary}`);
-            lines.push(`  Read when: ${entry.readWhen.join("; ")}`);
-        }
-    }
-    lines.push("");
-    return {
-        content: `${lines.join("\n")}`,
-        invalidTracks,
-        activeTrackPaths: entries
-            .filter((entry) => ACTIVE_TRACK_STATUSES.has(entry.status))
-            .map((entry) => entry.path)
-            .sort((a, b) => a.localeCompare(b)),
-    };
-}

package/templates/.agents/skills/break-it-qa/SKILL.md DELETED Viewed

@@ -1,184 +0,0 @@
----
-name: break-it-qa
-description: Verify a user-facing feature by trying to break it on purpose instead of only following the happy path. Use after building forms, multistep flows, settings pages, onboarding, stateful UI, destructive actions, or any browser-facing feature where invalid inputs, refreshes, back navigation, repeated clicks, wrong action order, or recovery paths might expose real bugs.
----
-# Break-It QA
-Use this skill to attack the feature like an impatient, confused, or careless user.
-This skill is for adversarial manual QA. It tries to make the feature fail through invalid, interrupted, stale, repeated, or out-of-order interactions instead of only proving the happy path works.
-## Step 1: Ask The Three Setup Questions
-Before testing, ask the user these questions if the answer is not already clear from context:
-- what exact feature or scope should this cover?
-- how many attack items should the break log reach before stopping?
-- should the skill stop at findings or also fix clear issues after they are found?
-Keep this intake short. These are the main user-controlled knobs for the skill.
-If the user does not specify a count, use a reasonable default such as `40`.
-## Step 2: Read First
-Before verification:
-1. Read `.waypoint/SOUL.md`
-2. Read `.waypoint/agent-operating-manual.md`
-3. Read `.waypoint/WORKSPACE.md`
-4. Read `.waypoint/context/MANIFEST.md`
-5. Read every file listed in that manifest
-6. Read the routed docs or nearby code that define the feature being tested
-## Step 3: Identify Break Surfaces
-- Identify the happy path first so you know what "broken" means.
-- Find the fragile surfaces: forms, wizards, pending states, destructive actions, async transitions, navigation changes, and persisted state.
-- For each major step or transition, ask explicit "What if...?" questions before testing. Examples:
-  - What if the user refreshes here?
-  - What if they go back now?
-  - What if they click twice?
-  - What if this input is empty, malformed, too long, or contradictory?
-  - What if this action succeeds in the UI but fails in persistence?
-Do not test blindly.
-## Step 4: Create A Break Log
-Write or update a durable markdown log under `.waypoint/docs/`.
-- Prefer a focused path such as `.waypoint/docs/verification/<feature>-break-it-qa.md`.
-- If a routed verification doc already exists for this feature, update it instead of creating a competing file.
-- The log is part of the skill, not an optional extra.
-- Pre-generate the attack plan in this log before executing it. Do not improvise everything live.
-Use one item per attempted action. A good entry shape is:
-```markdown
-- [ ] What if the user refreshes on the confirmation step before the request finishes?
-  Step: confirmation
-  Category: navigation
-  Status: pending
-  Observed: not tried yet
-```
-Then update each item as you go:
-- `survived`
-- `broke`
-- `fixed`
-- `retested-survived`
-- `blocked`
-- `not-applicable`
-Every executed item must include:
-- `Step`
-- `Category`
-- `Status`
-- `Observed`
-If the user sets a target such as "make this file 150 items long before you stop," treat that as a hard stopping condition unless you hit a real blocker and explain why.
-Use consistent categories such as:
-- `navigation`
-- `input-validation`
-- `repeat-action`
-- `stale-state`
-- `error-recovery`
-- `destructive-action`
-- `permissions`
-- `async-state`
-- `persistence`
-## Step 5: Enforce Coverage Before Execution
-Before you start executing attacks:
-- pre-generate a meaningful attack list
-- spread it across the major flow steps
-- spread it across relevant categories
-- make sure the count is not satisfied by one repetitive corner of the feature
-Do not treat total item count alone as sufficient coverage.
-If the user asks for a large target such as `150`, ensure the log covers multiple steps and multiple categories instead of padding one surface.
-Anti-cheating rules:
-- no filler items
-- each attack must be meaningfully distinct
-- reworded duplicates do not count toward the target
-## Step 6: Use The Real UI
-- Use `playwright-interactive`.
-- Exercise the actual UI instead of mocking the flow in code.
-- Keep the scope focused on the feature the user asked you to verify.
-- Capture screenshots of the important states you observe so the user can see the evidence directly.
-## Step 7: Try To Break It On Purpose
-Do more than a happy-path walkthrough.
-Actively try:
-- invalid inputs
-- empty required fields
-- boundary-length or malformed inputs
-- repeated or double clicks
-- submitting twice
-- wrong action order
-- back and forward navigation
-- page refresh during the flow
-- closing and reopening modals or screens
-- canceling mid-flow and re-entering
-- stale UI state after edits
-- conflicting selections or toggles
-- error recovery after a failed action
-If the feature is stateful, also check whether the UI, network result, and persisted state stay coherent after those interactions.
-As you test, keep expanding the break log with new "What if...?" cases that emerge from the flow. Do not rely on memory or chat-only notes.
-## Step 8: Record And Fix Real Bugs
-- Document each meaningful issue you find.
-- Fix the issue when the remediation is clear and the chosen mode includes fixes.
-- If the behavior is ambiguous, call out the product decision instead of bluffing a fix.
-- Update docs when the verification exposes stale assumptions about how the feature works.
-- Update the break log entry for each attempted action with what happened and whether the feature survived.
-- Require a short observed-result note for every executed item. "Worked" is too weak; capture what actually happened.
-- Save screenshots for the key broken, risky, or fixed states as you go.
-Do not stop at the first bug.
-## Step 9: Repeat Until The Feature Resists Abuse
-After fixes:
-- rerun the relevant happy path
-- rerun the break attempts that previously failed
-- rerun directly related attacks
-- rerun neighboring attacks that touch the same step, state transition, or failure surface
-- verify the fix did not create a new inconsistent state
-- keep adding and executing new "What if...?" items until the requested target coverage is reached
-The skill is not done when the feature only works once. It is done when the feature behaves predictably under sloppy real-world use.
-## Step 10: Report Truthfully
-Summarize:
-- the path to the break log markdown file
-- how many attack items were recorded and exercised
-- how coverage was distributed across steps and categories
-- which screenshots you captured and what each one shows
-- what break attempts you tried
-- which issues you found
-- what you fixed
-- a short systemic-risks summary describing recurring weakness patterns, not just individual bugs
-- what still looks risky or was not exercised

package/templates/.agents/skills/break-it-qa/agents/openai.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-interface:
-  display_name: "Break-It QA"
-  short_description: "Try to break a feature through the UI"
-  default_prompt: "Use $break-it-qa to verify this user-facing feature by trying to break it through the browser with invalid inputs, wrong action order, refreshes, back navigation, repeated clicks, and other adversarial interactions, then fix clear issues and repeat."

package/templates/.agents/skills/conversation-retrospective/SKILL.md DELETED Viewed

@@ -1,147 +0,0 @@
----
-name: conversation-retrospective
-description: Harvest durable knowledge, user feedback, skill lessons, and repeated workflow patterns from the active conversation into the repo's existing memory system. Use when the user asks to save what was learned, write down what changed, capture lessons from this thread, update docs or handoff state without more prompting, improve skills that were used or exposed gaps, or record new skill ideas based on repetitive work in the live conversation. Do not use this for generic planning, broad docs audits, or digging through archived session history unless the user explicitly asks for that.
----
-# Conversation Retrospective
-Use this skill to harvest the active conversation into the repo's existing memory system.
-This skill works from the live conversation already in context. Do not go hunting through archived session files unless the user explicitly asks for that.
-This is a closeout and distillation workflow, not a generic planning pass or a broad docs audit.
-## When Not To Use This Skill
-- Skip it for generic planning or implementation design; use the planning workflow for that.
-- Skip it for broad docs audits that are not driven by what happened in this conversation.
-- Skip it when the user wants archived history analysis rather than the live thread; only dig into old sessions if they explicitly ask.
-- Skip it when there is nothing durable to preserve and no skill or workflow lesson to capture.
-## Read First
-Before persisting anything:
-1. Read the repo's main agent guidance and project-context files
-2. Read the repo's current durable memory surfaces, such as docs, workspace/handoff files, trackers, decision logs, or knowledge files
-3. Read the exact docs, notes, and skill files that the conversation touched
-Do not assume the repo uses Waypoint. Adapt to the memory structure that already exists.
-## Step 1: Distill Durable Knowledge
-Review the current conversation and separate:
-- durable project knowledge
-- live execution state
-- transient chatter
-- direct user feedback, corrections, complaints, and preferences
-Persist without asking follow-up questions when the correct destination is clear.
-Treat explicit user feedback as a high-priority signal. If the user corrected the approach, rejected a behavior, called out friction, or stated a standing preference, prefer preserving that over the agent's earlier assumptions.
-Write durable knowledge to the smallest truthful home the repo already uses:
-- the main docs or knowledge layer for architecture, behavior, decisions, debugging knowledge, and reusable operating guidance
-- the repo's plans layer for durable implementation, rollout, migration, or investigation plans
-- the repo's standing guidance file for durable project context or long-lived working rules
-- the repo's live handoff or workspace file for current state, blockers, and immediate next steps
-- the repo's tracker or execution-log layer when the conversation created or materially changed a long-running workstream
-If the repo uses doc metadata such as `last_updated`, refresh it when needed.
-If the repo has no obvious durable home but the need is clear, create the smallest coherent doc or note that fits the surrounding patterns instead of leaving the learning only in chat.
-Do not leave important truths only in chat.
-## Step 2: Improve Existing Skills
-Identify which skills were actually used in this conversation, or which existing skills clearly should have covered the workflow but left avoidable gaps.
-For each used or clearly relevant skill, explicitly decide whether it:
-- succeeded
-- partially succeeded
-- failed
-Base that judgment on the actual conversation, especially:
-- direct user feedback
-- whether the skill helped complete the task
-- whether the agent had to work around missing guidance
-- whether concrete errors, dead ends, or repeated corrections happened while using it
-Distinguish between:
-- a skill problem
-- an execution mistake by the agent
-- an external/tooling failure
-- a one-off user preference that should not be generalized
-Only change the skill when the problem is truly in the skill guidance.
-For each affected skill:
-- read the existing skill before editing it
-- update only reusable guidance, not one-off transcript details
-- add missing guardrails, path hints, failure modes, error-handling guidance, decision rules, or references that would have made the conversation easier to complete
-- keep `SKILL.md` concise; prefer targeted structural improvements over turning the skill into a diary
-If the environment has both a source-of-truth skill and one or more mirrored or installed copies, update the source-of-truth version and any copies the user expects to stay in sync.
-Do not assume there is only one skill location, and do not assume there are many.
-## Step 3: Propose New Skills
-When the conversation revealed repetitive work that existing skills do not cover well:
-- do not silently scaffold a new skill unless the user asked for implementation
-- record the proposal in the repo's existing docs or idea-capture layer
-If there is no obvious place for durable skill proposals, create a small doc such as `skill-ideas.md` in the repo's normal docs area.
-Each proposal should include:
-- the repeated workflow or problem
-- likely trigger phrases
-- expected outputs or side effects
-- why existing skills were insufficient
-Skip this doc when there is no real new-skill candidate.
-## Step 4: Refresh Repo Memory
-After changing docs, handoff state, trackers, or skills:
-- run whatever repo-local refresh or index step the project uses, if one exists
-- otherwise make sure the edited memory surfaces are internally consistent and discoverable
-Do not invent a refresh command when the repo does not have one.
-## Step 5: Report
-Summarize:
-- what durable knowledge you saved and where
-- which skills you evaluated and whether they succeeded, partially succeeded, or failed
-- which skills you improved
-- which concrete errors, failure modes, or repeated friction points you captured
-- which new skill ideas you recorded, if any
-- what you intentionally left unpersisted because it was transient
-If no substantive persistence changes were needed, say that explicitly instead of inventing updates.
-## Gotchas
-- Do not turn this skill into a transcript dump. Persist only durable knowledge, live state, or reusable lessons.
-- Do not scatter the same learning across multiple files. Pick the smallest truthful home the repo already uses.
-- Do not blame a skill for a problem that was really an execution mistake or an external tool failure.
-- Do not preserve one-off user phrasing or temporary frustration as if it were standing repo policy unless the user clearly framed it that way.
-- Do not go hunting through archived session files just because the live thread feels incomplete. This skill should work from the current conversation unless the user explicitly broadens the scope.
-## Keep This Skill Sharp
-- After meaningful retrospectives, add new gotchas when the same persistence mistake, memory-placement mistake, or skill-triage mistake keeps recurring.
-- Tighten the description if the skill misses real prompts like "save what we learned here" or fires on requests that are really planning or docs-audit work.
-- If the same kind of durable learning keeps needing a custom destination, add that routing guidance to the skill instead of leaving the decision to be rediscovered in chat.

package/templates/.agents/skills/conversation-retrospective/agents/openai.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-interface:
-  display_name: "Conversation Retrospective"
-  short_description: "Harvest the live conversation into repo memory"
-  default_prompt: "Use $conversation-retrospective to preserve the durable lessons, repo-memory updates, and skill learnings from this live conversation."

package/templates/.agents/skills/docs-sync/SKILL.md DELETED Viewed

@@ -1,78 +0,0 @@
----
-name: docs-sync
-description: Audit routed docs against the actual codebase and shipped behavior. Use when the user asks to sync docs, when docs may be stale after implementation work, before pushing or opening a PR, when routes, contracts, config, commands, or shipped behavior changed, or when Codex should find missing, incorrect, outdated, or broken documentation and then update or flag the exact gaps. Do not use this for vendor-doc ingestion, repo-memory cleanup, or broad code review that is not specifically about docs drift.
----
-# Docs Sync
-Use this skill to keep repo docs aligned with reality.
-This is not a vendor-doc ingestion skill and not a workspace-cleanup skill. It owns one job: compare the codebase and shipped behavior against routed docs, then fix or flag the mismatches.
-## When Not To Use This Skill
-- Skip it for importing or summarizing upstream vendor docs. Link to the real source instead of copying it into the repo.
-- Skip it for workspace compression or tracker cleanup. This skill is about docs drift, not handoff hygiene.
-- Skip it for broad code review that is not specifically about docs-to-reality mismatches.
-- Skip it when the user only wants a new durable plan or architecture note; use the planning or normal docs-writing flow in that case.
-## Read First
-Before auditing docs:
-1. Read `.waypoint/SOUL.md`
-2. Read `.waypoint/agent-operating-manual.md`
-3. Read `.waypoint/WORKSPACE.md`
-4. Read `.waypoint/context/MANIFEST.md`
-5. Read every file listed in that manifest
-6. Read the routed docs for the area under review
-## Step 1: Compare Docs To Reality
-Audit the real code, routes, contracts, config surfaces, and user-visible behavior against `.waypoint/docs/`.
-Look for:
-- missing docs for real shipped behavior
-- stale docs that describe removed or changed behavior
-- broken routing links or docs index gaps
-- examples or commands that no longer work
-- contract/schema/config docs that no longer match the code
-- docs that still describe future work as if it is already shipped
-## Step 2: Fix Or Flag
-- Update the docs when the correct wording is clear.
-- Add the smallest routed doc needed when behavior exists but is undocumented.
-- Remove or reframe stale claims instead of letting them linger.
-- If a mismatch is real but the correct doc shape is unclear, flag it explicitly instead of bluffing.
-Do not mirror vendor reference docs into the repo. Link to the real upstream docs when that is the right source of truth.
-## Step 3: Refresh Routing
-After changing routed docs:
-- Run `node .waypoint/scripts/prepare-context.mjs` so the docs index and generated context match the updated docs.
-## Step 4: Report The Sync Result
-Summarize:
-- what docs were stale or missing
-- what you updated
-- what still needs a decision, if anything
-## Gotchas
-- Do not trust docs-to-docs consistency alone. The source of truth is the shipped code and behavior, not whether two markdown files agree with each other.
-- Do not leave stale future-tense claims behind after a feature ships or is cut. Docs drift often shows up as roadmap language that quietly became false.
-- Do not update prose without checking commands, routes, config names, and examples. Small copied snippets are often where docs rot first.
-- Do not invent certainty when the right doc shape is unclear. Flag the mismatch instead of bluffing a final answer.
-- After touching routed docs, always refresh the generated docs/context layer so the repo’s index and bootstrap bundle match the new reality.
-## Keep This Skill Sharp
-- After meaningful runs, add new gotchas when the same docs-drift pattern, broken example shape, or stale-claim mistake keeps recurring.
-- Tighten the description if the skill misses real prompts like "sync the docs" or fires on requests that are really about repo-memory cleanup instead.
-- If the skill starts needing detailed provider-specific or command-heavy guidance, move that detail into references instead of bloating the hub file.

package/templates/.agents/skills/docs-sync/agents/openai.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-interface:
-  display_name: "Docs Sync"
-  short_description: "Audit docs against the real codebase"
-  default_prompt: "Use $docs-sync to audit routed docs against the actual codebase and shipped behavior, then update or flag any missing, incorrect, or outdated documentation."