npm - @sireai/optimus - Versions diffs - 0.1.40 → 0.1.43 - Mend

@sireai/optimus 0.1.40 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/dist/task-environment/orchestration/task-runtime-policy.js ADDED Viewed

@@ -0,0 +1,38 @@
+const TASK_RUNTIME_POLICIES = {
+    bugfix: {
+        taskType: "bugfix",
+        executionBinding: "repo_bound",
+        artifactContract: "patch_result",
+        requiresRepository: true,
+        requiresRepoMemory: true,
+        supportsPublication: true,
+        supportsPatchArtifact: true,
+        primaryResultFile: "result.md",
+        expectedPrimaryArtifacts: ["result.md", "patch.diff"]
+    },
+    pm: {
+        taskType: "pm",
+        executionBinding: "artifact_only",
+        artifactContract: "prototype_result",
+        requiresRepository: false,
+        requiresRepoMemory: false,
+        supportsPublication: false,
+        supportsPatchArtifact: false,
+        primaryResultFile: "result.md",
+        expectedPrimaryArtifacts: ["result.md"]
+    }
+};
+export function getTaskRuntimePolicy(taskType) {
+    const policy = TASK_RUNTIME_POLICIES[taskType];
+    if (!policy) {
+        throw new Error(`No runtime policy registered for taskType ${taskType}.`);
+    }
+    return policy;
+}
+export function taskRequiresRepository(taskType) {
+    return getTaskRuntimePolicy(taskType).requiresRepository;
+}
+export function taskSupportsPublication(taskType) {
+    return getTaskRuntimePolicy(taskType).supportsPublication;
+}
+//# sourceMappingURL=task-runtime-policy.js.map

package/dist/task-environment/orchestration/task-runtime-policy.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"task-runtime-policy.js","sourceRoot":"","sources":["../../../src/task-environment/orchestration/task-runtime-policy.ts"],"names":[],"mappings":"AAEA,MAAM,qBAAqB,GAAsC;IAC/D,MAAM,EAAE;QACN,QAAQ,EAAE,QAAQ;QAClB,gBAAgB,EAAE,YAAY;QAC9B,gBAAgB,EAAE,cAAc;QAChC,kBAAkB,EAAE,IAAI;QACxB,kBAAkB,EAAE,IAAI;QACxB,mBAAmB,EAAE,IAAI;QACzB,qBAAqB,EAAE,IAAI;QAC3B,iBAAiB,EAAE,WAAW;QAC9B,wBAAwB,EAAE,CAAC,WAAW,EAAE,YAAY,CAAC;KACtD;IACD,EAAE,EAAE;QACF,QAAQ,EAAE,IAAI;QACd,gBAAgB,EAAE,eAAe;QACjC,gBAAgB,EAAE,kBAAkB;QACpC,kBAAkB,EAAE,KAAK;QACzB,kBAAkB,EAAE,KAAK;QACzB,mBAAmB,EAAE,KAAK;QAC1B,qBAAqB,EAAE,KAAK;QAC5B,iBAAiB,EAAE,WAAW;QAC9B,wBAAwB,EAAE,CAAC,WAAW,CAAC;KACxC;CACF,CAAC;AAEF,MAAM,UAAU,oBAAoB,CAAC,QAAgB;IACnD,MAAM,MAAM,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAC/C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,6CAA6C,QAAQ,GAAG,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,OAAO,oBAAoB,CAAC,QAAQ,CAAC,CAAC,kBAAkB,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,OAAO,oBAAoB,CAAC,QAAQ,CAAC,CAAC,mBAAmB,CAAC;AAC5D,CAAC"}

package/dist/task-environment/result-paths.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+interface ResultPathResolutionContext {
+    taskRootDir: string;
+    addresses: {
+        artifactDir: string;
+        workspaceDir: string;
+    };
+}
+export declare function resolveTaskResultPath(resultPath: string, context: ResultPathResolutionContext): string;
+export {};

package/dist/task-environment/result-paths.js ADDED Viewed

@@ -0,0 +1,36 @@
+import { resolve, relative, sep } from "node:path";
+function normalizePathLike(value) {
+    return value.replaceAll("\\", "/").replace(/^\.\/+/u, "").replace(/\/+/gu, "/").replace(/\/$/u, "");
+}
+function isWithinBaseDir(baseDir, targetPath) {
+    const relativePath = relative(resolve(baseDir), resolve(targetPath));
+    return relativePath === ""
+        || (!relativePath.startsWith(`..${sep}`) && relativePath !== "..");
+}
+function startsWithPathPrefix(candidatePath, prefixPath) {
+    const normalizedCandidate = normalizePathLike(candidatePath);
+    const normalizedPrefix = normalizePathLike(prefixPath);
+    return normalizedCandidate === normalizedPrefix
+        || normalizedCandidate.startsWith(`${normalizedPrefix}/`);
+}
+export function resolveTaskResultPath(resultPath, context) {
+    const trimmedResultPath = resultPath.trim();
+    if (trimmedResultPath.startsWith("/")) {
+        return resolve(trimmedResultPath);
+    }
+    const artifactDir = resolve(context.addresses.artifactDir);
+    const taskRootDir = resolve(context.taskRootDir);
+    const workspaceDir = resolve(context.addresses.workspaceDir);
+    const candidates = [];
+    for (const baseDir of [taskRootDir, workspaceDir]) {
+        const artifactPathFromBase = relative(baseDir, artifactDir);
+        if (artifactPathFromBase.length > 0
+            && isWithinBaseDir(baseDir, artifactDir)
+            && startsWithPathPrefix(trimmedResultPath, artifactPathFromBase)) {
+            candidates.push(resolve(baseDir, trimmedResultPath));
+        }
+    }
+    candidates.push(resolve(artifactDir, trimmedResultPath));
+    return candidates[0] ?? resolve(artifactDir, trimmedResultPath);
+}
+//# sourceMappingURL=result-paths.js.map

package/dist/task-environment/result-paths.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"result-paths.js","sourceRoot":"","sources":["../../src/task-environment/result-paths.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AAUnD,SAAS,iBAAiB,CAAC,KAAa;IACtC,OAAO,KAAK,CAAC,UAAU,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AACtG,CAAC;AAED,SAAS,eAAe,CAAC,OAAe,EAAE,UAAkB;IAC1D,MAAM,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;IACrE,OAAO,YAAY,KAAK,EAAE;WACrB,CAAC,CAAC,YAAY,CAAC,UAAU,CAAC,KAAK,GAAG,EAAE,CAAC,IAAI,YAAY,KAAK,IAAI,CAAC,CAAC;AACvE,CAAC;AAED,SAAS,oBAAoB,CAAC,aAAqB,EAAE,UAAkB;IACrE,MAAM,mBAAmB,GAAG,iBAAiB,CAAC,aAAa,CAAC,CAAC;IAC7D,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC;IACvD,OAAO,mBAAmB,KAAK,gBAAgB;WAC1C,mBAAmB,CAAC,UAAU,CAAC,GAAG,gBAAgB,GAAG,CAAC,CAAC;AAC9D,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,UAAkB,EAClB,OAAoC;IAEpC,MAAM,iBAAiB,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;IAC5C,IAAI,iBAAiB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACtC,OAAO,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAC3D,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IAC7D,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,KAAK,MAAM,OAAO,IAAI,CAAC,WAAW,EAAE,YAAY,CAAC,EAAE,CAAC;QAClD,MAAM,oBAAoB,GAAG,QAAQ,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QAC5D,IACE,oBAAoB,CAAC,MAAM,GAAG,CAAC;eAC5B,eAAe,CAAC,OAAO,EAAE,WAAW,CAAC;eACrC,oBAAoB,CAAC,iBAAiB,EAAE,oBAAoB,CAAC,EAChE,CAAC;YACD,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC,CAAC;IAEzD,OAAO,UAAU,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,iBAAiB,CAAC,CAAC;AAClE,CAAC"}

package/dist/types.d.ts CHANGED Viewed

@@ -106,6 +106,28 @@ export interface RuntimeEventContent {
     branch?: string;
     metadata?: Record<string, unknown>;
 }
+export interface TaskInputReferenceMaterial {
+    type: "doc" | "image" | "link" | "note" | "whiteboard" | "sheet" | "bitable" | "file";
+    title: string;
+    content?: string;
+    url?: string;
+    token?: string;
+    mimeType?: string;
+    sourceType?: string;
+}
+export type PmTaskInputReferenceMaterial = TaskInputReferenceMaterial;
+export interface PmTaskInput {
+    requirementDocument: string;
+    productGoal?: string;
+    targetUser?: string;
+    coreFlow?: string;
+    prototypeScope?: string;
+    platform?: "web" | "mobile_web" | "ios" | "android" | "desktop" | "responsive";
+    constraints?: string[];
+    referenceMaterials?: TaskInputReferenceMaterial[];
+    styleDirection?: string;
+    changeNotes?: string;
+}
 export interface FeishuAssigneeIdentity {
     displayName?: string;
     login?: string;
@@ -123,6 +145,19 @@ export interface PollingCheckpoint {
     skippedCount?: number;
 }
 export type RepositoryExecutionMode = "copy" | "inplace";
+export type TaskExecutionBinding = "repo_bound" | "artifact_only";
+export type TaskArtifactContract = "patch_result" | "prototype_result" | "analysis_result";
+export interface TaskRuntimePolicy {
+    taskType: string;
+    executionBinding: TaskExecutionBinding;
+    artifactContract: TaskArtifactContract;
+    requiresRepository: boolean;
+    requiresRepoMemory: boolean;
+    supportsPublication: boolean;
+    supportsPatchArtifact: boolean;
+    primaryResultFile: string;
+    expectedPrimaryArtifacts: string[];
+}
 export interface RepositoryRoot {
     path: string;
     alias?: string;
@@ -205,7 +240,7 @@ export interface TriageDecisionRejected {
     missingInfo?: string[];
 }
 export type TriageDecision = TriageDecisionAccepted | TriageDecisionRejected;
-export type TaskArtifactKind = "result_md" | "patch_diff" | "publication_plan" | "verification" | "review_packet" | "log" | "attachment";
+export type TaskArtifactKind = "result_md" | "patch_diff" | "prototype_html" | "publication_plan" | "verification" | "review_packet" | "log" | "attachment";
 export interface TaskArtifact {
     kind?: TaskArtifactKind;
     path: string;
@@ -298,6 +333,8 @@ export interface TaskDeliveryBundle {
         keyEvidence?: string;
         recommendedAction?: string;
         analysisDocUrl?: string;
+        prototypePreviewUrl?: string;
+        prototypeDownloadUrl?: string;
         risk?: string;
         nextAction?: string;
     };
@@ -305,6 +342,7 @@ export interface TaskDeliveryBundle {
     artifacts: {
         resultMd?: string;
         patchDiff?: string;
+        prototypeHtml?: string;
         extras: string[];
     };
     publication?: TaskPublicationStatus;
@@ -549,6 +587,7 @@ export interface RepositoryGuidanceContext {
 }
 export interface TaskExecutionContext {
     taskRootDir: string;
+    runtimePolicy: TaskRuntimePolicy;
     addresses: import("./task-environment/execution-addresses.js").ExecutionAddresses;
     sandboxMode: CodexSandboxMode;
     approvalPolicy: CodexApprovalPolicy;
@@ -669,6 +708,11 @@ export interface OptimusConfig {
             webhook?: string;
             webhooks?: string[];
             secret?: string;
+            defaultRecipient?: {
+                displayName?: string;
+                email?: string;
+                openId?: string;
+            };
             userMappings?: Record<string, {
                 openId: string;
                 displayName?: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sireai/optimus",
-  "version": "0.1.40",
+  "version": "0.1.43",
   "description": "Optimus Codex-native background task runtime and harness scaffolding.",
   "repository": {
     "type": "git",

package/task-harnesses/bugfix/ACCEPT.md CHANGED Viewed

@@ -2,9 +2,10 @@
 ## Decision target
 Route real software defect tasks into the `bugfix` harness.
-Triage must decide:
+Triage decides only:
 1. task type fit
 2. execution admission
 The runner, not triage, decides whether final closure is fix or analysis.
 ## Task type fit
@@ -29,7 +30,7 @@ Accept into execution only when all are true:
   - named page, feature, module, or flow
 ## Still acceptable with partial information
-Still acceptable when:
+Accept when:
 - root cause is unknown
 - reproduction is incomplete
 - logs, stack traces, screenshots, or tests are missing

package/task-harnesses/bugfix/CONSTRAINTS.md CHANGED Viewed

@@ -8,7 +8,7 @@
 - If available evidence contains any file whose basename includes `hprof`, do not skip heap-dump analysis before concluding a memory leak.
 - Do not prefer screenshot-only or description-only leak reasoning over available HPROF evidence.
-## Patch rules
+## Patch safety
 - Change code only after reasoning through module boundaries, call chains, state flow, and upstream/downstream impact.
 - Do not modify code that is not directly relevant to the reported problem. If wider edits are required, keep a direct causal link to the fix.
 - Prefer clear, robust, maintainable fixes. Avoid brute-force guards, broad fallbacks, excessive branching, or temporary-looking patches when a cleaner repair is available.
@@ -17,8 +17,13 @@
 - Important code changes must include useful comments about intent, key decisions, boundary handling, or risk. Do not add comments that only restate obvious behavior.
 - If code changed, describe what changed, why, affected scope, and validation.
 - If code did not change, explain why patching is not yet justified.
-- Before generating or delivering a patch, self-review the actual diff for regressions, boundary issues, compatibility risk, and unnecessary changes. Fix findings first.
-- Before delivery, self-review for new errors, regressions, boundary issues, compatibility issues, and obvious code smell. Fix newly introduced problems before closing.
+- Before delivery, self-review the actual diff for regressions, boundary issues, compatibility risk, unnecessary change, and obvious code smell. Fix findings first.
+- Builder self-review is not a substitute for an explicit reviewer subagent when the review loop is required by the standard.
+- Do not let a patch pass independent review if it deepens, spreads, or hides a known pre-existing issue, even when that issue was not introduced by the current task.
+- Do not widen a patch only to chase elegance or theoretical perfection when a lower-risk credible repair already exists.
+- If every repair path has tradeoffs, prefer the one with smaller blast radius, lower regression probability, and easier rollback.
+- Do not treat reviewer suggestions as mandatory code changes when following them would enlarge scope, reduce validation confidence, or make rollback meaningfully harder.
+- Do not keep revising only to satisfy successive reviewer findings if the patch is becoming broader, more coupled, or less testable than the current best candidate.
 ## Memory rules
 - Before solving a repo task, load repo memory for the current task type and repository. If missing, create a minimal reusable memory first, then continue.
@@ -33,7 +38,7 @@
 - If repo memory conflicts with current repository facts, commands, or validation evidence, trust current evidence and update the memory before finishing.
 ## Stop conditions
-Stop automatic patching and close as analysis if any are true:
+Close as analysis instead of auto-patching if any are true:
 - no credible root-cause judgment can be formed
 - input is too incomplete to define a stable change target
 - required environment, account, device, repository, or external access is missing
@@ -49,3 +54,4 @@ Stop automatic patching and close as analysis if any are true:
 - expanding problem definition or change scope without evidence
 - conclusion-only output without supporting evidence
 - skipping self-review before delivering code changes
+- turning a contained fix into a broader rewrite just to remove every residual reviewer concern

package/task-harnesses/bugfix/EVOLUTION.md CHANGED Viewed

@@ -4,13 +4,7 @@
 Reflect only to improve future `bugfix` tasks. Do not summarize the current case for its own sake.
 Focus on reusable experience that improves speed, accuracy, stability, or token cost.
-Highest-value targets:
-- shortcuts discovered only after repeated trial and error
-- signals that can reduce search cost earlier
-- lower-cost validation paths that should have been tried first
-- project-specific but reusable bugfix workflows
-- repeated dead ends future tasks should avoid
+- Highest-value gains: shorter search paths, stronger earlier signals, cheaper validation choices, reusable repo workflows, repeated dead-end avoidance.
 ## When to reflect
 Reflect only after the main task reaches a normal closure.
@@ -51,7 +45,7 @@ Create or update a skill only when all of the following are true:
 Prefer no skill change over weak skill change. Do not create or update a skill merely because reflection was requested.
 ## Good candidates
-Strong candidates include:
+Strong candidates:
 - a better entry point discovered after reading many irrelevant files
 - a shorter call-chain inspection order discovered after multiple false starts
 - a cheaper validation path discovered after expensive but low-yield validation

package/task-harnesses/bugfix/ROLE.md CHANGED Viewed

@@ -3,23 +3,19 @@
 ## Identity
 You are a `Bugfix Engineer` executing an already accepted `bugfix` task inside a real engineering repository.
-## Ownership
-- Drive the current defect to a trustworthy closure.
-- Stay focused on the defect, the target repository, and the current task package.
-- Produce a result that runtime can manage, humans can review, and downstream workflow can consume.
+## Core responsibility
+- drive one accepted defect to a trustworthy closure
+- stay anchored to the defect, repository facts, and current task package
+- produce a result runtime can consume and humans can review
+- close through evidence, not confidence language
 ## Closure target
-Prefer one of two endings:
-1. Fix closure: credible analysis, minimum necessary code changes, and a reviewable result.
-2. Analysis closure: credible analysis plus a clear explanation of why a safe, trustworthy patch cannot yet be claimed.
+- `Fix closure`: credible analysis, minimum necessary code changes, reviewable validation
+- `Analysis closure`: credible analysis plus a clear reason a trustworthy patch cannot yet be claimed
 ## Scope
 Handle accepted defects in code, config, scripts, build logic, or tests when the task can advance through repository reading, command execution, code change, and evidence.
-Typical cases:
-- application code, scripts, configuration, build logic, or tests
-- crashes, runtime errors, incorrect behavior, state bugs, and boundary-condition defects
 ## Evidence priority
 - If available evidence contains any file whose basename includes `hprof`, analyze that heap dump before claiming a memory-leak root cause.
 - Treat generated heap-analysis artifacts as primary evidence for memory-retention conclusions.

package/task-harnesses/bugfix/STANDARD.md CHANGED Viewed

@@ -8,6 +8,22 @@
 - `Check`: validate through reproduction, tests, scenarios, logs, output comparison, build, or code evidence.
 - `Act`: close as fix or analysis and write one reviewable result file.
+## Review loop
+- Run an explicit reviewer subagent after the main fix/check pass when any are true:
+  - code changed
+  - closure relies heavily on `V1` or `V2`
+  - the call chain, blast radius, or risk surface is non-trivial
+- The reviewer subagent is a judge, not a builder. It must not rewrite the patch directly.
+- Reviewer findings do not automatically justify a larger patch. Treat every revise step as a new risk decision, not as mandatory scope expansion.
+- Maximum review rounds: 3 total.
+- Stop early when:
+  - the reviewer approves closure, or
+  - another revise-and-review pass is unlikely to improve trustworthiness materially
+- Stop and downgrade instead of revising when the next candidate change would materially expand blast radius, weaken rollback safety, or require meaningfully lower-confidence reasoning than the current patch.
+- If the final reviewer verdict still finds material gaps after the maximum rounds, downgrade closure instead of looping further.
+- The builder must read the latest reviewer output before revising or closing.
 ## Patch gate
 - Patch only when both root-cause judgment and validation path are credible.
@@ -54,11 +70,58 @@ Never overstate:
 - Close as fix only when analysis, code changes, validation evidence, and residual-risk understanding are credible.
 - Close as analysis when information, environment, reproduction, or validation is insufficient for a trustworthy patch claim.
+- Prefer the current lower-risk repair candidate over a broader reviewer-driven rewrite when the broader rewrite would make the patch harder to reason about, validate, or roll back.
 - If code changed but fix validation stayed at `V2` or `V1`, describe it as a repair candidate, not a verified fix.
 - If the issue is interaction, crash, device, integration, or resource related and fix validation stayed at `V2`, state what stronger environment or tooling was missing.
 - If build or test failed for unrelated reasons, report the stage, failure reason, and why it is treated as noise or a pre-existing blocker.
 - If only `V1` evidence exists, do not submit a formal verified-fix claim; close as analysis unless a repair candidate is still justified.
 - Analysis closure must still provide root-cause judgment, fix direction, and either targeted local guidance or a module-level strategy.
+- When the review loop ran, final closure must not overstate the last reviewer verdict.
+- Reviewer approval can block, downgrade, or confirm closure, but it does not raise validation grade by itself.
+## Reviewer subagent standard
+- Reviewer input should include at minimum:
+  - accepted bugfix task input
+  - strongest root-cause judgment
+  - changed files or `patch.diff`
+  - strongest validation evidence and its limits
+  - remaining blockers, residual risks, and downgrade reasons when present
+  - previous reviewer findings and builder revisions for later rounds
+- Reviewer output should classify findings as:
+  - `Must Fix Before Close`
+  - `Risk Accepted`
+  - `Open Question`
+- Each later review round should also include:
+  - what the builder changed
+  - what the builder intentionally did not change and why
+- When the builder declines a suggested revision, it should state whether the reason is blast radius, weaker validation posture, added complexity, or lack of stronger causal evidence.
+- The reviewer subagent should evaluate the patch in this order:
+  - whether the patch actually addresses the judged root cause instead of only suppressing the symptom
+  - whether the change may introduce upstream/downstream side effects, stability regressions, performance regressions, compatibility issues, or neighbor-path breakage
+  - whether the change worsens any known pre-existing weakness even if that weakness was not introduced by this task
+  - whether the chosen repair is the lowest-risk credible option when every available fix path has tradeoffs
+  - whether the patch preserves or improves performance, simplicity, maintainability, and design clarity when multiple credible fixes exist
+- The reviewer should prefer downgrade over further churn when a follow-up patch would mainly trade one honest residual risk for a larger or harder-to-verify patch.
+- Reviewer expectations for tradeoff judgment:
+  - do not require an unrealistic zero-risk answer when all options have cost
+  - if every credible fix leaves some downside, prefer the option with smaller blast radius, lower regression probability, easier rollback, and clearer reasoning
+  - a pre-existing issue that is not caused by this patch does not have to be fixed now, but the patch must not deepen, spread, or hide it
+- Reviewer expectations for code quality:
+  - on top of correctness, prefer cleaner boundaries, lower complexity, and better performance when that does not expand risk disproportionately
+  - elegance is a tie-breaker after correctness and risk control, not a justification for widening the patch unnecessarily
+- `Must Fix Before Close` examples:
+  - the patch does not actually repair the judged root cause
+  - the change introduces meaningful side effects, compatibility regressions, or neighbor-path risk
+  - validation is materially overstated relative to what actually ran
+  - the patch worsens a known pre-existing weakness
+- `Risk Accepted` examples:
+  - the patch is credible, but some residual risk remains and is already disclosed honestly
+  - all repair paths have tradeoffs, and the chosen one is the smallest credible risk
+  - a reviewer-found weakness exists, but the next fix path would increase patch risk more than it would increase trustworthiness
+- `Open Question` examples:
+  - stronger validation needs missing environment, device, account, traffic, or data
+  - a broader architectural cleanup may exist, but it is outside safe single-task scope
 ## Runtime contract
@@ -91,6 +154,7 @@ Never overstate:
 - Always generate `result.md` on normal completion.
 - If code changed, runtime should also emit `patch.diff`.
+- Generate `review-log.md` whenever the reviewer loop ran.
 - If `patch.diff` exists, `Closure Level` must not be `Analysis Only`.
 - If `patch.diff` exists, Patch Closure Mode is mandatory.
 - If available evidence contains any file whose basename includes `hprof`, state whether the dump was analyzed and identify the strongest file used.
@@ -177,6 +241,23 @@ At minimum, `result.md` must include:
 - fix strategy when validation is insufficient
 - validation method, steps, actual results, and unverified items
 - residual risk and next step
+- when the review loop ran, keep detailed per-round reviewer findings in `review-log.md`, not in the main result body
+## `review-log.md` contract
+- Purpose: preserve the independent bugfix reviewer loop as an audit trail.
+- Create only when the reviewer loop ran.
+- Keep it task-private; do not rely on it as the primary delivery result.
+- Each round entry should include:
+  - round number
+  - reviewer verdict
+  - `Must Fix Before Close`
+  - `Risk Accepted`
+  - `Open Question`
+  - builder action
+- Keep findings dense and patch-specific.
+- Record what changed between rounds rather than repeating the full patch summary.
+- Final closure should match the last reviewer verdict without overstating certainty.
 ## Patch Closure Mode

package/task-harnesses/pm/ACCEPT.md ADDED Viewed

@@ -0,0 +1,66 @@
+# ACCEPT
+Routes requirement-to-prototype work into the `pm` harness.
+## Decision target
+Triage decides only:
+1. task type fit
+2. execution admission
+The runner decides final closure: `Prototype Complete`, `Prototype Partial`, or `Analysis Only`.
+## Task type fit
+Classify as `pm` only when all are true:
+- the request is to turn requirement input into an interactive HTML prototype
+- the expected output is a prototype artifact, not production code
+- the task centers on flow, structure, interaction, or state presentation
+- the prototype can be derived from requirement input without real system implementation
+Do not classify as `pm` when any are true:
+- the request is only strategy discussion or product advice
+- the request is for production implementation
+- the request is only visual design refinement with no requirement-to-prototype goal
+- the request is only PRD writing or requirement analysis with no interactive output
+- the request is a bugfix, code-change, or repository task
+## Execution admission
+Accept when all are true:
+- a usable `requirement_document` exists
+- at least one concrete goal exists
+- at least one concrete flow, page path, or interaction path exists or is clearly derivable
+- the prototype scope is bounded enough for one task
+- the task does not depend on repository coupling or production-system integration
+## Still acceptable with partial information
+Accept if:
+- some states, copy, rules, or edge cases are missing
+- the main objective and at least one core flow are clear
+- missing detail can be surfaced as assumptions instead of hidden invention
+## Reject when execution context is insufficient
+Reject when any are true:
+- there is no usable requirement basis
+- there is no concrete scenario or flow to prototype
+- multiple unrelated areas are mixed with no bounded scope
+- the input is too abstract to determine user behavior
+- trustworthy prototyping would require heavy invention
+- the request actually expects real implementation
+## Missing information labels
+Use the smallest set that explains rejection:
+- `requirement_document`
+- `product_goal`
+- `target_user`
+- `core_flow`
+- `prototype_scope`
+- `constraints`
+## Event scope
+- `problem.discovered`
+- `task.submitted_manually`
+## Triage guidance
+- judge requirement quality, not keyword presence
+- prefer one clear prototype objective over broad redesign asks
+- separate task-type fit from execution readiness
+- accept requirement-driven prototype work, not open-ended consulting

package/task-harnesses/pm/CONSTRAINTS.md ADDED Viewed

@@ -0,0 +1,60 @@
+# CONSTRAINTS
+Defines non-negotiable PM execution rules.
+## Source truth
+- the source requirement document is the primary truth source
+- helper summaries or prior artifacts must not replace source reading
+- keep confirmed facts, assumptions, and open questions separate
+- surface missing or conflicting input explicitly
+## Fidelity and representation
+- preserve explicit product names, labels, enums, ordering, defaults, formulas, limits, scope boundaries, examples, empty/error states, and exclusions
+- do not rename, broaden, normalize, or merge source facts in ways that change product meaning without disclosure
+- before building UI, extract explicit labels, enum sets, ordering, defaults, formulas, limits, scope, exclusions, and open questions
+- assign exactly one representation mode to each critical rule:
+  - `Represented Interactively`
+  - `Represented via Annotation`
+  - `Downgraded / Simulated`
+  - `Not Represented`
+- if a source fact is omitted, merged, normalized, or replaced, declare it in `result.md`; if it changes review understanding, also anchor it in the prototype and export it in `annotations.json`
+- when fidelity and prototype convenience conflict, preserve the source fact or declare the deviation explicitly
+- annotations may supplement core flow coverage, but must not replace it
+- do not present simulated or inferred detail as confirmed requirement
+- if trustworthy prototyping would require heavy invention, stop at `Analysis Only`
+## Review discipline
+- prototype for review, not production deployment
+- the first screen should read primarily as product UI, not as a prototype console
+- static output alone is insufficient unless closure is `Analysis Only`
+- independent reviewer subagent judgment is required before claiming `Prototype Complete`
+- the reviewer is a judge, not a builder
+- maximum review rounds: 3 total
+- record each round number, verdict, key gaps, and builder action in a task-private `review-log.md` under `artifactDir`
+- each later round must re-check the full accepted surface for regressions, not only the previous point fixes
+- before re-review, visually inspect every core panel that carries accepted-scope meaning
+- do not fix one area by making another panel blank, near-blank, visually invisible, or materially thinner in meaning
+- do not respond to reviewer pressure by inflating scope, adding speculative screens, or increasing prototype chrome when that makes the accepted scope harder to inspect
+- prefer `Prototype Partial` over a noisier, less truthful, or more invented prototype assembled only to clear late review comments
+## Annotation discipline
+- bind annotations to a concrete UI target, state, or transition whenever possible
+- use highlighting or connector lines only when readability improves
+- annotate rule meaning, implementation risk, or unresolved behavior, not trivial visual facts
+- do not dump raw PRD excerpts into annotations
+- keep reviewer-facing copy human-readable
+- `annotations.json` must match the actual annotation layer in `prototype.html`
+## Forbidden
+- fake backend integration
+- invented product direction with no source basis
+- claiming certainty that does not exist
+- decoration-first output that hides product meaning
+- claiming outputs that were not actually created under `artifactDir`
+- using annotations to hide missing core screens, states, or transitions
+- presenting simulated behavior as faithfully implemented
+- marking `Prototype Complete` when key rules remain materially weak, merged, or downgraded
+- treating builder self-review as a substitute for an independent reviewer subagent verdict
+- fixing a prior reviewer finding by introducing a new blank, near-blank, or materially weakened core panel
+- treating retained titles, labels, or container chrome as sufficient when the actual intended content expression has disappeared
+- adding speculative flows, exaggerated data breadth, or decorative complexity only to satisfy reviewer expectations rather than requirement truth

package/task-harnesses/pm/CONTEXT.md ADDED Viewed

@@ -0,0 +1,50 @@
+# CONTEXT
+Defines the minimum product model the PM harness must construct before prototyping.
+## Working model
+- this is a document-first, artifact-only task
+- the source requirement document is authoritative
+- helper summaries and prior artifacts are secondary aids, not truth
+- build a minimal product model before building UI
+## Required product model
+### Goal and scope
+- product goal
+- target user
+- bounded prototype scope
+- explicit non-goals
+### Flow and state
+- entry point
+- core actions and transitions
+- success, empty, error, gated, and branching states that change understanding
+### Rule model
+- thresholds, limits, ordering, gating, permissions, formulas, frequency limits, and role boundaries
+- rules that must be interactive
+- rules that must be annotated
+- rules that remain simulated or unresolved
+### Source fact model
+- explicit labels and names
+- explicit enum sets and ordering
+- explicit example entities
+- explicit defaults, selected states, formulas, limits, inclusions, and exclusions
+### Annotation model
+- rule meaning not faithfully expressible through lightweight interaction
+- one primary target per annotation whenever possible
+- truth layer: `confirmed`, `simulated`, `open_question`
+## Artifact model
+- `prototype.html` carries the interactive review surface
+- `result.md` carries rule supplements and implementation-critical notes
+- `annotations.json` carries the structured export of anchored annotations
+- the Feishu result document is only the delivery portal to the source link and artifact set
+## Priority
+- preserve requirement meaning first
+- preserve flow clarity second
+- improve visual coherence third