PyPI - deepwork - Versions diffs - 0.4.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

deepwork 0.4.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

deepwork/__init__.py +1 -1
deepwork/cli/hook.py +3 -4
deepwork/cli/install.py +70 -117
deepwork/cli/main.py +2 -2
deepwork/cli/serve.py +133 -0
deepwork/cli/sync.py +93 -58
deepwork/core/adapters.py +91 -98
deepwork/core/generator.py +19 -386
deepwork/core/hooks_syncer.py +1 -1
deepwork/core/parser.py +270 -1
deepwork/hooks/README.md +0 -44
deepwork/hooks/__init__.py +3 -6
deepwork/hooks/check_version.sh +54 -21
deepwork/mcp/__init__.py +23 -0
deepwork/mcp/quality_gate.py +347 -0
deepwork/mcp/schemas.py +263 -0
deepwork/mcp/server.py +253 -0
deepwork/mcp/state.py +422 -0
deepwork/mcp/tools.py +394 -0
deepwork/schemas/job.schema.json +347 -0
deepwork/schemas/job_schema.py +27 -239
deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md +9 -15
deepwork/standard_jobs/deepwork_jobs/job.yml +146 -46
deepwork/standard_jobs/deepwork_jobs/steps/define.md +100 -33
deepwork/standard_jobs/deepwork_jobs/steps/errata.md +154 -0
deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md +207 -0
deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md +177 -0
deepwork/standard_jobs/deepwork_jobs/steps/implement.md +22 -138
deepwork/standard_jobs/deepwork_jobs/steps/iterate.md +221 -0
deepwork/standard_jobs/deepwork_jobs/steps/learn.md +2 -26
deepwork/standard_jobs/deepwork_jobs/steps/test.md +154 -0
deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template +2 -0
deepwork/templates/claude/AGENTS.md +38 -0
deepwork/templates/claude/settings.json +16 -0
deepwork/templates/claude/skill-deepwork.md.jinja +37 -0
deepwork/templates/gemini/skill-deepwork.md.jinja +37 -0
deepwork-0.7.0.dist-info/METADATA +317 -0
deepwork-0.7.0.dist-info/RECORD +64 -0
deepwork/cli/rules.py +0 -32
deepwork/core/command_executor.py +0 -190
deepwork/core/pattern_matcher.py +0 -271
deepwork/core/rules_parser.py +0 -559
deepwork/core/rules_queue.py +0 -321
deepwork/hooks/rules_check.py +0 -759
deepwork/schemas/rules_schema.py +0 -135
deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md +0 -208
deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.example +0 -86
deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh +0 -38
deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml +0 -8
deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh +0 -16
deepwork/standard_jobs/deepwork_rules/job.yml +0 -49
deepwork/standard_jobs/deepwork_rules/rules/.gitkeep +0 -13
deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example +0 -10
deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example +0 -10
deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example +0 -11
deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md +0 -46
deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example +0 -13
deepwork/standard_jobs/deepwork_rules/steps/define.md +0 -249
deepwork/templates/claude/skill-job-meta.md.jinja +0 -77
deepwork/templates/claude/skill-job-step.md.jinja +0 -251
deepwork/templates/gemini/skill-job-meta.toml.jinja +0 -76
deepwork/templates/gemini/skill-job-step.toml.jinja +0 -162
deepwork-0.4.0.dist-info/METADATA +0 -381
deepwork-0.4.0.dist-info/RECORD +0 -71
{deepwork-0.4.0.dist-info → deepwork-0.7.0.dist-info}/WHEEL +0 -0
{deepwork-0.4.0.dist-info → deepwork-0.7.0.dist-info}/entry_points.txt +0 -0
{deepwork-0.4.0.dist-info → deepwork-0.7.0.dist-info}/licenses/LICENSE.md +0 -0

deepwork/mcp/quality_gate.py ADDED Viewed

@@ -0,0 +1,347 @@
+"""Quality gate for evaluating step outputs.
+The quality gate invokes a review agent (via subprocess) to evaluate
+step outputs against quality criteria.
+"""
+from __future__ import annotations
+import asyncio
+import json
+from pathlib import Path
+from typing import Any
+import aiofiles
+from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
+# JSON Schema for quality gate response validation
+QUALITY_GATE_RESPONSE_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "required": ["passed", "feedback"],
+    "properties": {
+        "passed": {"type": "boolean"},
+        "feedback": {"type": "string"},
+        "criteria_results": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "required": ["criterion", "passed"],
+                "properties": {
+                    "criterion": {"type": "string"},
+                    "passed": {"type": "boolean"},
+                    "feedback": {"type": ["string", "null"]},
+                },
+            },
+        },
+    },
+}
+# File separator format: 20 dashes, filename, 20 dashes
+FILE_SEPARATOR = "-" * 20
+class QualityGateError(Exception):
+    """Exception raised for quality gate errors."""
+    pass
+class QualityGate:
+    """Evaluates step outputs against quality criteria.
+    Uses a subprocess to invoke a review agent (e.g., Claude CLI) that
+    evaluates outputs and returns structured feedback.
+    See doc/reference/calling_claude_in_print_mode.md for details on
+    proper CLI invocation with structured output.
+    """
+    def __init__(
+        self,
+        timeout: int = 120,
+        *,
+        _test_command: list[str] | None = None,
+    ):
+        """Initialize quality gate.
+        Args:
+            timeout: Timeout in seconds for review agent
+            _test_command: Internal testing only - override the subprocess command.
+                          When set, skips adding --json-schema flag (test mock handles it).
+        """
+        self.timeout = timeout
+        self._test_command = _test_command
+    def _build_instructions(self, quality_criteria: list[str]) -> str:
+        """Build the system instructions for the review agent.
+        Args:
+            quality_criteria: List of quality criteria to evaluate
+        Returns:
+            System instructions string
+        """
+        criteria_list = "\n".join(f"- {c}" for c in quality_criteria)
+        return f"""You are a quality gate reviewer. Your job is to evaluate whether outputs meet the specified quality criteria.
+## Quality Criteria to Evaluate
+{criteria_list}
+## Response Format
+You must respond with JSON in this exact structure:
+```json
+{{
+  "passed": true/false,
+  "feedback": "Brief overall summary of evaluation",
+  "criteria_results": [
+    {{
+      "criterion": "The criterion text",
+      "passed": true/false,
+      "feedback": "Specific feedback for this criterion (null if passed)"
+    }}
+  ]
+}}
+```
+## Guidelines
+- Be strict but fair
+- Only mark a criterion as passed if it is clearly met
+- Provide specific, actionable feedback for failed criteria
+- The overall "passed" should be true only if ALL criteria pass"""
+    async def _build_payload(
+        self,
+        outputs: list[str],
+        project_root: Path,
+    ) -> str:
+        """Build the user prompt payload with file contents.
+        Args:
+            outputs: List of output file paths
+            project_root: Project root path for reading files
+        Returns:
+            Formatted payload with file contents
+        """
+        output_sections: list[str] = []
+        for output_path in outputs:
+            full_path = project_root / output_path
+            header = f"{FILE_SEPARATOR} {output_path} {FILE_SEPARATOR}"
+            if full_path.exists():
+                try:
+                    async with aiofiles.open(full_path, encoding="utf-8") as f:
+                        content = await f.read()
+                    output_sections.append(f"{header}\n{content}")
+                except Exception as e:
+                    output_sections.append(f"{header}\n[Error reading file: {e}]")
+            else:
+                output_sections.append(f"{header}\n[File not found]")
+        if not output_sections:
+            return "[No output files provided]"
+        return "\n\n".join(output_sections)
+    def _parse_response(self, response_text: str) -> QualityGateResult:
+        """Parse the review agent's response.
+        When using --print --output-format json --json-schema, Claude CLI returns
+        a wrapper object with the structured output in the 'structured_output' field.
+        Args:
+            response_text: Raw response from review agent (JSON wrapper)
+        Returns:
+            Parsed QualityGateResult
+        Raises:
+            QualityGateError: If response cannot be parsed
+        """
+        try:
+            wrapper = json.loads(response_text.strip())
+            # Check for errors in the wrapper
+            if wrapper.get("is_error"):
+                raise QualityGateError(
+                    f"Review agent returned error: {wrapper.get('result', 'Unknown error')}"
+                )
+            # Extract structured_output - this is where --json-schema puts the result
+            data = wrapper.get("structured_output")
+            if data is None:
+                raise QualityGateError(
+                    "Review agent response missing 'structured_output' field. "
+                    f"Response was: {response_text[:500]}..."
+                )
+            # Parse criteria results
+            criteria_results = [
+                QualityCriteriaResult(
+                    criterion=cr.get("criterion", ""),
+                    passed=cr.get("passed", False),
+                    feedback=cr.get("feedback"),
+                )
+                for cr in data.get("criteria_results", [])
+            ]
+            return QualityGateResult(
+                passed=data.get("passed", False),
+                feedback=data.get("feedback", "No feedback provided"),
+                criteria_results=criteria_results,
+            )
+        except json.JSONDecodeError as e:
+            raise QualityGateError(
+                f"Failed to parse review agent response as JSON: {e}\n"
+                f"Response was: {response_text[:500]}..."
+            ) from e
+        except (ValueError, KeyError) as e:
+            raise QualityGateError(
+                f"Failed to extract quality gate result: {e}\n"
+                f"Response was: {response_text[:500]}..."
+            ) from e
+    async def evaluate(
+        self,
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> QualityGateResult:
+        """Evaluate step outputs against quality criteria.
+        Args:
+            quality_criteria: List of quality criteria to evaluate
+            outputs: List of output file paths
+            project_root: Project root path
+        Returns:
+            QualityGateResult with pass/fail and feedback
+        Raises:
+            QualityGateError: If evaluation fails
+        """
+        if not quality_criteria:
+            # No criteria = auto-pass
+            return QualityGateResult(
+                passed=True,
+                feedback="No quality criteria defined - auto-passing",
+                criteria_results=[],
+            )
+        # Build system instructions and payload separately
+        instructions = self._build_instructions(quality_criteria)
+        payload = await self._build_payload(outputs, project_root)
+        # Build command with proper flag ordering for Claude CLI
+        # See doc/reference/calling_claude_in_print_mode.md for details
+        #
+        # Key insight: flags must come BEFORE `-p --` because:
+        # - `-p` expects a prompt argument immediately after
+        # - `--` marks the end of flags, everything after is the prompt
+        # - When piping via stdin, we use `-p --` to read from stdin
+        if self._test_command:
+            # Testing mode: use provided command, add system prompt only
+            full_cmd = self._test_command + ["--system-prompt", instructions]
+        else:
+            # Production mode: use Claude CLI with proper flags
+            schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
+            full_cmd = [
+                "claude",
+                "--print",  # Non-interactive mode
+                "--output-format",
+                "json",  # JSON output wrapper
+                "--system-prompt",
+                instructions,
+                "--json-schema",
+                schema_json,  # Structured output - result in 'structured_output' field
+                "-p",
+                "--",  # Read prompt from stdin
+            ]
+        try:
+            # Run review agent with payload piped via stdin
+            process = await asyncio.create_subprocess_exec(
+                *full_cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(project_root),
+            )
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(input=payload.encode()),
+                    timeout=self.timeout,
+                )
+            except TimeoutError:
+                process.kill()
+                await process.wait()
+                raise QualityGateError(
+                    f"Review agent timed out after {self.timeout} seconds"
+                ) from None
+            if process.returncode != 0:
+                raise QualityGateError(
+                    f"Review agent failed with exit code {process.returncode}:\n"
+                    f"stderr: {stderr.decode()}"
+                )
+            return self._parse_response(stdout.decode())
+        except FileNotFoundError as e:
+            raise QualityGateError("Review agent command not found: claude") from e
+class MockQualityGate(QualityGate):
+    """Mock quality gate for testing.
+    Always passes unless configured otherwise.
+    """
+    def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
+        """Initialize mock quality gate.
+        Args:
+            should_pass: Whether evaluations should pass
+            feedback: Feedback message to return
+        """
+        super().__init__()
+        self.should_pass = should_pass
+        self.feedback = feedback
+        self.evaluations: list[dict[str, Any]] = []
+    async def evaluate(
+        self,
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> QualityGateResult:
+        """Mock evaluation - records call and returns configured result."""
+        self.evaluations.append(
+            {
+                "quality_criteria": quality_criteria,
+                "outputs": outputs,
+            }
+        )
+        criteria_results = [
+            QualityCriteriaResult(
+                criterion=c,
+                passed=self.should_pass,
+                feedback=None if self.should_pass else self.feedback,
+            )
+            for c in quality_criteria
+        ]
+        return QualityGateResult(
+            passed=self.should_pass,
+            feedback=self.feedback,
+            criteria_results=criteria_results,
+        )

deepwork/mcp/schemas.py ADDED Viewed

@@ -0,0 +1,263 @@
+"""Pydantic models for MCP tool inputs and outputs.
+IMPORTANT: If you modify any models in this file that affect the MCP tool
+interfaces (input models, output models, or their fields), you MUST also
+update the documentation in doc/mcp_interface.md to keep it in sync with
+the implementation.
+"""
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, Field
+# =============================================================================
+# Enums
+# =============================================================================
+class StepStatus(str, Enum):
+    """Status returned from finished_step."""
+    NEEDS_WORK = "needs_work"
+    NEXT_STEP = "next_step"
+    WORKFLOW_COMPLETE = "workflow_complete"
+# =============================================================================
+# Workflow Info Models
+# NOTE: These models are returned by get_workflows tool.
+#       Update doc/mcp_interface.md when modifying.
+# =============================================================================
+class StepInfo(BaseModel):
+    """Information about a single step."""
+    id: str = Field(description="Step identifier")
+    name: str = Field(description="Human-readable step name")
+    description: str = Field(description="What the step does")
+    dependencies: list[str] = Field(default_factory=list, description="Required prior steps")
+class ConcurrentStepGroup(BaseModel):
+    """A group of steps that can be executed concurrently."""
+    step_ids: list[str] = Field(description="Steps that run in parallel")
+    is_concurrent: bool = Field(default=True)
+class WorkflowStepEntryInfo(BaseModel):
+    """Information about a workflow step entry (sequential or concurrent)."""
+    step_ids: list[str] = Field(description="Step ID(s) in this entry")
+    is_concurrent: bool = Field(default=False, description="True if steps run in parallel")
+class WorkflowInfo(BaseModel):
+    """Information about a workflow."""
+    name: str = Field(description="Workflow identifier")
+    summary: str = Field(description="Short description of workflow")
+class JobInfo(BaseModel):
+    """Information about a job and its workflows."""
+    name: str = Field(description="Job identifier")
+    summary: str = Field(description="Short summary of the job")
+    description: str | None = Field(default=None, description="Full description")
+    workflows: list[WorkflowInfo] = Field(default_factory=list)
+# =============================================================================
+# Tool Input Models
+# NOTE: Changes to these models affect MCP tool parameters.
+#       Update doc/mcp_interface.md when modifying.
+# =============================================================================
+class StartWorkflowInput(BaseModel):
+    """Input for start_workflow tool."""
+    goal: str = Field(description="What the user wants to accomplish")
+    job_name: str = Field(description="Name of the job")
+    workflow_name: str = Field(description="Name of the workflow within the job")
+    instance_id: str | None = Field(
+        default=None,
+        description="Optional identifier (e.g., 'acme', 'q1-2026')",
+    )
+class FinishedStepInput(BaseModel):
+    """Input for finished_step tool."""
+    outputs: list[str] = Field(description="List of output file paths created")
+    notes: str | None = Field(default=None, description="Optional notes about work done")
+    quality_review_override_reason: str | None = Field(
+        default=None,
+        description="If provided, skips the quality gate review. Must explain why the review is being bypassed.",
+    )
+class AbortWorkflowInput(BaseModel):
+    """Input for abort_workflow tool."""
+    explanation: str = Field(description="Explanation of why the workflow is being aborted")
+# =============================================================================
+# Quality Gate Models
+# =============================================================================
+class QualityCriteriaResult(BaseModel):
+    """Result for a single quality criterion."""
+    criterion: str = Field(description="The quality criterion text")
+    passed: bool = Field(description="Whether this criterion passed")
+    feedback: str | None = Field(default=None, description="Feedback if failed")
+class QualityGateResult(BaseModel):
+    """Result from quality gate evaluation."""
+    passed: bool = Field(description="Overall pass/fail")
+    feedback: str = Field(description="Summary feedback")
+    criteria_results: list[QualityCriteriaResult] = Field(
+        default_factory=list, description="Per-criterion results"
+    )
+# =============================================================================
+# Tool Output Models
+# NOTE: Changes to these models affect MCP tool return types.
+#       Update doc/mcp_interface.md when modifying.
+# =============================================================================
+class ActiveStepInfo(BaseModel):
+    """Information about the step to begin working on."""
+    session_id: str = Field(description="Unique session identifier")
+    branch_name: str = Field(description="Git branch for this workflow instance")
+    step_id: str = Field(description="ID of the current step")
+    step_expected_outputs: list[str] = Field(description="Expected output files for this step")
+    step_quality_criteria: list[str] = Field(
+        default_factory=list, description="Criteria for step completion"
+    )
+    step_instructions: str = Field(description="Instructions for the step")
+class GetWorkflowsResponse(BaseModel):
+    """Response from get_workflows tool."""
+    jobs: list[JobInfo] = Field(description="List of all jobs with their workflows")
+class StackEntry(BaseModel):
+    """An entry in the workflow stack."""
+    workflow: str = Field(description="Workflow identifier (job_name/workflow_name)")
+    step: str = Field(description="Current step ID in this workflow")
+class StartWorkflowResponse(BaseModel):
+    """Response from start_workflow tool."""
+    begin_step: ActiveStepInfo = Field(description="Information about the first step to begin")
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after starting"
+    )
+class FinishedStepResponse(BaseModel):
+    """Response from finished_step tool."""
+    status: StepStatus = Field(description="Result status")
+    # For needs_work status
+    feedback: str | None = Field(default=None, description="Feedback from quality gate")
+    failed_criteria: list[QualityCriteriaResult] | None = Field(
+        default=None, description="Failed quality criteria"
+    )
+    # For next_step status
+    begin_step: ActiveStepInfo | None = Field(
+        default=None, description="Information about the next step to begin"
+    )
+    # For workflow_complete status
+    summary: str | None = Field(default=None, description="Summary of completed workflow")
+    all_outputs: list[str] | None = Field(default=None, description="All outputs from all steps")
+    # Stack info (included in all responses)
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after this operation"
+    )
+class AbortWorkflowResponse(BaseModel):
+    """Response from abort_workflow tool."""
+    aborted_workflow: str = Field(
+        description="The workflow that was aborted (job_name/workflow_name)"
+    )
+    aborted_step: str = Field(description="The step that was active when aborted")
+    explanation: str = Field(description="The explanation provided for aborting")
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after abort"
+    )
+    resumed_workflow: str | None = Field(
+        default=None, description="The workflow now active (if any)"
+    )
+    resumed_step: str | None = Field(default=None, description="The step now active (if any)")
+# =============================================================================
+# Session State Models
+# =============================================================================
+class StepProgress(BaseModel):
+    """Progress for a single step in a workflow."""
+    step_id: str = Field(description="Step identifier")
+    started_at: str | None = Field(default=None, description="ISO timestamp when started")
+    completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
+    outputs: list[str] = Field(default_factory=list, description="Output files created")
+    notes: str | None = Field(default=None, description="Notes from agent")
+    quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
+class WorkflowSession(BaseModel):
+    """State for an active workflow session."""
+    session_id: str = Field(description="Unique session identifier")
+    job_name: str = Field(description="Name of the job")
+    workflow_name: str = Field(description="Name of the workflow")
+    instance_id: str | None = Field(default=None, description="Instance identifier")
+    goal: str = Field(description="User's goal for this workflow")
+    branch_name: str = Field(description="Git branch name")
+    current_step_id: str = Field(description="Current step in workflow")
+    current_entry_index: int = Field(
+        default=0, description="Index of current entry in step_entries"
+    )
+    step_progress: dict[str, StepProgress] = Field(
+        default_factory=dict, description="Progress for each step"
+    )
+    started_at: str = Field(description="ISO timestamp when session started")
+    completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
+    status: str = Field(default="active", description="Session status: active, completed, aborted")
+    abort_reason: str | None = Field(
+        default=None, description="Explanation if workflow was aborted"
+    )
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return self.model_dump()
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "WorkflowSession":
+        """Create from dictionary."""
+        return cls.model_validate(data)

deepwork 0.4.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

deepwork 0.4.0py3-none-any.whl → 0.7.0py3-none-any.whl