PyPI - deepwork - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

deepwork 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

deepwork/cli/sync.py CHANGED Viewed

@@ -117,6 +117,7 @@ def sync_commands(project_path: Path) -> None:
     # Sync each platform
     generator = CommandGenerator()
     stats = {"platforms": 0, "commands": 0, "hooks": 0}
+    synced_adapters: list[AgentAdapter] = []
     for platform_name in platforms:
         try:
@@ -157,6 +158,7 @@ def sync_commands(project_path: Path) -> None:
                 console.print(f"    [red]✗[/red] Failed to sync hooks: {e}")
         stats["platforms"] += 1
+        synced_adapters.append(adapter)
     # Summary
     console.print()
@@ -174,3 +176,10 @@ def sync_commands(project_path: Path) -> None:
     console.print(table)
     console.print()
+    # Show reload instructions for each synced platform
+    if synced_adapters and stats["commands"] > 0:
+        console.print("[bold]To use the new commands:[/bold]")
+        for adapter in synced_adapters:
+            console.print(f"  [cyan]{adapter.display_name}:[/cyan] {adapter.reload_instructions}")
+        console.print()

deepwork/core/adapters.py CHANGED Viewed

@@ -57,6 +57,12 @@ class AgentAdapter(ABC):
     commands_dir: ClassVar[str] = "commands"
     command_template: ClassVar[str] = "command-job-step.md.jinja"
+    # Instructions for reloading commands after sync (shown to users)
+    # Subclasses should override with platform-specific instructions.
+    reload_instructions: ClassVar[str] = (
+        "Restart your AI assistant session to use the new commands."
+    )
     # Mapping from generic CommandLifecycleHook to platform-specific event names.
     # Subclasses should override this to provide platform-specific mappings.
     hook_name_mapping: ClassVar[dict[CommandLifecycleHook, str]] = {}
@@ -253,6 +259,12 @@ class ClaudeAdapter(AgentAdapter):
     display_name = "Claude Code"
     config_dir = ".claude"
+    # Claude Code doesn't have a reload command - must restart session
+    reload_instructions: ClassVar[str] = (
+        "Type 'exit' to leave your current session, then run "
+        "'claude --resume' (your history will be maintained)."
+    )
     # Claude Code uses PascalCase event names
     hook_name_mapping: ClassVar[dict[CommandLifecycleHook, str]] = {
         CommandLifecycleHook.AFTER_AGENT: "Stop",
@@ -333,6 +345,11 @@ class GeminiAdapter(AgentAdapter):
     config_dir = ".gemini"
     command_template = "command-job-step.toml.jinja"
+    # Gemini CLI can reload with /memory refresh
+    reload_instructions: ClassVar[str] = (
+        "Run '/memory refresh' to reload commands, or restart your Gemini CLI session."
+    )
     # Gemini CLI does NOT support command-level hooks
     # Hooks are global/project-level in settings.json, not per-command
     hook_name_mapping: ClassVar[dict[CommandLifecycleHook, str]] = {}

deepwork/core/policy_parser.py CHANGED Viewed

@@ -17,6 +17,11 @@ class PolicyParseError(Exception):
     pass
+# Valid compare_to values
+COMPARE_TO_VALUES = frozenset({"base", "default_tip", "prompt"})
+DEFAULT_COMPARE_TO = "base"
 @dataclass
 class Policy:
     """Represents a single policy definition."""
@@ -25,6 +30,7 @@ class Policy:
     triggers: list[str]  # Normalized to list
     safety: list[str] = field(default_factory=list)  # Normalized to list, empty if not specified
     instructions: str = ""  # Resolved content (either inline or from file)
+    compare_to: str = DEFAULT_COMPARE_TO  # What to compare against: base, default_tip, or prompt
     @classmethod
     def from_dict(cls, data: dict[str, Any], base_dir: Path | None = None) -> "Policy":
@@ -74,11 +80,15 @@ class Policy:
                 f"Policy '{data['name']}' must have either 'instructions' or 'instructions_file'"
             )
+        # Get compare_to (defaults to DEFAULT_COMPARE_TO)
+        compare_to = data.get("compare_to", DEFAULT_COMPARE_TO)
         return cls(
             name=data["name"],
             triggers=triggers,
             safety=safety,
             instructions=instructions,
+            compare_to=compare_to,
         )

deepwork/hooks/evaluate_policies.py CHANGED Viewed

@@ -6,12 +6,16 @@ should fire based on changed files and conversation context.
 Usage:
     python -m deepwork.hooks.evaluate_policies \
-        --policy-file .deepwork.policy.yml \
-        --changed-files "file1.py\nfile2.py"
+        --policy-file .deepwork.policy.yml
 The conversation context is read from stdin and checked for <promise> tags
 that indicate policies have already been addressed.
+Changed files are computed based on each policy's compare_to setting:
+- base: Compare to merge-base with default branch (default)
+- default_tip: Two-dot diff against default branch tip
+- prompt: Compare to state captured at prompt submission
 Output is JSON suitable for Claude Code Stop hooks:
     {"decision": "block", "reason": "..."}  # Block stop, policies need attention
     {}  # No policies fired, allow stop
@@ -20,16 +24,223 @@ Output is JSON suitable for Claude Code Stop hooks:
 import argparse
 import json
 import re
+import subprocess
 import sys
 from pathlib import Path
 from deepwork.core.policy_parser import (
+    Policy,
     PolicyParseError,
-    evaluate_policies,
+    evaluate_policy,
     parse_policy_file,
 )
+def get_default_branch() -> str:
+    """
+    Get the default branch name (main or master).
+    Returns:
+        Default branch name, or "main" if cannot be determined.
+    """
+    # Try to get the default branch from remote HEAD
+    try:
+        result = subprocess.run(
+            ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        # Output is like "refs/remotes/origin/main"
+        return result.stdout.strip().split("/")[-1]
+    except subprocess.CalledProcessError:
+        pass
+    # Try common default branch names
+    for branch in ["main", "master"]:
+        try:
+            subprocess.run(
+                ["git", "rev-parse", "--verify", f"origin/{branch}"],
+                capture_output=True,
+                check=True,
+            )
+            return branch
+        except subprocess.CalledProcessError:
+            continue
+    # Fall back to main
+    return "main"
+def get_changed_files_base() -> list[str]:
+    """
+    Get files changed relative to the base of the current branch.
+    This finds the merge-base between the current branch and the default branch,
+    then returns all files changed since that point.
+    Returns:
+        List of changed file paths.
+    """
+    default_branch = get_default_branch()
+    try:
+        # Get the merge-base (where current branch diverged from default)
+        result = subprocess.run(
+            ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        merge_base = result.stdout.strip()
+        # Stage all changes so they appear in diff
+        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
+        # Get files changed since merge-base (including staged)
+        result = subprocess.run(
+            ["git", "diff", "--name-only", merge_base, "HEAD"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        # Also get staged changes not yet committed
+        result = subprocess.run(
+            ["git", "diff", "--name-only", "--cached"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        # Get untracked files
+        result = subprocess.run(
+            ["git", "ls-files", "--others", "--exclude-standard"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        all_files = committed_files | staged_files | untracked_files
+        return sorted([f for f in all_files if f])
+    except subprocess.CalledProcessError:
+        return []
+def get_changed_files_default_tip() -> list[str]:
+    """
+    Get files changed compared to the tip of the default branch.
+    This does a two-dot diff: what's different between HEAD and origin/default.
+    Returns:
+        List of changed file paths.
+    """
+    default_branch = get_default_branch()
+    try:
+        # Stage all changes so they appear in diff
+        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
+        # Two-dot diff against default branch tip
+        result = subprocess.run(
+            ["git", "diff", "--name-only", f"origin/{default_branch}..HEAD"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        # Also get staged changes not yet committed
+        result = subprocess.run(
+            ["git", "diff", "--name-only", "--cached"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        # Get untracked files
+        result = subprocess.run(
+            ["git", "ls-files", "--others", "--exclude-standard"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        all_files = committed_files | staged_files | untracked_files
+        return sorted([f for f in all_files if f])
+    except subprocess.CalledProcessError:
+        return []
+def get_changed_files_prompt() -> list[str]:
+    """
+    Get files changed since the prompt was submitted.
+    This compares against the baseline captured by capture_prompt_work_tree.sh.
+    Returns:
+        List of changed file paths.
+    """
+    baseline_path = Path(".deepwork/.last_work_tree")
+    try:
+        # Stage all changes so we can see them with --cached
+        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
+        # Get all staged files (includes what was just staged)
+        result = subprocess.run(
+            ["git", "diff", "--name-only", "--cached"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        current_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
+        current_files = {f for f in current_files if f}
+        if baseline_path.exists():
+            # Read baseline and find new files
+            baseline_files = set(baseline_path.read_text().strip().split("\n"))
+            baseline_files = {f for f in baseline_files if f}
+            # Return files that are in current but not in baseline
+            new_files = current_files - baseline_files
+            return sorted(new_files)
+        else:
+            # No baseline, return all current changes
+            return sorted(current_files)
+    except (subprocess.CalledProcessError, OSError):
+        return []
+def get_changed_files_for_mode(mode: str) -> list[str]:
+    """
+    Get changed files for a specific compare_to mode.
+    Args:
+        mode: One of 'base', 'default_tip', or 'prompt'
+    Returns:
+        List of changed file paths.
+    """
+    if mode == "base":
+        return get_changed_files_base()
+    elif mode == "default_tip":
+        return get_changed_files_default_tip()
+    elif mode == "prompt":
+        return get_changed_files_prompt()
+    else:
+        # Unknown mode, fall back to base
+        return get_changed_files_base()
 def extract_promise_tags(text: str) -> set[str]:
     """
     Extract policy names from <promise> tags in text.
@@ -87,23 +298,9 @@ def main() -> None:
         required=True,
         help="Path to .deepwork.policy.yml file",
     )
-    parser.add_argument(
-        "--changed-files",
-        type=str,
-        required=True,
-        help="Newline-separated list of changed files",
-    )
     args = parser.parse_args()
-    # Parse changed files (newline-separated)
-    changed_files = [f.strip() for f in args.changed_files.split("\n") if f.strip()]
-    if not changed_files:
-        # No files changed, nothing to evaluate
-        print("{}")
-        return
     # Check if policy file exists
     policy_path = Path(args.policy_file)
     if not policy_path.exists():
@@ -122,7 +319,7 @@ def main() -> None:
     # Extract promise tags from conversation
     promised_policies = extract_promise_tags(conversation_context)
-    # Parse and evaluate policies
+    # Parse policies
     try:
         policies = parse_policy_file(policy_path)
     except PolicyParseError as e:
@@ -136,8 +333,28 @@ def main() -> None:
         print("{}")
         return
-    # Evaluate which policies fire
-    fired_policies = evaluate_policies(policies, changed_files, promised_policies)
+    # Group policies by compare_to mode to minimize git calls
+    policies_by_mode: dict[str, list[Policy]] = {}
+    for policy in policies:
+        mode = policy.compare_to
+        if mode not in policies_by_mode:
+            policies_by_mode[mode] = []
+        policies_by_mode[mode].append(policy)
+    # Get changed files for each mode and evaluate policies
+    fired_policies: list[Policy] = []
+    for mode, mode_policies in policies_by_mode.items():
+        changed_files = get_changed_files_for_mode(mode)
+        if not changed_files:
+            continue
+        for policy in mode_policies:
+            # Skip if already promised
+            if policy.name in promised_policies:
+                continue
+            # Evaluate this policy
+            if evaluate_policy(policy, changed_files):
+                fired_policies.append(policy)
     if not fired_policies:
         # No policies fired

deepwork/schemas/policy_schema.py CHANGED Viewed

@@ -58,6 +58,16 @@ POLICY_SCHEMA: dict[str, Any] = {
                 "minLength": 1,
                 "description": "Path to a file containing instructions (alternative to inline instructions)",
             },
+            "compare_to": {
+                "type": "string",
+                "enum": ["base", "default_tip", "prompt"],
+                "description": (
+                    "What to compare against when detecting changed files. "
+                    "'base' (default) compares to the base of the current branch. "
+                    "'default_tip' compares to the tip of the default branch. "
+                    "'prompt' compares to the state at the start of the prompt."
+                ),
+            },
         },
         "oneOf": [
             {"required": ["instructions"]},

deepwork/standard_jobs/deepwork_jobs/job.yml CHANGED Viewed

@@ -1,20 +1,23 @@
 name: deepwork_jobs
-version: "0.1.0"
+version: "0.2.0"
 summary: "DeepWork job management commands"
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
-  workflows and refine existing ones.
+  workflows and learn from running them.
   The `define` command guides you through an interactive process to create a new job by
   asking detailed questions about your workflow, understanding each step's inputs and outputs,
   and generating all necessary files.
-  The `refine` command helps you modify existing jobs safely by understanding what you want
-  to change, validating the impact, and ensuring consistency across your workflow.
+  The `learn` command reflects on conversations where DeepWork jobs were run, identifies
+  confusion or inefficiencies, and improves job instructions. It also captures bespoke
+  learnings specific to the current run into AGENTS.md files in the working folder.
 changelog:
   - version: "0.1.0"
     changes: "Initial version"
+  - version: "0.2.0"
+    changes: "Replaced refine command with learn command for conversation-driven improvement"
 steps:
   - id: define
@@ -74,29 +77,31 @@ steps:
             If ANY criterion is not met, continue working to address it.
             If ALL criteria are satisfied, include `<promise>✓ Quality Criteria Met</promise>` in your response.
-  - id: refine
-    name: "Refine Existing Job"
-    description: "Modify an existing job definition"
-    instructions_file: steps/refine.md
+  - id: learn
+    name: "Learn from Job Execution"
+    description: "Reflect on conversation to improve job instructions and capture learnings"
+    instructions_file: steps/learn.md
     inputs:
       - name: job_name
-        description: "Name of the job to refine"
+        description: "Name of the job that was run (optional - will auto-detect from conversation)"
     outputs:
-      - job.yml
+      - learning_summary.md
     dependencies: []
     hooks:
       after_agent:
         - prompt: |
-            Verify the refinement meets ALL quality criteria before completing:
+            Verify the learning process meets ALL quality criteria before completing:
-            1. **Job Consistency**: Do the changes maintain overall job consistency?
-            2. **Valid Dependencies**: Are all step dependencies logically valid (no circular refs)?
-            3. **Semantic Versioning**: Was the version bumped appropriately (major/minor/patch)?
-            4. **Changelog Updated**: Is the changelog updated with a description of changes?
-            5. **User Understanding**: Does the user understand the impact of the changes?
-            6. **Breaking Changes**: Were any breaking changes clearly communicated?
-            7. **Files Updated**: Are all affected files (job.yml, step files) updated?
-            8. **Sync Complete**: Has `deepwork sync` been run to regenerate commands?
+            1. **Conversation Analyzed**: Did you review the conversation for DeepWork job executions?
+            2. **Confusion Identified**: Did you identify points of confusion, errors, or inefficiencies?
+            3. **Instructions Improved**: Were job instructions updated to address identified issues?
+            4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
+            5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
+            6. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
+            7. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
+            8. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
+            9. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
+            10. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
             If ANY criterion is not met, continue working to address it.
             If ALL criteria are satisfied, include `<promise>✓ Quality Criteria Met</promise>` in your response.

deepwork/standard_jobs/deepwork_jobs/steps/define.md CHANGED Viewed

@@ -62,6 +62,12 @@ For each major phase they mentioned, ask detailed questions:
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
+### Capability Considerations
+When defining steps, identify any that require specialized tools:
+**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
 ### Step 3: Validate the Workflow
 After gathering information about all steps:

deepwork/standard_jobs/deepwork_jobs/steps/implement.md CHANGED Viewed

@@ -144,6 +144,12 @@ If a step in the job.yml has `stop_hooks` defined, the generated instruction fil
 This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
+### Using Supplementary Reference Files
+Step instructions can include additional `.md` files in the `steps/` directory for detailed examples, templates, or reference material. Reference them using the full path from the project root.
+See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
 ### Step 4: Verify job.yml Location
 Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define step should have created it there. If for some reason it's not there, you may need to create or move it.
@@ -161,11 +167,9 @@ This will:
 - Generate slash-commands for each step
 - Make the commands available in `.claude/commands/` (or appropriate platform directory)
-### Step 6: Reload Commands
+### Step 6: Relay Reload Instructions
-Instruct the user to reload commands in their current session:
-- Run `/reload` command (if available)
-- Or restart the Claude session
+After running `deepwork sync`, look at the "To use the new commands" section in the output. **Relay these exact reload instructions to the user** so they know how to pick up the new commands. Don't just reference the sync output - tell them directly what they need to do (e.g., "Type 'exit' then run 'claude --resume'" for Claude Code, or "Run '/memory refresh'" for Gemini CLI).
 ### Step 7: Consider Policies for the New Job
@@ -391,7 +395,7 @@ After running `deepwork sync`, the following slash-commands are now available:
 ## Next Steps
-1. **Reload commands**: Run `/reload` or restart your Claude session
+1. **Reload commands**: [Include the specific reload instructions from the `deepwork sync` output here]
 2. **Start the workflow**: Run `/[job_name].[first_step_id]` to begin
 3. **Test the job**: Try executing the first step to ensure everything works
@@ -418,7 +422,7 @@ Before marking this step complete, ensure:
 - [ ] Each instruction file is complete and actionable
 - [ ] `deepwork sync` executed successfully
 - [ ] Commands generated in platform directory
-- [ ] User informed of next steps (reload commands)
+- [ ] User informed to follow reload instructions from `deepwork sync`
 - [ ] implementation_summary.md created
 - [ ] Considered whether policies would benefit this job (Step 7)
 - [ ] If policies suggested, offered to run `/deepwork_policy.define`

deepwork 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

deepwork 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl