PyPI - bioguider - Versions diffs - 0.2.52__py3-none-any.whl - Mend

bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

bioguider/__init__.py +0 -0
bioguider/agents/__init__.py +0 -0
bioguider/agents/agent_task.py +92 -0
bioguider/agents/agent_tools.py +176 -0
bioguider/agents/agent_utils.py +504 -0
bioguider/agents/collection_execute_step.py +182 -0
bioguider/agents/collection_observe_step.py +125 -0
bioguider/agents/collection_plan_step.py +156 -0
bioguider/agents/collection_task.py +184 -0
bioguider/agents/collection_task_utils.py +142 -0
bioguider/agents/common_agent.py +137 -0
bioguider/agents/common_agent_2step.py +215 -0
bioguider/agents/common_conversation.py +61 -0
bioguider/agents/common_step.py +85 -0
bioguider/agents/consistency_collection_step.py +102 -0
bioguider/agents/consistency_evaluation_task.py +57 -0
bioguider/agents/consistency_evaluation_task_utils.py +14 -0
bioguider/agents/consistency_observe_step.py +110 -0
bioguider/agents/consistency_query_step.py +77 -0
bioguider/agents/dockergeneration_execute_step.py +186 -0
bioguider/agents/dockergeneration_observe_step.py +154 -0
bioguider/agents/dockergeneration_plan_step.py +158 -0
bioguider/agents/dockergeneration_task.py +158 -0
bioguider/agents/dockergeneration_task_utils.py +220 -0
bioguider/agents/evaluation_installation_task.py +270 -0
bioguider/agents/evaluation_readme_task.py +767 -0
bioguider/agents/evaluation_submission_requirements_task.py +172 -0
bioguider/agents/evaluation_task.py +206 -0
bioguider/agents/evaluation_tutorial_task.py +169 -0
bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
bioguider/agents/evaluation_userguide_prompts.py +179 -0
bioguider/agents/evaluation_userguide_task.py +154 -0
bioguider/agents/evaluation_utils.py +127 -0
bioguider/agents/identification_execute_step.py +181 -0
bioguider/agents/identification_observe_step.py +104 -0
bioguider/agents/identification_plan_step.py +140 -0
bioguider/agents/identification_task.py +270 -0
bioguider/agents/identification_task_utils.py +22 -0
bioguider/agents/peo_common_step.py +64 -0
bioguider/agents/prompt_utils.py +253 -0
bioguider/agents/python_ast_repl_tool.py +69 -0
bioguider/agents/rag_collection_task.py +130 -0
bioguider/conversation.py +67 -0
bioguider/database/code_structure_db.py +500 -0
bioguider/database/summarized_file_db.py +146 -0
bioguider/generation/__init__.py +39 -0
bioguider/generation/benchmark_metrics.py +610 -0
bioguider/generation/change_planner.py +189 -0
bioguider/generation/document_renderer.py +157 -0
bioguider/generation/llm_cleaner.py +67 -0
bioguider/generation/llm_content_generator.py +1128 -0
bioguider/generation/llm_injector.py +809 -0
bioguider/generation/models.py +85 -0
bioguider/generation/output_manager.py +74 -0
bioguider/generation/repo_reader.py +37 -0
bioguider/generation/report_loader.py +166 -0
bioguider/generation/style_analyzer.py +36 -0
bioguider/generation/suggestion_extractor.py +436 -0
bioguider/generation/test_metrics.py +189 -0
bioguider/managers/benchmark_manager.py +785 -0
bioguider/managers/evaluation_manager.py +215 -0
bioguider/managers/generation_manager.py +686 -0
bioguider/managers/generation_test_manager.py +107 -0
bioguider/managers/generation_test_manager_v2.py +525 -0
bioguider/rag/__init__.py +0 -0
bioguider/rag/config.py +117 -0
bioguider/rag/data_pipeline.py +651 -0
bioguider/rag/embedder.py +24 -0
bioguider/rag/rag.py +138 -0
bioguider/settings.py +103 -0
bioguider/utils/code_structure_builder.py +59 -0
bioguider/utils/constants.py +135 -0
bioguider/utils/default.gitignore +140 -0
bioguider/utils/file_utils.py +215 -0
bioguider/utils/gitignore_checker.py +175 -0
bioguider/utils/notebook_utils.py +117 -0
bioguider/utils/pyphen_utils.py +73 -0
bioguider/utils/python_file_handler.py +65 -0
bioguider/utils/r_file_handler.py +551 -0
bioguider/utils/utils.py +163 -0
bioguider-0.2.52.dist-info/LICENSE +21 -0
bioguider-0.2.52.dist-info/METADATA +51 -0
bioguider-0.2.52.dist-info/RECORD +84 -0
bioguider-0.2.52.dist-info/WHEEL +4 -0

bioguider/generation/models.py ADDED Viewed

@@ -0,0 +1,85 @@
+from __future__ import annotations
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
+class EvaluationReport(BaseModel):
+    timestamp: Optional[str] = None
+    repo_url: Optional[str] = None
+    installation_evaluation: Optional[Dict[str, Any]] = None
+    installation_files: Optional[List[str]] = None
+    readme_evaluation: Optional[Dict[str, Any]] = None
+    readme_files: Optional[List[str]] = None
+    # Optional: rich user guide evaluation content and any explicitly listed files
+    userguide_evaluation: Optional[Dict[str, Any]] = None
+    userguide_files: Optional[List[str]] = None
+    # Optional: tutorial evaluation content and any explicitly listed files
+    tutorial_evaluation: Optional[Dict[str, Any]] = None
+    tutorial_files: Optional[List[str]] = None
+    submission_requirements_evaluation: Optional[Dict[str, Any]] = None
+    submission_requirements_files: Optional[List[str]] = None
+class SuggestionItem(BaseModel):
+    id: str
+    category: str
+    severity: str = Field(default="should_fix")
+    source: Dict[str, str] = Field(default_factory=dict)
+    target_files: List[str] = Field(default_factory=list)
+    action: str
+    anchor_hint: Optional[str] = None
+    content_guidance: Optional[str] = None
+class StyleProfile(BaseModel):
+    heading_style: str = Field(default="#")
+    list_style: str = Field(default="-")
+    code_fence_style: str = Field(default="```")
+    tone_markers: List[str] = Field(default_factory=list)
+    link_style: str = Field(default="inline")
+class PlannedEdit(BaseModel):
+    file_path: str
+    edit_type: str
+    anchor: Dict[str, str] = Field(default_factory=dict)
+    content_template: str
+    rationale: str
+    minimal_diff: bool = Field(default=True)
+    suggestion_id: Optional[str] = None
+class DocumentPlan(BaseModel):
+    repo_path: str
+    style_profile: StyleProfile
+    planned_edits: List[PlannedEdit] = Field(default_factory=list)
+class OutputArtifact(BaseModel):
+    dest_rel_path: str
+    original_rel_path: str
+    change_summary: str
+    diff_stats: Dict[str, int] = Field(default_factory=dict)
+class GenerationManifest(BaseModel):
+    repo_url: Optional[str] = None
+    report_path: Optional[str] = None
+    output_dir: Optional[str] = None
+    suggestions: List[SuggestionItem] = Field(default_factory=list)
+    planned_edits: List[PlannedEdit] = Field(default_factory=list)
+    artifacts: List[OutputArtifact] = Field(default_factory=list)
+    skipped: List[str] = Field(default_factory=list)
+class GenerationReport(BaseModel):
+    repo_url: Optional[str] = None
+    output_dir: Optional[str] = None
+    sections: List[Dict[str, Any]] = Field(default_factory=list)

bioguider/generation/output_manager.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+import os
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from .models import OutputArtifact, GenerationManifest, PlannedEdit
+class OutputManager:
+    def __init__(self, base_outputs_dir: Optional[str] = None):
+        self.base_outputs_dir = base_outputs_dir or "outputs"
+    def prepare_output_dir(self, repo_url_or_name: str) -> str:
+        repo_name = self._extract_repo_name(repo_url_or_name)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        out_dir = os.path.join(self.base_outputs_dir, f"{repo_name}", timestamp)
+        os.makedirs(out_dir, exist_ok=True)
+        return out_dir
+    def get_latest_output_dir(self, repo_url_or_name: str) -> str:
+        repo_name = self._extract_repo_name(repo_url_or_name)
+        out_dir = Path(self.base_outputs_dir, f"{repo_name}")
+        latest_tm = datetime.min
+        if not out_dir.exists():
+            return None
+        for f in out_dir.iterdir():
+            if not f.is_dir():
+                continue
+            tm = f.name.split("/")[-1]
+            if not tm.isdigit():
+                continue
+            tm = datetime.strptime(tm, "%Y%m%d_%H%M%S")
+            if tm > latest_tm:
+                latest_tm = tm
+                latest_dir = f.name
+        return latest_dir
+    def _extract_repo_name(self, url_or_name: str) -> str:
+        name = url_or_name.rstrip("/")
+        if "/" in name:
+            name = name.split("/")[-1]
+        name = name.replace(".git", "")
+        return name
+    def write_files(self, output_dir: str, files: Dict[str, str], diff_stats_by_file: Dict[str, dict] | None = None) -> List[OutputArtifact]:
+        artifacts: List[OutputArtifact] = []
+        for rel_path, content in files.items():
+            dest = os.path.join(output_dir, rel_path)
+            os.makedirs(os.path.dirname(dest), exist_ok=True)
+            with open(dest, "w", encoding="utf-8") as fobj:
+                fobj.write(content)
+            artifacts.append(OutputArtifact(
+                dest_rel_path=rel_path,
+                original_rel_path=rel_path,
+                change_summary="revised document",
+                diff_stats=(diff_stats_by_file or {}).get(rel_path, {})
+            ))
+        return artifacts
+    def write_manifest(
+        self,
+        output_dir: str,
+        manifest: GenerationManifest,
+    ) -> str:
+        dest = os.path.join(output_dir, "manifest.json")
+        with open(dest, "w", encoding="utf-8") as fobj:
+            json.dump(manifest.model_dump(), fobj, indent=2)
+        return dest

bioguider/generation/repo_reader.py ADDED Viewed

@@ -0,0 +1,37 @@
+from __future__ import annotations
+import os
+from typing import Dict, Optional, List, Tuple
+class RepoReader:
+    def __init__(self, repo_path: str, gitignore_path: Optional[str] = None):
+        self.repo_path = repo_path
+        self.gitignore_path = gitignore_path
+    def read_files(self, rel_paths: List[str]) -> Tuple[Dict[str, str], List[str]]:
+        contents: Dict[str, str] = {}
+        missing: List[str] = []
+        for rel in rel_paths:
+            abs_path = os.path.join(self.repo_path, rel)
+            if not os.path.isfile(abs_path):
+                missing.append(rel)
+                continue
+            try:
+                with open(abs_path, "r", encoding="utf-8") as fobj:
+                    contents[rel] = fobj.read()
+            except Exception:
+                missing.append(rel)
+        return contents, missing
+    def read_default_targets(self) -> Tuple[Dict[str, str], List[str]]:
+        # Common targets we may need to modify
+        candidates = [
+            "README.md",
+            "README.rst",
+            "vignettes/install.Rmd",
+            "vignettes/install_v5.Rmd",
+        ]
+        return self.read_files(candidates)

bioguider/generation/report_loader.py ADDED Viewed

@@ -0,0 +1,166 @@
+from __future__ import annotations
+import json
+from typing import Tuple, Dict, Any
+from .models import EvaluationReport
+class EvaluationReportLoader:
+    def _parse_bool(self, token: str) -> Any:
+        if token == "True":
+            return True
+        if token == "False":
+            return False
+        return token
+    def _split_args(self, s: str) -> Dict[str, Any]:
+        # Split a function-like argument list into a dict, respecting quotes
+        args: Dict[str, Any] = {}
+        current = ""
+        parts = []
+        in_single = False
+        in_double = False
+        for ch in s:
+            if ch == "'" and not in_double:
+                in_single = not in_single
+                current += ch
+                continue
+            if ch == '"' and not in_single:
+                in_double = not in_double
+                current += ch
+                continue
+            if ch == "," and not in_single and not in_double:
+                parts.append(current.strip())
+                current = ""
+            else:
+                current += ch
+        if current.strip():
+            parts.append(current.strip())
+        for p in parts:
+            if not p:
+                continue
+            if "=" not in p:
+                continue
+            k, v = p.split("=", 1)
+            k = k.strip()
+            v = v.strip()
+            if (v.startswith("'") and v.endswith("'")) or (v.startswith('"') and v.endswith('"')):
+                v = v[1:-1]
+            else:
+                # try bool/int
+                if v in ("True", "False"):
+                    v = self._parse_bool(v)
+                else:
+                    try:
+                        v = int(v)
+                    except Exception:
+                        pass
+            args[k] = v
+        return args
+    def _parse_structured_block(self, text: str, key: str) -> Dict[str, Any] | None:
+        # Extract key=ClassName(arg1=val1, ...) and parse args
+        marker = f"{key}="
+        idx = text.find(marker)
+        if idx == -1:
+            return None
+        rest = text[idx + len(marker) :]
+        # find first '('
+        pidx = rest.find("(")
+        if pidx == -1:
+            return None
+        rest = rest[pidx + 1 :]
+        # find matching ')'
+        depth = 1
+        collected = ""
+        for ch in rest:
+            if ch == "(":
+                depth += 1
+            elif ch == ")":
+                depth -= 1
+                if depth == 0:
+                    break
+            collected += ch
+        if not collected:
+            return None
+        return self._split_args(collected)
+    def _parse_submission_eval_str(self, text: str) -> Dict[str, Any]:
+        # Parse space-separated key=value pairs
+        out: Dict[str, Any] = {}
+        for token in text.strip().split():
+            if "=" not in token:
+                continue
+            k, v = token.split("=", 1)
+            v = v.strip()
+            if v in ("True", "False"):
+                out[k] = True if v == "True" else False
+            else:
+                out[k] = v
+        return out
+    def load(self, report_path: str) -> Tuple[EvaluationReport, str]:
+        with open(report_path, "r", encoding="utf-8") as fobj:
+            raw = json.load(fobj)
+        # Normalize nested stringified fields if any
+        def normalize(obj):
+            if isinstance(obj, str):
+                s = obj.strip()
+                if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
+                    try:
+                        return json.loads(s)
+                    except Exception:
+                        return obj
+                return obj
+            if isinstance(obj, dict):
+                return {k: normalize(v) for k, v in obj.items()}
+            if isinstance(obj, list):
+                return [normalize(v) for v in obj]
+            return obj
+        normalized = normalize(raw)
+        # Special handling for stringified evaluation fields
+        inst_eval = normalized.get("installation")
+        if isinstance(inst_eval, str):
+            normalized["installation_evaluation"] = {
+                "structured_evaluation": self._parse_structured_block(inst_eval["evaluation"], "structured_evaluation"),
+            }
+        else:
+            normalized["installation_evaluation"] = inst_eval["evaluation"]
+            normalized["installation_files"] = inst_eval["files"]
+        readme_eval = normalized.get("readme")
+        if isinstance(readme_eval["evaluations"], dict):
+            fixed: Dict[str, Any] = {}
+            for fname, val in readme_eval.items():
+                if isinstance(val, str):
+                    fixed[fname] = {
+                        "structured_evaluation": self._parse_structured_block(val, "structured_evaluation"),
+                    }
+                else:
+                    fixed[fname] = val
+            normalized["readme_evaluation"] = fixed
+            normalized["readme_files"] = readme_eval["files"]
+        userguide_eval = normalized.get("userguide")
+        if isinstance(userguide_eval["evaluation"], dict):
+            normalized["userguide_evaluation"] = userguide_eval["evaluation"]
+            normalized["userguide_files"] = userguide_eval["files"]
+        # Tutorial evaluation handling
+        tutorial_eval = normalized.get("tutorial")
+        if tutorial_eval and isinstance(tutorial_eval.get("evaluation"), dict):
+            normalized["tutorial_evaluation"] = tutorial_eval["evaluation"]
+            normalized["tutorial_files"] = tutorial_eval["files"]
+        # userguide_eval = normalized.get("userguide")
+        # if isinstance(userguide_eval, str):
+        #     normalized["userguide_evaluation"] = self._parse_structured_block(userguide_eval["evaluation"], "structured_evaluation")
+        report = EvaluationReport(**normalized)
+        return report, report_path

bioguider/generation/style_analyzer.py ADDED Viewed

@@ -0,0 +1,36 @@
+from __future__ import annotations
+from typing import Dict
+from .models import StyleProfile
+class StyleAnalyzer:
+    def analyze(self, files: Dict[str, str]) -> StyleProfile:
+        profile = StyleProfile()
+        # Infer heading style: prefer README
+        readme = None
+        for name in ("README.md", "README.rst"):
+            if name in files:
+                readme = files[name]
+                break
+        sample = readme or next(iter(files.values()), "")
+        if "\n# " in sample or sample.startswith("# "):
+            profile.heading_style = "#"
+        elif "\n## " in sample:
+            profile.heading_style = "#"
+        else:
+            profile.heading_style = "#"
+        # List style
+        if "\n- " in sample:
+            profile.list_style = "-"
+        elif "\n* " in sample:
+            profile.list_style = "*"
+        # Tone markers (heuristic): keep minimal
+        profile.tone_markers = ["concise", "neutral"]
+        return profile