bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +92 -0
  4. bioguider/agents/agent_tools.py +176 -0
  5. bioguider/agents/agent_utils.py +504 -0
  6. bioguider/agents/collection_execute_step.py +182 -0
  7. bioguider/agents/collection_observe_step.py +125 -0
  8. bioguider/agents/collection_plan_step.py +156 -0
  9. bioguider/agents/collection_task.py +184 -0
  10. bioguider/agents/collection_task_utils.py +142 -0
  11. bioguider/agents/common_agent.py +137 -0
  12. bioguider/agents/common_agent_2step.py +215 -0
  13. bioguider/agents/common_conversation.py +61 -0
  14. bioguider/agents/common_step.py +85 -0
  15. bioguider/agents/consistency_collection_step.py +102 -0
  16. bioguider/agents/consistency_evaluation_task.py +57 -0
  17. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  18. bioguider/agents/consistency_observe_step.py +110 -0
  19. bioguider/agents/consistency_query_step.py +77 -0
  20. bioguider/agents/dockergeneration_execute_step.py +186 -0
  21. bioguider/agents/dockergeneration_observe_step.py +154 -0
  22. bioguider/agents/dockergeneration_plan_step.py +158 -0
  23. bioguider/agents/dockergeneration_task.py +158 -0
  24. bioguider/agents/dockergeneration_task_utils.py +220 -0
  25. bioguider/agents/evaluation_installation_task.py +270 -0
  26. bioguider/agents/evaluation_readme_task.py +767 -0
  27. bioguider/agents/evaluation_submission_requirements_task.py +172 -0
  28. bioguider/agents/evaluation_task.py +206 -0
  29. bioguider/agents/evaluation_tutorial_task.py +169 -0
  30. bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
  31. bioguider/agents/evaluation_userguide_prompts.py +179 -0
  32. bioguider/agents/evaluation_userguide_task.py +154 -0
  33. bioguider/agents/evaluation_utils.py +127 -0
  34. bioguider/agents/identification_execute_step.py +181 -0
  35. bioguider/agents/identification_observe_step.py +104 -0
  36. bioguider/agents/identification_plan_step.py +140 -0
  37. bioguider/agents/identification_task.py +270 -0
  38. bioguider/agents/identification_task_utils.py +22 -0
  39. bioguider/agents/peo_common_step.py +64 -0
  40. bioguider/agents/prompt_utils.py +253 -0
  41. bioguider/agents/python_ast_repl_tool.py +69 -0
  42. bioguider/agents/rag_collection_task.py +130 -0
  43. bioguider/conversation.py +67 -0
  44. bioguider/database/code_structure_db.py +500 -0
  45. bioguider/database/summarized_file_db.py +146 -0
  46. bioguider/generation/__init__.py +39 -0
  47. bioguider/generation/benchmark_metrics.py +610 -0
  48. bioguider/generation/change_planner.py +189 -0
  49. bioguider/generation/document_renderer.py +157 -0
  50. bioguider/generation/llm_cleaner.py +67 -0
  51. bioguider/generation/llm_content_generator.py +1128 -0
  52. bioguider/generation/llm_injector.py +809 -0
  53. bioguider/generation/models.py +85 -0
  54. bioguider/generation/output_manager.py +74 -0
  55. bioguider/generation/repo_reader.py +37 -0
  56. bioguider/generation/report_loader.py +166 -0
  57. bioguider/generation/style_analyzer.py +36 -0
  58. bioguider/generation/suggestion_extractor.py +436 -0
  59. bioguider/generation/test_metrics.py +189 -0
  60. bioguider/managers/benchmark_manager.py +785 -0
  61. bioguider/managers/evaluation_manager.py +215 -0
  62. bioguider/managers/generation_manager.py +686 -0
  63. bioguider/managers/generation_test_manager.py +107 -0
  64. bioguider/managers/generation_test_manager_v2.py +525 -0
  65. bioguider/rag/__init__.py +0 -0
  66. bioguider/rag/config.py +117 -0
  67. bioguider/rag/data_pipeline.py +651 -0
  68. bioguider/rag/embedder.py +24 -0
  69. bioguider/rag/rag.py +138 -0
  70. bioguider/settings.py +103 -0
  71. bioguider/utils/code_structure_builder.py +59 -0
  72. bioguider/utils/constants.py +135 -0
  73. bioguider/utils/default.gitignore +140 -0
  74. bioguider/utils/file_utils.py +215 -0
  75. bioguider/utils/gitignore_checker.py +175 -0
  76. bioguider/utils/notebook_utils.py +117 -0
  77. bioguider/utils/pyphen_utils.py +73 -0
  78. bioguider/utils/python_file_handler.py +65 -0
  79. bioguider/utils/r_file_handler.py +551 -0
  80. bioguider/utils/utils.py +163 -0
  81. bioguider-0.2.52.dist-info/LICENSE +21 -0
  82. bioguider-0.2.52.dist-info/METADATA +51 -0
  83. bioguider-0.2.52.dist-info/RECORD +84 -0
  84. bioguider-0.2.52.dist-info/WHEEL +4 -0
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List, Dict, Any
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class EvaluationReport(BaseModel):
8
+ timestamp: Optional[str] = None
9
+ repo_url: Optional[str] = None
10
+
11
+ installation_evaluation: Optional[Dict[str, Any]] = None
12
+ installation_files: Optional[List[str]] = None
13
+
14
+ readme_evaluation: Optional[Dict[str, Any]] = None
15
+ readme_files: Optional[List[str]] = None
16
+
17
+ # Optional: rich user guide evaluation content and any explicitly listed files
18
+ userguide_evaluation: Optional[Dict[str, Any]] = None
19
+ userguide_files: Optional[List[str]] = None
20
+
21
+ # Optional: tutorial evaluation content and any explicitly listed files
22
+ tutorial_evaluation: Optional[Dict[str, Any]] = None
23
+ tutorial_files: Optional[List[str]] = None
24
+
25
+ submission_requirements_evaluation: Optional[Dict[str, Any]] = None
26
+ submission_requirements_files: Optional[List[str]] = None
27
+
28
+
29
+ class SuggestionItem(BaseModel):
30
+ id: str
31
+ category: str
32
+ severity: str = Field(default="should_fix")
33
+ source: Dict[str, str] = Field(default_factory=dict)
34
+ target_files: List[str] = Field(default_factory=list)
35
+ action: str
36
+ anchor_hint: Optional[str] = None
37
+ content_guidance: Optional[str] = None
38
+
39
+
40
+ class StyleProfile(BaseModel):
41
+ heading_style: str = Field(default="#")
42
+ list_style: str = Field(default="-")
43
+ code_fence_style: str = Field(default="```")
44
+ tone_markers: List[str] = Field(default_factory=list)
45
+ link_style: str = Field(default="inline")
46
+
47
+
48
+ class PlannedEdit(BaseModel):
49
+ file_path: str
50
+ edit_type: str
51
+ anchor: Dict[str, str] = Field(default_factory=dict)
52
+ content_template: str
53
+ rationale: str
54
+ minimal_diff: bool = Field(default=True)
55
+ suggestion_id: Optional[str] = None
56
+
57
+
58
+ class DocumentPlan(BaseModel):
59
+ repo_path: str
60
+ style_profile: StyleProfile
61
+ planned_edits: List[PlannedEdit] = Field(default_factory=list)
62
+
63
+
64
+ class OutputArtifact(BaseModel):
65
+ dest_rel_path: str
66
+ original_rel_path: str
67
+ change_summary: str
68
+ diff_stats: Dict[str, int] = Field(default_factory=dict)
69
+
70
+
71
+ class GenerationManifest(BaseModel):
72
+ repo_url: Optional[str] = None
73
+ report_path: Optional[str] = None
74
+ output_dir: Optional[str] = None
75
+ suggestions: List[SuggestionItem] = Field(default_factory=list)
76
+ planned_edits: List[PlannedEdit] = Field(default_factory=list)
77
+ artifacts: List[OutputArtifact] = Field(default_factory=list)
78
+ skipped: List[str] = Field(default_factory=list)
79
+
80
+ class GenerationReport(BaseModel):
81
+ repo_url: Optional[str] = None
82
+ output_dir: Optional[str] = None
83
+ sections: List[Dict[str, Any]] = Field(default_factory=list)
84
+
85
+
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import json
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+ from .models import OutputArtifact, GenerationManifest, PlannedEdit
10
+
11
+
12
+ class OutputManager:
13
+ def __init__(self, base_outputs_dir: Optional[str] = None):
14
+ self.base_outputs_dir = base_outputs_dir or "outputs"
15
+
16
+ def prepare_output_dir(self, repo_url_or_name: str) -> str:
17
+ repo_name = self._extract_repo_name(repo_url_or_name)
18
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
19
+ out_dir = os.path.join(self.base_outputs_dir, f"{repo_name}", timestamp)
20
+ os.makedirs(out_dir, exist_ok=True)
21
+ return out_dir
22
+
23
+ def get_latest_output_dir(self, repo_url_or_name: str) -> str:
24
+ repo_name = self._extract_repo_name(repo_url_or_name)
25
+ out_dir = Path(self.base_outputs_dir, f"{repo_name}")
26
+ latest_tm = datetime.min
27
+ if not out_dir.exists():
28
+ return None
29
+ for f in out_dir.iterdir():
30
+ if not f.is_dir():
31
+ continue
32
+ tm = f.name.split("/")[-1]
33
+ if not tm.isdigit():
34
+ continue
35
+ tm = datetime.strptime(tm, "%Y%m%d_%H%M%S")
36
+ if tm > latest_tm:
37
+ latest_tm = tm
38
+ latest_dir = f.name
39
+
40
+ return latest_dir
41
+
42
+ def _extract_repo_name(self, url_or_name: str) -> str:
43
+ name = url_or_name.rstrip("/")
44
+ if "/" in name:
45
+ name = name.split("/")[-1]
46
+ name = name.replace(".git", "")
47
+ return name
48
+
49
+ def write_files(self, output_dir: str, files: Dict[str, str], diff_stats_by_file: Dict[str, dict] | None = None) -> List[OutputArtifact]:
50
+ artifacts: List[OutputArtifact] = []
51
+ for rel_path, content in files.items():
52
+ dest = os.path.join(output_dir, rel_path)
53
+ os.makedirs(os.path.dirname(dest), exist_ok=True)
54
+ with open(dest, "w", encoding="utf-8") as fobj:
55
+ fobj.write(content)
56
+ artifacts.append(OutputArtifact(
57
+ dest_rel_path=rel_path,
58
+ original_rel_path=rel_path,
59
+ change_summary="revised document",
60
+ diff_stats=(diff_stats_by_file or {}).get(rel_path, {})
61
+ ))
62
+ return artifacts
63
+
64
+ def write_manifest(
65
+ self,
66
+ output_dir: str,
67
+ manifest: GenerationManifest,
68
+ ) -> str:
69
+ dest = os.path.join(output_dir, "manifest.json")
70
+ with open(dest, "w", encoding="utf-8") as fobj:
71
+ json.dump(manifest.model_dump(), fobj, indent=2)
72
+ return dest
73
+
74
+
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Dict, Optional, List, Tuple
5
+
6
+
7
+ class RepoReader:
8
+ def __init__(self, repo_path: str, gitignore_path: Optional[str] = None):
9
+ self.repo_path = repo_path
10
+ self.gitignore_path = gitignore_path
11
+
12
+ def read_files(self, rel_paths: List[str]) -> Tuple[Dict[str, str], List[str]]:
13
+ contents: Dict[str, str] = {}
14
+ missing: List[str] = []
15
+ for rel in rel_paths:
16
+ abs_path = os.path.join(self.repo_path, rel)
17
+ if not os.path.isfile(abs_path):
18
+ missing.append(rel)
19
+ continue
20
+ try:
21
+ with open(abs_path, "r", encoding="utf-8") as fobj:
22
+ contents[rel] = fobj.read()
23
+ except Exception:
24
+ missing.append(rel)
25
+ return contents, missing
26
+
27
+ def read_default_targets(self) -> Tuple[Dict[str, str], List[str]]:
28
+ # Common targets we may need to modify
29
+ candidates = [
30
+ "README.md",
31
+ "README.rst",
32
+ "vignettes/install.Rmd",
33
+ "vignettes/install_v5.Rmd",
34
+ ]
35
+ return self.read_files(candidates)
36
+
37
+
@@ -0,0 +1,166 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Tuple, Dict, Any
5
+
6
+ from .models import EvaluationReport
7
+
8
+
9
+ class EvaluationReportLoader:
10
+ def _parse_bool(self, token: str) -> Any:
11
+ if token == "True":
12
+ return True
13
+ if token == "False":
14
+ return False
15
+ return token
16
+
17
+ def _split_args(self, s: str) -> Dict[str, Any]:
18
+ # Split a function-like argument list into a dict, respecting quotes
19
+ args: Dict[str, Any] = {}
20
+ current = ""
21
+ parts = []
22
+ in_single = False
23
+ in_double = False
24
+ for ch in s:
25
+ if ch == "'" and not in_double:
26
+ in_single = not in_single
27
+ current += ch
28
+ continue
29
+ if ch == '"' and not in_single:
30
+ in_double = not in_double
31
+ current += ch
32
+ continue
33
+ if ch == "," and not in_single and not in_double:
34
+ parts.append(current.strip())
35
+ current = ""
36
+ else:
37
+ current += ch
38
+ if current.strip():
39
+ parts.append(current.strip())
40
+ for p in parts:
41
+ if not p:
42
+ continue
43
+ if "=" not in p:
44
+ continue
45
+ k, v = p.split("=", 1)
46
+ k = k.strip()
47
+ v = v.strip()
48
+ if (v.startswith("'") and v.endswith("'")) or (v.startswith('"') and v.endswith('"')):
49
+ v = v[1:-1]
50
+ else:
51
+ # try bool/int
52
+ if v in ("True", "False"):
53
+ v = self._parse_bool(v)
54
+ else:
55
+ try:
56
+ v = int(v)
57
+ except Exception:
58
+ pass
59
+ args[k] = v
60
+ return args
61
+
62
+ def _parse_structured_block(self, text: str, key: str) -> Dict[str, Any] | None:
63
+ # Extract key=ClassName(arg1=val1, ...) and parse args
64
+ marker = f"{key}="
65
+ idx = text.find(marker)
66
+ if idx == -1:
67
+ return None
68
+ rest = text[idx + len(marker) :]
69
+ # find first '('
70
+ pidx = rest.find("(")
71
+ if pidx == -1:
72
+ return None
73
+ rest = rest[pidx + 1 :]
74
+ # find matching ')'
75
+ depth = 1
76
+ collected = ""
77
+ for ch in rest:
78
+ if ch == "(":
79
+ depth += 1
80
+ elif ch == ")":
81
+ depth -= 1
82
+ if depth == 0:
83
+ break
84
+ collected += ch
85
+ if not collected:
86
+ return None
87
+ return self._split_args(collected)
88
+
89
+ def _parse_submission_eval_str(self, text: str) -> Dict[str, Any]:
90
+ # Parse space-separated key=value pairs
91
+ out: Dict[str, Any] = {}
92
+ for token in text.strip().split():
93
+ if "=" not in token:
94
+ continue
95
+ k, v = token.split("=", 1)
96
+ v = v.strip()
97
+ if v in ("True", "False"):
98
+ out[k] = True if v == "True" else False
99
+ else:
100
+ out[k] = v
101
+ return out
102
+
103
+ def load(self, report_path: str) -> Tuple[EvaluationReport, str]:
104
+ with open(report_path, "r", encoding="utf-8") as fobj:
105
+ raw = json.load(fobj)
106
+
107
+ # Normalize nested stringified fields if any
108
+ def normalize(obj):
109
+ if isinstance(obj, str):
110
+ s = obj.strip()
111
+ if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
112
+ try:
113
+ return json.loads(s)
114
+ except Exception:
115
+ return obj
116
+ return obj
117
+ if isinstance(obj, dict):
118
+ return {k: normalize(v) for k, v in obj.items()}
119
+ if isinstance(obj, list):
120
+ return [normalize(v) for v in obj]
121
+ return obj
122
+
123
+ normalized = normalize(raw)
124
+
125
+ # Special handling for stringified evaluation fields
126
+ inst_eval = normalized.get("installation")
127
+ if isinstance(inst_eval, str):
128
+ normalized["installation_evaluation"] = {
129
+ "structured_evaluation": self._parse_structured_block(inst_eval["evaluation"], "structured_evaluation"),
130
+ }
131
+ else:
132
+ normalized["installation_evaluation"] = inst_eval["evaluation"]
133
+ normalized["installation_files"] = inst_eval["files"]
134
+
135
+ readme_eval = normalized.get("readme")
136
+ if isinstance(readme_eval["evaluations"], dict):
137
+ fixed: Dict[str, Any] = {}
138
+ for fname, val in readme_eval.items():
139
+ if isinstance(val, str):
140
+ fixed[fname] = {
141
+ "structured_evaluation": self._parse_structured_block(val, "structured_evaluation"),
142
+ }
143
+ else:
144
+ fixed[fname] = val
145
+ normalized["readme_evaluation"] = fixed
146
+ normalized["readme_files"] = readme_eval["files"]
147
+
148
+ userguide_eval = normalized.get("userguide")
149
+ if isinstance(userguide_eval["evaluation"], dict):
150
+ normalized["userguide_evaluation"] = userguide_eval["evaluation"]
151
+ normalized["userguide_files"] = userguide_eval["files"]
152
+
153
+ # Tutorial evaluation handling
154
+ tutorial_eval = normalized.get("tutorial")
155
+ if tutorial_eval and isinstance(tutorial_eval.get("evaluation"), dict):
156
+ normalized["tutorial_evaluation"] = tutorial_eval["evaluation"]
157
+ normalized["tutorial_files"] = tutorial_eval["files"]
158
+
159
+ # userguide_eval = normalized.get("userguide")
160
+ # if isinstance(userguide_eval, str):
161
+ # normalized["userguide_evaluation"] = self._parse_structured_block(userguide_eval["evaluation"], "structured_evaluation")
162
+
163
+ report = EvaluationReport(**normalized)
164
+ return report, report_path
165
+
166
+
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+
5
+ from .models import StyleProfile
6
+
7
+
8
+ class StyleAnalyzer:
9
+ def analyze(self, files: Dict[str, str]) -> StyleProfile:
10
+ profile = StyleProfile()
11
+
12
+ # Infer heading style: prefer README
13
+ readme = None
14
+ for name in ("README.md", "README.rst"):
15
+ if name in files:
16
+ readme = files[name]
17
+ break
18
+ sample = readme or next(iter(files.values()), "")
19
+ if "\n# " in sample or sample.startswith("# "):
20
+ profile.heading_style = "#"
21
+ elif "\n## " in sample:
22
+ profile.heading_style = "#"
23
+ else:
24
+ profile.heading_style = "#"
25
+
26
+ # List style
27
+ if "\n- " in sample:
28
+ profile.list_style = "-"
29
+ elif "\n* " in sample:
30
+ profile.list_style = "*"
31
+
32
+ # Tone markers (heuristic): keep minimal
33
+ profile.tone_markers = ["concise", "neutral"]
34
+ return profile
35
+
36
+