bioguider 0.2.5__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (48) hide show
  1. {bioguider-0.2.5 → bioguider-0.2.7}/PKG-INFO +2 -1
  2. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/agent_tools.py +10 -1
  3. bioguider-0.2.7/bioguider/agents/evaluation_installation_task.py +160 -0
  4. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/evaluation_task.py +11 -4
  5. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/prompt_utils.py +1 -1
  6. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/managers/evaluation_manager.py +25 -0
  7. {bioguider-0.2.5 → bioguider-0.2.7}/pyproject.toml +2 -1
  8. {bioguider-0.2.5 → bioguider-0.2.7}/LICENSE +0 -0
  9. {bioguider-0.2.5 → bioguider-0.2.7}/README.md +0 -0
  10. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/__init__.py +0 -0
  11. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/__init__.py +0 -0
  12. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/agent_task.py +0 -0
  13. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/agent_utils.py +0 -0
  14. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/collection_execute_step.py +0 -0
  15. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/collection_observe_step.py +0 -0
  16. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/collection_plan_step.py +0 -0
  17. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/collection_task.py +0 -0
  18. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/collection_task_utils.py +0 -0
  19. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/common_agent.py +0 -0
  20. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/common_agent_2step.py +0 -0
  21. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/common_step.py +0 -0
  22. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/dockergeneration_execute_step.py +0 -0
  23. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/dockergeneration_observe_step.py +0 -0
  24. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/dockergeneration_plan_step.py +0 -0
  25. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/dockergeneration_task.py +0 -0
  26. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/dockergeneration_task_utils.py +0 -0
  27. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/identification_execute_step.py +0 -0
  28. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/identification_observe_step.py +0 -0
  29. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/identification_plan_step.py +0 -0
  30. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/identification_task.py +0 -0
  31. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/identification_task_utils.py +0 -0
  32. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/peo_common_step.py +0 -0
  33. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/python_ast_repl_tool.py +0 -0
  34. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/rag_collection_task.py +0 -0
  35. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/conversation.py +0 -0
  36. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/database/summarized_file_db.py +0 -0
  37. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/rag/__init__.py +0 -0
  38. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/rag/config.py +0 -0
  39. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/rag/data_pipeline.py +0 -0
  40. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/rag/embedder.py +0 -0
  41. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/rag/rag.py +0 -0
  42. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/settings.py +0 -0
  43. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/utils/constants.py +0 -0
  44. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/utils/default.gitignore +0 -0
  45. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/utils/file_utils.py +0 -0
  46. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/utils/gitignore_checker.py +0 -0
  47. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/utils/pyphen_utils.py +0 -0
  48. {bioguider-0.2.5 → bioguider-0.2.7}/bioguider/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: bioguider
3
- Version: 0.2.5
3
+ Version: 0.2.7
4
4
  Summary: An AI-Powered package to help biomedical developers to generate clear documentation
5
5
  License: MIT
6
6
  Author: Cankun Wang
@@ -28,6 +28,7 @@ Requires-Dist: langchain-experimental (>=0.3.4,<0.4.0)
28
28
  Requires-Dist: langchain-google-genai (>=2.1.4,<3.0.0)
29
29
  Requires-Dist: langchain-openai (>=0.3.8,<0.4.0)
30
30
  Requires-Dist: langgraph (>=0.3.11,<0.4.0)
31
+ Requires-Dist: markdownify (>=1.1.0,<2.0.0)
31
32
  Requires-Dist: nanoid (>=2.0.0,<3.0.0)
32
33
  Requires-Dist: pydantic (>=2.10.6,<3.0.0)
33
34
  Requires-Dist: pydantic-settings (>=2.8.1,<3.0.0)
@@ -1,9 +1,11 @@
1
1
  import os
2
2
  from typing import Callable
3
+ from markdownify import markdownify as md
3
4
  from langchain_openai.chat_models.base import BaseChatOpenAI
4
5
  from bioguider.database.summarized_file_db import SummarizedFilesDb
5
6
  from bioguider.utils.file_utils import get_file_type
6
7
  from bioguider.agents.agent_utils import read_directory, read_file, summarize_file
8
+ from bioguider.rag.data_pipeline import count_tokens
7
9
 
8
10
  class agent_tool:
9
11
  def __init__(
@@ -39,7 +41,14 @@ Returns:
39
41
  file_path = os.path.join(self.repo_path, file_path)
40
42
  if not os.path.isfile(file_path):
41
43
  return None
42
- return read_file(file_path)
44
+ content = read_file(file_path)
45
+ if file_path.endswith(".html") or file_path.endswith(".htm"):
46
+ content = md(content, escape_underscores=False)
47
+ tokens = count_tokens(content)
48
+ MAX_TOKENS = os.environ.get('OPENAI_MAX_INPUT_TOKENS', 102400)
49
+ if tokens > int(MAX_TOKENS):
50
+ content = content[:100000]
51
+ return content
43
52
 
44
53
  class summarize_file_tool(agent_tool):
45
54
  """ read and summarize the file
@@ -0,0 +1,160 @@
1
+ import os
2
+ from pathlib import Path
3
+ import logging
4
+ from typing import Callable, Optional
5
+ from abc import ABC, abstractmethod
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain_openai.chat_models.base import BaseChatOpenAI
8
+ from pydantic import BaseModel, Field
9
+ from markdownify import markdownify as md
10
+
11
+ from bioguider.agents.agent_utils import read_file
12
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
13
+ from bioguider.rag.data_pipeline import count_tokens
14
+ from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
15
+ from .common_agent import CommonConversation
16
+ from ..utils.pyphen_utils import PyphenReadability
17
+ from ..utils.gitignore_checker import GitignoreChecker
18
+ from .evaluation_task import EvaluationTask
19
+ from .agent_utils import read_file
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ EVALUATION_INSTALLATION_SYSTEM_PROMPT = """
25
+ You are an expert in evaluating the quality of **installation instructions** in software repositories.
26
+ Your task is to analyze the provided content of installation-related files and generate a **comprehensive, structured quality report**.
27
+
28
+ ---
29
+
30
+ ### Evaluation Criteria
31
+
32
+ Please assess the installation information using the following criteria. For each, provide a concise evaluation and specific feedback:
33
+
34
+ 1. **Ease of Access**
35
+ * Is the installation information clearly presented and easy to locate within the repository?
36
+ * Is it included in a top-level README, a dedicated INSTALL.md file, or other accessible locations?
37
+
38
+ 2. **Clarity of Dependency Specification**
39
+ * Are all software and library dependencies clearly listed?
40
+ * Are installation methods (e.g., `pip`, `conda`, `apt`) for those dependencies explicitly provided?
41
+
42
+ 3. **Hardware Requirements**
43
+ * Does the documentation specify hardware needs (e.g., GPU, memory, OS) if relevant?
44
+
45
+ 4. **Step-by-Step Installation Guide**
46
+ * Is there a clear, ordered set of instructions for installing the software?
47
+ * Are example commands or configuration steps provided to help users follow along?
48
+
49
+ ---
50
+
51
+ ### Output Format
52
+
53
+ Your response **must exactly follow** the structure below:
54
+
55
+ ```
56
+ **FinalAnswer**
57
+ **Overall Score:** [Poor / Fair / Good / Excellent]
58
+ **Ease of Access:** <your comments>
59
+ **Clarity of Dependency Specification:** <your comments>
60
+ **Hardware Requirements:** <your comments>
61
+ **Installation Guide:** <your comments>
62
+ ```
63
+
64
+ ---
65
+
66
+ ### Installation Files Provided:
67
+ {installation_file_contents}
68
+
69
+ """
70
+
71
+ class EvaluationInstallationResult(BaseModel):
72
+ ease_of_access: Optional[str]=Field(description="Is the installation information easy to access")
73
+ score: Optional[str]=Field(description="An overall score, could be Poor, Fair, Good or Excellent")
74
+ clarity_of_dependency: Optional[str]=Field(description="Are all dependencies clearly listed")
75
+ hardware_requirements: Optional[str]=Field(description="Are all hardware requirements clearly specified")
76
+ installation_guide: Optional[str]=Field(description="Is there a clear, ordered set of instructions for installing the software")
77
+
78
+ EvaluationInstallationResultSchema = {
79
+ "title": "EvaluationREADMEResult",
80
+ "type": "object",
81
+ "properties": {
82
+ "ease_of_access": {
83
+ "anyOf": [{"type": "string"}, {"type": "null"}],
84
+ "description": "Is the installation information easy to access",
85
+ "title": "Ease of Access"
86
+ },
87
+ "score": {
88
+ "anyOf": [{"type": "string"}, {"type": "null"}],
89
+ "description": "An overall score, could be Poor, Fair, Good or Excellent",
90
+ "title": "Score"
91
+ },
92
+ "clarity_of_dependency": {
93
+ "anyOf": [{"type": "string"}, {"type": "null"}],
94
+ "description": "Are all dependencies clearly listed",
95
+ "title": "Clarity of Dependency",
96
+ },
97
+ "hardware_requirements": {
98
+ "anyOf": [{"type": "string"}, {"type": "null"}],
99
+ "description": "Are all hardware requirements clearly specified",
100
+ "title": "Hardware Requirements"
101
+ },
102
+ "installation_guide": {
103
+ "anyOf": [{"type": "string"}, {"type": "null"}],
104
+ "description": "Is there a clear, ordered set of instructions for installing the software",
105
+ "title": "Installation Guide"
106
+ }
107
+ },
108
+ "required": ["ease_of_access", "score", "clarity_of_dependency", "hardware_requirements", "installation_guide"]
109
+ }
110
+
111
+ class EvaluationInstallationTask(EvaluationTask):
112
+ def __init__(
113
+ self,
114
+ llm,
115
+ repo_path,
116
+ gitignore_path,
117
+ meta_data = None,
118
+ step_callback = None,
119
+ ):
120
+ super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
121
+
122
+ def _evaluate(self, files: list[str] | None = None):
123
+ if files is None or len(files) == 0:
124
+ return None
125
+
126
+ files_content = ""
127
+ MAX_TOKENS = os.environ.get("OPENAI_MAX_INPUT_TOKENS", 102400)
128
+ for f in files:
129
+ if f.endswith(".html") or f.endswith(".htm"):
130
+ html = read_file(os.path.join(self.repo_path, f))
131
+ content = md(html, escape_underscores=False)
132
+ else:
133
+ content = read_file(os.path.join(self.repo_path, f))
134
+ if count_tokens(content) > int(MAX_TOKENS):
135
+ content = content[:100000]
136
+ files_content += f"""
137
+ {f} content:
138
+ {content}
139
+
140
+ """
141
+ system_prompt = ChatPromptTemplate.from_template(EVALUATION_INSTALLATION_SYSTEM_PROMPT).format(
142
+ installation_file_contents=files_content
143
+ )
144
+ agent = CommonAgentTwoChainSteps(llm=self.llm)
145
+ res, _, token_usage, reasoning_process = agent.go(
146
+ system_prompt=system_prompt,
147
+ instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation.",
148
+ schema=EvaluationInstallationResultSchema,
149
+ )
150
+ res = EvaluationInstallationResult(**res)
151
+ evaluation = {
152
+ "score": res.score,
153
+ "ease_of_access": res.ease_of_access,
154
+ "hardware_requirements": res.hardware_requirements,
155
+ "clarity_of_dependency": res.clarity_of_dependency,
156
+ "installation_guide": res.installation_guide,
157
+ "reasoning_process": reasoning_process,
158
+ }
159
+ return evaluation, token_usage
160
+
@@ -76,7 +76,7 @@ For each criterion below, provide a brief assessment followed by specific, actio
76
76
  * Project-Level README: Yes / No
77
77
  * **Score:** <number from 0 to 100>
78
78
  * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
79
- * **Improvement Suggestions:**
79
+ * **Overall Improvement Suggestions:**
80
80
  - "Original text snippet 1" - Improving comment 1
81
81
  - "Original text snippet 2" - Improving comment 2
82
82
  - ...
@@ -124,7 +124,12 @@ For each criterion below, provide a brief assessment followed by specific, actio
124
124
  **Final Answer**
125
125
  The final answer **must exactly match** the following format:
126
126
  * Project-Level README: Yes / No
127
- * Overall Assessment: provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
127
+ * **Score:** <number from 0 to 100>
128
+ * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
129
+ * **Overall Improvement Suggestions:**
130
+ - "Original text snippet 1" - Improving comment 1
131
+ - "Original text snippet 2" - Improving comment 2
132
+ - ...
128
133
  ---
129
134
 
130
135
  ### **README path:**
@@ -243,7 +248,9 @@ class EvaluationREADMETask(EvaluationTask):
243
248
  readme_evaluations[readme_file] = {
244
249
  "evaluation": {
245
250
  "project_level": "/" in readme_file,
246
- "overall_assessment": f"{readme_file} is an empty file."
251
+ "score": 0,
252
+ "key_strengths": f"{readme_file} is an empty file.",
253
+ "overall_improvement_suggestions": f"{readme_file} is an empty file.",
247
254
  },
248
255
  "reasoning_process": f"{readme_file} is an empty file.",
249
256
  }
@@ -277,7 +284,7 @@ class EvaluationREADMETask(EvaluationTask):
277
284
  "project_level": response.project_level,
278
285
  "score": response.score,
279
286
  "key_strengths": response.key_strengths,
280
- "overall_assessment": response.overall_improvement_suggestions,
287
+ "overall_improvement_suggestions": response.overall_improvement_suggestions,
281
288
  },
282
289
  "reasoning_process": reasoning_process
283
290
  }
@@ -165,7 +165,7 @@ If **any one** of these is present, the document should be classified as Install
165
165
  "important_instructions": """- Give priority to analyzing README file that contain installation instructions and the files whose names include **"install"** or **"setup"**.
166
166
  - If multiple files are found, select the most comprehensive one that covers the installation process.
167
167
  - The total number of collected files should **not exceed 3**.
168
- - Make sure to include **only one installation instruction file**, selecting the most comprehensive and representative one.
168
+ - Identify and select **no more than three** installation instruction files choose the most comprehensive and representative ones.
169
169
  """
170
170
  },
171
171
  "License": {
@@ -10,6 +10,7 @@ from ..rag.rag import RAG
10
10
  from ..utils.file_utils import parse_repo_url
11
11
  from ..database.summarized_file_db import SummarizedFilesDb
12
12
  from ..agents.evaluation_task import EvaluationREADMETask
13
+ from ..agents.evaluation_installation_task import EvaluationInstallationTask
13
14
  from ..agents.collection_task import CollectionTask
14
15
 
15
16
  class EvaluationManager:
@@ -84,6 +85,30 @@ class EvaluationManager:
84
85
  if s is None or 'final_answer' not in s:
85
86
  return None
86
87
 
88
+ def evaluate_installation(self):
89
+ task = CollectionTask(
90
+ llm=self.llm,
91
+ step_callback=self.step_callback,
92
+ )
93
+ task.compile(
94
+ repo_path=self.rag.repo_dir,
95
+ gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
96
+ db=self.summary_file_db,
97
+ goal_item=CollectionGoalItemEnum.Installation.name,
98
+ )
99
+ files = task.collect()
100
+ if files is None or len(files) == 0:
101
+ return None
102
+ evaluation_task = EvaluationInstallationTask(
103
+ llm=self.llm,
104
+ repo_path=self.rag.repo_dir,
105
+ gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
106
+ meta_data=self.project_metadata,
107
+ step_callback=self.step_callback,
108
+ )
109
+ evaluation = evaluation_task.evaluate(files)
110
+ return evaluation, files
111
+
87
112
  def _find_readme_files(self) -> list[str]:
88
113
  """
89
114
  Search for a README file in the repository directory.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "bioguider"
3
- version = "0.2.5"
3
+ version = "0.2.7"
4
4
  description = "An AI-Powered package to help biomedical developers to generate clear documentation"
5
5
  authors = [
6
6
  "Cankun Wang <Cankun.Wang@osumc.edu>",
@@ -45,6 +45,7 @@ faiss-cpu = "^1.11.0"
45
45
  binaryornot = "^0.4.4"
46
46
  textstat = "^0.7.6"
47
47
  pyphen = "^0.17.2"
48
+ markdownify = "^1.1.0"
48
49
 
49
50
 
50
51
  [tool.poetry.group.dev.dependencies]
File without changes
File without changes