bioguider 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (47) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +88 -0
  4. bioguider/agents/agent_tools.py +147 -0
  5. bioguider/agents/agent_utils.py +357 -0
  6. bioguider/agents/collection_execute_step.py +180 -0
  7. bioguider/agents/collection_observe_step.py +113 -0
  8. bioguider/agents/collection_plan_step.py +154 -0
  9. bioguider/agents/collection_task.py +179 -0
  10. bioguider/agents/collection_task_utils.py +109 -0
  11. bioguider/agents/common_agent.py +159 -0
  12. bioguider/agents/common_agent_2step.py +126 -0
  13. bioguider/agents/common_step.py +85 -0
  14. bioguider/agents/dockergeneration_execute_step.py +186 -0
  15. bioguider/agents/dockergeneration_observe_step.py +153 -0
  16. bioguider/agents/dockergeneration_plan_step.py +158 -0
  17. bioguider/agents/dockergeneration_task.py +158 -0
  18. bioguider/agents/dockergeneration_task_utils.py +220 -0
  19. bioguider/agents/evaluation_task.py +269 -0
  20. bioguider/agents/identification_execute_step.py +179 -0
  21. bioguider/agents/identification_observe_step.py +92 -0
  22. bioguider/agents/identification_plan_step.py +135 -0
  23. bioguider/agents/identification_task.py +220 -0
  24. bioguider/agents/identification_task_utils.py +18 -0
  25. bioguider/agents/peo_common_step.py +64 -0
  26. bioguider/agents/prompt_utils.py +190 -0
  27. bioguider/agents/python_ast_repl_tool.py +69 -0
  28. bioguider/agents/rag_collection_task.py +130 -0
  29. bioguider/conversation.py +67 -0
  30. bioguider/database/summarized_file_db.py +140 -0
  31. bioguider/managers/evaluation_manager.py +108 -0
  32. bioguider/rag/__init__.py +0 -0
  33. bioguider/rag/config.py +117 -0
  34. bioguider/rag/data_pipeline.py +648 -0
  35. bioguider/rag/embedder.py +24 -0
  36. bioguider/rag/rag.py +134 -0
  37. bioguider/settings.py +103 -0
  38. bioguider/utils/constants.py +40 -0
  39. bioguider/utils/default.gitignore +140 -0
  40. bioguider/utils/file_utils.py +126 -0
  41. bioguider/utils/gitignore_checker.py +175 -0
  42. bioguider/utils/pyphen_utils.py +73 -0
  43. bioguider/utils/utils.py +27 -0
  44. bioguider-0.2.3.dist-info/LICENSE +21 -0
  45. bioguider-0.2.3.dist-info/METADATA +44 -0
  46. bioguider-0.2.3.dist-info/RECORD +47 -0
  47. bioguider-0.2.3.dist-info/WHEEL +4 -0
@@ -0,0 +1,180 @@
1
+ import logging
2
+ from langchain_openai.chat_models.base import BaseChatOpenAI
3
+ from langchain.tools import BaseTool
4
+ from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
5
+ from langchain.agents import create_react_agent, AgentExecutor
6
+ from langchain_community.callbacks.openai_info import OpenAICallbackHandler
7
+
8
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
9
+ from bioguider.agents.agent_utils import (
10
+ CustomPromptTemplate,
11
+ CustomOutputParser,
12
+ )
13
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
14
+ from bioguider.agents.peo_common_step import PEOCommonStep, PEOWorkflowState
15
+ from bioguider.agents.collection_task_utils import CollectionWorkflowState
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ COLLECTION_EXECUTION_SYSTEM_PROMPT = """---
20
+
21
+ You are an expert Python developer.
22
+ You are given a **plan** and must complete it strictly using Python code and the available tools.
23
+
24
+ ---
25
+
26
+ ### **Available Tools**
27
+ {tools}
28
+
29
+ ---
30
+
31
+ ### **Your Task**
32
+ Follow the given plan step by step using the exact format below:
33
+
34
+ ```
35
+ Thought: Describe what you are thinking or planning to do next.
36
+ Action: The tool you are going to use (must be one of: {tool_names})
37
+ Action Input: The input to the selected action
38
+ Observation: The result returned by the action
39
+ ```
40
+
41
+ You may repeat the **Thought → Action → Action Input → Observation** loop as needed.
42
+
43
+ Once all steps in the plan have been executed, output all the results using this format:
44
+
45
+ ```
46
+ Thought: I have completed the plan.
47
+ Final Answer:
48
+ Action: {{tool_name}}
49
+ Action Input: {{input1}}
50
+ Action Observation: {{Observation1}}
51
+ ---
52
+ Action: {{tool_name}}
53
+ Action Input: {{input2}}
54
+ Action Observation: {{Observation2}}
55
+ ---
56
+ ...
57
+ ```
58
+
59
+ ---
60
+
61
+ ### **Example**
62
+ ```
63
+ Action: summarize_file_tool
64
+ Action Input: README.md
65
+ Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
66
+ ...
67
+ Final Answer:
68
+ Action: summarize_file_tool
69
+ Action Input: README.md
70
+ Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
71
+ ---
72
+ Action: check_file_related_tool
73
+ Action Input: pyproject.toml
74
+ Action Observation: Yes, the file is related to the project.
75
+ ---
76
+ ...
77
+ ```
78
+
79
+ ---
80
+
81
+ ### **Important Notes**
82
+
83
+ - You must strictly follow the provided plan.
84
+ - **Do not take any additional or alternative actions**, even if:
85
+ - No relevant result is found
86
+ - The file content is missing, empty, or irrelevant
87
+ - If no information is found in a step, simply proceed to the next action in the plan without improvising.
88
+ - Only use the tools specified in the plan actions. No independent decisions or extra steps are allowed.
89
+
90
+ ---
91
+
92
+ ### **Plan**
93
+ {plan_actions}
94
+
95
+ ### **Actions Already Taken**
96
+ {agent_scratchpad}
97
+
98
+ ---
99
+
100
+ {input}
101
+
102
+ ---
103
+ """
104
+
105
+ class CollectionExecuteStep(PEOCommonStep):
106
+ def __init__(
107
+ self,
108
+ llm: BaseChatOpenAI,
109
+ repo_path: str,
110
+ repo_structure: str,
111
+ gitignore_path: str,
112
+ custom_tools: list[BaseTool] | None = None,
113
+ ):
114
+ super().__init__(llm)
115
+ self.step_name = "Collection Execution Step"
116
+ self.repo_path = repo_path
117
+ self.repo_structure = repo_structure
118
+ self.gitignore_path = gitignore_path
119
+ self.custom_tools = custom_tools if custom_tools is not None else []
120
+
121
+
122
+ def _execute_directly(self, state: PEOWorkflowState):
123
+ plan_actions = state["plan_actions"]
124
+ prompt = CustomPromptTemplate(
125
+ template=COLLECTION_EXECUTION_SYSTEM_PROMPT,
126
+ tools=self.custom_tools,
127
+ plan_actions=plan_actions,
128
+ input_variables=[
129
+ "tools", "tool_names", "agent_scratchpad",
130
+ "intermediate_steps", "plan_actions",
131
+ ],
132
+ )
133
+ output_parser = CustomOutputParser()
134
+ agent = create_react_agent(
135
+ llm=self.llm,
136
+ tools=self.custom_tools,
137
+ prompt=prompt,
138
+ output_parser=output_parser,
139
+ stop_sequence=["\nObservation:"],
140
+ )
141
+ callback_handler = OpenAICallbackHandler()
142
+ agent_executor = AgentExecutor(
143
+ agent=agent,
144
+ tools=self.custom_tools,
145
+ max_iterations=10,
146
+ )
147
+ response = agent_executor.invoke(
148
+ input={"plan_actions": plan_actions, "input": "Now, let's begin."},
149
+ config={
150
+ "callbacks": [callback_handler],
151
+ "recursion_limit": 20,
152
+ },
153
+ )
154
+
155
+ # parse the response
156
+ if "output" in response:
157
+ output = response["output"]
158
+ if "**Final Answer**" in output:
159
+ final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
160
+ step_output = final_answer
161
+ elif "Final Answer" in output:
162
+ final_answer = output.split("Final Answer")[-1].strip().strip(":")
163
+ step_output = final_answer
164
+ else:
165
+ step_output = output
166
+ self._print_step(state, step_output=step_output)
167
+ state["step_output"] = step_output
168
+ else:
169
+ logger.error("No output found in the response.")
170
+ self._print_step(
171
+ state,
172
+ step_output="Error: No output found in the response.",
173
+ )
174
+ state["step_output"] = "Error: No output found in the response."
175
+
176
+
177
+ token_usage = vars(callback_handler)
178
+ token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
179
+
180
+ return state, token_usage
@@ -0,0 +1,113 @@
1
+
2
+ from typing import Callable
3
+ from langchain.tools import BaseTool
4
+ from langchain_openai.chat_models.base import BaseChatOpenAI
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from bioguider.agents.agent_utils import ObservationResult
7
+ from bioguider.agents.collection_task_utils import CollectionWorkflowState
8
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
9
+ from bioguider.agents.peo_common_step import PEOCommonStep
10
+ from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
11
+
12
+
13
+ COLLECTION_OBSERVE_SYSTEM_PROMPT = """You are an expert software developer and technical documentation analyst.
14
+ {goal_item_desc}
15
+
16
+ {related_file_description}
17
+ ---
18
+
19
+ ### **Repository Structure**
20
+ Here is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
21
+ {repo_structure}
22
+ ---
23
+
24
+ ### **Intermediate Output**
25
+ {intermediate_output}
26
+ ---
27
+
28
+ ### **Instructions**
29
+ 1. Your goal is to identify files that are relevant to the **goal item**.
30
+ 2. Carefully review the **Goal**, **Repository Structure**, and **Intermediate Output**.
31
+ 3. If you believe **all relevant files** have been collected:
32
+
33
+ * Proceed with the following format:
34
+
35
+ * Provide your reasoning under **Analysis**
36
+ * Then list all relevant files and folders under **FinalAnswer**
37
+ * Be sure to include the **full relative paths** with respect to the repository root.
38
+ * Your answer **must follow this exact format** (note: no JSON code block, no additional comments):
39
+
40
+ ```
41
+ **Analysis**: your analysis here
42
+ **FinalAnswer**: {{"final_answer": ["path/to/file1", "path/to/file2", ...]}}
43
+ ```
44
+ 4. If you believe **more files still need to be collected**:
45
+ * Provide your reasoning under **Thoughts**:
46
+
47
+ ```
48
+ **Thoughts**: your explanation here
49
+ ```
50
+
51
+ 5. Important instructions:
52
+ {important_instructions}
53
+ Be precise and support your reasoning with evidence from the input.
54
+ ---
55
+
56
+ ### Notes
57
+ - We are collecting information over multiple rounds, your thoughts and the output of this step will be persisted, so please **do not rush to provide a Final Answer**.
58
+ If you find the current information insufficient, share your thoughts instead—we’ll continue with the next round accordingly.
59
+ """
60
+
61
+ class CollectionObserveStep(PEOCommonStep):
62
+ def __init__(
63
+ self,
64
+ llm: BaseChatOpenAI,
65
+ repo_path: str,
66
+ repo_structure: str,
67
+ gitignore_path: str,
68
+ ):
69
+ super().__init__(llm=llm)
70
+ self.repo_path = repo_path
71
+ self.repo_structure = repo_structure
72
+ self.gitignore_path = gitignore_path
73
+ self.step_name = "Collection Observation Step"
74
+
75
+ def _build_prompt(self, state):
76
+ str_goal_item = state["goal_item"]
77
+ collection_item = COLLECTION_PROMPTS[str_goal_item]
78
+ goal_item_desc = \
79
+ ChatPromptTemplate.from_template(COLLECTION_GOAL).format(goal_item=collection_item["goal_item"])
80
+ repo_structure = self.repo_structure
81
+ intermediate_steps = self._build_intermediate_steps(state)
82
+ prompt = ChatPromptTemplate.from_template(COLLECTION_OBSERVE_SYSTEM_PROMPT)
83
+ important_instructions = "N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
84
+ else collection_item["important_instructions"]
85
+ return prompt.format(
86
+ goal_item_desc=goal_item_desc,
87
+ related_file_description=collection_item["related_file_description"],
88
+ repo_structure=repo_structure,
89
+ intermediate_output=intermediate_steps,
90
+ important_instructions=important_instructions,
91
+ )
92
+ def _execute_directly(self, state: CollectionWorkflowState):
93
+ system_prompt = self._build_prompt(state)
94
+ agent = CommonAgentTwoSteps(llm=self.llm)
95
+ res, _, token_usage, reasoning_process = agent.go(
96
+ system_prompt=system_prompt,
97
+ instruction_prompt="Let's begin thinking.",
98
+ schema=ObservationResult,
99
+ )
100
+ state["final_answer"] = res.FinalAnswer
101
+ analysis = res.Analysis
102
+ thoughts = res.Thoughts
103
+ state["step_analysis"] = analysis
104
+ state["step_thoughts"] = thoughts
105
+ self._print_step(
106
+ state,
107
+ step_output=f"**Observation Reasoning Process**\n{reasoning_process}"
108
+ )
109
+ self._print_step(
110
+ state,
111
+ step_output=f"Final Answer: {res.FinalAnswer if res.FinalAnswer else None}\nAnalysis: {analysis}\nThoughts: {thoughts}",
112
+ )
113
+ return state, token_usage
@@ -0,0 +1,154 @@
1
+
2
+ from langchain_openai.chat_models.base import BaseChatOpenAI
3
+ from langchain.tools import BaseTool
4
+ from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
5
+ from bioguider.agents.agent_utils import (
6
+ convert_plan_to_string,
7
+ get_tool_names_and_descriptions,
8
+ PlanAgentResultJsonSchema,
9
+ PlanAgentResult,
10
+ )
11
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
12
+ from bioguider.agents.peo_common_step import PEOCommonStep
13
+ from bioguider.agents.collection_task_utils import CollectionWorkflowState
14
+ from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
15
+
16
+ COLLECTION_PLAN_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""### **Goal**
17
+ You are an expert developer specializing in the biomedical domain.
18
+ **{goal}**
19
+
20
+ {related_file_description}
21
+ ---
22
+
23
+ ### **Repository File Structure**
24
+ Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
25
+ {repo_structure}
26
+
27
+ ---
28
+
29
+ ### **Function Tools**
30
+ You have access to the following function tools:
31
+ {tools}
32
+
33
+ ---
34
+
35
+ ### **Intermediate Steps**
36
+ Here are the results from previous steps:
37
+ {intermediate_steps}
38
+
39
+ ---
40
+
41
+ ### **Intermediate Thoughts**
42
+ - **Analysis**: {intermediate_analysis}
43
+ - **Thoughts**: {intermediate_thoughts}
44
+
45
+ ---
46
+
47
+ ### **Instructions**
48
+
49
+ 1. We will iterate through multiple **Plan -> Execution -> Observation** loops as needed.
50
+ - All variables and tool outputs are **persisted across rounds**, so you can build on prior results.
51
+ - Develop your plan **incrementally**, and reflect on intermediate observations before proceeding.
52
+ - Limit each step to **one or two actions** — avoid trying to complete everything in a single step.
53
+
54
+ 2. Your task is to collect all files that are relevant to the goal.
55
+ - Start by using the `summarize_file` tool to inspect file content quickly.
56
+ - If needed, follow up with the `read_file` tool for full content extraction.
57
+
58
+ 3. You may use the `read_directory` tool to explore directory contents, but avoid using it in the first step unless necessary.
59
+
60
+ 4. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
61
+
62
+ ---
63
+
64
+ ### **Important Instructions**
65
+ {important_instructions}
66
+
67
+ ### **Output Format**
68
+ Your plan should be returned as a sequence of steps in the following format:
69
+
70
+ Step: <tool name> # Tool name must be one of {tool_names}
71
+ Step Input: <file or directory name>
72
+
73
+ Step: <tool name>
74
+ Step Input: <file or directory name>
75
+ ...
76
+ """)
77
+
78
+ class CollectionPlanStep(PEOCommonStep):
79
+ """
80
+ CollectionPlanStep is a step in the collection plan process.
81
+ It is responsible for initializing the tools and compiling the step.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ llm: BaseChatOpenAI,
87
+ repo_path: str,
88
+ repo_structure: str,
89
+ gitignore_path: str,
90
+ custom_tools: list[BaseTool] | None = None,
91
+ ):
92
+ super().__init__(llm)
93
+ self.step_name = "Collection Plan Step"
94
+ self.repo_path = repo_path
95
+ self.repo_structure = repo_structure
96
+ self.gitignore_path = gitignore_path
97
+ self.custom_tools = custom_tools if custom_tools is not None else []
98
+
99
+
100
+ def _prepare_system_prompt(self, state: CollectionWorkflowState) -> str:
101
+ collection_state = state
102
+ goal_item = collection_state["goal_item"]
103
+ collection_item = COLLECTION_PROMPTS[goal_item]
104
+ intermediate_steps = self._build_intermediate_steps(state)
105
+ step_analysis, step_thoughts = self._build_intermediate_analysis_and_thoughts(state)
106
+ goal = ChatPromptTemplate.from_template(COLLECTION_GOAL).format(goal_item=collection_item["goal_item"])
107
+ related_file_description = collection_item["related_file_description"]
108
+ important_instructions="N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
109
+ else collection_item["important_instructions"]
110
+ tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
111
+ system_prompt = COLLECTION_PLAN_SYSTEM_PROMPT.format(
112
+ goal=goal,
113
+ related_file_description=related_file_description,
114
+ repo_structure=self.repo_structure,
115
+ tools=tools_desc,
116
+ intermediate_steps=intermediate_steps,
117
+ intermediate_analysis=step_analysis,
118
+ intermediate_thoughts=step_thoughts,
119
+ tool_names=tool_names,
120
+ important_instructions=important_instructions,
121
+ )
122
+ self._print_step(
123
+ state,
124
+ step_output="**Intermediate Step Output**\n" + intermediate_steps
125
+ )
126
+ self._print_step(
127
+ state,
128
+ step_output="**Intermediate Step Analysis**\n{step_analysis}\n**Intermediate Step Thoughts**\n{step_thoughts}",
129
+ )
130
+ return system_prompt
131
+
132
+ def _execute_directly(self, state: CollectionWorkflowState):
133
+ system_prompt = self._prepare_system_prompt(state)
134
+ agent = CommonAgentTwoSteps(llm=self.llm)
135
+ res, _, token_usage, reasoning_process = agent.go(
136
+ system_prompt=system_prompt,
137
+ instruction_prompt="Now, let's begin the collection plan step.",
138
+ schema=PlanAgentResultJsonSchema,
139
+ )
140
+ PEOCommonStep._reset_step_state(state)
141
+ res = PlanAgentResult(**res)
142
+ self._print_step(state, step_output=f"**Reasoning Process**\n{reasoning_process}")
143
+ self._print_step(state, step_output=f"**Plan**\n{str(res.actions)}")
144
+ state["plan_actions"] = convert_plan_to_string(res)
145
+
146
+ return state, token_usage
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
@@ -0,0 +1,179 @@
1
+
2
+ import os
3
+ import logging
4
+ import re
5
+ import json
6
+ from pydantic import BaseModel, Field
7
+ from typing import Callable, List, Optional, TypedDict, Union
8
+ from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
9
+ from langchain_core.messages import SystemMessage, HumanMessage
10
+ from langchain_openai.chat_models.base import BaseChatOpenAI
11
+ from langchain.tools import StructuredTool, Tool, tool, BaseTool
12
+ from langchain.agents import (
13
+ initialize_agent,
14
+ AgentType,
15
+ AgentOutputParser,
16
+ create_react_agent,
17
+ AgentExecutor,
18
+ )
19
+ from langchain.schema import (
20
+ AgentFinish,
21
+ AgentAction,
22
+ )
23
+ from langgraph.graph import StateGraph, START, END
24
+
25
+ from bioguider.database.summarized_file_db import SummarizedFilesDb
26
+ from bioguider.utils.file_utils import get_file_type
27
+ from bioguider.agents.agent_utils import read_directory
28
+ from bioguider.agents.collection_task_utils import (
29
+ RELATED_FILE_GOAL_ITEM,
30
+ CollectionWorkflowState,
31
+ check_file_related_tool,
32
+ )
33
+ from bioguider.agents.common_agent import CommonAgent
34
+ from bioguider.agents.agent_tools import (
35
+ read_directory_tool,
36
+ summarize_file_tool,
37
+ read_file_tool,
38
+ )
39
+ from bioguider.agents.peo_common_step import PEOCommonStep
40
+ from bioguider.agents.prompt_utils import COLLECTION_PROMPTS
41
+ from bioguider.agents.python_ast_repl_tool import CustomPythonAstREPLTool
42
+ from bioguider.agents.agent_task import AgentTask
43
+ from bioguider.agents.collection_plan_step import CollectionPlanStep
44
+ from bioguider.agents.collection_execute_step import CollectionExecuteStep
45
+ from bioguider.agents.collection_observe_step import CollectionObserveStep
46
+
47
+ logger = logging.getLogger(__name__)
48
+
49
+ class CollectionTask(AgentTask):
50
+ def __init__(
51
+ self,
52
+ llm: BaseChatOpenAI,
53
+ step_callback: Callable | None = None
54
+ ):
55
+ super().__init__(llm, step_callback)
56
+ self.repo_path: str | None = None
57
+ self.gitignore_path: str | None = None
58
+ self.repo_structure: str | None = None
59
+ self.goal_item: str | None = None
60
+ self.steps: list[PEOCommonStep] = []
61
+ self.tools: list[any] | None = None
62
+ self.custom_tools: list[Tool] | None = None
63
+
64
+ def _initialize(self):
65
+ # initialize the 2-level file structure of the repo
66
+ if not os.path.exists(self.repo_path):
67
+ raise ValueError(f"Repository path {self.repo_path} does not exist.")
68
+ files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
69
+ file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
70
+ self.repo_structure = ""
71
+ for f, f_type in file_pairs:
72
+ self.repo_structure += f"{f} - {f_type}\n"
73
+
74
+ collection_item = COLLECTION_PROMPTS[self.goal_item]
75
+ related_file_goal_item_desc = ChatPromptTemplate.from_template(RELATED_FILE_GOAL_ITEM).format(
76
+ goal_item=collection_item["goal_item"],
77
+ related_file_description=collection_item["related_file_description"],
78
+ )
79
+ self.tools = [
80
+ read_directory_tool(repo_path=self.repo_path),
81
+ summarize_file_tool(
82
+ llm=self.llm,
83
+ repo_path=self.repo_path,
84
+ output_callback=self.step_callback,
85
+ db=self.summary_file_db,
86
+ ),
87
+ read_file_tool(repo_path=self.repo_path),
88
+ check_file_related_tool(
89
+ llm=self.llm,
90
+ repo_path=self.repo_path,
91
+ goal_item_desc=related_file_goal_item_desc,
92
+ output_callback=self.step_callback,
93
+ ),
94
+ ]
95
+ self.custom_tools = [Tool(
96
+ name=tool.__class__.__name__,
97
+ func=tool.run,
98
+ description=tool.__class__.__doc__,
99
+ ) for tool in self.tools]
100
+ self.custom_tools.append(CustomPythonAstREPLTool())
101
+ self.steps = [
102
+ CollectionPlanStep(
103
+ llm=self.llm,
104
+ repo_path=self.repo_path,
105
+ repo_structure=self.repo_structure,
106
+ gitignore_path=self.gitignore_path,
107
+ custom_tools=self.custom_tools,
108
+ ),
109
+ CollectionExecuteStep(
110
+ llm=self.llm,
111
+ repo_path=self.repo_path,
112
+ repo_structure=self.repo_structure,
113
+ gitignore_path=self.gitignore_path,
114
+ custom_tools=self.custom_tools,
115
+ ),
116
+ CollectionObserveStep(
117
+ llm=self.llm,
118
+ repo_path=self.repo_path,
119
+ repo_structure=self.repo_structure,
120
+ gitignore_path=self.gitignore_path,
121
+ ),
122
+ ]
123
+
124
+ def _compile(self, repo_path: str, gitignore_path: str, **kwargs):
125
+ self.repo_path = repo_path
126
+ self.gitignore_path = gitignore_path
127
+ self.goal_item = kwargs.get("goal_item")
128
+ self._initialize()
129
+
130
+ def check_observe_step(state):
131
+ if "final_answer" in state and state["final_answer"] is not None:
132
+ self._print_step(step_name="Final Answer")
133
+ self._print_step(step_output=state["final_answer"])
134
+ return END
135
+ return "plan_step"
136
+
137
+ graph = StateGraph(CollectionWorkflowState)
138
+ graph.add_node("plan_step", self.steps[0].execute)
139
+ graph.add_node("execute_step", self.steps[1].execute)
140
+ graph.add_node("observe_step", self.steps[2].execute)
141
+ graph.add_edge(START, "plan_step")
142
+ graph.add_edge("plan_step", "execute_step")
143
+ graph.add_edge("execute_step", "observe_step")
144
+ graph.add_conditional_edges("observe_step", check_observe_step, {"plan_step", END})
145
+
146
+ self.graph = graph.compile()
147
+
148
+ def collect(self) -> list[str] | None:
149
+ s = self._go_graph({"goal_item": self.goal_item})
150
+ if s is None or 'final_answer' not in s:
151
+ return None
152
+ if s["final_answer"] is None:
153
+ return None
154
+ result = s["final_answer"].strip()
155
+ try:
156
+ json_obj = json.loads(result)
157
+ result = json_obj["final_answer"]
158
+ if isinstance(result, str):
159
+ result = result.strip()
160
+ return [result]
161
+ elif isinstance(result, list):
162
+ return result
163
+ else:
164
+ logger.error(f"Final answer is not a valid JSON list or string: {result}")
165
+ return None
166
+ except json.JSONDecodeError:
167
+ logger.error(f"Final answer is not a valid JSON: {result}")
168
+ return None
169
+ except Exception as e:
170
+ logger.error(str(e))
171
+ return s
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+