codeboarding 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/abstraction_agent.py +150 -0
- agents/agent.py +467 -0
- agents/agent_responses.py +363 -0
- agents/cluster_methods_mixin.py +281 -0
- agents/constants.py +13 -0
- agents/dependency_discovery.py +159 -0
- agents/details_agent.py +174 -0
- agents/llm_config.py +309 -0
- agents/meta_agent.py +105 -0
- agents/planner_agent.py +105 -0
- agents/prompts/__init__.py +85 -0
- agents/prompts/abstract_prompt_factory.py +63 -0
- agents/prompts/claude_prompts.py +381 -0
- agents/prompts/deepseek_prompts.py +389 -0
- agents/prompts/gemini_flash_prompts.py +362 -0
- agents/prompts/glm_prompts.py +407 -0
- agents/prompts/gpt_prompts.py +470 -0
- agents/prompts/kimi_prompts.py +400 -0
- agents/prompts/prompt_factory.py +179 -0
- agents/tools/__init__.py +8 -0
- agents/tools/base.py +96 -0
- agents/tools/get_external_deps.py +47 -0
- agents/tools/get_method_invocations.py +47 -0
- agents/tools/read_cfg.py +60 -0
- agents/tools/read_docs.py +132 -0
- agents/tools/read_file.py +90 -0
- agents/tools/read_file_structure.py +156 -0
- agents/tools/read_git_diff.py +131 -0
- agents/tools/read_packages.py +60 -0
- agents/tools/read_source.py +105 -0
- agents/tools/read_structure.py +49 -0
- agents/tools/toolkit.py +119 -0
- agents/validation.py +383 -0
- caching/__init__.py +4 -0
- caching/cache.py +29 -0
- caching/meta_cache.py +227 -0
- codeboarding-0.9.0.dist-info/METADATA +223 -0
- codeboarding-0.9.0.dist-info/RECORD +126 -0
- codeboarding-0.9.0.dist-info/WHEEL +5 -0
- codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
- codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
- codeboarding-0.9.0.dist-info/top_level.txt +18 -0
- core/__init__.py +101 -0
- core/plugin_loader.py +46 -0
- core/protocols.py +27 -0
- core/registry.py +46 -0
- diagram_analysis/__init__.py +4 -0
- diagram_analysis/analysis_json.py +346 -0
- diagram_analysis/diagram_generator.py +486 -0
- diagram_analysis/file_coverage.py +212 -0
- diagram_analysis/incremental/__init__.py +63 -0
- diagram_analysis/incremental/component_checker.py +236 -0
- diagram_analysis/incremental/file_manager.py +217 -0
- diagram_analysis/incremental/impact_analyzer.py +238 -0
- diagram_analysis/incremental/io_utils.py +281 -0
- diagram_analysis/incremental/models.py +72 -0
- diagram_analysis/incremental/path_patching.py +164 -0
- diagram_analysis/incremental/reexpansion.py +166 -0
- diagram_analysis/incremental/scoped_analysis.py +227 -0
- diagram_analysis/incremental/updater.py +464 -0
- diagram_analysis/incremental/validation.py +48 -0
- diagram_analysis/manifest.py +152 -0
- diagram_analysis/version.py +6 -0
- duckdb_crud.py +125 -0
- github_action.py +172 -0
- health/__init__.py +3 -0
- health/checks/__init__.py +11 -0
- health/checks/circular_deps.py +48 -0
- health/checks/cohesion.py +93 -0
- health/checks/coupling.py +140 -0
- health/checks/function_size.py +85 -0
- health/checks/god_class.py +167 -0
- health/checks/inheritance.py +104 -0
- health/checks/instability.py +77 -0
- health/checks/unused_code_diagnostics.py +338 -0
- health/config.py +172 -0
- health/constants.py +19 -0
- health/models.py +186 -0
- health/runner.py +236 -0
- install.py +518 -0
- logging_config.py +105 -0
- main.py +529 -0
- monitoring/__init__.py +12 -0
- monitoring/callbacks.py +163 -0
- monitoring/context.py +158 -0
- monitoring/mixin.py +16 -0
- monitoring/paths.py +47 -0
- monitoring/stats.py +50 -0
- monitoring/writers.py +172 -0
- output_generators/__init__.py +0 -0
- output_generators/html.py +163 -0
- output_generators/html_template.py +382 -0
- output_generators/markdown.py +140 -0
- output_generators/mdx.py +171 -0
- output_generators/sphinx.py +175 -0
- repo_utils/__init__.py +277 -0
- repo_utils/change_detector.py +289 -0
- repo_utils/errors.py +6 -0
- repo_utils/git_diff.py +74 -0
- repo_utils/ignore.py +341 -0
- static_analyzer/__init__.py +335 -0
- static_analyzer/analysis_cache.py +699 -0
- static_analyzer/analysis_result.py +269 -0
- static_analyzer/cluster_change_analyzer.py +391 -0
- static_analyzer/cluster_helpers.py +79 -0
- static_analyzer/constants.py +166 -0
- static_analyzer/git_diff_analyzer.py +224 -0
- static_analyzer/graph.py +746 -0
- static_analyzer/incremental_orchestrator.py +671 -0
- static_analyzer/java_config_scanner.py +232 -0
- static_analyzer/java_utils.py +227 -0
- static_analyzer/lsp_client/__init__.py +12 -0
- static_analyzer/lsp_client/client.py +1642 -0
- static_analyzer/lsp_client/diagnostics.py +62 -0
- static_analyzer/lsp_client/java_client.py +517 -0
- static_analyzer/lsp_client/language_settings.py +97 -0
- static_analyzer/lsp_client/typescript_client.py +235 -0
- static_analyzer/programming_language.py +152 -0
- static_analyzer/reference_resolve_mixin.py +166 -0
- static_analyzer/scanner.py +95 -0
- static_analyzer/typescript_config_scanner.py +54 -0
- tool_registry.py +433 -0
- user_config.py +134 -0
- utils.py +56 -0
- vscode_constants.py +124 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from langchain_core.tools import ArgsSchema
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from agents.tools.base import BaseRepoTool
|
|
6
|
+
from agents.dependency_discovery import discover_dependency_files
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExternalDepsInput(BaseModel):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ExternalDepsTool(BaseRepoTool):
|
|
16
|
+
name: str = "readExternalDeps"
|
|
17
|
+
description: str = (
|
|
18
|
+
"Scans the current repository to find common dependency files."
|
|
19
|
+
"Returns a list of file paths that can be examined with the readFile tool."
|
|
20
|
+
)
|
|
21
|
+
args_schema: Optional[ArgsSchema] = ExternalDepsInput
|
|
22
|
+
return_direct: bool = False
|
|
23
|
+
|
|
24
|
+
def _run(self) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Run the tool to find dependency files.
|
|
27
|
+
"""
|
|
28
|
+
logger.info("[ExternalDeps Tool] Searching for dependency files")
|
|
29
|
+
|
|
30
|
+
discovered = discover_dependency_files(self.repo_dir, self.ignore_manager)
|
|
31
|
+
|
|
32
|
+
if not discovered:
|
|
33
|
+
logger.warning("[ExternalDeps Tool] No dependency files found in the repository.")
|
|
34
|
+
return "No dependency files found in this repository. Searched for common files like requirements.txt, pyproject.toml, setup.py, environment.yml, Pipfile, etc."
|
|
35
|
+
|
|
36
|
+
summary = f"Found {len(discovered)} dependency file(s):\n\n"
|
|
37
|
+
|
|
38
|
+
for i, item in enumerate(discovered, 1):
|
|
39
|
+
relative_path = item.path.relative_to(self.repo_dir)
|
|
40
|
+
summary += f'{i}. {relative_path}\n To read this file: Use the readFile tool with file_path="{relative_path}" and line_number=0\n\n'
|
|
41
|
+
|
|
42
|
+
logger.info(
|
|
43
|
+
f"[ExternalDeps Tool] Found {len(discovered)} dependency file(s): "
|
|
44
|
+
f"{', '.join(str(d.path.relative_to(self.repo_dir)) for d in discovered)}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
return summary
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from langchain_core.tools import ArgsSchema
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
from agents.tools.base import BaseRepoTool
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MethodInvocationsInput(BaseModel):
|
|
11
|
+
method: str = Field(description="The name of the method for which to retrieve its immediate callees and calls.")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MethodInvocationsTool(BaseRepoTool):
|
|
15
|
+
name: str = "getMethodInvocationsTool"
|
|
16
|
+
description: str = (
|
|
17
|
+
"Retrieves complete project control flow graph (CFG) in DOT format. "
|
|
18
|
+
"Use once at the start of analysis to understand overall architecture. "
|
|
19
|
+
"Shows all function/method calls and execution flow across the codebase. "
|
|
20
|
+
"Primary data source - analyze this output before using other tools. "
|
|
21
|
+
"No input arguments required."
|
|
22
|
+
)
|
|
23
|
+
args_schema: Optional[ArgsSchema] = MethodInvocationsInput
|
|
24
|
+
|
|
25
|
+
def _run(self, method: str) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Executes the tool to read and return the project's control flow graph.
|
|
28
|
+
"""
|
|
29
|
+
if not self.static_analysis:
|
|
30
|
+
return "No static analysis data available."
|
|
31
|
+
|
|
32
|
+
results = ""
|
|
33
|
+
for lang in self.static_analysis.get_languages():
|
|
34
|
+
# Attempt to retrieve the control flow graph for the specified language
|
|
35
|
+
cfg = self.static_analysis.get_cfg(lang)
|
|
36
|
+
for edge in cfg.edges:
|
|
37
|
+
if edge.src_node.fully_qualified_name == method:
|
|
38
|
+
results += (
|
|
39
|
+
f"Method {edge.src_node.fully_qualified_name} is calling {edge.dst_node.fully_qualified_name}\n"
|
|
40
|
+
)
|
|
41
|
+
if edge.dst_node.fully_qualified_name == method:
|
|
42
|
+
results += f"Method {edge.dst_node.fully_qualified_name} is called by {edge.src_node.fully_qualified_name}\n"
|
|
43
|
+
if results:
|
|
44
|
+
return results.strip()
|
|
45
|
+
# If no results found, return a message indicating no calls or callees
|
|
46
|
+
logger.warning(f"[MethodInvocationsTool] No method invocations found for {method}.")
|
|
47
|
+
return f"No method invocations found for the {method}. Try reading the source with the `getSourceCode` tool for full details."
|
agents/tools/read_cfg.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from agents.agent_responses import Component
|
|
3
|
+
from agents.tools.base import BaseRepoTool
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GetCFGTool(BaseRepoTool):
|
|
9
|
+
name: str = "getControlFlowGraph"
|
|
10
|
+
description: str = (
|
|
11
|
+
"Retrieves complete project control flow graph (CFG) showing all method calls. "
|
|
12
|
+
"Primary analysis tool - use this first to understand project execution flow. "
|
|
13
|
+
"Provides graphical representation of function/method relationships. "
|
|
14
|
+
"Essential data - analyze this output thoroughly before using other tools. "
|
|
15
|
+
"No input arguments required."
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def _run(self) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Executes the tool to read and return the project's control flow graph.
|
|
21
|
+
"""
|
|
22
|
+
if not self.static_analysis:
|
|
23
|
+
return "No static analysis data available."
|
|
24
|
+
result_str = ""
|
|
25
|
+
for lang in self.static_analysis.get_languages():
|
|
26
|
+
cfg = self.static_analysis.get_cfg(lang)
|
|
27
|
+
logger.info(
|
|
28
|
+
f"[CFG Tool] Reading control flow graph for {lang}, nodes: {len(cfg.nodes)}, edges: {len(cfg.edges)}"
|
|
29
|
+
)
|
|
30
|
+
if cfg is None:
|
|
31
|
+
logging.warning(f"[CFG Tool] No control flow graph found for {lang}.")
|
|
32
|
+
continue
|
|
33
|
+
result_str += f"Control flow graph for {lang}:\n{cfg.llm_str()}\n"
|
|
34
|
+
if not result_str:
|
|
35
|
+
logging.error("[CFG Tool] No control flow graph data available.")
|
|
36
|
+
return "No control flow graph data available. Ensure static analysis was performed correctly."
|
|
37
|
+
return result_str
|
|
38
|
+
|
|
39
|
+
def component_cfg(self, component: Component) -> str:
|
|
40
|
+
if not self.static_analysis:
|
|
41
|
+
return "No static analysis data available."
|
|
42
|
+
items = 0
|
|
43
|
+
result = f"Control flow graph for {component.name}:\n"
|
|
44
|
+
skip_nodes: list = []
|
|
45
|
+
for lang in self.static_analysis.get_languages():
|
|
46
|
+
logger.info(f"[CFG Tool] Filtering CFG for component {component.name} in {lang}")
|
|
47
|
+
cfg = self.static_analysis.get_cfg(lang)
|
|
48
|
+
if cfg is None:
|
|
49
|
+
logging.warning(f"[CFG Tool] No control flow graph found for {lang}.")
|
|
50
|
+
continue
|
|
51
|
+
for _, node in cfg.nodes.items():
|
|
52
|
+
if node.file_path not in component.assigned_files:
|
|
53
|
+
skip_nodes.append(node)
|
|
54
|
+
result += f"{lang}:\n{cfg.llm_str(skip_nodes=skip_nodes)}\n"
|
|
55
|
+
items += len(cfg.nodes) - len(skip_nodes)
|
|
56
|
+
|
|
57
|
+
logger.info(f"[CFG Tool] Filtering CFG for component {component.name}, items found: {items}")
|
|
58
|
+
if items == 0:
|
|
59
|
+
return "No control flow graph data available for this component. Ensure static analysis was performed correctly or the component has valid source code references."
|
|
60
|
+
return result
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from langchain_core.tools import ArgsSchema
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
from agents.tools.base import BaseRepoTool
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ReadDocsFile(BaseModel):
|
|
11
|
+
"""Input for ReadDocsTool."""
|
|
12
|
+
|
|
13
|
+
file_path: str | None = Field(
|
|
14
|
+
None,
|
|
15
|
+
description="Path to the documentation file to read, use relative paths from the root of the project. If not provided, will read README.md",
|
|
16
|
+
)
|
|
17
|
+
line_number: int | None = Field(
|
|
18
|
+
0, description="Line number to focus on. The tool will return content centered around this line."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ReadDocsTool(BaseRepoTool):
|
|
23
|
+
name: str = "readDocs"
|
|
24
|
+
description: str = (
|
|
25
|
+
"Reads project documentation files (README, .md, .rst, .txt). "
|
|
26
|
+
"Use early in analysis to understand project purpose and architecture. "
|
|
27
|
+
"Defaults to README.md if no path specified. "
|
|
28
|
+
"Provides project understanding without code analysis. "
|
|
29
|
+
"Focus on architecture sections, not detailed API documentation."
|
|
30
|
+
)
|
|
31
|
+
args_schema: ArgsSchema | None = ReadDocsFile
|
|
32
|
+
return_direct: bool = False
|
|
33
|
+
LINES_TO_RETURN: int = 300
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def cached_files(self) -> list[Path]:
|
|
37
|
+
"""
|
|
38
|
+
Returns documentation files from the cached file list.
|
|
39
|
+
"""
|
|
40
|
+
files = self.context.get_files()
|
|
41
|
+
patterns = (".md", ".rst", ".txt", ".html")
|
|
42
|
+
doc_files = []
|
|
43
|
+
for path in files:
|
|
44
|
+
if path.suffix.lower() in patterns:
|
|
45
|
+
# Maintain additional test exclusion if needed, though get_files already handles ignore_manager
|
|
46
|
+
if "tests" in path.parts or "test" in path.name.lower():
|
|
47
|
+
continue
|
|
48
|
+
doc_files.append(path)
|
|
49
|
+
return sorted(doc_files, key=lambda x: len(x.parts))
|
|
50
|
+
|
|
51
|
+
def _run(self, file_path: str | None = None, line_number: int = 0) -> str:
|
|
52
|
+
"""
|
|
53
|
+
Run the tool with the given input.
|
|
54
|
+
"""
|
|
55
|
+
if file_path is None:
|
|
56
|
+
file_path = "README"
|
|
57
|
+
file_path_obj = Path(file_path)
|
|
58
|
+
|
|
59
|
+
read_file: Path | None = None
|
|
60
|
+
if self.cached_files:
|
|
61
|
+
for cached_file in self.cached_files:
|
|
62
|
+
if self.is_subsequence(file_path_obj, cached_file):
|
|
63
|
+
read_file = cached_file
|
|
64
|
+
break
|
|
65
|
+
|
|
66
|
+
if read_file is None:
|
|
67
|
+
if file_path_obj.stem.lower() == "readme":
|
|
68
|
+
if self.cached_files and self.repo_dir:
|
|
69
|
+
available_files = [str(f.relative_to(self.repo_dir)) for f in self.cached_files]
|
|
70
|
+
if not available_files:
|
|
71
|
+
return "No documentation files found in this repository."
|
|
72
|
+
return "README not found. Available documentation files:\n\n" + "\n".join(
|
|
73
|
+
f"- {f}" for f in available_files
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
return "No documentation files found in this repository."
|
|
77
|
+
|
|
78
|
+
if self.cached_files and self.repo_dir:
|
|
79
|
+
files_str = "\n".join([str(f.relative_to(self.repo_dir)) for f in self.cached_files])
|
|
80
|
+
else:
|
|
81
|
+
files_str = "No files available"
|
|
82
|
+
return (
|
|
83
|
+
f"Error: The specified file '{file_path_obj}' was not found. "
|
|
84
|
+
f"Available documentation files:\n{files_str}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
with open(read_file, "r", encoding="utf-8") as file:
|
|
89
|
+
logger.info(f"[ReadDocs Tool] Reading file {read_file} around line {line_number}")
|
|
90
|
+
lines = file.readlines()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
return f"Error reading file {file_path_obj}: {str(e)}"
|
|
93
|
+
|
|
94
|
+
total_lines = len(lines)
|
|
95
|
+
if line_number < 0 or line_number >= total_lines:
|
|
96
|
+
if total_lines == 0:
|
|
97
|
+
return f"File {file_path_obj} is empty."
|
|
98
|
+
return f"Error: Line number {line_number} is out of range (0-{total_lines - 1})"
|
|
99
|
+
|
|
100
|
+
if line_number < self.LINES_TO_RETURN // 2:
|
|
101
|
+
start_line = 0
|
|
102
|
+
end_line = min(total_lines, self.LINES_TO_RETURN)
|
|
103
|
+
else:
|
|
104
|
+
start_line = max(0, line_number - (self.LINES_TO_RETURN // 2))
|
|
105
|
+
end_line = min(total_lines, start_line + self.LINES_TO_RETURN)
|
|
106
|
+
if end_line - start_line < self.LINES_TO_RETURN and start_line > 0:
|
|
107
|
+
potential_start = max(0, total_lines - self.LINES_TO_RETURN)
|
|
108
|
+
if potential_start < start_line:
|
|
109
|
+
start_line = potential_start
|
|
110
|
+
|
|
111
|
+
selected_lines = lines[start_line:end_line]
|
|
112
|
+
numbered_lines = [f"{i + start_line:4}:{line}" for i, line in enumerate(selected_lines)]
|
|
113
|
+
content = "".join(numbered_lines)
|
|
114
|
+
|
|
115
|
+
file_info = f"File: {file_path_obj}\n"
|
|
116
|
+
if total_lines > self.LINES_TO_RETURN:
|
|
117
|
+
file_info += f"Lines {start_line}-{end_line - 1} (centered around line {line_number}, total lines: {total_lines})\n\n"
|
|
118
|
+
else:
|
|
119
|
+
file_info += f"Full content ({total_lines} lines):\n\n"
|
|
120
|
+
|
|
121
|
+
if self.cached_files:
|
|
122
|
+
other_files = [f for f in self.cached_files if f != read_file]
|
|
123
|
+
else:
|
|
124
|
+
other_files = []
|
|
125
|
+
result = file_info + content
|
|
126
|
+
|
|
127
|
+
if other_files and self.repo_dir is not None:
|
|
128
|
+
relative_files = [str(f.relative_to(self.repo_dir)) for f in other_files]
|
|
129
|
+
result += "\n\n--- Other Available Documentation Files ---\n"
|
|
130
|
+
result += "\n".join(f"- {f}" for f in relative_files)
|
|
131
|
+
|
|
132
|
+
return result
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from langchain_core.tools import ArgsSchema
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
from agents.tools.base import BaseRepoTool
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ReadFileInput(BaseModel):
|
|
11
|
+
"""Input for ReadFileTool."""
|
|
12
|
+
|
|
13
|
+
file_path: str = Field(
|
|
14
|
+
..., description="Path to the file to read, use relative paths from the root of the project. "
|
|
15
|
+
)
|
|
16
|
+
line_number: int = Field(..., description="Line number to focus on")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ReadFileTool(BaseRepoTool):
|
|
20
|
+
name: str = "readFile"
|
|
21
|
+
description: str = (
|
|
22
|
+
"Reads specific file content around a target line number. "
|
|
23
|
+
"Use only when specific implementation details are needed that CFG cannot provide. "
|
|
24
|
+
"Returns 300 lines centered on the requested line. "
|
|
25
|
+
"Avoid exploratory reading - use only when you know exactly what to examine."
|
|
26
|
+
)
|
|
27
|
+
args_schema: ArgsSchema | None = ReadFileInput
|
|
28
|
+
return_direct: bool = False
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def cached_files(self) -> list[Path]:
|
|
32
|
+
files = self.context.get_files()
|
|
33
|
+
return sorted(files, key=lambda x: len(x.parts))
|
|
34
|
+
|
|
35
|
+
def _run(self, file_path: str, line_number: int) -> str:
|
|
36
|
+
"""
|
|
37
|
+
Run the tool with the given input.
|
|
38
|
+
"""
|
|
39
|
+
logger.info(f"[ReadFile Tool] Reading file {file_path} around line {line_number}")
|
|
40
|
+
|
|
41
|
+
file_path_obj = Path(file_path)
|
|
42
|
+
read_file: Path | None = None
|
|
43
|
+
if self.cached_files:
|
|
44
|
+
for cached_file in self.cached_files:
|
|
45
|
+
if self.is_subsequence(file_path_obj, cached_file):
|
|
46
|
+
read_file = cached_file
|
|
47
|
+
break
|
|
48
|
+
|
|
49
|
+
common_prefix = str(self.repo_dir) if self.repo_dir else ""
|
|
50
|
+
if read_file is None:
|
|
51
|
+
if self.cached_files and self.repo_dir:
|
|
52
|
+
files_str = "\n".join(
|
|
53
|
+
[str(f.relative_to(self.repo_dir)) for f in self.cached_files if f.suffix == file_path_obj.suffix]
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
files_str = "No files cached"
|
|
57
|
+
logger.error(f"[ReadFile Tool] File {file_path} not found in cached files.")
|
|
58
|
+
return (
|
|
59
|
+
f"Error: The specified file '{file_path}' was not found in the indexed source files. "
|
|
60
|
+
f"Please ensure the path is correct and points to an existing file: {common_prefix}/\n{files_str}."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Read the file content
|
|
64
|
+
with open(read_file, "r", encoding="utf-8") as file:
|
|
65
|
+
lines = file.readlines()
|
|
66
|
+
|
|
67
|
+
total_lines = len(lines)
|
|
68
|
+
if line_number < 0 or line_number >= total_lines:
|
|
69
|
+
logger.error(f"[ReadFile Tool] Line number {line_number} is out of range. Total lines: {total_lines}")
|
|
70
|
+
return f"Error: Line number {line_number} is out of range (0-{total_lines - 1})"
|
|
71
|
+
|
|
72
|
+
# Calculate start and end line numbers
|
|
73
|
+
if line_number < 150:
|
|
74
|
+
start_line = 0
|
|
75
|
+
end_line = min(total_lines, 300)
|
|
76
|
+
else:
|
|
77
|
+
start_line = max(0, line_number - 150)
|
|
78
|
+
end_line = min(total_lines, start_line + 300)
|
|
79
|
+
if end_line - start_line < 300 and start_line > 0:
|
|
80
|
+
potential_start = max(0, total_lines - 300)
|
|
81
|
+
if potential_start < start_line:
|
|
82
|
+
start_line = potential_start
|
|
83
|
+
|
|
84
|
+
selected_lines = lines[start_line:end_line]
|
|
85
|
+
numbered_lines = [f"{i + 1 + start_line:4}:{line}" for i, line in enumerate(selected_lines)]
|
|
86
|
+
content = "".join(numbered_lines)
|
|
87
|
+
logger.info(f"[ReadFile Tool] Successfully read {len(selected_lines)} lines from {file_path} ")
|
|
88
|
+
return (
|
|
89
|
+
f"File: {file_path}\nLines {start_line}-{end_line - 1} (centered around line {line_number}):\n\n{content}"
|
|
90
|
+
)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from langchain_core.tools import ArgsSchema
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from agents.constants import FileStructureConfig
|
|
7
|
+
from agents.tools.base import BaseRepoTool
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DirInput(BaseModel):
|
|
13
|
+
dir: str | None = Field(
|
|
14
|
+
default=".",
|
|
15
|
+
description=(
|
|
16
|
+
"Relative path to the directory whose file structure should be retrieved. "
|
|
17
|
+
"Defaults to the project root if not specified (i.e., use '.' for root)."
|
|
18
|
+
),
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FileStructureTool(BaseRepoTool):
|
|
23
|
+
name: str = "getFileStructure"
|
|
24
|
+
description: str = (
|
|
25
|
+
"Returns project directory structure as a tree. "
|
|
26
|
+
"Use only when project layout is unclear from existing context. "
|
|
27
|
+
"Most effective for understanding overall project organization. "
|
|
28
|
+
"Avoid recursive calls - use once for high-level structure understanding."
|
|
29
|
+
)
|
|
30
|
+
args_schema: ArgsSchema | None = DirInput
|
|
31
|
+
return_direct: bool = False
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def cached_dirs(self) -> list[Path]:
|
|
35
|
+
dirs = self.context.get_directories()
|
|
36
|
+
# Ensure they are sorted by depth for is_subsequence logic
|
|
37
|
+
return sorted(dirs, key=lambda x: len(x.parts))
|
|
38
|
+
|
|
39
|
+
def _run(self, dir: str | None = None) -> str:
|
|
40
|
+
"""
|
|
41
|
+
Run the tool with the given input.
|
|
42
|
+
"""
|
|
43
|
+
if dir == "." and self.repo_dir:
|
|
44
|
+
# Start with a reasonable depth limit
|
|
45
|
+
max_depth = 10
|
|
46
|
+
tree_lines = get_tree_string(self.repo_dir, max_depth=max_depth, ignore_manager=self.ignore_manager)
|
|
47
|
+
|
|
48
|
+
# If we hit the line limit, try again with progressively lower depths
|
|
49
|
+
while len(tree_lines) >= FileStructureConfig.MAX_LINES and max_depth > 1:
|
|
50
|
+
max_depth -= 1
|
|
51
|
+
tree_lines = get_tree_string(self.repo_dir, max_depth=max_depth, ignore_manager=self.ignore_manager)
|
|
52
|
+
|
|
53
|
+
tree_structure = "\n".join(tree_lines)
|
|
54
|
+
depth_info = f" (limited to depth {max_depth})" if max_depth < 10 else ""
|
|
55
|
+
return f"The file tree for {dir}{depth_info} is:\n{tree_structure}"
|
|
56
|
+
|
|
57
|
+
if not dir:
|
|
58
|
+
return "Error: No directory specified."
|
|
59
|
+
|
|
60
|
+
dir_path = Path(dir)
|
|
61
|
+
searching_dir: Path | None = None
|
|
62
|
+
if self.cached_dirs:
|
|
63
|
+
for d in self.cached_dirs:
|
|
64
|
+
# check if dir is a subdirectory of the cached directory
|
|
65
|
+
if self.is_subsequence(dir_path, d):
|
|
66
|
+
logger.info(f"[File Structure Tool] Found directory {d}")
|
|
67
|
+
searching_dir = d
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
if searching_dir is None:
|
|
71
|
+
dir_path = Path(*dir_path.parts[1:])
|
|
72
|
+
for d in self.cached_dirs:
|
|
73
|
+
# check if dir is a subdirectory of the cached directory
|
|
74
|
+
if self.is_subsequence(dir_path, d):
|
|
75
|
+
logger.info(f"[File Structure Tool] Found directory {d}")
|
|
76
|
+
searching_dir = d
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
if searching_dir is None:
|
|
80
|
+
logger.error(f"[File Structure Tool] Directory {dir} not found in cached directories.")
|
|
81
|
+
cached_str = ", ".join([str(d) for d in self.cached_dirs]) if self.cached_dirs else "None"
|
|
82
|
+
return f"Error: The specified directory does not exist or is empty. Available directories are: {cached_str}"
|
|
83
|
+
|
|
84
|
+
logger.info(f"[File Structure Tool] Reading file structure for {searching_dir}")
|
|
85
|
+
|
|
86
|
+
# Start with a reasonable depth limit
|
|
87
|
+
max_depth = 10
|
|
88
|
+
tree_lines = get_tree_string(searching_dir, max_depth=max_depth, ignore_manager=self.ignore_manager)
|
|
89
|
+
|
|
90
|
+
# If we hit the line limit, try again with progressively lower depths
|
|
91
|
+
while len(tree_lines) >= 50000 and max_depth > 1:
|
|
92
|
+
max_depth -= 1
|
|
93
|
+
tree_lines = get_tree_string(
|
|
94
|
+
searching_dir,
|
|
95
|
+
max_depth=max_depth,
|
|
96
|
+
max_lines=FileStructureConfig.MAX_LINES,
|
|
97
|
+
ignore_manager=self.ignore_manager,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
tree_structure = "\n".join(tree_lines)
|
|
101
|
+
depth_info = f" (limited to depth {max_depth})" if max_depth < 10 else ""
|
|
102
|
+
return f"The file tree for {dir}{depth_info} is:\n{tree_structure}"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_tree_string(
|
|
106
|
+
startpath: Path,
|
|
107
|
+
indent: str = "",
|
|
108
|
+
max_depth: float = float("inf"),
|
|
109
|
+
current_depth: int = 0,
|
|
110
|
+
max_lines: int = 100,
|
|
111
|
+
ignore_manager=None,
|
|
112
|
+
) -> list[str]:
|
|
113
|
+
"""
|
|
114
|
+
Generate a tree-like string representation of the directory structure.
|
|
115
|
+
"""
|
|
116
|
+
tree_lines: list[str] = []
|
|
117
|
+
|
|
118
|
+
# Stop if we've exceeded max depth
|
|
119
|
+
if current_depth > max_depth:
|
|
120
|
+
return tree_lines
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
# Filter entries using the ignore manager
|
|
124
|
+
entries = sorted([p for p in startpath.iterdir() if not (ignore_manager and ignore_manager.should_ignore(p))])
|
|
125
|
+
except (PermissionError, FileNotFoundError):
|
|
126
|
+
# Handle permission errors or non-existent directories
|
|
127
|
+
return [indent + "└── [Error reading directory]"]
|
|
128
|
+
|
|
129
|
+
for i, entry_path in enumerate(entries):
|
|
130
|
+
# Check if we've exceeded the maximum number of lines
|
|
131
|
+
if len(tree_lines) >= max_lines:
|
|
132
|
+
tree_lines.append(indent + "└── [Output truncated due to size limits]")
|
|
133
|
+
return tree_lines
|
|
134
|
+
|
|
135
|
+
connector = "└── " if i == len(entries) - 1 else "├── "
|
|
136
|
+
tree_lines.append(indent + connector + entry_path.name)
|
|
137
|
+
|
|
138
|
+
if entry_path.is_dir():
|
|
139
|
+
extension = " " if i == len(entries) - 1 else "│ "
|
|
140
|
+
subtree = get_tree_string(
|
|
141
|
+
entry_path,
|
|
142
|
+
indent + extension,
|
|
143
|
+
max_depth,
|
|
144
|
+
current_depth + 1,
|
|
145
|
+
max_lines - len(tree_lines),
|
|
146
|
+
ignore_manager=ignore_manager,
|
|
147
|
+
)
|
|
148
|
+
tree_lines.extend(subtree)
|
|
149
|
+
|
|
150
|
+
# Check again after adding subtree
|
|
151
|
+
if len(tree_lines) >= max_lines:
|
|
152
|
+
if tree_lines[-1] != indent + "└── [Output truncated due to size limits]":
|
|
153
|
+
tree_lines.append(indent + "└── [Output truncated due to size limits]")
|
|
154
|
+
return tree_lines
|
|
155
|
+
|
|
156
|
+
return tree_lines
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from langchain_core.tools import ArgsSchema
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from agents.tools.base import BaseRepoTool
|
|
5
|
+
from repo_utils.git_diff import FileChange
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ReadDiffInput(BaseModel):
|
|
11
|
+
"""Input for ReadDiffTool."""
|
|
12
|
+
|
|
13
|
+
file_path: str = Field(
|
|
14
|
+
..., description="Path to the file to read diff for, use relative paths from the root of the project"
|
|
15
|
+
)
|
|
16
|
+
line_number: int = Field(
|
|
17
|
+
default=1,
|
|
18
|
+
description="Line number to focus on within the diff (1-based). For large diffs, this allows viewing different sections. Default is 1 to start from the beginning.",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ReadDiffTool(BaseRepoTool):
|
|
23
|
+
name: str = "readDiffFile"
|
|
24
|
+
description: str = (
|
|
25
|
+
"Reads the diff for a specified file and returns the changes made (additions and deletions). "
|
|
26
|
+
"This tool shows what lines were added (+) and removed (-) in the file. "
|
|
27
|
+
"For large diffs, it shows up to 100 lines around the specified line_number. "
|
|
28
|
+
"If the diff is truncated, you can call this tool again with a different line_number to see other sections."
|
|
29
|
+
)
|
|
30
|
+
args_schema: ArgsSchema | None = ReadDiffInput
|
|
31
|
+
return_direct: bool = False
|
|
32
|
+
diffs: list[FileChange] | None = None
|
|
33
|
+
|
|
34
|
+
def __init__(self, **kwargs):
|
|
35
|
+
# Allow passing diffs via kwargs
|
|
36
|
+
diffs = kwargs.pop("diffs", None)
|
|
37
|
+
super().__init__(**kwargs)
|
|
38
|
+
self.diffs = diffs
|
|
39
|
+
|
|
40
|
+
def _run(self, file_path: str, line_number: int = 1) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Run the tool with the given input.
|
|
43
|
+
"""
|
|
44
|
+
logger.info(f"[ReadDiff Tool] Reading diff for file {file_path} around line {line_number}")
|
|
45
|
+
|
|
46
|
+
if not self.diffs:
|
|
47
|
+
return "Error: No diff information available."
|
|
48
|
+
|
|
49
|
+
# Find the matching file change
|
|
50
|
+
matching_change = None
|
|
51
|
+
for change in self.diffs:
|
|
52
|
+
if change.filename == file_path or change.filename.endswith(file_path):
|
|
53
|
+
matching_change = change
|
|
54
|
+
break
|
|
55
|
+
|
|
56
|
+
if matching_change is None:
|
|
57
|
+
# Provide helpful error message with available files
|
|
58
|
+
available_files = [change.filename for change in self.diffs]
|
|
59
|
+
files_str = "\n".join(available_files) if available_files else "No files with changes found"
|
|
60
|
+
return f"Error: No diff found for file '{file_path}'. Available files with changes:\n{files_str}"
|
|
61
|
+
|
|
62
|
+
# Format the diff output
|
|
63
|
+
result = [
|
|
64
|
+
f"File: {matching_change.filename}",
|
|
65
|
+
f"Total additions: {matching_change.additions}, Total deletions: {matching_change.deletions}",
|
|
66
|
+
"",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
# Combine all diff lines for pagination
|
|
70
|
+
all_diff_lines = []
|
|
71
|
+
|
|
72
|
+
# Add deletions with prefixes
|
|
73
|
+
for line in matching_change.removed_lines:
|
|
74
|
+
all_diff_lines.append(f"- {line}")
|
|
75
|
+
|
|
76
|
+
# Add additions with prefixes
|
|
77
|
+
for line in matching_change.added_lines:
|
|
78
|
+
all_diff_lines.append(f"+ {line}")
|
|
79
|
+
|
|
80
|
+
total_diff_lines = len(all_diff_lines)
|
|
81
|
+
if total_diff_lines == 0:
|
|
82
|
+
result.append(
|
|
83
|
+
"No detailed line changes available (file may have been moved, renamed, or had binary changes)"
|
|
84
|
+
)
|
|
85
|
+
return "\n".join(result)
|
|
86
|
+
|
|
87
|
+
# Handle pagination similar to ReadFileTool
|
|
88
|
+
max_lines_to_show = 100
|
|
89
|
+
line_number = max(1, line_number) # Ensure line_number is at least 1
|
|
90
|
+
|
|
91
|
+
if line_number > total_diff_lines:
|
|
92
|
+
result.append(f"Error: Line number {line_number} is out of range (1-{total_diff_lines})")
|
|
93
|
+
return "\n".join(result)
|
|
94
|
+
|
|
95
|
+
# Calculate start and end line numbers
|
|
96
|
+
if line_number <= 50:
|
|
97
|
+
start_line = 0
|
|
98
|
+
end_line = min(total_diff_lines, max_lines_to_show)
|
|
99
|
+
else:
|
|
100
|
+
# Center around the specified line number
|
|
101
|
+
start_line = max(0, line_number - 51) # -51 because line_number is 1-based
|
|
102
|
+
end_line = min(total_diff_lines, start_line + max_lines_to_show)
|
|
103
|
+
|
|
104
|
+
if end_line - start_line < max_lines_to_show and start_line > 0:
|
|
105
|
+
potential_start = max(0, total_diff_lines - max_lines_to_show)
|
|
106
|
+
if potential_start < start_line:
|
|
107
|
+
start_line = potential_start
|
|
108
|
+
|
|
109
|
+
# Extract the lines to display
|
|
110
|
+
displayed_lines = all_diff_lines[start_line:end_line]
|
|
111
|
+
|
|
112
|
+
result.append(f"=== DIFF CONTENT (Lines {start_line + 1}-{end_line} of {total_diff_lines}) ===")
|
|
113
|
+
for i, line in enumerate(displayed_lines):
|
|
114
|
+
result.append(f"{start_line + i + 1:4}: {line}")
|
|
115
|
+
|
|
116
|
+
# Add truncation notice if needed
|
|
117
|
+
if total_diff_lines > max_lines_to_show:
|
|
118
|
+
if end_line < total_diff_lines:
|
|
119
|
+
result.append("")
|
|
120
|
+
result.append(
|
|
121
|
+
f"*** DIFF TRUNCATED: Showing lines {start_line + 1}-{end_line} of {total_diff_lines} total diff lines ***"
|
|
122
|
+
)
|
|
123
|
+
result.append(f"To see more, call this tool again with line_number > {end_line}")
|
|
124
|
+
elif start_line > 0:
|
|
125
|
+
result.append("")
|
|
126
|
+
result.append(
|
|
127
|
+
f"*** DIFF TRUNCATED: Showing lines {start_line + 1}-{end_line} of {total_diff_lines} total diff lines ***"
|
|
128
|
+
)
|
|
129
|
+
result.append(f"To see earlier content, call this tool again with a smaller line_number")
|
|
130
|
+
|
|
131
|
+
return "\n".join(result)
|