codeboarding 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/abstraction_agent.py +150 -0
- agents/agent.py +467 -0
- agents/agent_responses.py +363 -0
- agents/cluster_methods_mixin.py +281 -0
- agents/constants.py +13 -0
- agents/dependency_discovery.py +159 -0
- agents/details_agent.py +174 -0
- agents/llm_config.py +309 -0
- agents/meta_agent.py +105 -0
- agents/planner_agent.py +105 -0
- agents/prompts/__init__.py +85 -0
- agents/prompts/abstract_prompt_factory.py +63 -0
- agents/prompts/claude_prompts.py +381 -0
- agents/prompts/deepseek_prompts.py +389 -0
- agents/prompts/gemini_flash_prompts.py +362 -0
- agents/prompts/glm_prompts.py +407 -0
- agents/prompts/gpt_prompts.py +470 -0
- agents/prompts/kimi_prompts.py +400 -0
- agents/prompts/prompt_factory.py +179 -0
- agents/tools/__init__.py +8 -0
- agents/tools/base.py +96 -0
- agents/tools/get_external_deps.py +47 -0
- agents/tools/get_method_invocations.py +47 -0
- agents/tools/read_cfg.py +60 -0
- agents/tools/read_docs.py +132 -0
- agents/tools/read_file.py +90 -0
- agents/tools/read_file_structure.py +156 -0
- agents/tools/read_git_diff.py +131 -0
- agents/tools/read_packages.py +60 -0
- agents/tools/read_source.py +105 -0
- agents/tools/read_structure.py +49 -0
- agents/tools/toolkit.py +119 -0
- agents/validation.py +383 -0
- caching/__init__.py +4 -0
- caching/cache.py +29 -0
- caching/meta_cache.py +227 -0
- codeboarding-0.9.0.dist-info/METADATA +223 -0
- codeboarding-0.9.0.dist-info/RECORD +126 -0
- codeboarding-0.9.0.dist-info/WHEEL +5 -0
- codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
- codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
- codeboarding-0.9.0.dist-info/top_level.txt +18 -0
- core/__init__.py +101 -0
- core/plugin_loader.py +46 -0
- core/protocols.py +27 -0
- core/registry.py +46 -0
- diagram_analysis/__init__.py +4 -0
- diagram_analysis/analysis_json.py +346 -0
- diagram_analysis/diagram_generator.py +486 -0
- diagram_analysis/file_coverage.py +212 -0
- diagram_analysis/incremental/__init__.py +63 -0
- diagram_analysis/incremental/component_checker.py +236 -0
- diagram_analysis/incremental/file_manager.py +217 -0
- diagram_analysis/incremental/impact_analyzer.py +238 -0
- diagram_analysis/incremental/io_utils.py +281 -0
- diagram_analysis/incremental/models.py +72 -0
- diagram_analysis/incremental/path_patching.py +164 -0
- diagram_analysis/incremental/reexpansion.py +166 -0
- diagram_analysis/incremental/scoped_analysis.py +227 -0
- diagram_analysis/incremental/updater.py +464 -0
- diagram_analysis/incremental/validation.py +48 -0
- diagram_analysis/manifest.py +152 -0
- diagram_analysis/version.py +6 -0
- duckdb_crud.py +125 -0
- github_action.py +172 -0
- health/__init__.py +3 -0
- health/checks/__init__.py +11 -0
- health/checks/circular_deps.py +48 -0
- health/checks/cohesion.py +93 -0
- health/checks/coupling.py +140 -0
- health/checks/function_size.py +85 -0
- health/checks/god_class.py +167 -0
- health/checks/inheritance.py +104 -0
- health/checks/instability.py +77 -0
- health/checks/unused_code_diagnostics.py +338 -0
- health/config.py +172 -0
- health/constants.py +19 -0
- health/models.py +186 -0
- health/runner.py +236 -0
- install.py +518 -0
- logging_config.py +105 -0
- main.py +529 -0
- monitoring/__init__.py +12 -0
- monitoring/callbacks.py +163 -0
- monitoring/context.py +158 -0
- monitoring/mixin.py +16 -0
- monitoring/paths.py +47 -0
- monitoring/stats.py +50 -0
- monitoring/writers.py +172 -0
- output_generators/__init__.py +0 -0
- output_generators/html.py +163 -0
- output_generators/html_template.py +382 -0
- output_generators/markdown.py +140 -0
- output_generators/mdx.py +171 -0
- output_generators/sphinx.py +175 -0
- repo_utils/__init__.py +277 -0
- repo_utils/change_detector.py +289 -0
- repo_utils/errors.py +6 -0
- repo_utils/git_diff.py +74 -0
- repo_utils/ignore.py +341 -0
- static_analyzer/__init__.py +335 -0
- static_analyzer/analysis_cache.py +699 -0
- static_analyzer/analysis_result.py +269 -0
- static_analyzer/cluster_change_analyzer.py +391 -0
- static_analyzer/cluster_helpers.py +79 -0
- static_analyzer/constants.py +166 -0
- static_analyzer/git_diff_analyzer.py +224 -0
- static_analyzer/graph.py +746 -0
- static_analyzer/incremental_orchestrator.py +671 -0
- static_analyzer/java_config_scanner.py +232 -0
- static_analyzer/java_utils.py +227 -0
- static_analyzer/lsp_client/__init__.py +12 -0
- static_analyzer/lsp_client/client.py +1642 -0
- static_analyzer/lsp_client/diagnostics.py +62 -0
- static_analyzer/lsp_client/java_client.py +517 -0
- static_analyzer/lsp_client/language_settings.py +97 -0
- static_analyzer/lsp_client/typescript_client.py +235 -0
- static_analyzer/programming_language.py +152 -0
- static_analyzer/reference_resolve_mixin.py +166 -0
- static_analyzer/scanner.py +95 -0
- static_analyzer/typescript_config_scanner.py +54 -0
- tool_registry.py +433 -0
- user_config.py +134 -0
- utils.py +56 -0
- vscode_constants.py +124 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from .client import LSPClient
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TypeScriptClient(LSPClient):
|
|
11
|
+
"""
|
|
12
|
+
TypeScript/JavaScript-specific Language Server Protocol client.
|
|
13
|
+
Extends the base LSPClient with TypeScript-specific functionality.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def handle_notification(self, method: str, params: dict):
|
|
17
|
+
"""
|
|
18
|
+
Handle notifications from the TypeScript language server.
|
|
19
|
+
|
|
20
|
+
TypeScript language server notifications are not needed for our analysis,
|
|
21
|
+
so this method intentionally does nothing. The base implementation is sufficient.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
method: The LSP notification method name
|
|
25
|
+
params: The notification parameters
|
|
26
|
+
"""
|
|
27
|
+
# TypeScript LSP server notifications are not needed for static analysis
|
|
28
|
+
# The server handles project loading internally without requiring client tracking
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
def start(self):
|
|
32
|
+
"""Starts the language server with dependency check."""
|
|
33
|
+
# Check and install dependencies if needed
|
|
34
|
+
self._ensure_dependencies()
|
|
35
|
+
|
|
36
|
+
# Call parent start method
|
|
37
|
+
super().start()
|
|
38
|
+
|
|
39
|
+
def _ensure_dependencies(self):
|
|
40
|
+
"""Check if node_modules exists and log an error if they don't."""
|
|
41
|
+
node_modules_path = self.project_path / "node_modules"
|
|
42
|
+
|
|
43
|
+
if node_modules_path.exists():
|
|
44
|
+
logger.info(f"node_modules found at: {node_modules_path}")
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
logger.warning(f"node_modules not found in {self.project_path}")
|
|
48
|
+
|
|
49
|
+
# Check if package.json exists
|
|
50
|
+
package_json = self.project_path / "package.json"
|
|
51
|
+
if not package_json.exists():
|
|
52
|
+
logger.warning(f"package.json not found in {self.project_path}.")
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
def _initialize(self):
|
|
56
|
+
"""Performs the LSP initialization handshake."""
|
|
57
|
+
logger.info(f"Initializing connection for {self.language_id}...")
|
|
58
|
+
params = {
|
|
59
|
+
"processId": os.getpid(),
|
|
60
|
+
"rootUri": self.project_path.as_uri(),
|
|
61
|
+
"capabilities": {
|
|
62
|
+
"textDocument": {
|
|
63
|
+
"callHierarchy": {"dynamicRegistration": True},
|
|
64
|
+
"documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
|
|
65
|
+
"typeHierarchy": {"dynamicRegistration": True},
|
|
66
|
+
"references": {"dynamicRegistration": True},
|
|
67
|
+
"semanticTokens": {"dynamicRegistration": True},
|
|
68
|
+
},
|
|
69
|
+
"workspace": {
|
|
70
|
+
"configuration": True,
|
|
71
|
+
"workspaceFolders": True,
|
|
72
|
+
"didChangeConfiguration": {"dynamicRegistration": True},
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
"workspace": {"applyEdit": True, "workspaceEdit": {"documentChanges": True}},
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Allow subclasses to customize initialization parameters
|
|
79
|
+
params = self._customize_initialization_params(params)
|
|
80
|
+
|
|
81
|
+
init_id = self._send_request("initialize", params)
|
|
82
|
+
# Use longer timeout for initialization as it may involve full workspace indexing
|
|
83
|
+
response = self._wait_for_response(init_id, timeout=360)
|
|
84
|
+
|
|
85
|
+
if "error" in response:
|
|
86
|
+
raise RuntimeError(f"Initialization failed: {response['error']}")
|
|
87
|
+
|
|
88
|
+
logger.info("Initialization successful.")
|
|
89
|
+
self._send_notification("initialized", {})
|
|
90
|
+
|
|
91
|
+
# Allow subclasses to perform post-initialization setup
|
|
92
|
+
self._configure_typescript_workspace()
|
|
93
|
+
|
|
94
|
+
def _customize_initialization_params(self, params: dict) -> dict:
|
|
95
|
+
"""Add TypeScript-specific initialization parameters."""
|
|
96
|
+
params["workspaceFolders"] = [{"uri": self.project_path.as_uri(), "name": self.project_path.name}]
|
|
97
|
+
|
|
98
|
+
params["initializationOptions"] = {
|
|
99
|
+
"preferences": {"includeCompletionsForModuleExports": True, "includeCompletionsWithSnippetText": True},
|
|
100
|
+
"tsserver": {"logVerbosity": "off"}, # Reduce noise in logs
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return params
|
|
104
|
+
|
|
105
|
+
def _configure_typescript_workspace(self):
|
|
106
|
+
"""Send TypeScript-specific workspace configuration after initialization."""
|
|
107
|
+
try:
|
|
108
|
+
# Check if we have TypeScript/JavaScript files
|
|
109
|
+
ts_files = self._find_typescript_files()
|
|
110
|
+
|
|
111
|
+
if not ts_files:
|
|
112
|
+
logger.warning(f"No TypeScript/JavaScript files found in {self.project_path}")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
logger.info(f"Found {len(ts_files)} TypeScript/JavaScript files")
|
|
116
|
+
|
|
117
|
+
# Notify workspace folders change
|
|
118
|
+
self._send_notification(
|
|
119
|
+
"workspace/didChangeWorkspaceFolders",
|
|
120
|
+
{
|
|
121
|
+
"event": {
|
|
122
|
+
"added": [{"uri": self.project_path.as_uri(), "name": self.project_path.name}],
|
|
123
|
+
"removed": [],
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Process configuration files
|
|
129
|
+
config_found = self._process_config_files()
|
|
130
|
+
|
|
131
|
+
# Bootstrap project by opening sample files
|
|
132
|
+
self._bootstrap_project(ts_files, config_found)
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logger.warning(f"Failed to configure TypeScript workspace: {e}")
|
|
136
|
+
|
|
137
|
+
def _find_typescript_files(self) -> list:
|
|
138
|
+
"""Find all TypeScript/JavaScript files in the project."""
|
|
139
|
+
all_files = []
|
|
140
|
+
for pattern in ["*.ts", "*.tsx", "*.js", "*.jsx"]:
|
|
141
|
+
all_files.extend(list(self.project_path.rglob(pattern)))
|
|
142
|
+
|
|
143
|
+
return self.filter_src_files(all_files)
|
|
144
|
+
|
|
145
|
+
def _process_config_files(self) -> bool:
|
|
146
|
+
"""Process TypeScript configuration files and return True if any found."""
|
|
147
|
+
config_files = [
|
|
148
|
+
self.project_path / "tsconfig.json",
|
|
149
|
+
self.project_path / "jsconfig.json",
|
|
150
|
+
self.project_path / "package.json",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
config_found = False
|
|
154
|
+
for config_path in config_files:
|
|
155
|
+
if config_path.exists():
|
|
156
|
+
logger.info(f"Found configuration file: {config_path}")
|
|
157
|
+
config_found = True
|
|
158
|
+
self._send_notification(
|
|
159
|
+
"workspace/didChangeWatchedFiles",
|
|
160
|
+
{"changes": [{"uri": config_path.as_uri(), "type": 1}]}, # Created/Changed
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return config_found
|
|
164
|
+
|
|
165
|
+
def _bootstrap_project(self, ts_files: list, config_found: bool):
|
|
166
|
+
"""Bootstrap TypeScript project by opening files."""
|
|
167
|
+
logger.info("Opening sample files to bootstrap TypeScript project...")
|
|
168
|
+
# Files are already filtered in _find_typescript_files
|
|
169
|
+
sample_files = ts_files[:3]
|
|
170
|
+
|
|
171
|
+
# Open bootstrap files
|
|
172
|
+
for file_path in sample_files:
|
|
173
|
+
try:
|
|
174
|
+
content = file_path.read_text(encoding="utf-8")
|
|
175
|
+
file_uri = file_path.as_uri()
|
|
176
|
+
self._send_notification(
|
|
177
|
+
"textDocument/didOpen",
|
|
178
|
+
{"textDocument": {"uri": file_uri, "languageId": self.language_id, "version": 1, "text": content}},
|
|
179
|
+
)
|
|
180
|
+
logger.debug(f"Opened bootstrap file: {file_path}")
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.debug(f"Could not open bootstrap file {file_path}: {e}")
|
|
183
|
+
|
|
184
|
+
# Wait for project initialization
|
|
185
|
+
wait_time = 5 if config_found else 8
|
|
186
|
+
logger.info(f"Waiting {wait_time}s for TypeScript server to initialize project...")
|
|
187
|
+
time.sleep(wait_time)
|
|
188
|
+
|
|
189
|
+
# Validate and close bootstrap files
|
|
190
|
+
if self._validate_typescript_project():
|
|
191
|
+
logger.info("TypeScript project successfully loaded!")
|
|
192
|
+
else:
|
|
193
|
+
logger.warning("TypeScript project still not loaded, but continuing...")
|
|
194
|
+
|
|
195
|
+
self._close_bootstrap_files(sample_files)
|
|
196
|
+
|
|
197
|
+
def _close_bootstrap_files(self, sample_files: list):
|
|
198
|
+
"""Close bootstrap files that were opened for project initialization."""
|
|
199
|
+
for file_path in sample_files:
|
|
200
|
+
try:
|
|
201
|
+
self._send_notification("textDocument/didClose", {"textDocument": {"uri": file_path.as_uri()}})
|
|
202
|
+
except Exception:
|
|
203
|
+
pass
|
|
204
|
+
|
|
205
|
+
def _prepare_for_analysis(self):
|
|
206
|
+
"""TypeScript-specific preparation before analysis."""
|
|
207
|
+
logger.info("Waiting additional time for TypeScript server to fully initialize...")
|
|
208
|
+
time.sleep(2)
|
|
209
|
+
|
|
210
|
+
if not self._validate_typescript_project():
|
|
211
|
+
logger.warning("TypeScript project not properly loaded. Analysis may be limited.")
|
|
212
|
+
|
|
213
|
+
def _validate_typescript_project(self) -> bool:
|
|
214
|
+
"""Validate that TypeScript server has a project loaded."""
|
|
215
|
+
try:
|
|
216
|
+
logger.debug("Validating TypeScript project is loaded...")
|
|
217
|
+
params = {"query": "test"}
|
|
218
|
+
req_id = self._send_request("workspace/symbol", params)
|
|
219
|
+
response = self._wait_for_response(req_id)
|
|
220
|
+
|
|
221
|
+
if "error" in response:
|
|
222
|
+
error_msg = response["error"]
|
|
223
|
+
if "No Project" in str(error_msg):
|
|
224
|
+
logger.error("TypeScript server reports 'No Project' - project not properly loaded")
|
|
225
|
+
return False
|
|
226
|
+
else:
|
|
227
|
+
logger.warning(f"workspace/symbol test failed but may work: {error_msg}")
|
|
228
|
+
return True
|
|
229
|
+
|
|
230
|
+
logger.debug("TypeScript project validation successful")
|
|
231
|
+
return True
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.error(f"Failed to validate TypeScript project: {e}")
|
|
235
|
+
return False
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from static_analyzer.constants import Language
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LanguageConfig(BaseModel):
|
|
12
|
+
"""Base configuration class for language-specific settings."""
|
|
13
|
+
|
|
14
|
+
model_config = {"frozen": True} # Make configs immutable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class JavaConfig(LanguageConfig):
|
|
18
|
+
"""Java-specific configuration."""
|
|
19
|
+
|
|
20
|
+
jdtls_root: Path = Field(description="Path to the JDTLS (Java Language Server) installation directory")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ProgrammingLanguage:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
language: str,
|
|
27
|
+
size: int,
|
|
28
|
+
percentage: float,
|
|
29
|
+
suffixes: list[str],
|
|
30
|
+
server_commands: list[str] | None = None,
|
|
31
|
+
lsp_server_key: str | None = None,
|
|
32
|
+
language_specific_config: LanguageConfig | None = None,
|
|
33
|
+
):
|
|
34
|
+
self.language = language
|
|
35
|
+
self.size = size
|
|
36
|
+
self.percentage = percentage
|
|
37
|
+
self.suffixes = suffixes
|
|
38
|
+
self.server_commands = server_commands
|
|
39
|
+
# group related languages (e.g., JS, TSX, JSX -> typescript) to the same language server
|
|
40
|
+
self.lsp_server_key = lsp_server_key or language.lower()
|
|
41
|
+
# Store language-specific configuration (e.g., JavaConfig for Java)
|
|
42
|
+
self.language_specific_config = language_specific_config
|
|
43
|
+
|
|
44
|
+
def get_suffix_pattern(self) -> list[str]:
|
|
45
|
+
"""Generate and return pattern for the file suffixes, to use in .rglob(pattern)"""
|
|
46
|
+
if not self.suffixes:
|
|
47
|
+
return ["*"]
|
|
48
|
+
# Join suffixes with '|' to create a regex pattern
|
|
49
|
+
return [f"*.{suffix.lstrip('.')}" for suffix in self.suffixes]
|
|
50
|
+
|
|
51
|
+
def get_language_id(self) -> str:
|
|
52
|
+
# id for the language, used in LSP server
|
|
53
|
+
return self.language.lower().replace(" ", "_")
|
|
54
|
+
|
|
55
|
+
def get_server_parameters(self) -> list[str]:
|
|
56
|
+
if not self.server_commands:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"No server commands defined for {self.language}. "
|
|
59
|
+
"Please ensure the language is supported and has server commands defined."
|
|
60
|
+
)
|
|
61
|
+
return self.server_commands
|
|
62
|
+
|
|
63
|
+
def is_supported_lang(self) -> bool:
|
|
64
|
+
return self.server_commands is not None
|
|
65
|
+
|
|
66
|
+
def __hash__(self):
|
|
67
|
+
return hash(self.lsp_server_key)
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other):
|
|
70
|
+
if not isinstance(other, ProgrammingLanguage):
|
|
71
|
+
return False
|
|
72
|
+
return self.lsp_server_key == other.lsp_server_key
|
|
73
|
+
|
|
74
|
+
def __str__(self):
|
|
75
|
+
return f"ProgrammingLanguage(language={self.language}, lsp_server_key={self.lsp_server_key}, size={self.size}, percentage={self.percentage:.2f}%, suffixes={self.suffixes})"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ProgrammingLanguageBuilder:
|
|
79
|
+
"""Builder to create ProgrammingLanguage instances from tokei output with greedy LSP matching."""
|
|
80
|
+
|
|
81
|
+
def __init__(self, lsp_configs: dict):
|
|
82
|
+
self.lsp_configs = lsp_configs
|
|
83
|
+
# Build reverse index: extension -> lsp_config_key
|
|
84
|
+
self._extension_to_lsp: dict[str, str] = {}
|
|
85
|
+
for lsp_server_key, config in lsp_configs.items():
|
|
86
|
+
for ext in config.get("file_extensions", []):
|
|
87
|
+
# Normalize extension (ensure it starts with '.')
|
|
88
|
+
normalized_ext = ext if ext.startswith(".") else f".{ext}"
|
|
89
|
+
self._extension_to_lsp[normalized_ext] = lsp_server_key
|
|
90
|
+
|
|
91
|
+
def _find_lsp_server_key(self, tokei_language: str, file_suffixes: set[str]) -> str | None:
|
|
92
|
+
"""
|
|
93
|
+
Find the LSP config key for a tokei language by matching file extensions.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
tokei_language: Language name from tokei output (e.g., "JavaScript", "TSX")
|
|
97
|
+
file_suffixes: Set of file suffixes from tokei reports
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
LSP config key if found, None otherwise
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
# Try direct match with lsp_configs keys
|
|
104
|
+
normalized = tokei_language.lower()
|
|
105
|
+
if normalized in self.lsp_configs:
|
|
106
|
+
return normalized
|
|
107
|
+
|
|
108
|
+
# Fallback: try matching by file extensions
|
|
109
|
+
for suffix in file_suffixes:
|
|
110
|
+
normalized_suffix = suffix if suffix.startswith(".") else f".{suffix}"
|
|
111
|
+
if normalized_suffix in self._extension_to_lsp:
|
|
112
|
+
return self._extension_to_lsp[normalized_suffix]
|
|
113
|
+
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
def build(
|
|
117
|
+
self,
|
|
118
|
+
tokei_language: str,
|
|
119
|
+
code_count: int,
|
|
120
|
+
percentage: float,
|
|
121
|
+
file_suffixes: set[str],
|
|
122
|
+
) -> ProgrammingLanguage:
|
|
123
|
+
lsp_server_key = self._find_lsp_server_key(tokei_language, file_suffixes)
|
|
124
|
+
|
|
125
|
+
server_commands: list | None = None
|
|
126
|
+
config_suffixes: set[str] = set()
|
|
127
|
+
language_specific_config: LanguageConfig | None = None
|
|
128
|
+
|
|
129
|
+
if lsp_server_key and lsp_server_key in self.lsp_configs:
|
|
130
|
+
config = self.lsp_configs[lsp_server_key]
|
|
131
|
+
server_commands = config.get("command")
|
|
132
|
+
config_suffixes = set(config.get("file_extensions", []))
|
|
133
|
+
|
|
134
|
+
# Create language-specific config based on the LSP server key
|
|
135
|
+
if lsp_server_key == Language.JAVA and "jdtls_root" in config:
|
|
136
|
+
language_specific_config = JavaConfig(jdtls_root=Path(config["jdtls_root"]))
|
|
137
|
+
|
|
138
|
+
# Merge suffixes from tokei and config
|
|
139
|
+
all_suffixes = file_suffixes | config_suffixes
|
|
140
|
+
|
|
141
|
+
return ProgrammingLanguage(
|
|
142
|
+
language=tokei_language,
|
|
143
|
+
size=code_count,
|
|
144
|
+
percentage=percentage,
|
|
145
|
+
suffixes=list(all_suffixes),
|
|
146
|
+
server_commands=server_commands,
|
|
147
|
+
lsp_server_key=lsp_server_key,
|
|
148
|
+
language_specific_config=language_specific_config,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def get_supported_extensions(self) -> set[str]:
|
|
152
|
+
return set(self._extension_to_lsp.keys())
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from agents.agent_responses import AnalysisInsights
|
|
8
|
+
from static_analyzer.analysis_result import StaticAnalysisResults
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ReferenceResolverMixin:
|
|
14
|
+
_parse_invoke: Any # Provided by Agent base class
|
|
15
|
+
|
|
16
|
+
def __init__(self, repo_dir: Path, static_analysis: StaticAnalysisResults):
|
|
17
|
+
self.repo_dir = repo_dir
|
|
18
|
+
self.static_analysis = static_analysis
|
|
19
|
+
|
|
20
|
+
def fix_source_code_reference_lines(self, analysis: AnalysisInsights):
|
|
21
|
+
logger.info(f"Fixing source code reference lines for the analysis: {analysis.llm_str()}")
|
|
22
|
+
for component in analysis.components:
|
|
23
|
+
for reference in component.key_entities:
|
|
24
|
+
# Check if the file is already resolved
|
|
25
|
+
if reference.reference_file is not None and os.path.exists(reference.reference_file):
|
|
26
|
+
continue
|
|
27
|
+
|
|
28
|
+
self._resolve_single_reference(reference, component.assigned_files)
|
|
29
|
+
|
|
30
|
+
# Remove unresolved references
|
|
31
|
+
self._remove_unresolved_references(analysis)
|
|
32
|
+
|
|
33
|
+
return self._relative_paths(analysis)
|
|
34
|
+
|
|
35
|
+
def _resolve_single_reference(self, reference, file_candidates: list[str] | None = None):
|
|
36
|
+
"""Orchestrates different resolution strategies for a single reference."""
|
|
37
|
+
assert self.static_analysis is not None, "static_analysis required for reference resolution"
|
|
38
|
+
qname = reference.qualified_name.replace(os.sep, ".")
|
|
39
|
+
|
|
40
|
+
for lang in self.static_analysis.get_languages():
|
|
41
|
+
# Try exact match first
|
|
42
|
+
if self._try_exact_match(reference, qname, lang):
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
# Try loose matching
|
|
46
|
+
if self._try_loose_match(reference, qname, lang):
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
# Try file path resolution
|
|
50
|
+
if self._try_file_path_resolution(reference, qname, lang, file_candidates):
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
# No resolution found - will be cleaned up later
|
|
54
|
+
logger.warning(f"[Reference Resolution] Could not resolve reference {reference.qualified_name} in any language")
|
|
55
|
+
|
|
56
|
+
def _try_exact_match(self, reference, qname, lang):
|
|
57
|
+
"""Attempts exact reference matching."""
|
|
58
|
+
try:
|
|
59
|
+
node = self.static_analysis.get_reference(lang, qname)
|
|
60
|
+
reference.reference_file = node.file_path
|
|
61
|
+
reference.reference_start_line = node.line_start + 1 # match 1 based indexing
|
|
62
|
+
reference.reference_end_line = node.line_end + 1 # match 1 based indexing
|
|
63
|
+
reference.qualified_name = qname
|
|
64
|
+
logger.info(
|
|
65
|
+
f"[Reference Resolution] Matched {reference.qualified_name} in {lang} at {reference.reference_file}"
|
|
66
|
+
)
|
|
67
|
+
return True
|
|
68
|
+
except (ValueError, FileExistsError) as e:
|
|
69
|
+
logger.warning(f"[Reference Resolution] Exact match failed for {reference.qualified_name} in {lang}: {e}")
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def _try_loose_match(self, reference, qname, lang):
|
|
73
|
+
"""Attempts loose reference matching."""
|
|
74
|
+
try:
|
|
75
|
+
_, node = self.static_analysis.get_loose_reference(lang, qname)
|
|
76
|
+
if node is not None:
|
|
77
|
+
reference.reference_file = node.file_path
|
|
78
|
+
reference.reference_start_line = node.line_start + 1
|
|
79
|
+
reference.reference_end_line = node.line_end + 1
|
|
80
|
+
reference.qualified_name = qname
|
|
81
|
+
logger.info(
|
|
82
|
+
f"[Reference Resolution] Loosely matched {reference.qualified_name} in {lang} at {reference.reference_file}"
|
|
83
|
+
)
|
|
84
|
+
return True
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.warning(f"[Reference Resolution] Loose match failed for {qname} in {lang}: {e}")
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
def _try_file_path_resolution(self, reference, qname, lang, file_candidates: list[str] | None = None):
|
|
90
|
+
"""Attempts to resolve reference through file path matching."""
|
|
91
|
+
# First try existing reference file path
|
|
92
|
+
if self._try_existing_reference_file(reference, lang):
|
|
93
|
+
return True
|
|
94
|
+
|
|
95
|
+
# Then try qualified name as file path
|
|
96
|
+
return self._try_qualified_name_as_path(reference, qname, lang, file_candidates)
|
|
97
|
+
|
|
98
|
+
def _try_existing_reference_file(self, reference, lang):
|
|
99
|
+
"""Tries to resolve using existing reference file path."""
|
|
100
|
+
if (reference.reference_file is not None) and (not Path(reference.reference_file).is_absolute()):
|
|
101
|
+
joined_path = os.path.join(self.repo_dir, reference.reference_file)
|
|
102
|
+
if os.path.exists(joined_path):
|
|
103
|
+
reference.reference_file = joined_path
|
|
104
|
+
logger.info(
|
|
105
|
+
f"[Reference Resolution] File path matched for {reference.qualified_name} in {lang} at {reference.reference_file}"
|
|
106
|
+
)
|
|
107
|
+
return True
|
|
108
|
+
else:
|
|
109
|
+
reference.reference_file = None
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
def _try_qualified_name_as_path(self, reference, qname, lang, file_candidates: list[str] | None = None):
|
|
113
|
+
"""Tries to resolve qualified name as various file path patterns."""
|
|
114
|
+
file_path = qname.replace(".", os.sep) # Get file path
|
|
115
|
+
full_path = os.path.join(self.repo_dir, file_path)
|
|
116
|
+
file_ref = ".".join(full_path.rsplit(os.sep, 1))
|
|
117
|
+
extra_paths = file_candidates or []
|
|
118
|
+
paths = [full_path, f"{file_path}.py", f"{file_path}.ts", f"{file_path}.tsx", file_ref, *extra_paths]
|
|
119
|
+
|
|
120
|
+
for path in paths:
|
|
121
|
+
if os.path.exists(path):
|
|
122
|
+
reference.reference_file = str(path)
|
|
123
|
+
logger.info(
|
|
124
|
+
f"[Reference Resolution] Path matched for {reference.qualified_name} in {lang} at {reference.reference_file}"
|
|
125
|
+
)
|
|
126
|
+
return True
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
def _remove_unresolved_references(self, analysis: AnalysisInsights):
|
|
130
|
+
"""Remove references and assigned files that couldn't be resolved to existing files."""
|
|
131
|
+
for component in analysis.components:
|
|
132
|
+
# Remove unresolved key_entities
|
|
133
|
+
original_ref_count = len(component.key_entities)
|
|
134
|
+
component.key_entities = [
|
|
135
|
+
ref
|
|
136
|
+
for ref in component.key_entities
|
|
137
|
+
if ref.reference_file is not None and os.path.exists(ref.reference_file)
|
|
138
|
+
]
|
|
139
|
+
removed_ref_count = original_ref_count - len(component.key_entities)
|
|
140
|
+
if removed_ref_count > 0:
|
|
141
|
+
logger.info(
|
|
142
|
+
f"[Reference Resolution] Removed {removed_ref_count} unresolved reference(s) "
|
|
143
|
+
f"from component '{component.name}'"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Remove unresolved assigned_files
|
|
147
|
+
original_file_count = len(component.assigned_files)
|
|
148
|
+
component.assigned_files = [
|
|
149
|
+
f
|
|
150
|
+
for f in component.assigned_files
|
|
151
|
+
if os.path.exists(os.path.join(self.repo_dir, f)) or os.path.exists(f)
|
|
152
|
+
]
|
|
153
|
+
removed_file_count = original_file_count - len(component.assigned_files)
|
|
154
|
+
if removed_file_count > 0:
|
|
155
|
+
logger.info(
|
|
156
|
+
f"[Reference Resolution] Removed {removed_file_count} unresolved assigned file(s) "
|
|
157
|
+
f"from component '{component.name}'"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
def _relative_paths(self, analysis: AnalysisInsights):
|
|
161
|
+
"""Convert all reference file paths to relative paths."""
|
|
162
|
+
for component in analysis.components:
|
|
163
|
+
for reference in component.key_entities:
|
|
164
|
+
if reference.reference_file and reference.reference_file.startswith(str(self.repo_dir)):
|
|
165
|
+
reference.reference_file = os.path.relpath(reference.reference_file, self.repo_dir)
|
|
166
|
+
return analysis
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Set
|
|
6
|
+
|
|
7
|
+
from static_analyzer.programming_language import ProgrammingLanguage, ProgrammingLanguageBuilder
|
|
8
|
+
from utils import get_config
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ProjectScanner:
|
|
14
|
+
def __init__(self, repo_location: Path):
|
|
15
|
+
self.repo_location = repo_location
|
|
16
|
+
self.all_text_files: list[str] = []
|
|
17
|
+
|
|
18
|
+
def scan(self) -> list[ProgrammingLanguage]:
|
|
19
|
+
"""
|
|
20
|
+
Scan the repository using Tokei and return parsed results.
|
|
21
|
+
|
|
22
|
+
Also populates self.all_text_files with all text file paths found by Tokei.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
list[ProgrammingLanguage]: technologies with their sizes, percentages, and suffixes
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
commands = get_config("tools")["tokei"]["command"]
|
|
29
|
+
result = subprocess.run(commands, cwd=self.repo_location, capture_output=True, text=True, check=True)
|
|
30
|
+
|
|
31
|
+
server_config = get_config("lsp_servers")
|
|
32
|
+
builder = ProgrammingLanguageBuilder(server_config)
|
|
33
|
+
|
|
34
|
+
# Parse Tokei JSON output
|
|
35
|
+
tokei_data = json.loads(result.stdout)
|
|
36
|
+
|
|
37
|
+
# Compute total code count
|
|
38
|
+
total_code = tokei_data.get("Total", {}).get("code", 0)
|
|
39
|
+
if not total_code:
|
|
40
|
+
logger.warning("No total code count found in Tokei output")
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
programming_languages: list[ProgrammingLanguage] = []
|
|
44
|
+
all_files: list[str] = []
|
|
45
|
+
for technology, stats in tokei_data.items():
|
|
46
|
+
if technology == "Total":
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
# Collect ALL text file paths from Tokei for file coverage,
|
|
50
|
+
# including languages with code_count == 0 (e.g. Markdown is 100% comments)
|
|
51
|
+
for report in stats.get("reports", []):
|
|
52
|
+
all_files.append(report["name"])
|
|
53
|
+
|
|
54
|
+
code_count = stats.get("code", 0)
|
|
55
|
+
if code_count == 0:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
percentage = code_count / total_code * 100
|
|
59
|
+
|
|
60
|
+
# Extract suffixes from reports
|
|
61
|
+
suffixes = set()
|
|
62
|
+
for report in stats.get("reports", []):
|
|
63
|
+
suffixes |= self._extract_suffixes([report["name"]])
|
|
64
|
+
|
|
65
|
+
pl = builder.build(
|
|
66
|
+
tokei_language=technology,
|
|
67
|
+
code_count=code_count,
|
|
68
|
+
percentage=percentage,
|
|
69
|
+
file_suffixes=suffixes,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
logger.debug(f"Found: {pl}")
|
|
73
|
+
if pl.percentage >= 1:
|
|
74
|
+
programming_languages.append(pl)
|
|
75
|
+
|
|
76
|
+
self.all_text_files = all_files
|
|
77
|
+
return programming_languages
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def _extract_suffixes(files: list[str]) -> Set[str]:
|
|
81
|
+
"""
|
|
82
|
+
Extract unique file suffixes from a list of files.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
files (list[str]): list of file paths
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Set[str]: Unique file extensions/suffixes
|
|
89
|
+
"""
|
|
90
|
+
suffixes = set()
|
|
91
|
+
for file_path in files:
|
|
92
|
+
suffix = Path(file_path).suffix
|
|
93
|
+
if suffix: # Only add non-empty suffixes
|
|
94
|
+
suffixes.add(suffix)
|
|
95
|
+
return suffixes
|