codeboarding 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. agents/__init__.py +0 -0
  2. agents/abstraction_agent.py +150 -0
  3. agents/agent.py +467 -0
  4. agents/agent_responses.py +363 -0
  5. agents/cluster_methods_mixin.py +281 -0
  6. agents/constants.py +13 -0
  7. agents/dependency_discovery.py +159 -0
  8. agents/details_agent.py +174 -0
  9. agents/llm_config.py +309 -0
  10. agents/meta_agent.py +105 -0
  11. agents/planner_agent.py +105 -0
  12. agents/prompts/__init__.py +85 -0
  13. agents/prompts/abstract_prompt_factory.py +63 -0
  14. agents/prompts/claude_prompts.py +381 -0
  15. agents/prompts/deepseek_prompts.py +389 -0
  16. agents/prompts/gemini_flash_prompts.py +362 -0
  17. agents/prompts/glm_prompts.py +407 -0
  18. agents/prompts/gpt_prompts.py +470 -0
  19. agents/prompts/kimi_prompts.py +400 -0
  20. agents/prompts/prompt_factory.py +179 -0
  21. agents/tools/__init__.py +8 -0
  22. agents/tools/base.py +96 -0
  23. agents/tools/get_external_deps.py +47 -0
  24. agents/tools/get_method_invocations.py +47 -0
  25. agents/tools/read_cfg.py +60 -0
  26. agents/tools/read_docs.py +132 -0
  27. agents/tools/read_file.py +90 -0
  28. agents/tools/read_file_structure.py +156 -0
  29. agents/tools/read_git_diff.py +131 -0
  30. agents/tools/read_packages.py +60 -0
  31. agents/tools/read_source.py +105 -0
  32. agents/tools/read_structure.py +49 -0
  33. agents/tools/toolkit.py +119 -0
  34. agents/validation.py +383 -0
  35. caching/__init__.py +4 -0
  36. caching/cache.py +29 -0
  37. caching/meta_cache.py +227 -0
  38. codeboarding-0.9.0.dist-info/METADATA +223 -0
  39. codeboarding-0.9.0.dist-info/RECORD +126 -0
  40. codeboarding-0.9.0.dist-info/WHEEL +5 -0
  41. codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
  42. codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
  43. codeboarding-0.9.0.dist-info/top_level.txt +18 -0
  44. core/__init__.py +101 -0
  45. core/plugin_loader.py +46 -0
  46. core/protocols.py +27 -0
  47. core/registry.py +46 -0
  48. diagram_analysis/__init__.py +4 -0
  49. diagram_analysis/analysis_json.py +346 -0
  50. diagram_analysis/diagram_generator.py +486 -0
  51. diagram_analysis/file_coverage.py +212 -0
  52. diagram_analysis/incremental/__init__.py +63 -0
  53. diagram_analysis/incremental/component_checker.py +236 -0
  54. diagram_analysis/incremental/file_manager.py +217 -0
  55. diagram_analysis/incremental/impact_analyzer.py +238 -0
  56. diagram_analysis/incremental/io_utils.py +281 -0
  57. diagram_analysis/incremental/models.py +72 -0
  58. diagram_analysis/incremental/path_patching.py +164 -0
  59. diagram_analysis/incremental/reexpansion.py +166 -0
  60. diagram_analysis/incremental/scoped_analysis.py +227 -0
  61. diagram_analysis/incremental/updater.py +464 -0
  62. diagram_analysis/incremental/validation.py +48 -0
  63. diagram_analysis/manifest.py +152 -0
  64. diagram_analysis/version.py +6 -0
  65. duckdb_crud.py +125 -0
  66. github_action.py +172 -0
  67. health/__init__.py +3 -0
  68. health/checks/__init__.py +11 -0
  69. health/checks/circular_deps.py +48 -0
  70. health/checks/cohesion.py +93 -0
  71. health/checks/coupling.py +140 -0
  72. health/checks/function_size.py +85 -0
  73. health/checks/god_class.py +167 -0
  74. health/checks/inheritance.py +104 -0
  75. health/checks/instability.py +77 -0
  76. health/checks/unused_code_diagnostics.py +338 -0
  77. health/config.py +172 -0
  78. health/constants.py +19 -0
  79. health/models.py +186 -0
  80. health/runner.py +236 -0
  81. install.py +518 -0
  82. logging_config.py +105 -0
  83. main.py +529 -0
  84. monitoring/__init__.py +12 -0
  85. monitoring/callbacks.py +163 -0
  86. monitoring/context.py +158 -0
  87. monitoring/mixin.py +16 -0
  88. monitoring/paths.py +47 -0
  89. monitoring/stats.py +50 -0
  90. monitoring/writers.py +172 -0
  91. output_generators/__init__.py +0 -0
  92. output_generators/html.py +163 -0
  93. output_generators/html_template.py +382 -0
  94. output_generators/markdown.py +140 -0
  95. output_generators/mdx.py +171 -0
  96. output_generators/sphinx.py +175 -0
  97. repo_utils/__init__.py +277 -0
  98. repo_utils/change_detector.py +289 -0
  99. repo_utils/errors.py +6 -0
  100. repo_utils/git_diff.py +74 -0
  101. repo_utils/ignore.py +341 -0
  102. static_analyzer/__init__.py +335 -0
  103. static_analyzer/analysis_cache.py +699 -0
  104. static_analyzer/analysis_result.py +269 -0
  105. static_analyzer/cluster_change_analyzer.py +391 -0
  106. static_analyzer/cluster_helpers.py +79 -0
  107. static_analyzer/constants.py +166 -0
  108. static_analyzer/git_diff_analyzer.py +224 -0
  109. static_analyzer/graph.py +746 -0
  110. static_analyzer/incremental_orchestrator.py +671 -0
  111. static_analyzer/java_config_scanner.py +232 -0
  112. static_analyzer/java_utils.py +227 -0
  113. static_analyzer/lsp_client/__init__.py +12 -0
  114. static_analyzer/lsp_client/client.py +1642 -0
  115. static_analyzer/lsp_client/diagnostics.py +62 -0
  116. static_analyzer/lsp_client/java_client.py +517 -0
  117. static_analyzer/lsp_client/language_settings.py +97 -0
  118. static_analyzer/lsp_client/typescript_client.py +235 -0
  119. static_analyzer/programming_language.py +152 -0
  120. static_analyzer/reference_resolve_mixin.py +166 -0
  121. static_analyzer/scanner.py +95 -0
  122. static_analyzer/typescript_config_scanner.py +54 -0
  123. tool_registry.py +433 -0
  124. user_config.py +134 -0
  125. utils.py +56 -0
  126. vscode_constants.py +124 -0
@@ -0,0 +1,235 @@
1
+ import logging
2
+ import os
3
+ import time
4
+
5
+ from .client import LSPClient
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class TypeScriptClient(LSPClient):
11
+ """
12
+ TypeScript/JavaScript-specific Language Server Protocol client.
13
+ Extends the base LSPClient with TypeScript-specific functionality.
14
+ """
15
+
16
+ def handle_notification(self, method: str, params: dict):
17
+ """
18
+ Handle notifications from the TypeScript language server.
19
+
20
+ TypeScript language server notifications are not needed for our analysis,
21
+ so this method intentionally does nothing. The base implementation is sufficient.
22
+
23
+ Args:
24
+ method: The LSP notification method name
25
+ params: The notification parameters
26
+ """
27
+ # TypeScript LSP server notifications are not needed for static analysis
28
+ # The server handles project loading internally without requiring client tracking
29
+ pass
30
+
31
+ def start(self):
32
+ """Starts the language server with dependency check."""
33
+ # Check and install dependencies if needed
34
+ self._ensure_dependencies()
35
+
36
+ # Call parent start method
37
+ super().start()
38
+
39
+ def _ensure_dependencies(self):
40
+ """Check if node_modules exists and log an error if they don't."""
41
+ node_modules_path = self.project_path / "node_modules"
42
+
43
+ if node_modules_path.exists():
44
+ logger.info(f"node_modules found at: {node_modules_path}")
45
+ return
46
+
47
+ logger.warning(f"node_modules not found in {self.project_path}")
48
+
49
+ # Check if package.json exists
50
+ package_json = self.project_path / "package.json"
51
+ if not package_json.exists():
52
+ logger.warning(f"package.json not found in {self.project_path}.")
53
+ return
54
+
55
+ def _initialize(self):
56
+ """Performs the LSP initialization handshake."""
57
+ logger.info(f"Initializing connection for {self.language_id}...")
58
+ params = {
59
+ "processId": os.getpid(),
60
+ "rootUri": self.project_path.as_uri(),
61
+ "capabilities": {
62
+ "textDocument": {
63
+ "callHierarchy": {"dynamicRegistration": True},
64
+ "documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
65
+ "typeHierarchy": {"dynamicRegistration": True},
66
+ "references": {"dynamicRegistration": True},
67
+ "semanticTokens": {"dynamicRegistration": True},
68
+ },
69
+ "workspace": {
70
+ "configuration": True,
71
+ "workspaceFolders": True,
72
+ "didChangeConfiguration": {"dynamicRegistration": True},
73
+ },
74
+ },
75
+ "workspace": {"applyEdit": True, "workspaceEdit": {"documentChanges": True}},
76
+ }
77
+
78
+ # Allow subclasses to customize initialization parameters
79
+ params = self._customize_initialization_params(params)
80
+
81
+ init_id = self._send_request("initialize", params)
82
+ # Use longer timeout for initialization as it may involve full workspace indexing
83
+ response = self._wait_for_response(init_id, timeout=360)
84
+
85
+ if "error" in response:
86
+ raise RuntimeError(f"Initialization failed: {response['error']}")
87
+
88
+ logger.info("Initialization successful.")
89
+ self._send_notification("initialized", {})
90
+
91
+ # Allow subclasses to perform post-initialization setup
92
+ self._configure_typescript_workspace()
93
+
94
+ def _customize_initialization_params(self, params: dict) -> dict:
95
+ """Add TypeScript-specific initialization parameters."""
96
+ params["workspaceFolders"] = [{"uri": self.project_path.as_uri(), "name": self.project_path.name}]
97
+
98
+ params["initializationOptions"] = {
99
+ "preferences": {"includeCompletionsForModuleExports": True, "includeCompletionsWithSnippetText": True},
100
+ "tsserver": {"logVerbosity": "off"}, # Reduce noise in logs
101
+ }
102
+
103
+ return params
104
+
105
+ def _configure_typescript_workspace(self):
106
+ """Send TypeScript-specific workspace configuration after initialization."""
107
+ try:
108
+ # Check if we have TypeScript/JavaScript files
109
+ ts_files = self._find_typescript_files()
110
+
111
+ if not ts_files:
112
+ logger.warning(f"No TypeScript/JavaScript files found in {self.project_path}")
113
+ return
114
+
115
+ logger.info(f"Found {len(ts_files)} TypeScript/JavaScript files")
116
+
117
+ # Notify workspace folders change
118
+ self._send_notification(
119
+ "workspace/didChangeWorkspaceFolders",
120
+ {
121
+ "event": {
122
+ "added": [{"uri": self.project_path.as_uri(), "name": self.project_path.name}],
123
+ "removed": [],
124
+ }
125
+ },
126
+ )
127
+
128
+ # Process configuration files
129
+ config_found = self._process_config_files()
130
+
131
+ # Bootstrap project by opening sample files
132
+ self._bootstrap_project(ts_files, config_found)
133
+
134
+ except Exception as e:
135
+ logger.warning(f"Failed to configure TypeScript workspace: {e}")
136
+
137
+ def _find_typescript_files(self) -> list:
138
+ """Find all TypeScript/JavaScript files in the project."""
139
+ all_files = []
140
+ for pattern in ["*.ts", "*.tsx", "*.js", "*.jsx"]:
141
+ all_files.extend(list(self.project_path.rglob(pattern)))
142
+
143
+ return self.filter_src_files(all_files)
144
+
145
+ def _process_config_files(self) -> bool:
146
+ """Process TypeScript configuration files and return True if any found."""
147
+ config_files = [
148
+ self.project_path / "tsconfig.json",
149
+ self.project_path / "jsconfig.json",
150
+ self.project_path / "package.json",
151
+ ]
152
+
153
+ config_found = False
154
+ for config_path in config_files:
155
+ if config_path.exists():
156
+ logger.info(f"Found configuration file: {config_path}")
157
+ config_found = True
158
+ self._send_notification(
159
+ "workspace/didChangeWatchedFiles",
160
+ {"changes": [{"uri": config_path.as_uri(), "type": 1}]}, # Created/Changed
161
+ )
162
+
163
+ return config_found
164
+
165
+ def _bootstrap_project(self, ts_files: list, config_found: bool):
166
+ """Bootstrap TypeScript project by opening files."""
167
+ logger.info("Opening sample files to bootstrap TypeScript project...")
168
+ # Files are already filtered in _find_typescript_files
169
+ sample_files = ts_files[:3]
170
+
171
+ # Open bootstrap files
172
+ for file_path in sample_files:
173
+ try:
174
+ content = file_path.read_text(encoding="utf-8")
175
+ file_uri = file_path.as_uri()
176
+ self._send_notification(
177
+ "textDocument/didOpen",
178
+ {"textDocument": {"uri": file_uri, "languageId": self.language_id, "version": 1, "text": content}},
179
+ )
180
+ logger.debug(f"Opened bootstrap file: {file_path}")
181
+ except Exception as e:
182
+ logger.debug(f"Could not open bootstrap file {file_path}: {e}")
183
+
184
+ # Wait for project initialization
185
+ wait_time = 5 if config_found else 8
186
+ logger.info(f"Waiting {wait_time}s for TypeScript server to initialize project...")
187
+ time.sleep(wait_time)
188
+
189
+ # Validate and close bootstrap files
190
+ if self._validate_typescript_project():
191
+ logger.info("TypeScript project successfully loaded!")
192
+ else:
193
+ logger.warning("TypeScript project still not loaded, but continuing...")
194
+
195
+ self._close_bootstrap_files(sample_files)
196
+
197
+ def _close_bootstrap_files(self, sample_files: list):
198
+ """Close bootstrap files that were opened for project initialization."""
199
+ for file_path in sample_files:
200
+ try:
201
+ self._send_notification("textDocument/didClose", {"textDocument": {"uri": file_path.as_uri()}})
202
+ except Exception:
203
+ pass
204
+
205
+ def _prepare_for_analysis(self):
206
+ """TypeScript-specific preparation before analysis."""
207
+ logger.info("Waiting additional time for TypeScript server to fully initialize...")
208
+ time.sleep(2)
209
+
210
+ if not self._validate_typescript_project():
211
+ logger.warning("TypeScript project not properly loaded. Analysis may be limited.")
212
+
213
+ def _validate_typescript_project(self) -> bool:
214
+ """Validate that TypeScript server has a project loaded."""
215
+ try:
216
+ logger.debug("Validating TypeScript project is loaded...")
217
+ params = {"query": "test"}
218
+ req_id = self._send_request("workspace/symbol", params)
219
+ response = self._wait_for_response(req_id)
220
+
221
+ if "error" in response:
222
+ error_msg = response["error"]
223
+ if "No Project" in str(error_msg):
224
+ logger.error("TypeScript server reports 'No Project' - project not properly loaded")
225
+ return False
226
+ else:
227
+ logger.warning(f"workspace/symbol test failed but may work: {error_msg}")
228
+ return True
229
+
230
+ logger.debug("TypeScript project validation successful")
231
+ return True
232
+
233
+ except Exception as e:
234
+ logger.error(f"Failed to validate TypeScript project: {e}")
235
+ return False
@@ -0,0 +1,152 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from static_analyzer.constants import Language
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class LanguageConfig(BaseModel):
12
+ """Base configuration class for language-specific settings."""
13
+
14
+ model_config = {"frozen": True} # Make configs immutable
15
+
16
+
17
+ class JavaConfig(LanguageConfig):
18
+ """Java-specific configuration."""
19
+
20
+ jdtls_root: Path = Field(description="Path to the JDTLS (Java Language Server) installation directory")
21
+
22
+
23
+ class ProgrammingLanguage:
24
+ def __init__(
25
+ self,
26
+ language: str,
27
+ size: int,
28
+ percentage: float,
29
+ suffixes: list[str],
30
+ server_commands: list[str] | None = None,
31
+ lsp_server_key: str | None = None,
32
+ language_specific_config: LanguageConfig | None = None,
33
+ ):
34
+ self.language = language
35
+ self.size = size
36
+ self.percentage = percentage
37
+ self.suffixes = suffixes
38
+ self.server_commands = server_commands
39
+ # group related languages (e.g., JS, TSX, JSX -> typescript) to the same language server
40
+ self.lsp_server_key = lsp_server_key or language.lower()
41
+ # Store language-specific configuration (e.g., JavaConfig for Java)
42
+ self.language_specific_config = language_specific_config
43
+
44
+ def get_suffix_pattern(self) -> list[str]:
45
+ """Generate and return pattern for the file suffixes, to use in .rglob(pattern)"""
46
+ if not self.suffixes:
47
+ return ["*"]
48
+ # Join suffixes with '|' to create a regex pattern
49
+ return [f"*.{suffix.lstrip('.')}" for suffix in self.suffixes]
50
+
51
+ def get_language_id(self) -> str:
52
+ # id for the language, used in LSP server
53
+ return self.language.lower().replace(" ", "_")
54
+
55
+ def get_server_parameters(self) -> list[str]:
56
+ if not self.server_commands:
57
+ raise ValueError(
58
+ f"No server commands defined for {self.language}. "
59
+ "Please ensure the language is supported and has server commands defined."
60
+ )
61
+ return self.server_commands
62
+
63
+ def is_supported_lang(self) -> bool:
64
+ return self.server_commands is not None
65
+
66
+ def __hash__(self):
67
+ return hash(self.lsp_server_key)
68
+
69
+ def __eq__(self, other):
70
+ if not isinstance(other, ProgrammingLanguage):
71
+ return False
72
+ return self.lsp_server_key == other.lsp_server_key
73
+
74
+ def __str__(self):
75
+ return f"ProgrammingLanguage(language={self.language}, lsp_server_key={self.lsp_server_key}, size={self.size}, percentage={self.percentage:.2f}%, suffixes={self.suffixes})"
76
+
77
+
78
+ class ProgrammingLanguageBuilder:
79
+ """Builder to create ProgrammingLanguage instances from tokei output with greedy LSP matching."""
80
+
81
+ def __init__(self, lsp_configs: dict):
82
+ self.lsp_configs = lsp_configs
83
+ # Build reverse index: extension -> lsp_config_key
84
+ self._extension_to_lsp: dict[str, str] = {}
85
+ for lsp_server_key, config in lsp_configs.items():
86
+ for ext in config.get("file_extensions", []):
87
+ # Normalize extension (ensure it starts with '.')
88
+ normalized_ext = ext if ext.startswith(".") else f".{ext}"
89
+ self._extension_to_lsp[normalized_ext] = lsp_server_key
90
+
91
+ def _find_lsp_server_key(self, tokei_language: str, file_suffixes: set[str]) -> str | None:
92
+ """
93
+ Find the LSP config key for a tokei language by matching file extensions.
94
+
95
+ Args:
96
+ tokei_language: Language name from tokei output (e.g., "JavaScript", "TSX")
97
+ file_suffixes: Set of file suffixes from tokei reports
98
+
99
+ Returns:
100
+ LSP config key if found, None otherwise
101
+ """
102
+
103
+ # Try direct match with lsp_configs keys
104
+ normalized = tokei_language.lower()
105
+ if normalized in self.lsp_configs:
106
+ return normalized
107
+
108
+ # Fallback: try matching by file extensions
109
+ for suffix in file_suffixes:
110
+ normalized_suffix = suffix if suffix.startswith(".") else f".{suffix}"
111
+ if normalized_suffix in self._extension_to_lsp:
112
+ return self._extension_to_lsp[normalized_suffix]
113
+
114
+ return None
115
+
116
+ def build(
117
+ self,
118
+ tokei_language: str,
119
+ code_count: int,
120
+ percentage: float,
121
+ file_suffixes: set[str],
122
+ ) -> ProgrammingLanguage:
123
+ lsp_server_key = self._find_lsp_server_key(tokei_language, file_suffixes)
124
+
125
+ server_commands: list | None = None
126
+ config_suffixes: set[str] = set()
127
+ language_specific_config: LanguageConfig | None = None
128
+
129
+ if lsp_server_key and lsp_server_key in self.lsp_configs:
130
+ config = self.lsp_configs[lsp_server_key]
131
+ server_commands = config.get("command")
132
+ config_suffixes = set(config.get("file_extensions", []))
133
+
134
+ # Create language-specific config based on the LSP server key
135
+ if lsp_server_key == Language.JAVA and "jdtls_root" in config:
136
+ language_specific_config = JavaConfig(jdtls_root=Path(config["jdtls_root"]))
137
+
138
+ # Merge suffixes from tokei and config
139
+ all_suffixes = file_suffixes | config_suffixes
140
+
141
+ return ProgrammingLanguage(
142
+ language=tokei_language,
143
+ size=code_count,
144
+ percentage=percentage,
145
+ suffixes=list(all_suffixes),
146
+ server_commands=server_commands,
147
+ lsp_server_key=lsp_server_key,
148
+ language_specific_config=language_specific_config,
149
+ )
150
+
151
+ def get_supported_extensions(self) -> set[str]:
152
+ return set(self._extension_to_lsp.keys())
@@ -0,0 +1,166 @@
1
+ import logging
2
+ import logging
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from agents.agent_responses import AnalysisInsights
8
+ from static_analyzer.analysis_result import StaticAnalysisResults
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ReferenceResolverMixin:
14
+ _parse_invoke: Any # Provided by Agent base class
15
+
16
+ def __init__(self, repo_dir: Path, static_analysis: StaticAnalysisResults):
17
+ self.repo_dir = repo_dir
18
+ self.static_analysis = static_analysis
19
+
20
+ def fix_source_code_reference_lines(self, analysis: AnalysisInsights):
21
+ logger.info(f"Fixing source code reference lines for the analysis: {analysis.llm_str()}")
22
+ for component in analysis.components:
23
+ for reference in component.key_entities:
24
+ # Check if the file is already resolved
25
+ if reference.reference_file is not None and os.path.exists(reference.reference_file):
26
+ continue
27
+
28
+ self._resolve_single_reference(reference, component.assigned_files)
29
+
30
+ # Remove unresolved references
31
+ self._remove_unresolved_references(analysis)
32
+
33
+ return self._relative_paths(analysis)
34
+
35
+ def _resolve_single_reference(self, reference, file_candidates: list[str] | None = None):
36
+ """Orchestrates different resolution strategies for a single reference."""
37
+ assert self.static_analysis is not None, "static_analysis required for reference resolution"
38
+ qname = reference.qualified_name.replace(os.sep, ".")
39
+
40
+ for lang in self.static_analysis.get_languages():
41
+ # Try exact match first
42
+ if self._try_exact_match(reference, qname, lang):
43
+ return
44
+
45
+ # Try loose matching
46
+ if self._try_loose_match(reference, qname, lang):
47
+ return
48
+
49
+ # Try file path resolution
50
+ if self._try_file_path_resolution(reference, qname, lang, file_candidates):
51
+ return
52
+
53
+ # No resolution found - will be cleaned up later
54
+ logger.warning(f"[Reference Resolution] Could not resolve reference {reference.qualified_name} in any language")
55
+
56
+ def _try_exact_match(self, reference, qname, lang):
57
+ """Attempts exact reference matching."""
58
+ try:
59
+ node = self.static_analysis.get_reference(lang, qname)
60
+ reference.reference_file = node.file_path
61
+ reference.reference_start_line = node.line_start + 1 # match 1 based indexing
62
+ reference.reference_end_line = node.line_end + 1 # match 1 based indexing
63
+ reference.qualified_name = qname
64
+ logger.info(
65
+ f"[Reference Resolution] Matched {reference.qualified_name} in {lang} at {reference.reference_file}"
66
+ )
67
+ return True
68
+ except (ValueError, FileExistsError) as e:
69
+ logger.warning(f"[Reference Resolution] Exact match failed for {reference.qualified_name} in {lang}: {e}")
70
+ return False
71
+
72
+ def _try_loose_match(self, reference, qname, lang):
73
+ """Attempts loose reference matching."""
74
+ try:
75
+ _, node = self.static_analysis.get_loose_reference(lang, qname)
76
+ if node is not None:
77
+ reference.reference_file = node.file_path
78
+ reference.reference_start_line = node.line_start + 1
79
+ reference.reference_end_line = node.line_end + 1
80
+ reference.qualified_name = qname
81
+ logger.info(
82
+ f"[Reference Resolution] Loosely matched {reference.qualified_name} in {lang} at {reference.reference_file}"
83
+ )
84
+ return True
85
+ except Exception as e:
86
+ logger.warning(f"[Reference Resolution] Loose match failed for {qname} in {lang}: {e}")
87
+ return False
88
+
89
+ def _try_file_path_resolution(self, reference, qname, lang, file_candidates: list[str] | None = None):
90
+ """Attempts to resolve reference through file path matching."""
91
+ # First try existing reference file path
92
+ if self._try_existing_reference_file(reference, lang):
93
+ return True
94
+
95
+ # Then try qualified name as file path
96
+ return self._try_qualified_name_as_path(reference, qname, lang, file_candidates)
97
+
98
+ def _try_existing_reference_file(self, reference, lang):
99
+ """Tries to resolve using existing reference file path."""
100
+ if (reference.reference_file is not None) and (not Path(reference.reference_file).is_absolute()):
101
+ joined_path = os.path.join(self.repo_dir, reference.reference_file)
102
+ if os.path.exists(joined_path):
103
+ reference.reference_file = joined_path
104
+ logger.info(
105
+ f"[Reference Resolution] File path matched for {reference.qualified_name} in {lang} at {reference.reference_file}"
106
+ )
107
+ return True
108
+ else:
109
+ reference.reference_file = None
110
+ return False
111
+
112
+ def _try_qualified_name_as_path(self, reference, qname, lang, file_candidates: list[str] | None = None):
113
+ """Tries to resolve qualified name as various file path patterns."""
114
+ file_path = qname.replace(".", os.sep) # Get file path
115
+ full_path = os.path.join(self.repo_dir, file_path)
116
+ file_ref = ".".join(full_path.rsplit(os.sep, 1))
117
+ extra_paths = file_candidates or []
118
+ paths = [full_path, f"{file_path}.py", f"{file_path}.ts", f"{file_path}.tsx", file_ref, *extra_paths]
119
+
120
+ for path in paths:
121
+ if os.path.exists(path):
122
+ reference.reference_file = str(path)
123
+ logger.info(
124
+ f"[Reference Resolution] Path matched for {reference.qualified_name} in {lang} at {reference.reference_file}"
125
+ )
126
+ return True
127
+ return False
128
+
129
+ def _remove_unresolved_references(self, analysis: AnalysisInsights):
130
+ """Remove references and assigned files that couldn't be resolved to existing files."""
131
+ for component in analysis.components:
132
+ # Remove unresolved key_entities
133
+ original_ref_count = len(component.key_entities)
134
+ component.key_entities = [
135
+ ref
136
+ for ref in component.key_entities
137
+ if ref.reference_file is not None and os.path.exists(ref.reference_file)
138
+ ]
139
+ removed_ref_count = original_ref_count - len(component.key_entities)
140
+ if removed_ref_count > 0:
141
+ logger.info(
142
+ f"[Reference Resolution] Removed {removed_ref_count} unresolved reference(s) "
143
+ f"from component '{component.name}'"
144
+ )
145
+
146
+ # Remove unresolved assigned_files
147
+ original_file_count = len(component.assigned_files)
148
+ component.assigned_files = [
149
+ f
150
+ for f in component.assigned_files
151
+ if os.path.exists(os.path.join(self.repo_dir, f)) or os.path.exists(f)
152
+ ]
153
+ removed_file_count = original_file_count - len(component.assigned_files)
154
+ if removed_file_count > 0:
155
+ logger.info(
156
+ f"[Reference Resolution] Removed {removed_file_count} unresolved assigned file(s) "
157
+ f"from component '{component.name}'"
158
+ )
159
+
160
+ def _relative_paths(self, analysis: AnalysisInsights):
161
+ """Convert all reference file paths to relative paths."""
162
+ for component in analysis.components:
163
+ for reference in component.key_entities:
164
+ if reference.reference_file and reference.reference_file.startswith(str(self.repo_dir)):
165
+ reference.reference_file = os.path.relpath(reference.reference_file, self.repo_dir)
166
+ return analysis
@@ -0,0 +1,95 @@
1
+ import json
2
+ import logging
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import Set
6
+
7
+ from static_analyzer.programming_language import ProgrammingLanguage, ProgrammingLanguageBuilder
8
+ from utils import get_config
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ProjectScanner:
14
+ def __init__(self, repo_location: Path):
15
+ self.repo_location = repo_location
16
+ self.all_text_files: list[str] = []
17
+
18
+ def scan(self) -> list[ProgrammingLanguage]:
19
+ """
20
+ Scan the repository using Tokei and return parsed results.
21
+
22
+ Also populates self.all_text_files with all text file paths found by Tokei.
23
+
24
+ Returns:
25
+ list[ProgrammingLanguage]: technologies with their sizes, percentages, and suffixes
26
+ """
27
+
28
+ commands = get_config("tools")["tokei"]["command"]
29
+ result = subprocess.run(commands, cwd=self.repo_location, capture_output=True, text=True, check=True)
30
+
31
+ server_config = get_config("lsp_servers")
32
+ builder = ProgrammingLanguageBuilder(server_config)
33
+
34
+ # Parse Tokei JSON output
35
+ tokei_data = json.loads(result.stdout)
36
+
37
+ # Compute total code count
38
+ total_code = tokei_data.get("Total", {}).get("code", 0)
39
+ if not total_code:
40
+ logger.warning("No total code count found in Tokei output")
41
+ return []
42
+
43
+ programming_languages: list[ProgrammingLanguage] = []
44
+ all_files: list[str] = []
45
+ for technology, stats in tokei_data.items():
46
+ if technology == "Total":
47
+ continue
48
+
49
+ # Collect ALL text file paths from Tokei for file coverage,
50
+ # including languages with code_count == 0 (e.g. Markdown is 100% comments)
51
+ for report in stats.get("reports", []):
52
+ all_files.append(report["name"])
53
+
54
+ code_count = stats.get("code", 0)
55
+ if code_count == 0:
56
+ continue
57
+
58
+ percentage = code_count / total_code * 100
59
+
60
+ # Extract suffixes from reports
61
+ suffixes = set()
62
+ for report in stats.get("reports", []):
63
+ suffixes |= self._extract_suffixes([report["name"]])
64
+
65
+ pl = builder.build(
66
+ tokei_language=technology,
67
+ code_count=code_count,
68
+ percentage=percentage,
69
+ file_suffixes=suffixes,
70
+ )
71
+
72
+ logger.debug(f"Found: {pl}")
73
+ if pl.percentage >= 1:
74
+ programming_languages.append(pl)
75
+
76
+ self.all_text_files = all_files
77
+ return programming_languages
78
+
79
+ @staticmethod
80
+ def _extract_suffixes(files: list[str]) -> Set[str]:
81
+ """
82
+ Extract unique file suffixes from a list of files.
83
+
84
+ Args:
85
+ files (list[str]): list of file paths
86
+
87
+ Returns:
88
+ Set[str]: Unique file extensions/suffixes
89
+ """
90
+ suffixes = set()
91
+ for file_path in files:
92
+ suffix = Path(file_path).suffix
93
+ if suffix: # Only add non-empty suffixes
94
+ suffixes.add(suffix)
95
+ return suffixes