codeboarding 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/abstraction_agent.py +150 -0
- agents/agent.py +467 -0
- agents/agent_responses.py +363 -0
- agents/cluster_methods_mixin.py +281 -0
- agents/constants.py +13 -0
- agents/dependency_discovery.py +159 -0
- agents/details_agent.py +174 -0
- agents/llm_config.py +309 -0
- agents/meta_agent.py +105 -0
- agents/planner_agent.py +105 -0
- agents/prompts/__init__.py +85 -0
- agents/prompts/abstract_prompt_factory.py +63 -0
- agents/prompts/claude_prompts.py +381 -0
- agents/prompts/deepseek_prompts.py +389 -0
- agents/prompts/gemini_flash_prompts.py +362 -0
- agents/prompts/glm_prompts.py +407 -0
- agents/prompts/gpt_prompts.py +470 -0
- agents/prompts/kimi_prompts.py +400 -0
- agents/prompts/prompt_factory.py +179 -0
- agents/tools/__init__.py +8 -0
- agents/tools/base.py +96 -0
- agents/tools/get_external_deps.py +47 -0
- agents/tools/get_method_invocations.py +47 -0
- agents/tools/read_cfg.py +60 -0
- agents/tools/read_docs.py +132 -0
- agents/tools/read_file.py +90 -0
- agents/tools/read_file_structure.py +156 -0
- agents/tools/read_git_diff.py +131 -0
- agents/tools/read_packages.py +60 -0
- agents/tools/read_source.py +105 -0
- agents/tools/read_structure.py +49 -0
- agents/tools/toolkit.py +119 -0
- agents/validation.py +383 -0
- caching/__init__.py +4 -0
- caching/cache.py +29 -0
- caching/meta_cache.py +227 -0
- codeboarding-0.9.0.dist-info/METADATA +223 -0
- codeboarding-0.9.0.dist-info/RECORD +126 -0
- codeboarding-0.9.0.dist-info/WHEEL +5 -0
- codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
- codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
- codeboarding-0.9.0.dist-info/top_level.txt +18 -0
- core/__init__.py +101 -0
- core/plugin_loader.py +46 -0
- core/protocols.py +27 -0
- core/registry.py +46 -0
- diagram_analysis/__init__.py +4 -0
- diagram_analysis/analysis_json.py +346 -0
- diagram_analysis/diagram_generator.py +486 -0
- diagram_analysis/file_coverage.py +212 -0
- diagram_analysis/incremental/__init__.py +63 -0
- diagram_analysis/incremental/component_checker.py +236 -0
- diagram_analysis/incremental/file_manager.py +217 -0
- diagram_analysis/incremental/impact_analyzer.py +238 -0
- diagram_analysis/incremental/io_utils.py +281 -0
- diagram_analysis/incremental/models.py +72 -0
- diagram_analysis/incremental/path_patching.py +164 -0
- diagram_analysis/incremental/reexpansion.py +166 -0
- diagram_analysis/incremental/scoped_analysis.py +227 -0
- diagram_analysis/incremental/updater.py +464 -0
- diagram_analysis/incremental/validation.py +48 -0
- diagram_analysis/manifest.py +152 -0
- diagram_analysis/version.py +6 -0
- duckdb_crud.py +125 -0
- github_action.py +172 -0
- health/__init__.py +3 -0
- health/checks/__init__.py +11 -0
- health/checks/circular_deps.py +48 -0
- health/checks/cohesion.py +93 -0
- health/checks/coupling.py +140 -0
- health/checks/function_size.py +85 -0
- health/checks/god_class.py +167 -0
- health/checks/inheritance.py +104 -0
- health/checks/instability.py +77 -0
- health/checks/unused_code_diagnostics.py +338 -0
- health/config.py +172 -0
- health/constants.py +19 -0
- health/models.py +186 -0
- health/runner.py +236 -0
- install.py +518 -0
- logging_config.py +105 -0
- main.py +529 -0
- monitoring/__init__.py +12 -0
- monitoring/callbacks.py +163 -0
- monitoring/context.py +158 -0
- monitoring/mixin.py +16 -0
- monitoring/paths.py +47 -0
- monitoring/stats.py +50 -0
- monitoring/writers.py +172 -0
- output_generators/__init__.py +0 -0
- output_generators/html.py +163 -0
- output_generators/html_template.py +382 -0
- output_generators/markdown.py +140 -0
- output_generators/mdx.py +171 -0
- output_generators/sphinx.py +175 -0
- repo_utils/__init__.py +277 -0
- repo_utils/change_detector.py +289 -0
- repo_utils/errors.py +6 -0
- repo_utils/git_diff.py +74 -0
- repo_utils/ignore.py +341 -0
- static_analyzer/__init__.py +335 -0
- static_analyzer/analysis_cache.py +699 -0
- static_analyzer/analysis_result.py +269 -0
- static_analyzer/cluster_change_analyzer.py +391 -0
- static_analyzer/cluster_helpers.py +79 -0
- static_analyzer/constants.py +166 -0
- static_analyzer/git_diff_analyzer.py +224 -0
- static_analyzer/graph.py +746 -0
- static_analyzer/incremental_orchestrator.py +671 -0
- static_analyzer/java_config_scanner.py +232 -0
- static_analyzer/java_utils.py +227 -0
- static_analyzer/lsp_client/__init__.py +12 -0
- static_analyzer/lsp_client/client.py +1642 -0
- static_analyzer/lsp_client/diagnostics.py +62 -0
- static_analyzer/lsp_client/java_client.py +517 -0
- static_analyzer/lsp_client/language_settings.py +97 -0
- static_analyzer/lsp_client/typescript_client.py +235 -0
- static_analyzer/programming_language.py +152 -0
- static_analyzer/reference_resolve_mixin.py +166 -0
- static_analyzer/scanner.py +95 -0
- static_analyzer/typescript_config_scanner.py +54 -0
- tool_registry.py +433 -0
- user_config.py +134 -0
- utils.py +56 -0
- vscode_constants.py +124 -0
main.py
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
from agents.llm_config import configure_models, validate_api_key_provided
|
|
12
|
+
from user_config import ensure_config_template, load_user_config
|
|
13
|
+
from core import get_registries, load_plugins
|
|
14
|
+
from diagram_analysis import DiagramGenerator
|
|
15
|
+
from diagram_analysis.analysis_json import build_id_to_name_map, parse_unified_analysis
|
|
16
|
+
from diagram_analysis.incremental.io_utils import load_full_analysis, save_sub_analysis
|
|
17
|
+
from logging_config import setup_logging
|
|
18
|
+
from monitoring import monitor_execution
|
|
19
|
+
from monitoring.paths import generate_run_id, get_monitoring_run_dir
|
|
20
|
+
from output_generators.markdown import generate_markdown_file
|
|
21
|
+
from repo_utils import (
|
|
22
|
+
clone_repository,
|
|
23
|
+
get_branch,
|
|
24
|
+
get_repo_name,
|
|
25
|
+
store_token,
|
|
26
|
+
upload_onboarding_materials,
|
|
27
|
+
)
|
|
28
|
+
from repo_utils.ignore import initialize_codeboardingignore
|
|
29
|
+
from utils import (
|
|
30
|
+
create_temp_repo_folder,
|
|
31
|
+
monitoring_enabled,
|
|
32
|
+
remove_temp_repo_folder,
|
|
33
|
+
sanitize,
|
|
34
|
+
)
|
|
35
|
+
from vscode_constants import update_config
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def onboarding_materials_exist(project_name: str) -> bool:
|
|
41
|
+
generated_repo_url = f"https://github.com/CodeBoarding/GeneratedOnBoardings/tree/main/{project_name}"
|
|
42
|
+
response = requests.get(generated_repo_url)
|
|
43
|
+
if response.status_code == 200:
|
|
44
|
+
logger.info(f"Repository has already been generated, please check {generated_repo_url}")
|
|
45
|
+
return True
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def generate_analysis(
|
|
50
|
+
repo_name: str,
|
|
51
|
+
repo_path: Path,
|
|
52
|
+
output_dir: Path,
|
|
53
|
+
depth_level: int = 1,
|
|
54
|
+
run_id: str | None = None,
|
|
55
|
+
monitoring_enabled: bool = False,
|
|
56
|
+
force_full: bool = False,
|
|
57
|
+
) -> list[Path]:
|
|
58
|
+
generator = DiagramGenerator(
|
|
59
|
+
repo_location=repo_path,
|
|
60
|
+
temp_folder=output_dir,
|
|
61
|
+
repo_name=repo_name,
|
|
62
|
+
output_dir=output_dir,
|
|
63
|
+
depth_level=depth_level,
|
|
64
|
+
run_id=run_id,
|
|
65
|
+
monitoring_enabled=monitoring_enabled,
|
|
66
|
+
)
|
|
67
|
+
generator.force_full_analysis = force_full
|
|
68
|
+
generated_files = generator.generate_analysis()
|
|
69
|
+
return [Path(path) for path in generated_files]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def generate_markdown_docs(
|
|
73
|
+
repo_name: str,
|
|
74
|
+
repo_path: Path,
|
|
75
|
+
repo_url: str,
|
|
76
|
+
analysis_files: list[Path],
|
|
77
|
+
output_dir: Path,
|
|
78
|
+
demo_mode: bool = False,
|
|
79
|
+
):
|
|
80
|
+
target_branch = get_branch(repo_path)
|
|
81
|
+
repo_ref = f"{repo_url}/blob/{target_branch}/"
|
|
82
|
+
|
|
83
|
+
# Load the single unified analysis.json
|
|
84
|
+
analysis_path = analysis_files[0]
|
|
85
|
+
with open(analysis_path, "r") as f:
|
|
86
|
+
data = json.load(f)
|
|
87
|
+
|
|
88
|
+
root_analysis, sub_analyses = parse_unified_analysis(data)
|
|
89
|
+
|
|
90
|
+
# Generate markdown for root analysis
|
|
91
|
+
root_expanded = set(sub_analyses.keys())
|
|
92
|
+
generate_markdown_file(
|
|
93
|
+
"on_boarding",
|
|
94
|
+
root_analysis,
|
|
95
|
+
repo_name,
|
|
96
|
+
repo_ref=repo_ref,
|
|
97
|
+
expanded_components=root_expanded,
|
|
98
|
+
temp_dir=output_dir,
|
|
99
|
+
demo=demo_mode,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Build id-to-name mapping across all levels for file naming
|
|
103
|
+
id_to_name = build_id_to_name_map(root_analysis, sub_analyses)
|
|
104
|
+
|
|
105
|
+
# Generate markdown for each sub-analysis
|
|
106
|
+
for comp_id, sub_analysis in sub_analyses.items():
|
|
107
|
+
sub_expanded = {c.component_id for c in sub_analysis.components if c.component_id in sub_analyses}
|
|
108
|
+
comp_name = id_to_name.get(comp_id, comp_id)
|
|
109
|
+
fname = sanitize(comp_name)
|
|
110
|
+
generate_markdown_file(
|
|
111
|
+
fname,
|
|
112
|
+
sub_analysis,
|
|
113
|
+
repo_name,
|
|
114
|
+
repo_ref=repo_ref,
|
|
115
|
+
expanded_components=sub_expanded,
|
|
116
|
+
temp_dir=output_dir,
|
|
117
|
+
demo=demo_mode,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def partial_update(
|
|
122
|
+
repo_path: Path,
|
|
123
|
+
output_dir: Path,
|
|
124
|
+
project_name: str,
|
|
125
|
+
component_id: str,
|
|
126
|
+
depth_level: int = 1,
|
|
127
|
+
):
|
|
128
|
+
"""
|
|
129
|
+
Update a specific component in an existing analysis.
|
|
130
|
+
"""
|
|
131
|
+
generator = DiagramGenerator(
|
|
132
|
+
repo_location=repo_path,
|
|
133
|
+
temp_folder=output_dir,
|
|
134
|
+
repo_name=project_name,
|
|
135
|
+
output_dir=output_dir,
|
|
136
|
+
depth_level=depth_level,
|
|
137
|
+
)
|
|
138
|
+
generator.pre_analysis()
|
|
139
|
+
|
|
140
|
+
# Load the full unified analysis (root + all sub-analyses)
|
|
141
|
+
full_analysis = load_full_analysis(output_dir)
|
|
142
|
+
if full_analysis is None:
|
|
143
|
+
logger.error(f"No analysis.json found in '{output_dir}'. Please ensure the file exists.")
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
root_analysis, sub_analyses = full_analysis
|
|
147
|
+
|
|
148
|
+
# Search root components first, then all nested sub-analysis components
|
|
149
|
+
component_to_analyze = None
|
|
150
|
+
for component in root_analysis.components:
|
|
151
|
+
if component.component_id == component_id:
|
|
152
|
+
logger.info(f"Updating analysis for component: {component.name}")
|
|
153
|
+
component_to_analyze = component
|
|
154
|
+
break
|
|
155
|
+
if component_to_analyze is None:
|
|
156
|
+
for sub_analysis in sub_analyses.values():
|
|
157
|
+
for component in sub_analysis.components:
|
|
158
|
+
if component.component_id == component_id:
|
|
159
|
+
logger.info(f"Updating analysis for component: {component.name}")
|
|
160
|
+
component_to_analyze = component
|
|
161
|
+
break
|
|
162
|
+
if component_to_analyze is not None:
|
|
163
|
+
break
|
|
164
|
+
|
|
165
|
+
if component_to_analyze is None:
|
|
166
|
+
logger.error(f"Component with ID '{component_id}' not found in analysis")
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
comp_id, sub_analysis, new_components = generator.process_component(component_to_analyze)
|
|
170
|
+
|
|
171
|
+
if sub_analysis:
|
|
172
|
+
save_sub_analysis(sub_analysis, output_dir, component_id)
|
|
173
|
+
logger.info(f"Updated component '{component_id}' in analysis.json")
|
|
174
|
+
else:
|
|
175
|
+
logger.error(f"Failed to generate sub-analysis for component '{component_id}'")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def generate_docs_remote(
|
|
179
|
+
repo_url: str,
|
|
180
|
+
temp_repo_folder: Path,
|
|
181
|
+
local_dev: bool = False,
|
|
182
|
+
run_id: str | None = None,
|
|
183
|
+
monitoring_enabled: bool = False,
|
|
184
|
+
):
|
|
185
|
+
"""
|
|
186
|
+
Clone a git repo and generate documentation (backward compatibility wrapper used by local_app).
|
|
187
|
+
"""
|
|
188
|
+
process_remote_repository(
|
|
189
|
+
repo_url=repo_url,
|
|
190
|
+
output_dir=temp_repo_folder,
|
|
191
|
+
depth_level=int(os.getenv("DIAGRAM_DEPTH_LEVEL", "1")),
|
|
192
|
+
upload=not local_dev, # Only upload if not in local dev mode
|
|
193
|
+
cache_check=True,
|
|
194
|
+
run_id=run_id,
|
|
195
|
+
monitoring_enabled=monitoring_enabled,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def process_remote_repository(
|
|
200
|
+
repo_url: str,
|
|
201
|
+
output_dir: Path | None = None,
|
|
202
|
+
depth_level: int = 1,
|
|
203
|
+
upload: bool = False,
|
|
204
|
+
cache_check: bool = True,
|
|
205
|
+
run_id: str | None = None,
|
|
206
|
+
monitoring_enabled: bool = False,
|
|
207
|
+
):
|
|
208
|
+
"""
|
|
209
|
+
Process a remote repository by cloning and generating documentation.
|
|
210
|
+
"""
|
|
211
|
+
repo_root = Path("repos")
|
|
212
|
+
|
|
213
|
+
repo_name = get_repo_name(repo_url)
|
|
214
|
+
|
|
215
|
+
# Check cache if enabled
|
|
216
|
+
if cache_check and onboarding_materials_exist(repo_name):
|
|
217
|
+
logger.info(f"Cache hit for '{repo_name}', skipping documentation generation.")
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
# Clone repository
|
|
221
|
+
repo_name = clone_repository(repo_url, repo_root)
|
|
222
|
+
repo_path = repo_root / repo_name
|
|
223
|
+
|
|
224
|
+
temp_folder = create_temp_repo_folder()
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
analysis_files = generate_analysis(
|
|
228
|
+
repo_name=repo_name,
|
|
229
|
+
repo_path=repo_path,
|
|
230
|
+
output_dir=temp_folder,
|
|
231
|
+
depth_level=depth_level,
|
|
232
|
+
run_id=run_id,
|
|
233
|
+
monitoring_enabled=monitoring_enabled,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Generate markdown documentation for remote repo
|
|
237
|
+
generate_markdown_docs(
|
|
238
|
+
repo_name=repo_name,
|
|
239
|
+
repo_path=repo_path,
|
|
240
|
+
repo_url=repo_url,
|
|
241
|
+
analysis_files=analysis_files,
|
|
242
|
+
output_dir=temp_folder,
|
|
243
|
+
demo_mode=True,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Copy files to output directory if specified
|
|
247
|
+
if output_dir:
|
|
248
|
+
copy_files(temp_folder, output_dir)
|
|
249
|
+
|
|
250
|
+
# Upload if requested
|
|
251
|
+
if upload:
|
|
252
|
+
upload_onboarding_materials(repo_name, temp_folder, "results")
|
|
253
|
+
finally:
|
|
254
|
+
remove_temp_repo_folder(str(temp_folder))
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def process_local_repository(
|
|
258
|
+
repo_path: Path,
|
|
259
|
+
output_dir: Path,
|
|
260
|
+
project_name: str,
|
|
261
|
+
depth_level: int = 1,
|
|
262
|
+
component_id: str | None = None,
|
|
263
|
+
monitoring_enabled: bool = False,
|
|
264
|
+
incremental: bool = False,
|
|
265
|
+
force_full: bool = False,
|
|
266
|
+
):
|
|
267
|
+
# Handle partial updates
|
|
268
|
+
if component_id:
|
|
269
|
+
partial_update(
|
|
270
|
+
repo_path=repo_path,
|
|
271
|
+
output_dir=output_dir,
|
|
272
|
+
project_name=project_name,
|
|
273
|
+
component_id=component_id,
|
|
274
|
+
depth_level=depth_level,
|
|
275
|
+
)
|
|
276
|
+
return
|
|
277
|
+
|
|
278
|
+
# Use smart incremental analysis if requested
|
|
279
|
+
if incremental and not force_full:
|
|
280
|
+
generator = DiagramGenerator(
|
|
281
|
+
repo_location=repo_path,
|
|
282
|
+
temp_folder=output_dir,
|
|
283
|
+
repo_name=project_name,
|
|
284
|
+
output_dir=output_dir,
|
|
285
|
+
depth_level=depth_level,
|
|
286
|
+
monitoring_enabled=monitoring_enabled,
|
|
287
|
+
)
|
|
288
|
+
generator.force_full_analysis = force_full
|
|
289
|
+
|
|
290
|
+
# Try incremental first, fall back to full
|
|
291
|
+
result = generator.generate_analysis_smart()
|
|
292
|
+
if result:
|
|
293
|
+
logger.info(f"Incremental analysis completed: {len(result)} files")
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
# Full analysis (local repo - no markdown generation)
|
|
297
|
+
generate_analysis(
|
|
298
|
+
repo_name=project_name,
|
|
299
|
+
repo_path=repo_path,
|
|
300
|
+
output_dir=output_dir,
|
|
301
|
+
depth_level=depth_level,
|
|
302
|
+
monitoring_enabled=monitoring_enabled,
|
|
303
|
+
force_full=force_full,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def copy_files(temp_folder: Path, output_dir: Path):
|
|
308
|
+
"""Copy all markdown and JSON files from temp folder to output directory."""
|
|
309
|
+
# Copy markdown files
|
|
310
|
+
markdown_files = list(temp_folder.glob("*.md"))
|
|
311
|
+
# Copy JSON files
|
|
312
|
+
json_files = list(temp_folder.glob("*.json"))
|
|
313
|
+
|
|
314
|
+
all_files = markdown_files + json_files
|
|
315
|
+
|
|
316
|
+
if not all_files:
|
|
317
|
+
logger.warning(f"No markdown or JSON files found in {temp_folder}")
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
for file in all_files:
|
|
321
|
+
dest_file = output_dir / file.name
|
|
322
|
+
shutil.copy2(file, dest_file)
|
|
323
|
+
logger.info(f"Copied {file.name} to {dest_file}")
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def validate_arguments(args, parser, is_local: bool):
|
|
327
|
+
# Ensure mutual exclusivity between remote and local runs
|
|
328
|
+
has_remote_repos = bool(getattr(args, "repositories", None))
|
|
329
|
+
has_local_repo = args.local is not None
|
|
330
|
+
|
|
331
|
+
if has_remote_repos == has_local_repo:
|
|
332
|
+
parser.error("Provide either one or more remote repositories or --local, but not both.")
|
|
333
|
+
|
|
334
|
+
# Validate partial update arguments
|
|
335
|
+
if args.partial_component_id and not is_local:
|
|
336
|
+
parser.error("--partial-component-id only works with local repositories")
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def define_cli_arguments(parser: argparse.ArgumentParser):
|
|
340
|
+
"""
|
|
341
|
+
Adds all command-line arguments and groups to the ArgumentParser.
|
|
342
|
+
"""
|
|
343
|
+
parser.add_argument(
|
|
344
|
+
"repositories",
|
|
345
|
+
nargs="*",
|
|
346
|
+
help="One or more Git repository URLs to generate documentation for",
|
|
347
|
+
)
|
|
348
|
+
parser.add_argument("--local", type=Path, help="Path to a local repository")
|
|
349
|
+
|
|
350
|
+
# Partial update options
|
|
351
|
+
parser.add_argument(
|
|
352
|
+
"--partial-component-id",
|
|
353
|
+
type=str,
|
|
354
|
+
help="Component ID to update (for partial updates only)",
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Binary/tool configuration
|
|
358
|
+
parser.add_argument(
|
|
359
|
+
"--binary-location",
|
|
360
|
+
type=Path,
|
|
361
|
+
help="Path to the binary directory for language servers (overrides ~/.codeboarding/servers/)",
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Analysis options
|
|
365
|
+
parser.add_argument(
|
|
366
|
+
"--depth-level",
|
|
367
|
+
type=int,
|
|
368
|
+
default=1,
|
|
369
|
+
help="Depth level for diagram generation (default: 1)",
|
|
370
|
+
)
|
|
371
|
+
parser.add_argument(
|
|
372
|
+
"--upload",
|
|
373
|
+
action="store_true",
|
|
374
|
+
help="Upload onboarding materials to GeneratedOnBoardings repo (remote repos only)",
|
|
375
|
+
)
|
|
376
|
+
parser.add_argument("--enable-monitoring", action="store_true", help="Enable monitoring")
|
|
377
|
+
|
|
378
|
+
# Incremental update options
|
|
379
|
+
parser.add_argument(
|
|
380
|
+
"--full",
|
|
381
|
+
action="store_true",
|
|
382
|
+
help="Force full reanalysis, skipping incremental update detection",
|
|
383
|
+
)
|
|
384
|
+
parser.add_argument(
|
|
385
|
+
"--incremental",
|
|
386
|
+
action="store_true",
|
|
387
|
+
help="Use smart incremental updates (tries incremental first, falls back to full)",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def main():
|
|
392
|
+
"""Main entry point for the unified CodeBoarding CLI."""
|
|
393
|
+
parser = argparse.ArgumentParser(
|
|
394
|
+
description="Generate onboarding documentation for Git repositories (local or remote)",
|
|
395
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
396
|
+
epilog="""
|
|
397
|
+
Examples:
|
|
398
|
+
# Local repository (output written to <repo>/.codeboarding/)
|
|
399
|
+
codeboarding --local /path/to/repo
|
|
400
|
+
|
|
401
|
+
# Local repository with custom depth level
|
|
402
|
+
codeboarding --local /path/to/repo --depth-level 2
|
|
403
|
+
|
|
404
|
+
# Remote repository (cloned to cwd/<repo_name>/, output to cwd/<repo_name>/.codeboarding/)
|
|
405
|
+
codeboarding https://github.com/user/repo
|
|
406
|
+
|
|
407
|
+
# Partial update (update single component by ID)
|
|
408
|
+
codeboarding --local /path/to/repo --partial-component-id "a3f2b1c4d5e6f789"
|
|
409
|
+
|
|
410
|
+
# Incremental update (smart - detects changes automatically)
|
|
411
|
+
codeboarding --local /path/to/repo --incremental
|
|
412
|
+
|
|
413
|
+
# Force full reanalysis (skip incremental detection)
|
|
414
|
+
codeboarding --local /path/to/repo --full
|
|
415
|
+
|
|
416
|
+
# Use custom binary location (e.g. VS Code extension)
|
|
417
|
+
codeboarding --local /path/to/repo --binary-location /path/to/binaries
|
|
418
|
+
""",
|
|
419
|
+
)
|
|
420
|
+
define_cli_arguments(parser)
|
|
421
|
+
|
|
422
|
+
args = parser.parse_args()
|
|
423
|
+
|
|
424
|
+
# Validate interdependent arguments
|
|
425
|
+
is_local = args.local is not None
|
|
426
|
+
validate_arguments(args, parser, is_local)
|
|
427
|
+
|
|
428
|
+
# Derive output directory from repo path
|
|
429
|
+
if is_local:
|
|
430
|
+
output_dir = args.local.resolve() / ".codeboarding"
|
|
431
|
+
else:
|
|
432
|
+
# Remote: will be set per-repo inside the loop below
|
|
433
|
+
output_dir = None
|
|
434
|
+
|
|
435
|
+
# Setup logging
|
|
436
|
+
setup_logging(log_dir=output_dir)
|
|
437
|
+
logger.info("Starting CodeBoarding documentation generation...")
|
|
438
|
+
|
|
439
|
+
# Ensure ~/.codeboarding/config.toml exists (writes template on first run)
|
|
440
|
+
ensure_config_template()
|
|
441
|
+
|
|
442
|
+
# Load ~/.codeboarding/config.toml: inject provider keys into env and store model overrides
|
|
443
|
+
user_cfg = load_user_config()
|
|
444
|
+
user_cfg.apply_to_env()
|
|
445
|
+
configure_models(agent_model=user_cfg.llm.agent_model, parsing_model=user_cfg.llm.parsing_model)
|
|
446
|
+
|
|
447
|
+
# Validate that an LLM provider key is configured before doing any heavy work
|
|
448
|
+
try:
|
|
449
|
+
validate_api_key_provided()
|
|
450
|
+
except ValueError as e:
|
|
451
|
+
logger.error(str(e))
|
|
452
|
+
raise SystemExit(1)
|
|
453
|
+
|
|
454
|
+
load_plugins(get_registries())
|
|
455
|
+
|
|
456
|
+
if args.binary_location:
|
|
457
|
+
update_config(args.binary_location)
|
|
458
|
+
else:
|
|
459
|
+
from tool_registry import ensure_tools, needs_install
|
|
460
|
+
|
|
461
|
+
if needs_install():
|
|
462
|
+
logger.info("First run: downloading language server binaries to ~/.codeboarding/servers/ ...")
|
|
463
|
+
ensure_tools(auto_install_npm=True)
|
|
464
|
+
|
|
465
|
+
should_monitor = args.enable_monitoring or monitoring_enabled()
|
|
466
|
+
|
|
467
|
+
if is_local:
|
|
468
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
469
|
+
initialize_codeboardingignore(output_dir)
|
|
470
|
+
|
|
471
|
+
# Derive project name from the repo directory name
|
|
472
|
+
project_name = args.local.resolve().name
|
|
473
|
+
|
|
474
|
+
process_local_repository(
|
|
475
|
+
repo_path=args.local,
|
|
476
|
+
output_dir=output_dir,
|
|
477
|
+
project_name=project_name,
|
|
478
|
+
depth_level=args.depth_level,
|
|
479
|
+
component_id=args.partial_component_id,
|
|
480
|
+
monitoring_enabled=should_monitor,
|
|
481
|
+
incremental=args.incremental,
|
|
482
|
+
force_full=args.full,
|
|
483
|
+
)
|
|
484
|
+
logger.info(f"Documentation generated successfully in {output_dir}")
|
|
485
|
+
else:
|
|
486
|
+
if args.repositories:
|
|
487
|
+
if args.upload:
|
|
488
|
+
try:
|
|
489
|
+
store_token()
|
|
490
|
+
except Exception as e:
|
|
491
|
+
logger.warning(f"Could not store GitHub token: {e}")
|
|
492
|
+
|
|
493
|
+
for repo in tqdm(args.repositories, desc="Generating docs for repos"):
|
|
494
|
+
repo_name = get_repo_name(repo)
|
|
495
|
+
# Clone to cwd/<repo_name>/, output to cwd/<repo_name>/.codeboarding/
|
|
496
|
+
repo_output_dir = Path.cwd() / repo_name / ".codeboarding"
|
|
497
|
+
repo_output_dir.mkdir(parents=True, exist_ok=True)
|
|
498
|
+
initialize_codeboardingignore(repo_output_dir)
|
|
499
|
+
|
|
500
|
+
run_id = generate_run_id(repo_name)
|
|
501
|
+
monitoring_dir = get_monitoring_run_dir(run_id, create=should_monitor)
|
|
502
|
+
|
|
503
|
+
with monitor_execution(
|
|
504
|
+
run_id=run_id,
|
|
505
|
+
output_dir=str(monitoring_dir),
|
|
506
|
+
enabled=should_monitor,
|
|
507
|
+
) as mon:
|
|
508
|
+
mon.step(f"processing_{repo_name}")
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
process_remote_repository(
|
|
512
|
+
repo_url=repo,
|
|
513
|
+
output_dir=repo_output_dir,
|
|
514
|
+
depth_level=args.depth_level,
|
|
515
|
+
upload=args.upload,
|
|
516
|
+
run_id=run_id,
|
|
517
|
+
monitoring_enabled=should_monitor,
|
|
518
|
+
)
|
|
519
|
+
except Exception as e:
|
|
520
|
+
logger.error(f"Failed to process repository {repo}: {e}")
|
|
521
|
+
continue
|
|
522
|
+
|
|
523
|
+
logger.info("All repositories processed successfully!")
|
|
524
|
+
else:
|
|
525
|
+
logger.error("No repositories specified")
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
if __name__ == "__main__":
|
|
529
|
+
main()
|
monitoring/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Monitoring package for tracking LLM usage, tool calls, and static analysis metrics.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from monitoring import RunStats, MonitoringCallback, StreamingStatsWriter
|
|
6
|
+
from monitoring import monitor_execution, trace, current_step
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .stats import RunStats
|
|
10
|
+
from .callbacks import MonitoringCallback
|
|
11
|
+
from .writers import StreamingStatsWriter
|
|
12
|
+
from .context import monitor_execution, trace, current_step
|
monitoring/callbacks.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Mapping, MutableMapping, cast
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
|
|
7
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
8
|
+
from langchain_core.outputs import LLMResult
|
|
9
|
+
|
|
10
|
+
from monitoring.stats import RunStats, current_stats
|
|
11
|
+
from monitoring.context import current_step
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("monitoring")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MonitoringCallback(BaseCallbackHandler):
|
|
17
|
+
"""
|
|
18
|
+
Captures LLM events, tags them with the current step, and updates stats.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, stats_container: RunStats | None = None, log_results: bool = True):
|
|
22
|
+
# runtime bookkeeping
|
|
23
|
+
self._tool_start_times: dict[str, float] = {} # run_id -> start_time
|
|
24
|
+
self._tool_names: dict[str, str] = {} # run_id -> tool_name
|
|
25
|
+
self._stats_container = stats_container
|
|
26
|
+
self.log_results = log_results
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def model_name(self) -> str | None:
|
|
30
|
+
return self.stats.model_name
|
|
31
|
+
|
|
32
|
+
@model_name.setter
|
|
33
|
+
def model_name(self, value: str | None) -> None:
|
|
34
|
+
with self.stats._lock:
|
|
35
|
+
self.stats.model_name = value
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def stats(self) -> RunStats:
|
|
39
|
+
if self._stats_container:
|
|
40
|
+
return self._stats_container
|
|
41
|
+
return current_stats.get()
|
|
42
|
+
|
|
43
|
+
def on_llm_end(self, response: LLMResult, **_kwargs: Any) -> None:
|
|
44
|
+
step_name = current_step.get()
|
|
45
|
+
|
|
46
|
+
# Extract usage
|
|
47
|
+
usage = self._extract_usage(response)
|
|
48
|
+
|
|
49
|
+
if not usage:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
# Update State
|
|
53
|
+
with self.stats._lock:
|
|
54
|
+
self.stats.total_tokens += usage.get("total_tokens", 0)
|
|
55
|
+
self.stats.input_tokens += usage.get("input_tokens", 0)
|
|
56
|
+
self.stats.output_tokens += usage.get("output_tokens", 0)
|
|
57
|
+
|
|
58
|
+
# Log Event
|
|
59
|
+
if self.log_results:
|
|
60
|
+
model = self.model_name or "unknown"
|
|
61
|
+
# IMPORTANT: Do not change this log line format. Any change must be approved by IVAN.
|
|
62
|
+
logger.info(f"Token Usage: step={step_name} model={model} usage={json.dumps(usage)}")
|
|
63
|
+
|
|
64
|
+
def on_tool_start(self, serialized: dict[str, Any], input_str: str, **kwargs: Any) -> None:
|
|
65
|
+
run_id_any = kwargs.get("run_id")
|
|
66
|
+
run_id: str | None = str(run_id_any) if run_id_any else None
|
|
67
|
+
tool_name = (
|
|
68
|
+
serialized.get("name")
|
|
69
|
+
or serialized.get("id")
|
|
70
|
+
or serialized.get("lc_namespace", ["tool"])[-1]
|
|
71
|
+
or "unknown_tool"
|
|
72
|
+
)
|
|
73
|
+
with self.stats._lock:
|
|
74
|
+
self.stats.tool_counts[tool_name] += 1
|
|
75
|
+
|
|
76
|
+
now = time.time()
|
|
77
|
+
if run_id:
|
|
78
|
+
self._tool_start_times[run_id] = now
|
|
79
|
+
self._tool_names[run_id] = tool_name
|
|
80
|
+
|
|
81
|
+
def on_tool_end(self, output: Any, **kwargs: Any) -> None:
|
|
82
|
+
run_id_any = kwargs.get("run_id")
|
|
83
|
+
run_id: str | None = str(run_id_any) if run_id_any else None
|
|
84
|
+
if run_id and run_id in self._tool_start_times:
|
|
85
|
+
start = self._tool_start_times.pop(run_id)
|
|
86
|
+
tool_name = self._tool_names.pop(run_id, "unknown_tool")
|
|
87
|
+
latency = int((time.time() - start) * 1000)
|
|
88
|
+
with self.stats._lock:
|
|
89
|
+
self.stats.tool_latency_ms[tool_name].append(latency)
|
|
90
|
+
|
|
91
|
+
def on_tool_error(
|
|
92
|
+
self, error: BaseException, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
|
|
93
|
+
) -> Any:
|
|
94
|
+
tool_name = "unknown_tool"
|
|
95
|
+
run_id_str = str(run_id)
|
|
96
|
+
if run_id_str in self._tool_names:
|
|
97
|
+
tool_name = self._tool_names[run_id_str]
|
|
98
|
+
with self.stats._lock:
|
|
99
|
+
self.stats.tool_errors[tool_name] += 1
|
|
100
|
+
|
|
101
|
+
# Clean up any in-flight timing
|
|
102
|
+
if run_id_str in self._tool_start_times:
|
|
103
|
+
self._tool_start_times.pop(run_id_str, None)
|
|
104
|
+
self._tool_names.pop(run_id_str, None)
|
|
105
|
+
|
|
106
|
+
def _extract_usage(self, response: LLMResult) -> dict[str, int]:
|
|
107
|
+
def _coerce_int(value: Any) -> int:
|
|
108
|
+
try:
|
|
109
|
+
return int(value)
|
|
110
|
+
except (TypeError, ValueError):
|
|
111
|
+
return 0
|
|
112
|
+
|
|
113
|
+
def _extract_usage_from_mapping(mapping: Mapping[str, Any]) -> dict[str, int]:
|
|
114
|
+
# Handle both prompt/completion and input/output styles
|
|
115
|
+
prompt = mapping.get("prompt_tokens", mapping.get("input_tokens", 0))
|
|
116
|
+
completion = mapping.get("completion_tokens", mapping.get("output_tokens", 0))
|
|
117
|
+
total = mapping.get("total_tokens", mapping.get("total_token_count", None))
|
|
118
|
+
|
|
119
|
+
prompt_i = _coerce_int(prompt)
|
|
120
|
+
completion_i = _coerce_int(completion)
|
|
121
|
+
|
|
122
|
+
if total is None:
|
|
123
|
+
total_i = prompt_i + completion_i
|
|
124
|
+
else:
|
|
125
|
+
total_i = _coerce_int(total)
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
"input_tokens": prompt_i,
|
|
129
|
+
"output_tokens": completion_i,
|
|
130
|
+
"total_tokens": total_i,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
usage_mapping: MutableMapping[str, Any] = {}
|
|
134
|
+
|
|
135
|
+
# 1) Try llm_output
|
|
136
|
+
llm_output = response.llm_output or {}
|
|
137
|
+
if "token_usage" in llm_output:
|
|
138
|
+
raw = cast(Mapping[str, Any], llm_output.get("token_usage") or {})
|
|
139
|
+
usage_mapping = dict(raw)
|
|
140
|
+
elif "usage" in llm_output:
|
|
141
|
+
raw = cast(Mapping[str, Any], llm_output.get("usage") or {})
|
|
142
|
+
usage_mapping = dict(raw)
|
|
143
|
+
|
|
144
|
+
# 2) Fallback to first generation's message metadata
|
|
145
|
+
if not usage_mapping and response.generations:
|
|
146
|
+
first_gen = response.generations[0][0]
|
|
147
|
+
message = getattr(first_gen, "message", None) or getattr(first_gen, "text", None)
|
|
148
|
+
meta: Mapping[str, Any] = getattr(message, "response_metadata", {}) or {}
|
|
149
|
+
usage_meta: Mapping[str, Any] = getattr(message, "usage_metadata", {}) or {}
|
|
150
|
+
|
|
151
|
+
if "token_usage" in meta:
|
|
152
|
+
raw = cast(Mapping[str, Any], meta.get("token_usage") or {})
|
|
153
|
+
usage_mapping = dict(raw)
|
|
154
|
+
elif "usage" in meta:
|
|
155
|
+
raw = cast(Mapping[str, Any], meta.get("usage") or {})
|
|
156
|
+
usage_mapping = dict(raw)
|
|
157
|
+
elif usage_meta:
|
|
158
|
+
usage_mapping = dict(usage_meta)
|
|
159
|
+
|
|
160
|
+
if not usage_mapping:
|
|
161
|
+
return {}
|
|
162
|
+
|
|
163
|
+
return _extract_usage_from_mapping(usage_mapping)
|