skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1225 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Codebase Scraper CLI Tool
4
+
5
+ Standalone tool for analyzing local codebases without GitHub API.
6
+ Extracts code signatures, comments, and optionally generates API documentation.
7
+
8
+ Usage:
9
+ codebase-scraper --directory /path/to/repo --output output/codebase/
10
+ codebase-scraper --directory . --depth deep --languages Python,JavaScript
11
+ codebase-scraper --directory /path/to/repo --build-api-reference
12
+
13
+ Features:
14
+ - File tree walking with .gitignore support
15
+ - Multi-language code analysis (9 languages: Python, JavaScript/TypeScript, C/C++, C#, Go, Rust, Java, Ruby, PHP)
16
+ - API reference generation
17
+ - Comment extraction
18
+ - Dependency graph analysis
19
+ - Configurable depth levels
20
+
21
+ Credits:
22
+ - Language parsing patterns inspired by official language specifications
23
+ - NetworkX for dependency graph analysis: https://networkx.org/
24
+ - pathspec for .gitignore support: https://pypi.org/project/pathspec/
25
+ """
26
+
27
+ import argparse
28
+ import json
29
+ import logging
30
+ import os
31
+ import sys
32
+ from pathlib import Path
33
+ from typing import Any
34
+
35
+ # Add parent directory to path for imports
36
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
37
+
38
+ from skill_seekers.cli.api_reference_builder import APIReferenceBuilder
39
+ from skill_seekers.cli.code_analyzer import CodeAnalyzer
40
+ from skill_seekers.cli.config_extractor import ConfigExtractor
41
+ from skill_seekers.cli.dependency_analyzer import DependencyAnalyzer
42
+
43
+ # Try to import pathspec for .gitignore support
44
+ try:
45
+ import pathspec
46
+
47
+ PATHSPEC_AVAILABLE = True
48
+ except ImportError:
49
+ PATHSPEC_AVAILABLE = False
50
+
51
+ # Configure logging
52
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ # Language extension mapping
57
+ LANGUAGE_EXTENSIONS = {
58
+ ".py": "Python",
59
+ ".js": "JavaScript",
60
+ ".jsx": "JavaScript",
61
+ ".ts": "TypeScript",
62
+ ".tsx": "TypeScript",
63
+ ".cpp": "C++",
64
+ ".cc": "C++",
65
+ ".cxx": "C++",
66
+ ".h": "C++",
67
+ ".hpp": "C++",
68
+ ".hxx": "C++",
69
+ ".c": "C",
70
+ ".cs": "C#",
71
+ ".go": "Go",
72
+ ".rs": "Rust",
73
+ ".java": "Java",
74
+ ".rb": "Ruby",
75
+ ".php": "PHP",
76
+ }
77
+
78
+ # Default directories to exclude
79
+ DEFAULT_EXCLUDED_DIRS = {
80
+ "node_modules",
81
+ "venv",
82
+ "__pycache__",
83
+ ".git",
84
+ ".svn",
85
+ ".hg",
86
+ "build",
87
+ "dist",
88
+ "target",
89
+ ".pytest_cache",
90
+ ".tox",
91
+ ".mypy_cache",
92
+ "htmlcov",
93
+ "coverage",
94
+ ".coverage",
95
+ ".eggs",
96
+ "*.egg-info",
97
+ ".idea",
98
+ ".vscode",
99
+ ".vs",
100
+ "__pypackages__",
101
+ }
102
+
103
+
104
+ def detect_language(file_path: Path) -> str:
105
+ """
106
+ Detect programming language from file extension.
107
+
108
+ Args:
109
+ file_path: Path to source file
110
+
111
+ Returns:
112
+ Language name or 'Unknown'
113
+ """
114
+ extension = file_path.suffix.lower()
115
+ return LANGUAGE_EXTENSIONS.get(extension, "Unknown")
116
+
117
+
118
+ def load_gitignore(directory: Path) -> pathspec.PathSpec | None:
119
+ """
120
+ Load .gitignore file and create pathspec matcher.
121
+
122
+ Args:
123
+ directory: Root directory to search for .gitignore
124
+
125
+ Returns:
126
+ PathSpec object if .gitignore found, None otherwise
127
+ """
128
+ if not PATHSPEC_AVAILABLE:
129
+ logger.warning("pathspec not installed - .gitignore support disabled")
130
+ logger.warning("Install with: pip install pathspec")
131
+ return None
132
+
133
+ gitignore_path = directory / ".gitignore"
134
+ if not gitignore_path.exists():
135
+ logger.debug(f"No .gitignore found in {directory}")
136
+ return None
137
+
138
+ try:
139
+ with open(gitignore_path, encoding="utf-8") as f:
140
+ spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
141
+ logger.info(f"Loaded .gitignore from {gitignore_path}")
142
+ return spec
143
+ except Exception as e:
144
+ logger.warning(f"Failed to load .gitignore: {e}")
145
+ return None
146
+
147
+
148
+ def should_exclude_dir(dir_name: str, excluded_dirs: set) -> bool:
149
+ """
150
+ Check if directory should be excluded from analysis.
151
+
152
+ Args:
153
+ dir_name: Directory name
154
+ excluded_dirs: Set of directory names to exclude
155
+
156
+ Returns:
157
+ True if directory should be excluded
158
+ """
159
+ return dir_name in excluded_dirs
160
+
161
+
162
+ def walk_directory(
163
+ root: Path,
164
+ patterns: list[str] | None = None,
165
+ gitignore_spec: pathspec.PathSpec | None = None,
166
+ excluded_dirs: set | None = None,
167
+ ) -> list[Path]:
168
+ """
169
+ Walk directory tree and collect source files.
170
+
171
+ Args:
172
+ root: Root directory to walk
173
+ patterns: Optional file patterns to include (e.g., ['*.py', '*.js'])
174
+ gitignore_spec: Optional PathSpec object for .gitignore rules
175
+ excluded_dirs: Set of directory names to exclude
176
+
177
+ Returns:
178
+ List of source file paths
179
+ """
180
+ if excluded_dirs is None:
181
+ excluded_dirs = DEFAULT_EXCLUDED_DIRS
182
+
183
+ files = []
184
+ root = Path(root).resolve()
185
+
186
+ for dirpath, dirnames, filenames in os.walk(root):
187
+ current_dir = Path(dirpath)
188
+
189
+ # Filter out excluded directories (in-place modification)
190
+ dirnames[:] = [d for d in dirnames if not should_exclude_dir(d, excluded_dirs)]
191
+
192
+ for filename in filenames:
193
+ file_path = current_dir / filename
194
+
195
+ # Check .gitignore rules
196
+ if gitignore_spec:
197
+ try:
198
+ rel_path = file_path.relative_to(root)
199
+ if gitignore_spec.match_file(str(rel_path)):
200
+ logger.debug(f"Skipping (gitignore): {rel_path}")
201
+ continue
202
+ except ValueError:
203
+ # File is outside root, skip it
204
+ continue
205
+
206
+ # Check file extension
207
+ if file_path.suffix.lower() not in LANGUAGE_EXTENSIONS:
208
+ continue
209
+
210
+ # Check file patterns if provided
211
+ if patterns and not any(file_path.match(pattern) for pattern in patterns):
212
+ continue
213
+
214
+ files.append(file_path)
215
+
216
+ return sorted(files)
217
+
218
+
219
+ def analyze_codebase(
220
+ directory: Path,
221
+ output_dir: Path,
222
+ depth: str = "deep",
223
+ languages: list[str] | None = None,
224
+ file_patterns: list[str] | None = None,
225
+ build_api_reference: bool = True,
226
+ extract_comments: bool = True,
227
+ build_dependency_graph: bool = True,
228
+ detect_patterns: bool = True,
229
+ extract_test_examples: bool = True,
230
+ build_how_to_guides: bool = True,
231
+ extract_config_patterns: bool = True,
232
+ enhance_with_ai: bool = True,
233
+ ai_mode: str = "auto",
234
+ ) -> dict[str, Any]:
235
+ """
236
+ Analyze local codebase and extract code knowledge.
237
+
238
+ Args:
239
+ directory: Directory to analyze
240
+ output_dir: Output directory for results
241
+ depth: Analysis depth (surface, deep, full)
242
+ languages: Optional list of languages to analyze
243
+ file_patterns: Optional file patterns to include
244
+ build_api_reference: Generate API reference markdown
245
+ extract_comments: Extract inline comments
246
+ build_dependency_graph: Generate dependency graph and detect circular dependencies
247
+ detect_patterns: Detect design patterns (Singleton, Factory, Observer, etc.)
248
+ extract_test_examples: Extract usage examples from test files
249
+ build_how_to_guides: Build how-to guides from workflow examples (C3.3)
250
+ extract_config_patterns: Extract configuration patterns from config files (C3.4)
251
+ enhance_with_ai: Enhance patterns and examples with AI analysis (C3.6)
252
+ ai_mode: AI enhancement mode for how-to guides (auto, api, local, none)
253
+
254
+ Returns:
255
+ Analysis results dictionary
256
+ """
257
+ # Resolve directory to absolute path to avoid relative_to() errors
258
+ directory = Path(directory).resolve()
259
+
260
+ logger.info(f"Analyzing codebase: {directory}")
261
+ logger.info(f"Depth: {depth}")
262
+
263
+ # Create output directory
264
+ output_dir = Path(output_dir)
265
+ output_dir.mkdir(parents=True, exist_ok=True)
266
+
267
+ # Load .gitignore
268
+ gitignore_spec = load_gitignore(directory)
269
+
270
+ # Walk directory tree
271
+ logger.info("Scanning directory tree...")
272
+ files = walk_directory(directory, patterns=file_patterns, gitignore_spec=gitignore_spec)
273
+
274
+ logger.info(f"Found {len(files)} source files")
275
+
276
+ # Filter by language if specified
277
+ if languages:
278
+ language_set = set(languages)
279
+ files = [f for f in files if detect_language(f) in language_set]
280
+ logger.info(f"Filtered to {len(files)} files for languages: {', '.join(languages)}")
281
+
282
+ # Initialize code analyzer
283
+ analyzer = CodeAnalyzer(depth=depth)
284
+
285
+ # Analyze each file
286
+ results = {"files": []}
287
+ analyzed_count = 0
288
+
289
+ for file_path in files:
290
+ try:
291
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
292
+ language = detect_language(file_path)
293
+
294
+ if language == "Unknown":
295
+ continue
296
+
297
+ # Analyze file
298
+ analysis = analyzer.analyze_file(str(file_path), content, language)
299
+
300
+ # Only include files with actual analysis results
301
+ if analysis and (analysis.get("classes") or analysis.get("functions")):
302
+ results["files"].append(
303
+ {
304
+ "file": str(file_path.relative_to(directory)),
305
+ "language": language,
306
+ **analysis,
307
+ }
308
+ )
309
+ analyzed_count += 1
310
+
311
+ if analyzed_count % 10 == 0:
312
+ logger.info(f"Analyzed {analyzed_count}/{len(files)} files...")
313
+
314
+ except Exception as e:
315
+ logger.warning(f"Error analyzing {file_path}: {e}")
316
+ continue
317
+
318
+ logger.info(f"✅ Successfully analyzed {analyzed_count} files")
319
+
320
+ # Save results
321
+ output_json = output_dir / "code_analysis.json"
322
+ with open(output_json, "w", encoding="utf-8") as f:
323
+ json.dump(results, f, indent=2)
324
+
325
+ logger.info(f"📁 Saved analysis to: {output_json}")
326
+
327
+ # Build API reference if requested
328
+ if build_api_reference and results["files"]:
329
+ logger.info("Building API reference documentation...")
330
+ builder = APIReferenceBuilder(results)
331
+ api_output_dir = output_dir / "api_reference"
332
+ generated_files = builder.build_reference(api_output_dir)
333
+ logger.info(f"✅ Generated {len(generated_files)} API reference files")
334
+ logger.info(f"📁 API reference: {api_output_dir}")
335
+
336
+ # Build dependency graph if requested (C2.6)
337
+ if build_dependency_graph:
338
+ logger.info("Building dependency graph...")
339
+ dep_analyzer = DependencyAnalyzer()
340
+
341
+ # Analyze dependencies for all files
342
+ for file_path in files:
343
+ try:
344
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
345
+ language = detect_language(file_path)
346
+
347
+ if language != "Unknown":
348
+ # Use relative path from directory for better graph readability
349
+ rel_path = str(file_path.relative_to(directory))
350
+ dep_analyzer.analyze_file(rel_path, content, language)
351
+ except Exception as e:
352
+ logger.warning(f"Error analyzing dependencies for {file_path}: {e}")
353
+ continue
354
+
355
+ # Build the graph
356
+ graph = dep_analyzer.build_graph()
357
+
358
+ # Detect circular dependencies
359
+ cycles = dep_analyzer.detect_cycles()
360
+ if cycles:
361
+ logger.warning(f"⚠️ Found {len(cycles)} circular dependencies:")
362
+ for i, cycle in enumerate(cycles[:5], 1): # Show first 5
363
+ cycle_str = " → ".join(cycle) + f" → {cycle[0]}"
364
+ logger.warning(f" {i}. {cycle_str}")
365
+ if len(cycles) > 5:
366
+ logger.warning(f" ... and {len(cycles) - 5} more")
367
+ else:
368
+ logger.info("✅ No circular dependencies found")
369
+
370
+ # Save dependency graph data
371
+ dep_output_dir = output_dir / "dependencies"
372
+ dep_output_dir.mkdir(parents=True, exist_ok=True)
373
+
374
+ # Export as JSON
375
+ dep_json = dep_output_dir / "dependency_graph.json"
376
+ with open(dep_json, "w", encoding="utf-8") as f:
377
+ json.dump(dep_analyzer.export_json(), f, indent=2)
378
+ logger.info(f"📁 Saved dependency graph: {dep_json}")
379
+
380
+ # Export as Mermaid diagram
381
+ mermaid_file = dep_output_dir / "dependency_graph.mmd"
382
+ mermaid_file.write_text(dep_analyzer.export_mermaid())
383
+ logger.info(f"📁 Saved Mermaid diagram: {mermaid_file}")
384
+
385
+ # Save statistics
386
+ stats = dep_analyzer.get_statistics()
387
+ stats_file = dep_output_dir / "statistics.json"
388
+ with open(stats_file, "w", encoding="utf-8") as f:
389
+ json.dump(stats, f, indent=2)
390
+ logger.info(
391
+ f"📊 Statistics: {stats['total_files']} files, "
392
+ f"{stats['total_dependencies']} dependencies, "
393
+ f"{stats['circular_dependencies']} cycles"
394
+ )
395
+
396
+ # Try to export as DOT (requires pydot)
397
+ try:
398
+ dot_file = dep_output_dir / "dependency_graph.dot"
399
+ dep_analyzer.export_dot(str(dot_file))
400
+ except Exception:
401
+ pass # pydot not installed, skip DOT export
402
+
403
+ # Detect design patterns if requested (C3.1)
404
+ if detect_patterns:
405
+ logger.info("Detecting design patterns...")
406
+ from skill_seekers.cli.pattern_recognizer import PatternRecognizer
407
+
408
+ pattern_recognizer = PatternRecognizer(depth=depth, enhance_with_ai=enhance_with_ai)
409
+ pattern_results = []
410
+
411
+ for file_path in files:
412
+ try:
413
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
414
+ language = detect_language(file_path)
415
+
416
+ if language != "Unknown":
417
+ report = pattern_recognizer.analyze_file(str(file_path), content, language)
418
+
419
+ if report.patterns:
420
+ pattern_results.append(report.to_dict())
421
+ except Exception as e:
422
+ logger.warning(f"Pattern detection failed for {file_path}: {e}")
423
+ continue
424
+
425
+ # Save pattern results
426
+ if pattern_results:
427
+ pattern_output = output_dir / "patterns"
428
+ pattern_output.mkdir(parents=True, exist_ok=True)
429
+
430
+ pattern_json = pattern_output / "detected_patterns.json"
431
+ with open(pattern_json, "w", encoding="utf-8") as f:
432
+ json.dump(pattern_results, f, indent=2)
433
+
434
+ total_patterns = sum(len(r["patterns"]) for r in pattern_results)
435
+ logger.info(f"✅ Detected {total_patterns} patterns in {len(pattern_results)} files")
436
+ logger.info(f"📁 Saved to: {pattern_json}")
437
+ else:
438
+ logger.info("No design patterns detected")
439
+
440
+ # Extract test examples if requested (C3.2)
441
+ if extract_test_examples:
442
+ logger.info("Extracting usage examples from test files...")
443
+ from skill_seekers.cli.test_example_extractor import TestExampleExtractor
444
+
445
+ # Create extractor
446
+ test_extractor = TestExampleExtractor(
447
+ min_confidence=0.5,
448
+ max_per_file=10,
449
+ languages=languages,
450
+ enhance_with_ai=enhance_with_ai,
451
+ )
452
+
453
+ # Extract examples from directory
454
+ try:
455
+ example_report = test_extractor.extract_from_directory(directory, recursive=True)
456
+
457
+ if example_report.total_examples > 0:
458
+ # Save results
459
+ examples_output = output_dir / "test_examples"
460
+ examples_output.mkdir(parents=True, exist_ok=True)
461
+
462
+ # Save as JSON
463
+ examples_json = examples_output / "test_examples.json"
464
+ with open(examples_json, "w", encoding="utf-8") as f:
465
+ json.dump(example_report.to_dict(), f, indent=2)
466
+
467
+ # Save as Markdown
468
+ examples_md = examples_output / "test_examples.md"
469
+ examples_md.write_text(example_report.to_markdown(), encoding="utf-8")
470
+
471
+ logger.info(
472
+ f"✅ Extracted {example_report.total_examples} test examples "
473
+ f"({example_report.high_value_count} high-value)"
474
+ )
475
+ logger.info(f"📁 Saved to: {examples_output}")
476
+ else:
477
+ logger.info("No test examples extracted")
478
+
479
+ except Exception as e:
480
+ logger.warning(f"Test example extraction failed: {e}")
481
+ example_report = None
482
+
483
+ # Build how-to guides from workflow examples (C3.3)
484
+ if build_how_to_guides and extract_test_examples:
485
+ logger.info("Building how-to guides from workflow examples...")
486
+ try:
487
+ from skill_seekers.cli.how_to_guide_builder import HowToGuideBuilder
488
+
489
+ # Create guide builder
490
+ guide_builder = HowToGuideBuilder(enhance_with_ai=enhance_with_ai)
491
+
492
+ # Build guides from workflow examples
493
+ tutorials_dir = output_dir / "tutorials"
494
+
495
+ # Get workflow examples from the example_report if available
496
+ if (
497
+ "example_report" in locals()
498
+ and example_report
499
+ and example_report.total_examples > 0
500
+ ):
501
+ # Convert example_report to list of dicts for processing
502
+ examples_list = example_report.to_dict().get("examples", [])
503
+
504
+ guide_collection = guide_builder.build_guides_from_examples(
505
+ examples_list,
506
+ grouping_strategy="ai-tutorial-group",
507
+ output_dir=tutorials_dir,
508
+ enhance_with_ai=enhance_with_ai,
509
+ ai_mode=ai_mode,
510
+ )
511
+
512
+ if guide_collection and guide_collection.total_guides > 0:
513
+ # Save collection summary
514
+ collection_json = tutorials_dir / "guide_collection.json"
515
+ with open(collection_json, "w", encoding="utf-8") as f:
516
+ json.dump(guide_collection.to_dict(), f, indent=2)
517
+
518
+ logger.info(f"✅ Built {guide_collection.total_guides} how-to guides")
519
+ logger.info(f"📁 Saved to: {tutorials_dir}")
520
+ else:
521
+ logger.info("No how-to guides generated (insufficient workflow examples)")
522
+ else:
523
+ logger.info("No workflow examples available for guide generation")
524
+
525
+ except Exception as e:
526
+ logger.warning(f"How-to guide building failed: {e}")
527
+
528
+ # Extract configuration patterns (C3.4)
529
+ if extract_config_patterns:
530
+ logger.info("Extracting configuration patterns...")
531
+ try:
532
+ config_extractor = ConfigExtractor()
533
+
534
+ # Extract config patterns from directory
535
+ extraction_result = config_extractor.extract_from_directory(directory)
536
+
537
+ if extraction_result.config_files:
538
+ # Convert to dict for enhancement
539
+ result_dict = config_extractor.to_dict(extraction_result)
540
+
541
+ # AI Enhancement (if enabled)
542
+ if enhance_with_ai and ai_mode != "none":
543
+ try:
544
+ from skill_seekers.cli.config_enhancer import ConfigEnhancer
545
+
546
+ logger.info(f"🤖 Enhancing config analysis with AI (mode: {ai_mode})...")
547
+ enhancer = ConfigEnhancer(mode=ai_mode)
548
+ result_dict = enhancer.enhance_config_result(result_dict)
549
+ logger.info("✅ AI enhancement complete")
550
+ except Exception as e:
551
+ logger.warning(f"⚠️ Config AI enhancement failed: {e}")
552
+
553
+ # Save results
554
+ config_output = output_dir / "config_patterns"
555
+ config_output.mkdir(parents=True, exist_ok=True)
556
+
557
+ # Save as JSON
558
+ config_json = config_output / "config_patterns.json"
559
+ with open(config_json, "w", encoding="utf-8") as f:
560
+ json.dump(result_dict, f, indent=2)
561
+
562
+ # Save as Markdown (basic - AI enhancements in JSON only for now)
563
+ config_md = config_output / "config_patterns.md"
564
+ config_md.write_text(extraction_result.to_markdown(), encoding="utf-8")
565
+
566
+ # Count total settings across all files
567
+ total_settings = sum(len(cf.settings) for cf in extraction_result.config_files)
568
+ total_patterns = sum(len(cf.patterns) for cf in extraction_result.config_files)
569
+
570
+ logger.info(
571
+ f"✅ Extracted {len(extraction_result.config_files)} config files "
572
+ f"with {total_settings} settings and {total_patterns} detected patterns"
573
+ )
574
+
575
+ if "ai_enhancements" in result_dict:
576
+ insights = result_dict["ai_enhancements"].get("overall_insights", {})
577
+ if insights.get("security_issues_found"):
578
+ logger.info(
579
+ f"🔐 Security issues found: {insights['security_issues_found']}"
580
+ )
581
+
582
+ logger.info(f"📁 Saved to: {config_output}")
583
+ else:
584
+ logger.info("No configuration files found")
585
+
586
+ except Exception as e:
587
+ logger.warning(f"Config pattern extraction failed: {e}")
588
+
589
+ # Detect architectural patterns (C3.7)
590
+ # Always run this - it provides high-level overview
591
+ logger.info("Analyzing architectural patterns...")
592
+ from skill_seekers.cli.architectural_pattern_detector import ArchitecturalPatternDetector
593
+
594
+ arch_detector = ArchitecturalPatternDetector(enhance_with_ai=enhance_with_ai)
595
+ arch_report = arch_detector.analyze(directory, results["files"])
596
+
597
+ if arch_report.patterns:
598
+ arch_output = output_dir / "architecture"
599
+ arch_output.mkdir(parents=True, exist_ok=True)
600
+
601
+ # Save as JSON
602
+ arch_json = arch_output / "architectural_patterns.json"
603
+ with open(arch_json, "w", encoding="utf-8") as f:
604
+ json.dump(arch_report.to_dict(), f, indent=2)
605
+
606
+ logger.info(f"🏗️ Detected {len(arch_report.patterns)} architectural patterns")
607
+ for pattern in arch_report.patterns:
608
+ logger.info(f" - {pattern.pattern_name} (confidence: {pattern.confidence:.2f})")
609
+ logger.info(f"📁 Saved to: {arch_json}")
610
+ else:
611
+ logger.info("No clear architectural patterns detected")
612
+
613
+ # Generate SKILL.md and references/ directory
614
+ logger.info("Generating SKILL.md and references...")
615
+ _generate_skill_md(
616
+ output_dir=output_dir,
617
+ directory=directory,
618
+ results=results,
619
+ depth=depth,
620
+ build_api_reference=build_api_reference,
621
+ build_dependency_graph=build_dependency_graph,
622
+ detect_patterns=detect_patterns,
623
+ extract_test_examples=extract_test_examples,
624
+ extract_config_patterns=extract_config_patterns,
625
+ )
626
+
627
+ return results
628
+
629
+
630
+ def _generate_skill_md(
631
+ output_dir: Path,
632
+ directory: Path,
633
+ results: dict[str, Any],
634
+ depth: str,
635
+ build_api_reference: bool,
636
+ build_dependency_graph: bool,
637
+ detect_patterns: bool,
638
+ extract_test_examples: bool,
639
+ extract_config_patterns: bool,
640
+ ):
641
+ """
642
+ Generate rich SKILL.md from codebase analysis results.
643
+
644
+ Creates a 300+ line skill file with:
645
+ - Front matter (name, description)
646
+ - Repository info (path, languages, file count)
647
+ - When to Use section
648
+ - Quick Reference (patterns, languages, stats)
649
+ - Code Examples (from test files)
650
+ - API Reference (from code analysis)
651
+ - Architecture Overview
652
+ - Configuration Patterns
653
+ - Available References
654
+ """
655
+ repo_name = directory.name
656
+
657
+ # Generate skill name (lowercase, hyphens only, max 64 chars)
658
+ skill_name = repo_name.lower().replace("_", "-").replace(" ", "-")[:64]
659
+
660
+ # Generate description
661
+ description = f"Local codebase analysis for {repo_name}"
662
+
663
+ # Count files by language
664
+ language_stats = _get_language_stats(results.get("files", []))
665
+ total_files = len(results.get("files", []))
666
+
667
+ # Start building content
668
+ skill_content = f"""---
669
+ name: {skill_name}
670
+ description: {description}
671
+ ---
672
+
673
+ # {repo_name} Codebase
674
+
675
+ ## Description
676
+
677
+ Local codebase analysis and documentation generated from code analysis.
678
+
679
+ **Path:** `{directory}`
680
+ **Files Analyzed:** {total_files}
681
+ **Languages:** {", ".join(language_stats.keys())}
682
+ **Analysis Depth:** {depth}
683
+
684
+ ## When to Use This Skill
685
+
686
+ Use this skill when you need to:
687
+ - Understand the codebase architecture and design patterns
688
+ - Find implementation examples and usage patterns
689
+ - Review API documentation extracted from code
690
+ - Check configuration patterns and best practices
691
+ - Explore test examples and real-world usage
692
+ - Navigate the codebase structure efficiently
693
+
694
+ ## ⚡ Quick Reference
695
+
696
+ ### Codebase Statistics
697
+
698
+ """
699
+
700
+ # Language breakdown
701
+ skill_content += "**Languages:**\n"
702
+ for lang, count in sorted(language_stats.items(), key=lambda x: x[1], reverse=True):
703
+ percentage = (count / total_files * 100) if total_files > 0 else 0
704
+ skill_content += f"- **{lang}**: {count} files ({percentage:.1f}%)\n"
705
+ skill_content += "\n"
706
+
707
+ # Analysis features performed
708
+ skill_content += "**Analysis Performed:**\n"
709
+ if build_api_reference:
710
+ skill_content += "- ✅ API Reference (C2.5)\n"
711
+ if build_dependency_graph:
712
+ skill_content += "- ✅ Dependency Graph (C2.6)\n"
713
+ if detect_patterns:
714
+ skill_content += "- ✅ Design Patterns (C3.1)\n"
715
+ if extract_test_examples:
716
+ skill_content += "- ✅ Test Examples (C3.2)\n"
717
+ if extract_config_patterns:
718
+ skill_content += "- ✅ Configuration Patterns (C3.4)\n"
719
+ skill_content += "- ✅ Architectural Analysis (C3.7)\n\n"
720
+
721
+ # Add design patterns if available
722
+ if detect_patterns:
723
+ patterns_content = _format_patterns_section(output_dir)
724
+ if patterns_content:
725
+ skill_content += patterns_content
726
+
727
+ # Add code examples if available
728
+ if extract_test_examples:
729
+ examples_content = _format_examples_section(output_dir)
730
+ if examples_content:
731
+ skill_content += examples_content
732
+
733
+ # Add API reference if available
734
+ if build_api_reference:
735
+ api_content = _format_api_section(output_dir)
736
+ if api_content:
737
+ skill_content += api_content
738
+
739
+ # Add architecture if available
740
+ arch_content = _format_architecture_section(output_dir)
741
+ if arch_content:
742
+ skill_content += arch_content
743
+
744
+ # Add configuration patterns if available
745
+ if extract_config_patterns:
746
+ config_content = _format_config_section(output_dir)
747
+ if config_content:
748
+ skill_content += config_content
749
+
750
+ # Available references
751
+ skill_content += "## 📚 Available References\n\n"
752
+ skill_content += "This skill includes detailed reference documentation:\n\n"
753
+
754
+ refs_added = False
755
+ if build_api_reference and (output_dir / "api_reference").exists():
756
+ skill_content += (
757
+ "- **API Reference**: `references/api_reference/` - Complete API documentation\n"
758
+ )
759
+ refs_added = True
760
+ if build_dependency_graph and (output_dir / "dependencies").exists():
761
+ skill_content += (
762
+ "- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
763
+ )
764
+ refs_added = True
765
+ if detect_patterns and (output_dir / "patterns").exists():
766
+ skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
767
+ refs_added = True
768
+ if extract_test_examples and (output_dir / "test_examples").exists():
769
+ skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
770
+ refs_added = True
771
+ if extract_config_patterns and (output_dir / "config_patterns").exists():
772
+ skill_content += (
773
+ "- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
774
+ )
775
+ refs_added = True
776
+ if (output_dir / "architecture").exists():
777
+ skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
778
+ refs_added = True
779
+
780
+ if not refs_added:
781
+ skill_content += "No additional references generated (analysis features disabled).\n"
782
+
783
+ skill_content += "\n"
784
+
785
+ # Footer
786
+ skill_content += "---\n\n"
787
+ skill_content += "**Generated by Skill Seeker** | Codebase Analyzer with C3.x Analysis\n"
788
+
789
+ # Write SKILL.md
790
+ skill_path = output_dir / "SKILL.md"
791
+ skill_path.write_text(skill_content, encoding="utf-8")
792
+
793
+ line_count = len(skill_content.split("\n"))
794
+ logger.info(f"✅ Generated SKILL.md: {skill_path} ({line_count} lines)")
795
+
796
+ # Generate references/ directory structure
797
+ _generate_references(output_dir)
798
+
799
+
800
+ def _get_language_stats(files: list[dict]) -> dict[str, int]:
801
+ """Count files by language from analysis results."""
802
+ stats = {}
803
+ for file_data in files:
804
+ # files is a list of dicts with 'language' key
805
+ lang = file_data.get("language", "Unknown")
806
+ if lang != "Unknown":
807
+ stats[lang] = stats.get(lang, 0) + 1
808
+ return stats
809
+
810
+
811
+ def _format_patterns_section(output_dir: Path) -> str:
812
+ """Format design patterns section from patterns/detected_patterns.json."""
813
+ patterns_file = output_dir / "patterns" / "detected_patterns.json"
814
+ if not patterns_file.exists():
815
+ return ""
816
+
817
+ try:
818
+ with open(patterns_file, encoding="utf-8") as f:
819
+ patterns_data = json.load(f)
820
+ except Exception:
821
+ return ""
822
+
823
+ if not patterns_data:
824
+ return ""
825
+
826
+ # Count patterns by type (deduplicate by class, keep highest confidence)
827
+ pattern_counts = {}
828
+ by_class = {}
829
+
830
+ for pattern_file in patterns_data:
831
+ for pattern in pattern_file.get("patterns", []):
832
+ ptype = pattern.get("pattern_type", "Unknown")
833
+ cls = pattern.get("class_name", "")
834
+ confidence = pattern.get("confidence", 0)
835
+
836
+ # Skip low confidence
837
+ if confidence < 0.7:
838
+ continue
839
+
840
+ # Deduplicate by class
841
+ key = f"{cls}:{ptype}"
842
+ if key not in by_class or by_class[key]["confidence"] < confidence:
843
+ by_class[key] = pattern
844
+
845
+ # Count by type
846
+ pattern_counts[ptype] = pattern_counts.get(ptype, 0) + 1
847
+
848
+ if not pattern_counts:
849
+ return ""
850
+
851
+ content = "### 🎨 Design Patterns Detected\n\n"
852
+ content += "*From C3.1 codebase analysis (confidence > 0.7)*\n\n"
853
+
854
+ # Top 5 pattern types
855
+ for ptype, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
856
+ content += f"- **{ptype}**: {count} instances\n"
857
+
858
+ content += f"\n*Total: {len(by_class)} high-confidence patterns*\n\n"
859
+ content += "*See `references/patterns/` for complete pattern analysis*\n\n"
860
+ return content
861
+
862
+
863
+ def _format_examples_section(output_dir: Path) -> str:
864
+ """Format code examples section from test_examples/test_examples.json."""
865
+ examples_file = output_dir / "test_examples" / "test_examples.json"
866
+ if not examples_file.exists():
867
+ return ""
868
+
869
+ try:
870
+ with open(examples_file, encoding="utf-8") as f:
871
+ examples_data = json.load(f)
872
+ except Exception:
873
+ return ""
874
+
875
+ examples = examples_data.get("examples", [])
876
+ if not examples:
877
+ return ""
878
+
879
+ # Filter high-value examples (complexity > 0.7)
880
+ high_value = [ex for ex in examples if ex.get("complexity_score", 0) > 0.7]
881
+
882
+ if not high_value:
883
+ # If no high complexity, take any examples
884
+ high_value = examples[:10]
885
+
886
+ if not high_value:
887
+ return ""
888
+
889
+ content = "## 📝 Code Examples\n\n"
890
+ content += "*High-quality examples extracted from test files (C3.2)*\n\n"
891
+
892
+ # Top 10 examples
893
+ for ex in sorted(high_value, key=lambda x: x.get("complexity_score", 0), reverse=True)[:10]:
894
+ desc = ex.get("description", "Example")
895
+ lang = ex.get("language", "python").lower()
896
+ code = ex.get("code", "")
897
+ complexity = ex.get("complexity_score", 0)
898
+
899
+ content += f"**{desc}** (complexity: {complexity:.2f})\n\n"
900
+ content += f"```{lang}\n{code}\n```\n\n"
901
+
902
+ content += "*See `references/test_examples/` for all extracted examples*\n\n"
903
+ return content
904
+
905
+
906
+ def _format_api_section(output_dir: Path) -> str:
907
+ """Format API reference section."""
908
+ api_dir = output_dir / "api_reference"
909
+ if not api_dir.exists():
910
+ return ""
911
+
912
+ api_md = api_dir / "api_reference.md"
913
+ if not api_md.exists():
914
+ return ""
915
+
916
+ try:
917
+ api_content = api_md.read_text(encoding="utf-8")
918
+ except Exception:
919
+ return ""
920
+
921
+ # Extract first section (up to 500 chars)
922
+ preview = api_content[:500]
923
+ if len(api_content) > 500:
924
+ preview += "..."
925
+
926
+ content = "## 🔧 API Reference\n\n"
927
+ content += "*Extracted from codebase analysis (C2.5)*\n\n"
928
+ content += preview + "\n\n"
929
+ content += "*See `references/api_reference/` for complete API documentation*\n\n"
930
+ return content
931
+
932
+
933
+ def _format_architecture_section(output_dir: Path) -> str:
934
+ """Format architecture section from architecture/architectural_patterns.json."""
935
+ arch_file = output_dir / "architecture" / "architectural_patterns.json"
936
+ if not arch_file.exists():
937
+ return ""
938
+
939
+ try:
940
+ with open(arch_file, encoding="utf-8") as f:
941
+ arch_data = json.load(f)
942
+ except Exception:
943
+ return ""
944
+
945
+ patterns = arch_data.get("patterns", [])
946
+ if not patterns:
947
+ return ""
948
+
949
+ content = "## 🏗️ Architecture Overview\n\n"
950
+ content += "*From C3.7 architectural analysis*\n\n"
951
+
952
+ content += "**Detected Architectural Patterns:**\n\n"
953
+ for pattern in patterns[:5]:
954
+ name = pattern.get("pattern_name", "Unknown")
955
+ confidence = pattern.get("confidence", 0)
956
+ indicators = pattern.get("indicators", [])
957
+
958
+ content += f"- **{name}** (confidence: {confidence:.2f})\n"
959
+ if indicators:
960
+ content += f" - Indicators: {', '.join(indicators[:3])}\n"
961
+
962
+ content += f"\n*Total: {len(patterns)} architectural patterns detected*\n\n"
963
+ content += "*See `references/architecture/` for complete architectural analysis*\n\n"
964
+ return content
965
+
966
+
967
+ def _format_config_section(output_dir: Path) -> str:
968
+ """Format configuration patterns section."""
969
+ config_file = output_dir / "config_patterns" / "config_patterns.json"
970
+ if not config_file.exists():
971
+ return ""
972
+
973
+ try:
974
+ with open(config_file, encoding="utf-8") as f:
975
+ config_data = json.load(f)
976
+ except Exception:
977
+ return ""
978
+
979
+ config_files = config_data.get("config_files", [])
980
+ if not config_files:
981
+ return ""
982
+
983
+ total_settings = sum(len(cf.get("settings", [])) for cf in config_files)
984
+ total_patterns = sum(len(cf.get("patterns", [])) for cf in config_files)
985
+
986
+ content = "## ⚙️ Configuration Patterns\n\n"
987
+ content += "*From C3.4 configuration analysis*\n\n"
988
+ content += f"**Configuration Files Analyzed:** {len(config_files)}\n"
989
+ content += f"**Total Settings:** {total_settings}\n"
990
+ content += f"**Patterns Detected:** {total_patterns}\n\n"
991
+
992
+ # List config file types found
993
+ file_types = {}
994
+ for cf in config_files:
995
+ ctype = cf.get("config_type", "unknown")
996
+ file_types[ctype] = file_types.get(ctype, 0) + 1
997
+
998
+ if file_types:
999
+ content += "**Configuration Types:**\n"
1000
+ for ctype, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
1001
+ content += f"- {ctype}: {count} files\n"
1002
+ content += "\n"
1003
+
1004
+ content += "*See `references/config_patterns/` for detailed configuration analysis*\n\n"
1005
+ return content
1006
+
1007
+
1008
+ def _generate_references(output_dir: Path):
1009
+ """
1010
+ Generate references/ directory structure by symlinking analysis output.
1011
+
1012
+ Creates a clean references/ directory that links to all analysis outputs.
1013
+ """
1014
+ references_dir = output_dir / "references"
1015
+ references_dir.mkdir(exist_ok=True)
1016
+
1017
+ # Map analysis directories to reference names
1018
+ mappings = {
1019
+ "api_reference": "api_reference",
1020
+ "dependencies": "dependencies",
1021
+ "patterns": "patterns",
1022
+ "test_examples": "test_examples",
1023
+ "tutorials": "tutorials",
1024
+ "config_patterns": "config_patterns",
1025
+ "architecture": "architecture",
1026
+ }
1027
+
1028
+ for source, target in mappings.items():
1029
+ source_dir = output_dir / source
1030
+ target_dir = references_dir / target
1031
+
1032
+ if source_dir.exists() and source_dir.is_dir():
1033
+ # Copy directory to references/ (not symlink, for portability)
1034
+ if target_dir.exists():
1035
+ import shutil
1036
+
1037
+ shutil.rmtree(target_dir)
1038
+
1039
+ import shutil
1040
+
1041
+ shutil.copytree(source_dir, target_dir)
1042
+ logger.debug(f"Copied {source} → references/{target}")
1043
+
1044
+ logger.info(f"✅ Generated references directory: {references_dir}")
1045
+
1046
+
1047
+ def main():
1048
+ """Command-line interface for codebase analysis."""
1049
+ parser = argparse.ArgumentParser(
1050
+ description="Analyze local codebases and extract code knowledge",
1051
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1052
+ epilog="""
1053
+ Examples:
1054
+ # Analyze current directory
1055
+ codebase-scraper --directory . --output output/codebase/
1056
+
1057
+ # Deep analysis with API reference and dependency graph
1058
+ codebase-scraper --directory /path/to/repo --depth deep --build-api-reference --build-dependency-graph
1059
+
1060
+ # Analyze only Python and JavaScript
1061
+ codebase-scraper --directory . --languages Python,JavaScript
1062
+
1063
+ # Use file patterns
1064
+ codebase-scraper --directory . --file-patterns "*.py,src/**/*.js"
1065
+
1066
+ # Full analysis with all features (default)
1067
+ codebase-scraper --directory . --depth deep
1068
+
1069
+ # Surface analysis (fast, skip all analysis features)
1070
+ codebase-scraper --directory . --depth surface --skip-api-reference --skip-dependency-graph --skip-patterns --skip-test-examples
1071
+
1072
+ # Skip specific features
1073
+ codebase-scraper --directory . --skip-patterns --skip-test-examples
1074
+ """,
1075
+ )
1076
+
1077
+ parser.add_argument("--directory", required=True, help="Directory to analyze")
1078
+ parser.add_argument(
1079
+ "--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
1080
+ )
1081
+ parser.add_argument(
1082
+ "--depth",
1083
+ choices=["surface", "deep", "full"],
1084
+ default="deep",
1085
+ help="Analysis depth (default: deep)",
1086
+ )
1087
+ parser.add_argument(
1088
+ "--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
1089
+ )
1090
+ parser.add_argument(
1091
+ "--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
1092
+ )
1093
+ parser.add_argument(
1094
+ "--skip-api-reference",
1095
+ action="store_true",
1096
+ default=False,
1097
+ help="Skip API reference markdown documentation generation (default: enabled)",
1098
+ )
1099
+ parser.add_argument(
1100
+ "--skip-dependency-graph",
1101
+ action="store_true",
1102
+ default=False,
1103
+ help="Skip dependency graph and circular dependency detection (default: enabled)",
1104
+ )
1105
+ parser.add_argument(
1106
+ "--skip-patterns",
1107
+ action="store_true",
1108
+ default=False,
1109
+ help="Skip design pattern detection (Singleton, Factory, Observer, etc.) (default: enabled)",
1110
+ )
1111
+ parser.add_argument(
1112
+ "--skip-test-examples",
1113
+ action="store_true",
1114
+ default=False,
1115
+ help="Skip test example extraction (instantiation, method calls, configs, etc.) (default: enabled)",
1116
+ )
1117
+ parser.add_argument(
1118
+ "--skip-how-to-guides",
1119
+ action="store_true",
1120
+ default=False,
1121
+ help="Skip how-to guide generation from workflow examples (default: enabled)",
1122
+ )
1123
+ parser.add_argument(
1124
+ "--skip-config-patterns",
1125
+ action="store_true",
1126
+ default=False,
1127
+ help="Skip configuration pattern extraction from config files (JSON, YAML, TOML, ENV, etc.) (default: enabled)",
1128
+ )
1129
+ parser.add_argument(
1130
+ "--ai-mode",
1131
+ choices=["auto", "api", "local", "none"],
1132
+ default="auto",
1133
+ help="AI enhancement mode for how-to guides: auto (detect best), api (Claude API), local (Claude Code CLI), none (disable) (default: auto)",
1134
+ )
1135
+ parser.add_argument("--no-comments", action="store_true", help="Skip comment extraction")
1136
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
1137
+
1138
+ # Check for deprecated flags
1139
+ deprecated_flags = {
1140
+ "--build-api-reference": "--skip-api-reference",
1141
+ "--build-dependency-graph": "--skip-dependency-graph",
1142
+ "--detect-patterns": "--skip-patterns",
1143
+ "--extract-test-examples": "--skip-test-examples",
1144
+ "--build-how-to-guides": "--skip-how-to-guides",
1145
+ "--extract-config-patterns": "--skip-config-patterns",
1146
+ }
1147
+
1148
+ for old_flag, new_flag in deprecated_flags.items():
1149
+ if old_flag in sys.argv:
1150
+ logger.warning(
1151
+ f"⚠️ DEPRECATED: {old_flag} is deprecated. "
1152
+ f"All features are now enabled by default. "
1153
+ f"Use {new_flag} to disable this feature."
1154
+ )
1155
+
1156
+ args = parser.parse_args()
1157
+
1158
+ # Set logging level
1159
+ if args.verbose:
1160
+ logging.getLogger().setLevel(logging.DEBUG)
1161
+
1162
+ # Validate directory
1163
+ directory = Path(args.directory)
1164
+ if not directory.exists():
1165
+ logger.error(f"Directory not found: {directory}")
1166
+ return 1
1167
+
1168
+ if not directory.is_dir():
1169
+ logger.error(f"Not a directory: {directory}")
1170
+ return 1
1171
+
1172
+ # Parse languages
1173
+ languages = None
1174
+ if args.languages:
1175
+ languages = [lang.strip() for lang in args.languages.split(",")]
1176
+
1177
+ # Parse file patterns
1178
+ file_patterns = None
1179
+ if args.file_patterns:
1180
+ file_patterns = [p.strip() for p in args.file_patterns.split(",")]
1181
+
1182
+ # Analyze codebase
1183
+ try:
1184
+ results = analyze_codebase(
1185
+ directory=directory,
1186
+ output_dir=Path(args.output),
1187
+ depth=args.depth,
1188
+ languages=languages,
1189
+ file_patterns=file_patterns,
1190
+ build_api_reference=not args.skip_api_reference,
1191
+ extract_comments=not args.no_comments,
1192
+ build_dependency_graph=not args.skip_dependency_graph,
1193
+ detect_patterns=not args.skip_patterns,
1194
+ extract_test_examples=not args.skip_test_examples,
1195
+ build_how_to_guides=not args.skip_how_to_guides,
1196
+ extract_config_patterns=not args.skip_config_patterns,
1197
+ enhance_with_ai=True, # Auto-disables if no API key present
1198
+ ai_mode=args.ai_mode, # NEW: AI enhancement mode for how-to guides
1199
+ )
1200
+
1201
+ # Print summary
1202
+ print(f"\n{'=' * 60}")
1203
+ print("CODEBASE ANALYSIS COMPLETE")
1204
+ print(f"{'=' * 60}")
1205
+ print(f"Files analyzed: {len(results['files'])}")
1206
+ print(f"Output directory: {args.output}")
1207
+ if not args.skip_api_reference:
1208
+ print(f"API reference: {Path(args.output) / 'api_reference'}")
1209
+ print(f"{'=' * 60}\n")
1210
+
1211
+ return 0
1212
+
1213
+ except KeyboardInterrupt:
1214
+ logger.error("\nAnalysis interrupted by user")
1215
+ return 130
1216
+ except Exception as e:
1217
+ logger.error(f"Analysis failed: {e}")
1218
+ import traceback
1219
+
1220
+ traceback.print_exc()
1221
+ return 1
1222
+
1223
+
1224
+ if __name__ == "__main__":
1225
+ sys.exit(main())