skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,871 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Configuration Pattern Extraction (C3.4)
4
+
5
+ Extracts configuration patterns from actual config files in the codebase.
6
+ Supports JSON, YAML, TOML, ENV, INI, Python config modules, and more.
7
+
8
+ This is different from C3.2 which extracts config examples from test code.
9
+ C3.4 focuses on documenting the actual project configuration.
10
+ """
11
+
12
+ import ast
13
+ import json
14
+ import logging
15
+ import re
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any, Literal
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Optional dependencies
23
+ try:
24
+ import yaml
25
+
26
+ YAML_AVAILABLE = True
27
+ except ImportError:
28
+ YAML_AVAILABLE = False
29
+ logger.debug("PyYAML not available - YAML parsing will be limited")
30
+
31
+ try:
32
+ import tomli as toml_lib
33
+
34
+ TOML_AVAILABLE = True
35
+ except ImportError:
36
+ try:
37
+ import toml as toml_lib # noqa: F401
38
+
39
+ TOML_AVAILABLE = True
40
+ except ImportError:
41
+ toml_lib = None
42
+ TOML_AVAILABLE = False
43
+ logger.debug("toml/tomli not available - TOML parsing disabled")
44
+
45
+
46
+ @dataclass
47
+ class ConfigSetting:
48
+ """Individual configuration setting"""
49
+
50
+ key: str
51
+ value: Any
52
+ value_type: str # 'string', 'integer', 'boolean', 'array', 'object', 'null'
53
+ default_value: Any | None = None
54
+ required: bool = False
55
+ env_var: str | None = None
56
+ description: str = ""
57
+ validation: dict[str, Any] = field(default_factory=dict)
58
+ nested_path: list[str] = field(default_factory=list) # For nested configs
59
+
60
+
61
+ @dataclass
62
+ class ConfigFile:
63
+ """Represents a configuration file"""
64
+
65
+ file_path: str
66
+ relative_path: str
67
+ config_type: Literal[
68
+ "json",
69
+ "yaml",
70
+ "toml",
71
+ "env",
72
+ "ini",
73
+ "python",
74
+ "javascript",
75
+ "dockerfile",
76
+ "docker-compose",
77
+ ]
78
+ purpose: str # Inferred purpose: database, api, logging, etc.
79
+ settings: list[ConfigSetting] = field(default_factory=list)
80
+ patterns: list[str] = field(default_factory=list)
81
+ raw_content: str | None = None
82
+ parse_errors: list[str] = field(default_factory=list)
83
+
84
+
85
+ @dataclass
86
+ class ConfigExtractionResult:
87
+ """Result of config extraction"""
88
+
89
+ config_files: list[ConfigFile] = field(default_factory=list)
90
+ total_files: int = 0
91
+ total_settings: int = 0
92
+ detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files
93
+ errors: list[str] = field(default_factory=list)
94
+
95
+ def to_dict(self) -> dict:
96
+ """Convert result to dictionary for JSON output"""
97
+ return {
98
+ "total_files": self.total_files,
99
+ "total_settings": self.total_settings,
100
+ "detected_patterns": self.detected_patterns,
101
+ "config_files": [
102
+ {
103
+ "file_path": cf.file_path,
104
+ "relative_path": cf.relative_path,
105
+ "type": cf.config_type,
106
+ "purpose": cf.purpose,
107
+ "patterns": cf.patterns,
108
+ "settings_count": len(cf.settings),
109
+ "settings": [
110
+ {
111
+ "key": s.key,
112
+ "value": s.value,
113
+ "type": s.value_type,
114
+ "env_var": s.env_var,
115
+ "description": s.description,
116
+ }
117
+ for s in cf.settings
118
+ ],
119
+ "parse_errors": cf.parse_errors,
120
+ }
121
+ for cf in self.config_files
122
+ ],
123
+ "errors": self.errors,
124
+ }
125
+
126
+ def to_markdown(self) -> str:
127
+ """Generate markdown report of extraction results"""
128
+ md = "# Configuration Extraction Report\n\n"
129
+ md += f"**Total Files:** {self.total_files}\n"
130
+ md += f"**Total Settings:** {self.total_settings}\n"
131
+
132
+ # Handle both dict and list formats for detected_patterns
133
+ if self.detected_patterns:
134
+ if isinstance(self.detected_patterns, dict):
135
+ patterns_str = ", ".join(self.detected_patterns.keys())
136
+ else:
137
+ patterns_str = ", ".join(self.detected_patterns)
138
+ else:
139
+ patterns_str = "None"
140
+ md += f"**Detected Patterns:** {patterns_str}\n\n"
141
+
142
+ if self.config_files:
143
+ md += "## Configuration Files\n\n"
144
+ for cf in self.config_files:
145
+ md += f"### {cf.relative_path}\n\n"
146
+ md += f"- **Type:** {cf.config_type}\n"
147
+ md += f"- **Purpose:** {cf.purpose}\n"
148
+ md += f"- **Settings:** {len(cf.settings)}\n"
149
+ if cf.patterns:
150
+ md += f"- **Patterns:** {', '.join(cf.patterns)}\n"
151
+ if cf.parse_errors:
152
+ md += f"- **Errors:** {len(cf.parse_errors)}\n"
153
+ md += "\n"
154
+
155
+ if self.errors:
156
+ md += "## Errors\n\n"
157
+ for error in self.errors:
158
+ md += f"- {error}\n"
159
+
160
+ return md
161
+
162
+
163
+ class ConfigFileDetector:
164
+ """Detect configuration files in codebase"""
165
+
166
+ # Config file patterns by type
167
+ CONFIG_PATTERNS = {
168
+ "json": {
169
+ "patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"],
170
+ "names": [
171
+ "config.json",
172
+ "settings.json",
173
+ "app.json",
174
+ ".eslintrc.json",
175
+ ".prettierrc.json",
176
+ ],
177
+ },
178
+ "yaml": {
179
+ "patterns": ["*.yaml", "*.yml"],
180
+ "names": [
181
+ "config.yml",
182
+ "settings.yml",
183
+ ".travis.yml",
184
+ ".gitlab-ci.yml",
185
+ "docker-compose.yml",
186
+ ],
187
+ },
188
+ "toml": {
189
+ "patterns": ["*.toml"],
190
+ "names": ["pyproject.toml", "Cargo.toml", "config.toml"],
191
+ },
192
+ "env": {
193
+ "patterns": [".env*", "*.env"],
194
+ "names": [".env", ".env.example", ".env.local", ".env.production"],
195
+ },
196
+ "ini": {
197
+ "patterns": ["*.ini", "*.cfg"],
198
+ "names": ["config.ini", "setup.cfg", "tox.ini"],
199
+ },
200
+ "python": {
201
+ "patterns": [],
202
+ "names": ["settings.py", "config.py", "configuration.py", "constants.py"],
203
+ },
204
+ "javascript": {
205
+ "patterns": ["*.config.js", "*.config.ts"],
206
+ "names": [
207
+ "config.js",
208
+ "next.config.js",
209
+ "vue.config.js",
210
+ "webpack.config.js",
211
+ ],
212
+ },
213
+ "dockerfile": {
214
+ "patterns": ["Dockerfile*"],
215
+ "names": ["Dockerfile", "Dockerfile.dev", "Dockerfile.prod"],
216
+ },
217
+ "docker-compose": {
218
+ "patterns": ["docker-compose*.yml", "docker-compose*.yaml"],
219
+ "names": ["docker-compose.yml", "docker-compose.yaml"],
220
+ },
221
+ }
222
+
223
+ # Directories to skip
224
+ SKIP_DIRS = {
225
+ "node_modules",
226
+ "venv",
227
+ "env",
228
+ ".venv",
229
+ "__pycache__",
230
+ ".git",
231
+ "build",
232
+ "dist",
233
+ ".tox",
234
+ ".mypy_cache",
235
+ ".pytest_cache",
236
+ "htmlcov",
237
+ "coverage",
238
+ ".eggs",
239
+ "*.egg-info",
240
+ }
241
+
242
+ def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]:
243
+ """
244
+ Find all configuration files in directory.
245
+
246
+ Args:
247
+ directory: Root directory to search
248
+ max_files: Maximum number of config files to find
249
+
250
+ Returns:
251
+ List of ConfigFile objects
252
+ """
253
+ config_files = []
254
+ found_count = 0
255
+
256
+ for file_path in self._walk_directory(directory):
257
+ if found_count >= max_files:
258
+ logger.info(f"Reached max_files limit ({max_files})")
259
+ break
260
+
261
+ config_type = self._detect_config_type(file_path)
262
+ if config_type:
263
+ relative_path = str(file_path.relative_to(directory))
264
+ config_file = ConfigFile(
265
+ file_path=str(file_path),
266
+ relative_path=relative_path,
267
+ config_type=config_type,
268
+ purpose=self._infer_purpose(file_path, config_type),
269
+ )
270
+ config_files.append(config_file)
271
+ found_count += 1
272
+ logger.debug(f"Found {config_type} config: {relative_path}")
273
+
274
+ logger.info(f"Found {len(config_files)} configuration files")
275
+ return config_files
276
+
277
+ def _walk_directory(self, directory: Path):
278
+ """Walk directory, skipping excluded directories"""
279
+ for item in directory.rglob("*"):
280
+ # Skip directories
281
+ if item.is_dir():
282
+ continue
283
+
284
+ # Skip if in excluded directory
285
+ if any(skip_dir in item.parts for skip_dir in self.SKIP_DIRS):
286
+ continue
287
+
288
+ yield item
289
+
290
+ def _detect_config_type(self, file_path: Path) -> str | None:
291
+ """Detect configuration file type"""
292
+ filename = file_path.name.lower()
293
+
294
+ # Check each config type
295
+ for config_type, patterns in self.CONFIG_PATTERNS.items():
296
+ # Check exact name matches
297
+ if filename in patterns["names"]:
298
+ return config_type
299
+
300
+ # Check pattern matches
301
+ for pattern in patterns["patterns"]:
302
+ if file_path.match(pattern):
303
+ return config_type
304
+
305
+ return None
306
+
307
+ def _infer_purpose(self, file_path: Path, _config_type: str) -> str:
308
+ """Infer configuration purpose from file path and name"""
309
+ path_lower = str(file_path).lower()
310
+ filename = file_path.name.lower()
311
+
312
+ # Database configs
313
+ if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]):
314
+ return "database_configuration"
315
+
316
+ # API configs
317
+ if any(word in path_lower for word in ["api", "rest", "graphql", "endpoint"]):
318
+ return "api_configuration"
319
+
320
+ # Logging configs
321
+ if any(word in path_lower for word in ["log", "logger", "logging"]):
322
+ return "logging_configuration"
323
+
324
+ # Docker configs
325
+ if "docker" in filename:
326
+ return "docker_configuration"
327
+
328
+ # CI/CD configs
329
+ if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]):
330
+ return "ci_cd_configuration"
331
+
332
+ # Package configs
333
+ if filename in ["package.json", "pyproject.toml", "cargo.toml"]:
334
+ return "package_configuration"
335
+
336
+ # TypeScript/JavaScript configs
337
+ if filename in ["tsconfig.json", "jsconfig.json"]:
338
+ return "typescript_configuration"
339
+
340
+ # Framework configs
341
+ if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename:
342
+ return "framework_configuration"
343
+
344
+ # Environment configs
345
+ if ".env" in filename:
346
+ return "environment_configuration"
347
+
348
+ # Default
349
+ return "general_configuration"
350
+
351
+
352
+ class ConfigParser:
353
+ """Parse different configuration file formats"""
354
+
355
+ def parse_config_file(self, config_file: ConfigFile) -> ConfigFile:
356
+ """
357
+ Parse configuration file and extract settings.
358
+
359
+ Args:
360
+ config_file: ConfigFile object to parse
361
+
362
+ Returns:
363
+ Updated ConfigFile with settings populated
364
+ """
365
+ try:
366
+ # Read file content
367
+ with open(config_file.file_path, encoding="utf-8") as f:
368
+ config_file.raw_content = f.read()
369
+
370
+ # Parse based on type
371
+ if config_file.config_type == "json":
372
+ self._parse_json(config_file)
373
+ elif config_file.config_type == "yaml":
374
+ self._parse_yaml(config_file)
375
+ elif config_file.config_type == "toml":
376
+ self._parse_toml(config_file)
377
+ elif config_file.config_type == "env":
378
+ self._parse_env(config_file)
379
+ elif config_file.config_type == "ini":
380
+ self._parse_ini(config_file)
381
+ elif config_file.config_type == "python":
382
+ self._parse_python_config(config_file)
383
+ elif config_file.config_type == "javascript":
384
+ self._parse_javascript_config(config_file)
385
+ elif config_file.config_type == "dockerfile":
386
+ self._parse_dockerfile(config_file)
387
+ elif config_file.config_type == "docker-compose":
388
+ self._parse_yaml(config_file) # Docker compose is YAML
389
+
390
+ except Exception as e:
391
+ error_msg = f"Error parsing {config_file.relative_path}: {str(e)}"
392
+ logger.warning(error_msg)
393
+ config_file.parse_errors.append(error_msg)
394
+
395
+ return config_file
396
+
397
+ def _parse_json(self, config_file: ConfigFile):
398
+ """Parse JSON configuration"""
399
+ try:
400
+ data = json.loads(config_file.raw_content)
401
+ self._extract_settings_from_dict(data, config_file)
402
+ except json.JSONDecodeError as e:
403
+ config_file.parse_errors.append(f"JSON parse error: {str(e)}")
404
+
405
+ def _parse_yaml(self, config_file: ConfigFile):
406
+ """Parse YAML configuration"""
407
+ if not YAML_AVAILABLE:
408
+ config_file.parse_errors.append("PyYAML not installed")
409
+ return
410
+
411
+ try:
412
+ data = yaml.safe_load(config_file.raw_content)
413
+ if isinstance(data, dict):
414
+ self._extract_settings_from_dict(data, config_file)
415
+ except yaml.YAMLError as e:
416
+ config_file.parse_errors.append(f"YAML parse error: {str(e)}")
417
+
418
+ def _parse_toml(self, config_file: ConfigFile):
419
+ """Parse TOML configuration"""
420
+ if not TOML_AVAILABLE:
421
+ config_file.parse_errors.append("toml/tomli not installed")
422
+ return
423
+
424
+ try:
425
+ data = toml_lib.loads(config_file.raw_content)
426
+ self._extract_settings_from_dict(data, config_file)
427
+ except Exception as e:
428
+ config_file.parse_errors.append(f"TOML parse error: {str(e)}")
429
+
430
+ def _parse_env(self, config_file: ConfigFile):
431
+ """Parse .env file"""
432
+ lines = config_file.raw_content.split("\n")
433
+
434
+ for line_num, line in enumerate(lines, 1):
435
+ line = line.strip()
436
+
437
+ # Skip comments and empty lines
438
+ if not line or line.startswith("#"):
439
+ continue
440
+
441
+ # Parse KEY=VALUE
442
+ match = re.match(r"([A-Z_][A-Z0-9_]*)\s*=\s*(.+)", line)
443
+ if match:
444
+ key, value = match.groups()
445
+ value = value.strip().strip('"').strip("'")
446
+
447
+ setting = ConfigSetting(
448
+ key=key,
449
+ value=value,
450
+ value_type=self._infer_type(value),
451
+ env_var=key,
452
+ description=self._extract_env_description(lines, line_num - 1),
453
+ )
454
+ config_file.settings.append(setting)
455
+
456
+ def _parse_ini(self, config_file: ConfigFile):
457
+ """Parse INI configuration"""
458
+ import configparser
459
+
460
+ try:
461
+ parser = configparser.ConfigParser()
462
+ parser.read_string(config_file.raw_content)
463
+
464
+ for section in parser.sections():
465
+ for key, value in parser[section].items():
466
+ setting = ConfigSetting(
467
+ key=f"{section}.{key}",
468
+ value=value,
469
+ value_type=self._infer_type(value),
470
+ nested_path=[section, key],
471
+ )
472
+ config_file.settings.append(setting)
473
+ except Exception as e:
474
+ config_file.parse_errors.append(f"INI parse error: {str(e)}")
475
+
476
+ def _parse_python_config(self, config_file: ConfigFile):
477
+ """Parse Python configuration module"""
478
+ try:
479
+ tree = ast.parse(config_file.raw_content)
480
+
481
+ for node in ast.walk(tree):
482
+ # Get variable name and skip private variables
483
+ if (
484
+ isinstance(node, ast.Assign)
485
+ and len(node.targets) == 1
486
+ and isinstance(node.targets[0], ast.Name)
487
+ and not node.targets[0].id.startswith("_")
488
+ ):
489
+ key = node.targets[0].id
490
+
491
+ # Extract value
492
+ try:
493
+ value = ast.literal_eval(node.value)
494
+ setting = ConfigSetting(
495
+ key=key,
496
+ value=value,
497
+ value_type=self._infer_type(value),
498
+ description=self._extract_python_docstring(node),
499
+ )
500
+ config_file.settings.append(setting)
501
+ except (ValueError, TypeError):
502
+ # Can't evaluate complex expressions
503
+ pass
504
+
505
+ except SyntaxError as e:
506
+ config_file.parse_errors.append(f"Python parse error: {str(e)}")
507
+
508
+ def _parse_javascript_config(self, config_file: ConfigFile):
509
+ """Parse JavaScript/TypeScript config (basic extraction)"""
510
+ # Simple regex-based extraction for common patterns
511
+ patterns = [
512
+ r'(?:const|let|var)\s+(\w+)\s*[:=]\s*(["\'])(.*?)\2', # String values
513
+ r"(?:const|let|var)\s+(\w+)\s*[:=]\s*(\d+)", # Number values
514
+ r"(?:const|let|var)\s+(\w+)\s*[:=]\s*(true|false)", # Boolean values
515
+ ]
516
+
517
+ for pattern in patterns:
518
+ for match in re.finditer(pattern, config_file.raw_content):
519
+ if len(match.groups()) >= 2:
520
+ key = match.group(1)
521
+ value = match.group(3) if len(match.groups()) > 2 else match.group(2)
522
+
523
+ setting = ConfigSetting(
524
+ key=key, value=value, value_type=self._infer_type(value)
525
+ )
526
+ config_file.settings.append(setting)
527
+
528
+ def _parse_dockerfile(self, config_file: ConfigFile):
529
+ """Parse Dockerfile configuration"""
530
+ lines = config_file.raw_content.split("\n")
531
+
532
+ for line in lines:
533
+ line = line.strip()
534
+
535
+ # Extract ENV variables
536
+ if line.startswith("ENV "):
537
+ parts = line[4:].split("=", 1)
538
+ if len(parts) == 2:
539
+ key, value = parts
540
+ setting = ConfigSetting(
541
+ key=key.strip(),
542
+ value=value.strip(),
543
+ value_type="string",
544
+ env_var=key.strip(),
545
+ )
546
+ config_file.settings.append(setting)
547
+
548
+ # Extract ARG variables
549
+ elif line.startswith("ARG "):
550
+ parts = line[4:].split("=", 1)
551
+ key = parts[0].strip()
552
+ value = parts[1].strip() if len(parts) == 2 else None
553
+
554
+ setting = ConfigSetting(key=key, value=value, value_type="string")
555
+ config_file.settings.append(setting)
556
+
557
+ def _extract_settings_from_dict(
558
+ self, data: dict, config_file: ConfigFile, parent_path: list[str] = None
559
+ ):
560
+ """Recursively extract settings from dictionary"""
561
+ if parent_path is None:
562
+ parent_path = []
563
+
564
+ for key, value in data.items():
565
+ if isinstance(value, dict):
566
+ # Recurse into nested dicts
567
+ self._extract_settings_from_dict(value, config_file, parent_path + [key])
568
+ else:
569
+ setting = ConfigSetting(
570
+ key=".".join(parent_path + [key]) if parent_path else key,
571
+ value=value,
572
+ value_type=self._infer_type(value),
573
+ nested_path=parent_path + [key],
574
+ )
575
+ config_file.settings.append(setting)
576
+
577
+ def _infer_type(self, value: Any) -> str:
578
+ """Infer value type"""
579
+ if value is None:
580
+ return "null"
581
+ elif isinstance(value, bool):
582
+ return "boolean"
583
+ elif isinstance(value, int):
584
+ return "integer"
585
+ elif isinstance(value, float):
586
+ return "number"
587
+ elif isinstance(value, (list, tuple)):
588
+ return "array"
589
+ elif isinstance(value, dict):
590
+ return "object"
591
+ else:
592
+ return "string"
593
+
594
+ def _extract_env_description(self, lines: list[str], line_index: int) -> str:
595
+ """Extract description from comment above env variable"""
596
+ if line_index > 0:
597
+ prev_line = lines[line_index - 1].strip()
598
+ if prev_line.startswith("#"):
599
+ return prev_line[1:].strip()
600
+ return ""
601
+
602
+ def _extract_python_docstring(self, _node: ast.AST) -> str:
603
+ """Extract docstring/comment for Python node"""
604
+ # This is simplified - real implementation would need more context
605
+ return ""
606
+
607
+
608
+ class ConfigPatternDetector:
609
+ """Detect common configuration patterns"""
610
+
611
+ # Known configuration patterns
612
+ KNOWN_PATTERNS = {
613
+ "database_config": {
614
+ "keys": [
615
+ "host",
616
+ "port",
617
+ "database",
618
+ "user",
619
+ "username",
620
+ "password",
621
+ "db_name",
622
+ ],
623
+ "min_match": 3,
624
+ },
625
+ "api_config": {
626
+ "keys": [
627
+ "base_url",
628
+ "api_key",
629
+ "api_secret",
630
+ "timeout",
631
+ "retry",
632
+ "endpoint",
633
+ ],
634
+ "min_match": 2,
635
+ },
636
+ "logging_config": {
637
+ "keys": ["level", "format", "handler", "file", "console", "log_level"],
638
+ "min_match": 2,
639
+ },
640
+ "cache_config": {
641
+ "keys": ["backend", "ttl", "timeout", "max_size", "redis", "memcached"],
642
+ "min_match": 2,
643
+ },
644
+ "email_config": {
645
+ "keys": ["smtp_host", "smtp_port", "email", "from_email", "mail_server"],
646
+ "min_match": 2,
647
+ },
648
+ "auth_config": {
649
+ "keys": ["secret_key", "jwt_secret", "token", "oauth", "authentication"],
650
+ "min_match": 1,
651
+ },
652
+ "server_config": {
653
+ "keys": ["host", "port", "bind", "workers", "threads"],
654
+ "min_match": 2,
655
+ },
656
+ }
657
+
658
+ def detect_patterns(self, config_file: ConfigFile) -> list[str]:
659
+ """
660
+ Detect which patterns this config file matches.
661
+
662
+ Args:
663
+ config_file: ConfigFile with settings extracted
664
+
665
+ Returns:
666
+ List of detected pattern names
667
+ """
668
+ detected = []
669
+
670
+ # Get all keys from settings (lowercase for matching)
671
+ setting_keys = {s.key.lower() for s in config_file.settings}
672
+
673
+ # Check against each known pattern
674
+ for pattern_name, pattern_def in self.KNOWN_PATTERNS.items():
675
+ pattern_keys = {k.lower() for k in pattern_def["keys"]}
676
+ min_match = pattern_def["min_match"]
677
+
678
+ # Count matches
679
+ matches = len(setting_keys & pattern_keys)
680
+
681
+ if matches >= min_match:
682
+ detected.append(pattern_name)
683
+ logger.debug(
684
+ f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)"
685
+ )
686
+
687
+ return detected
688
+
689
+
690
+ class ConfigExtractor:
691
+ """Main configuration extraction orchestrator"""
692
+
693
+ def __init__(self):
694
+ self.detector = ConfigFileDetector()
695
+ self.parser = ConfigParser()
696
+ self.pattern_detector = ConfigPatternDetector()
697
+
698
+ def extract_from_directory(
699
+ self, directory: Path, max_files: int = 100
700
+ ) -> ConfigExtractionResult:
701
+ """
702
+ Extract configuration patterns from directory.
703
+
704
+ Args:
705
+ directory: Root directory to analyze
706
+ max_files: Maximum config files to process
707
+
708
+ Returns:
709
+ ConfigExtractionResult with all findings
710
+ """
711
+ result = ConfigExtractionResult()
712
+
713
+ logger.info(f"Extracting configuration patterns from: {directory}")
714
+
715
+ # Step 1: Find config files
716
+ config_files = self.detector.find_config_files(directory, max_files)
717
+ result.total_files = len(config_files)
718
+
719
+ if not config_files:
720
+ logger.warning("No configuration files found")
721
+ return result
722
+
723
+ # Step 2: Parse each config file
724
+ for config_file in config_files:
725
+ try:
726
+ parsed = self.parser.parse_config_file(config_file)
727
+
728
+ # Step 3: Detect patterns
729
+ patterns = self.pattern_detector.detect_patterns(parsed)
730
+ parsed.patterns = patterns
731
+
732
+ # Track patterns
733
+ for pattern in patterns:
734
+ if pattern not in result.detected_patterns:
735
+ result.detected_patterns[pattern] = []
736
+ result.detected_patterns[pattern].append(parsed.relative_path)
737
+
738
+ result.config_files.append(parsed)
739
+ result.total_settings += len(parsed.settings)
740
+
741
+ except Exception as e:
742
+ error_msg = f"Error processing {config_file.relative_path}: {str(e)}"
743
+ logger.error(error_msg)
744
+ result.errors.append(error_msg)
745
+
746
+ logger.info(
747
+ f"Extracted {result.total_settings} settings from {result.total_files} config files"
748
+ )
749
+ logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}")
750
+
751
+ return result
752
+
753
+ def to_dict(self, result: ConfigExtractionResult) -> dict:
754
+ """Convert result to dictionary for JSON output"""
755
+ return {
756
+ "total_files": result.total_files,
757
+ "total_settings": result.total_settings,
758
+ "detected_patterns": result.detected_patterns,
759
+ "config_files": [
760
+ {
761
+ "file_path": cf.file_path,
762
+ "relative_path": cf.relative_path,
763
+ "type": cf.config_type,
764
+ "purpose": cf.purpose,
765
+ "patterns": cf.patterns,
766
+ "settings_count": len(cf.settings),
767
+ "settings": [
768
+ {
769
+ "key": s.key,
770
+ "value": s.value,
771
+ "type": s.value_type,
772
+ "env_var": s.env_var,
773
+ "description": s.description,
774
+ }
775
+ for s in cf.settings
776
+ ],
777
+ "parse_errors": cf.parse_errors,
778
+ }
779
+ for cf in result.config_files
780
+ ],
781
+ "errors": result.errors,
782
+ }
783
+
784
+
785
+ def main():
786
+ """CLI entry point for config extraction"""
787
+ import argparse
788
+
789
+ parser = argparse.ArgumentParser(
790
+ description="Extract configuration patterns from codebase with optional AI enhancement"
791
+ )
792
+ parser.add_argument("directory", type=Path, help="Directory to analyze")
793
+ parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
794
+ parser.add_argument(
795
+ "--max-files", type=int, default=100, help="Maximum config files to process"
796
+ )
797
+ parser.add_argument(
798
+ "--enhance",
799
+ action="store_true",
800
+ help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)",
801
+ )
802
+ parser.add_argument(
803
+ "--enhance-local",
804
+ action="store_true",
805
+ help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)",
806
+ )
807
+ parser.add_argument(
808
+ "--ai-mode",
809
+ choices=["auto", "api", "local", "none"],
810
+ default="none",
811
+ help="AI enhancement mode: auto (detect), api (Claude API), local (Claude Code CLI), none (disable)",
812
+ )
813
+
814
+ args = parser.parse_args()
815
+
816
+ # Setup logging
817
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
818
+
819
+ # Extract
820
+ extractor = ConfigExtractor()
821
+ result = extractor.extract_from_directory(args.directory, args.max_files)
822
+
823
+ # Convert to dict
824
+ output_dict = extractor.to_dict(result)
825
+
826
+ # AI Enhancement (if requested)
827
+ enhance_mode = args.ai_mode
828
+ if args.enhance:
829
+ enhance_mode = "api"
830
+ elif args.enhance_local:
831
+ enhance_mode = "local"
832
+
833
+ if enhance_mode != "none":
834
+ try:
835
+ from skill_seekers.cli.config_enhancer import ConfigEnhancer
836
+
837
+ logger.info(f"šŸ¤– Starting AI enhancement (mode: {enhance_mode})...")
838
+ enhancer = ConfigEnhancer(mode=enhance_mode)
839
+ output_dict = enhancer.enhance_config_result(output_dict)
840
+ logger.info("āœ… AI enhancement complete")
841
+ except ImportError:
842
+ logger.warning("āš ļø ConfigEnhancer not available, skipping enhancement")
843
+ except Exception as e:
844
+ logger.error(f"āŒ AI enhancement failed: {e}")
845
+
846
+ # Output
847
+ if args.output:
848
+ with open(args.output, "w") as f:
849
+ json.dump(output_dict, f, indent=2)
850
+ print(f"āœ… Saved config extraction results to: {args.output}")
851
+ else:
852
+ print(json.dumps(output_dict, indent=2))
853
+
854
+ # Summary
855
+ print("\nšŸ“Š Summary:")
856
+ print(f" Config files found: {result.total_files}")
857
+ print(f" Total settings: {result.total_settings}")
858
+ print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}")
859
+
860
+ if "ai_enhancements" in output_dict:
861
+ print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)")
862
+ insights = output_dict["ai_enhancements"].get("overall_insights", {})
863
+ if insights.get("security_issues_found"):
864
+ print(f" šŸ” Security issues found: {insights['security_issues_found']}")
865
+
866
+ if result.errors:
867
+ print(f"\nāš ļø Errors: {len(result.errors)}")
868
+
869
+
870
+ if __name__ == "__main__":
871
+ main()