skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,871 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Configuration Pattern Extraction (C3.4)
|
|
4
|
+
|
|
5
|
+
Extracts configuration patterns from actual config files in the codebase.
|
|
6
|
+
Supports JSON, YAML, TOML, ENV, INI, Python config modules, and more.
|
|
7
|
+
|
|
8
|
+
This is different from C3.2 which extracts config examples from test code.
|
|
9
|
+
C3.4 focuses on documenting the actual project configuration.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import ast
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any, Literal
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Optional dependencies
|
|
23
|
+
try:
|
|
24
|
+
import yaml
|
|
25
|
+
|
|
26
|
+
YAML_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
YAML_AVAILABLE = False
|
|
29
|
+
logger.debug("PyYAML not available - YAML parsing will be limited")
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import tomli as toml_lib
|
|
33
|
+
|
|
34
|
+
TOML_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
try:
|
|
37
|
+
import toml as toml_lib # noqa: F401
|
|
38
|
+
|
|
39
|
+
TOML_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
toml_lib = None
|
|
42
|
+
TOML_AVAILABLE = False
|
|
43
|
+
logger.debug("toml/tomli not available - TOML parsing disabled")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class ConfigSetting:
|
|
48
|
+
"""Individual configuration setting"""
|
|
49
|
+
|
|
50
|
+
key: str
|
|
51
|
+
value: Any
|
|
52
|
+
value_type: str # 'string', 'integer', 'boolean', 'array', 'object', 'null'
|
|
53
|
+
default_value: Any | None = None
|
|
54
|
+
required: bool = False
|
|
55
|
+
env_var: str | None = None
|
|
56
|
+
description: str = ""
|
|
57
|
+
validation: dict[str, Any] = field(default_factory=dict)
|
|
58
|
+
nested_path: list[str] = field(default_factory=list) # For nested configs
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class ConfigFile:
|
|
63
|
+
"""Represents a configuration file"""
|
|
64
|
+
|
|
65
|
+
file_path: str
|
|
66
|
+
relative_path: str
|
|
67
|
+
config_type: Literal[
|
|
68
|
+
"json",
|
|
69
|
+
"yaml",
|
|
70
|
+
"toml",
|
|
71
|
+
"env",
|
|
72
|
+
"ini",
|
|
73
|
+
"python",
|
|
74
|
+
"javascript",
|
|
75
|
+
"dockerfile",
|
|
76
|
+
"docker-compose",
|
|
77
|
+
]
|
|
78
|
+
purpose: str # Inferred purpose: database, api, logging, etc.
|
|
79
|
+
settings: list[ConfigSetting] = field(default_factory=list)
|
|
80
|
+
patterns: list[str] = field(default_factory=list)
|
|
81
|
+
raw_content: str | None = None
|
|
82
|
+
parse_errors: list[str] = field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class ConfigExtractionResult:
|
|
87
|
+
"""Result of config extraction"""
|
|
88
|
+
|
|
89
|
+
config_files: list[ConfigFile] = field(default_factory=list)
|
|
90
|
+
total_files: int = 0
|
|
91
|
+
total_settings: int = 0
|
|
92
|
+
detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files
|
|
93
|
+
errors: list[str] = field(default_factory=list)
|
|
94
|
+
|
|
95
|
+
def to_dict(self) -> dict:
|
|
96
|
+
"""Convert result to dictionary for JSON output"""
|
|
97
|
+
return {
|
|
98
|
+
"total_files": self.total_files,
|
|
99
|
+
"total_settings": self.total_settings,
|
|
100
|
+
"detected_patterns": self.detected_patterns,
|
|
101
|
+
"config_files": [
|
|
102
|
+
{
|
|
103
|
+
"file_path": cf.file_path,
|
|
104
|
+
"relative_path": cf.relative_path,
|
|
105
|
+
"type": cf.config_type,
|
|
106
|
+
"purpose": cf.purpose,
|
|
107
|
+
"patterns": cf.patterns,
|
|
108
|
+
"settings_count": len(cf.settings),
|
|
109
|
+
"settings": [
|
|
110
|
+
{
|
|
111
|
+
"key": s.key,
|
|
112
|
+
"value": s.value,
|
|
113
|
+
"type": s.value_type,
|
|
114
|
+
"env_var": s.env_var,
|
|
115
|
+
"description": s.description,
|
|
116
|
+
}
|
|
117
|
+
for s in cf.settings
|
|
118
|
+
],
|
|
119
|
+
"parse_errors": cf.parse_errors,
|
|
120
|
+
}
|
|
121
|
+
for cf in self.config_files
|
|
122
|
+
],
|
|
123
|
+
"errors": self.errors,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
def to_markdown(self) -> str:
|
|
127
|
+
"""Generate markdown report of extraction results"""
|
|
128
|
+
md = "# Configuration Extraction Report\n\n"
|
|
129
|
+
md += f"**Total Files:** {self.total_files}\n"
|
|
130
|
+
md += f"**Total Settings:** {self.total_settings}\n"
|
|
131
|
+
|
|
132
|
+
# Handle both dict and list formats for detected_patterns
|
|
133
|
+
if self.detected_patterns:
|
|
134
|
+
if isinstance(self.detected_patterns, dict):
|
|
135
|
+
patterns_str = ", ".join(self.detected_patterns.keys())
|
|
136
|
+
else:
|
|
137
|
+
patterns_str = ", ".join(self.detected_patterns)
|
|
138
|
+
else:
|
|
139
|
+
patterns_str = "None"
|
|
140
|
+
md += f"**Detected Patterns:** {patterns_str}\n\n"
|
|
141
|
+
|
|
142
|
+
if self.config_files:
|
|
143
|
+
md += "## Configuration Files\n\n"
|
|
144
|
+
for cf in self.config_files:
|
|
145
|
+
md += f"### {cf.relative_path}\n\n"
|
|
146
|
+
md += f"- **Type:** {cf.config_type}\n"
|
|
147
|
+
md += f"- **Purpose:** {cf.purpose}\n"
|
|
148
|
+
md += f"- **Settings:** {len(cf.settings)}\n"
|
|
149
|
+
if cf.patterns:
|
|
150
|
+
md += f"- **Patterns:** {', '.join(cf.patterns)}\n"
|
|
151
|
+
if cf.parse_errors:
|
|
152
|
+
md += f"- **Errors:** {len(cf.parse_errors)}\n"
|
|
153
|
+
md += "\n"
|
|
154
|
+
|
|
155
|
+
if self.errors:
|
|
156
|
+
md += "## Errors\n\n"
|
|
157
|
+
for error in self.errors:
|
|
158
|
+
md += f"- {error}\n"
|
|
159
|
+
|
|
160
|
+
return md
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class ConfigFileDetector:
|
|
164
|
+
"""Detect configuration files in codebase"""
|
|
165
|
+
|
|
166
|
+
# Config file patterns by type
|
|
167
|
+
CONFIG_PATTERNS = {
|
|
168
|
+
"json": {
|
|
169
|
+
"patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"],
|
|
170
|
+
"names": [
|
|
171
|
+
"config.json",
|
|
172
|
+
"settings.json",
|
|
173
|
+
"app.json",
|
|
174
|
+
".eslintrc.json",
|
|
175
|
+
".prettierrc.json",
|
|
176
|
+
],
|
|
177
|
+
},
|
|
178
|
+
"yaml": {
|
|
179
|
+
"patterns": ["*.yaml", "*.yml"],
|
|
180
|
+
"names": [
|
|
181
|
+
"config.yml",
|
|
182
|
+
"settings.yml",
|
|
183
|
+
".travis.yml",
|
|
184
|
+
".gitlab-ci.yml",
|
|
185
|
+
"docker-compose.yml",
|
|
186
|
+
],
|
|
187
|
+
},
|
|
188
|
+
"toml": {
|
|
189
|
+
"patterns": ["*.toml"],
|
|
190
|
+
"names": ["pyproject.toml", "Cargo.toml", "config.toml"],
|
|
191
|
+
},
|
|
192
|
+
"env": {
|
|
193
|
+
"patterns": [".env*", "*.env"],
|
|
194
|
+
"names": [".env", ".env.example", ".env.local", ".env.production"],
|
|
195
|
+
},
|
|
196
|
+
"ini": {
|
|
197
|
+
"patterns": ["*.ini", "*.cfg"],
|
|
198
|
+
"names": ["config.ini", "setup.cfg", "tox.ini"],
|
|
199
|
+
},
|
|
200
|
+
"python": {
|
|
201
|
+
"patterns": [],
|
|
202
|
+
"names": ["settings.py", "config.py", "configuration.py", "constants.py"],
|
|
203
|
+
},
|
|
204
|
+
"javascript": {
|
|
205
|
+
"patterns": ["*.config.js", "*.config.ts"],
|
|
206
|
+
"names": [
|
|
207
|
+
"config.js",
|
|
208
|
+
"next.config.js",
|
|
209
|
+
"vue.config.js",
|
|
210
|
+
"webpack.config.js",
|
|
211
|
+
],
|
|
212
|
+
},
|
|
213
|
+
"dockerfile": {
|
|
214
|
+
"patterns": ["Dockerfile*"],
|
|
215
|
+
"names": ["Dockerfile", "Dockerfile.dev", "Dockerfile.prod"],
|
|
216
|
+
},
|
|
217
|
+
"docker-compose": {
|
|
218
|
+
"patterns": ["docker-compose*.yml", "docker-compose*.yaml"],
|
|
219
|
+
"names": ["docker-compose.yml", "docker-compose.yaml"],
|
|
220
|
+
},
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
# Directories to skip
|
|
224
|
+
SKIP_DIRS = {
|
|
225
|
+
"node_modules",
|
|
226
|
+
"venv",
|
|
227
|
+
"env",
|
|
228
|
+
".venv",
|
|
229
|
+
"__pycache__",
|
|
230
|
+
".git",
|
|
231
|
+
"build",
|
|
232
|
+
"dist",
|
|
233
|
+
".tox",
|
|
234
|
+
".mypy_cache",
|
|
235
|
+
".pytest_cache",
|
|
236
|
+
"htmlcov",
|
|
237
|
+
"coverage",
|
|
238
|
+
".eggs",
|
|
239
|
+
"*.egg-info",
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]:
|
|
243
|
+
"""
|
|
244
|
+
Find all configuration files in directory.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
directory: Root directory to search
|
|
248
|
+
max_files: Maximum number of config files to find
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of ConfigFile objects
|
|
252
|
+
"""
|
|
253
|
+
config_files = []
|
|
254
|
+
found_count = 0
|
|
255
|
+
|
|
256
|
+
for file_path in self._walk_directory(directory):
|
|
257
|
+
if found_count >= max_files:
|
|
258
|
+
logger.info(f"Reached max_files limit ({max_files})")
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
config_type = self._detect_config_type(file_path)
|
|
262
|
+
if config_type:
|
|
263
|
+
relative_path = str(file_path.relative_to(directory))
|
|
264
|
+
config_file = ConfigFile(
|
|
265
|
+
file_path=str(file_path),
|
|
266
|
+
relative_path=relative_path,
|
|
267
|
+
config_type=config_type,
|
|
268
|
+
purpose=self._infer_purpose(file_path, config_type),
|
|
269
|
+
)
|
|
270
|
+
config_files.append(config_file)
|
|
271
|
+
found_count += 1
|
|
272
|
+
logger.debug(f"Found {config_type} config: {relative_path}")
|
|
273
|
+
|
|
274
|
+
logger.info(f"Found {len(config_files)} configuration files")
|
|
275
|
+
return config_files
|
|
276
|
+
|
|
277
|
+
def _walk_directory(self, directory: Path):
|
|
278
|
+
"""Walk directory, skipping excluded directories"""
|
|
279
|
+
for item in directory.rglob("*"):
|
|
280
|
+
# Skip directories
|
|
281
|
+
if item.is_dir():
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
# Skip if in excluded directory
|
|
285
|
+
if any(skip_dir in item.parts for skip_dir in self.SKIP_DIRS):
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
yield item
|
|
289
|
+
|
|
290
|
+
def _detect_config_type(self, file_path: Path) -> str | None:
|
|
291
|
+
"""Detect configuration file type"""
|
|
292
|
+
filename = file_path.name.lower()
|
|
293
|
+
|
|
294
|
+
# Check each config type
|
|
295
|
+
for config_type, patterns in self.CONFIG_PATTERNS.items():
|
|
296
|
+
# Check exact name matches
|
|
297
|
+
if filename in patterns["names"]:
|
|
298
|
+
return config_type
|
|
299
|
+
|
|
300
|
+
# Check pattern matches
|
|
301
|
+
for pattern in patterns["patterns"]:
|
|
302
|
+
if file_path.match(pattern):
|
|
303
|
+
return config_type
|
|
304
|
+
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
def _infer_purpose(self, file_path: Path, _config_type: str) -> str:
|
|
308
|
+
"""Infer configuration purpose from file path and name"""
|
|
309
|
+
path_lower = str(file_path).lower()
|
|
310
|
+
filename = file_path.name.lower()
|
|
311
|
+
|
|
312
|
+
# Database configs
|
|
313
|
+
if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]):
|
|
314
|
+
return "database_configuration"
|
|
315
|
+
|
|
316
|
+
# API configs
|
|
317
|
+
if any(word in path_lower for word in ["api", "rest", "graphql", "endpoint"]):
|
|
318
|
+
return "api_configuration"
|
|
319
|
+
|
|
320
|
+
# Logging configs
|
|
321
|
+
if any(word in path_lower for word in ["log", "logger", "logging"]):
|
|
322
|
+
return "logging_configuration"
|
|
323
|
+
|
|
324
|
+
# Docker configs
|
|
325
|
+
if "docker" in filename:
|
|
326
|
+
return "docker_configuration"
|
|
327
|
+
|
|
328
|
+
# CI/CD configs
|
|
329
|
+
if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]):
|
|
330
|
+
return "ci_cd_configuration"
|
|
331
|
+
|
|
332
|
+
# Package configs
|
|
333
|
+
if filename in ["package.json", "pyproject.toml", "cargo.toml"]:
|
|
334
|
+
return "package_configuration"
|
|
335
|
+
|
|
336
|
+
# TypeScript/JavaScript configs
|
|
337
|
+
if filename in ["tsconfig.json", "jsconfig.json"]:
|
|
338
|
+
return "typescript_configuration"
|
|
339
|
+
|
|
340
|
+
# Framework configs
|
|
341
|
+
if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename:
|
|
342
|
+
return "framework_configuration"
|
|
343
|
+
|
|
344
|
+
# Environment configs
|
|
345
|
+
if ".env" in filename:
|
|
346
|
+
return "environment_configuration"
|
|
347
|
+
|
|
348
|
+
# Default
|
|
349
|
+
return "general_configuration"
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class ConfigParser:
|
|
353
|
+
"""Parse different configuration file formats"""
|
|
354
|
+
|
|
355
|
+
def parse_config_file(self, config_file: ConfigFile) -> ConfigFile:
|
|
356
|
+
"""
|
|
357
|
+
Parse configuration file and extract settings.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
config_file: ConfigFile object to parse
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Updated ConfigFile with settings populated
|
|
364
|
+
"""
|
|
365
|
+
try:
|
|
366
|
+
# Read file content
|
|
367
|
+
with open(config_file.file_path, encoding="utf-8") as f:
|
|
368
|
+
config_file.raw_content = f.read()
|
|
369
|
+
|
|
370
|
+
# Parse based on type
|
|
371
|
+
if config_file.config_type == "json":
|
|
372
|
+
self._parse_json(config_file)
|
|
373
|
+
elif config_file.config_type == "yaml":
|
|
374
|
+
self._parse_yaml(config_file)
|
|
375
|
+
elif config_file.config_type == "toml":
|
|
376
|
+
self._parse_toml(config_file)
|
|
377
|
+
elif config_file.config_type == "env":
|
|
378
|
+
self._parse_env(config_file)
|
|
379
|
+
elif config_file.config_type == "ini":
|
|
380
|
+
self._parse_ini(config_file)
|
|
381
|
+
elif config_file.config_type == "python":
|
|
382
|
+
self._parse_python_config(config_file)
|
|
383
|
+
elif config_file.config_type == "javascript":
|
|
384
|
+
self._parse_javascript_config(config_file)
|
|
385
|
+
elif config_file.config_type == "dockerfile":
|
|
386
|
+
self._parse_dockerfile(config_file)
|
|
387
|
+
elif config_file.config_type == "docker-compose":
|
|
388
|
+
self._parse_yaml(config_file) # Docker compose is YAML
|
|
389
|
+
|
|
390
|
+
except Exception as e:
|
|
391
|
+
error_msg = f"Error parsing {config_file.relative_path}: {str(e)}"
|
|
392
|
+
logger.warning(error_msg)
|
|
393
|
+
config_file.parse_errors.append(error_msg)
|
|
394
|
+
|
|
395
|
+
return config_file
|
|
396
|
+
|
|
397
|
+
def _parse_json(self, config_file: ConfigFile):
|
|
398
|
+
"""Parse JSON configuration"""
|
|
399
|
+
try:
|
|
400
|
+
data = json.loads(config_file.raw_content)
|
|
401
|
+
self._extract_settings_from_dict(data, config_file)
|
|
402
|
+
except json.JSONDecodeError as e:
|
|
403
|
+
config_file.parse_errors.append(f"JSON parse error: {str(e)}")
|
|
404
|
+
|
|
405
|
+
def _parse_yaml(self, config_file: ConfigFile):
|
|
406
|
+
"""Parse YAML configuration"""
|
|
407
|
+
if not YAML_AVAILABLE:
|
|
408
|
+
config_file.parse_errors.append("PyYAML not installed")
|
|
409
|
+
return
|
|
410
|
+
|
|
411
|
+
try:
|
|
412
|
+
data = yaml.safe_load(config_file.raw_content)
|
|
413
|
+
if isinstance(data, dict):
|
|
414
|
+
self._extract_settings_from_dict(data, config_file)
|
|
415
|
+
except yaml.YAMLError as e:
|
|
416
|
+
config_file.parse_errors.append(f"YAML parse error: {str(e)}")
|
|
417
|
+
|
|
418
|
+
def _parse_toml(self, config_file: ConfigFile):
|
|
419
|
+
"""Parse TOML configuration"""
|
|
420
|
+
if not TOML_AVAILABLE:
|
|
421
|
+
config_file.parse_errors.append("toml/tomli not installed")
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
try:
|
|
425
|
+
data = toml_lib.loads(config_file.raw_content)
|
|
426
|
+
self._extract_settings_from_dict(data, config_file)
|
|
427
|
+
except Exception as e:
|
|
428
|
+
config_file.parse_errors.append(f"TOML parse error: {str(e)}")
|
|
429
|
+
|
|
430
|
+
def _parse_env(self, config_file: ConfigFile):
|
|
431
|
+
"""Parse .env file"""
|
|
432
|
+
lines = config_file.raw_content.split("\n")
|
|
433
|
+
|
|
434
|
+
for line_num, line in enumerate(lines, 1):
|
|
435
|
+
line = line.strip()
|
|
436
|
+
|
|
437
|
+
# Skip comments and empty lines
|
|
438
|
+
if not line or line.startswith("#"):
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
# Parse KEY=VALUE
|
|
442
|
+
match = re.match(r"([A-Z_][A-Z0-9_]*)\s*=\s*(.+)", line)
|
|
443
|
+
if match:
|
|
444
|
+
key, value = match.groups()
|
|
445
|
+
value = value.strip().strip('"').strip("'")
|
|
446
|
+
|
|
447
|
+
setting = ConfigSetting(
|
|
448
|
+
key=key,
|
|
449
|
+
value=value,
|
|
450
|
+
value_type=self._infer_type(value),
|
|
451
|
+
env_var=key,
|
|
452
|
+
description=self._extract_env_description(lines, line_num - 1),
|
|
453
|
+
)
|
|
454
|
+
config_file.settings.append(setting)
|
|
455
|
+
|
|
456
|
+
def _parse_ini(self, config_file: ConfigFile):
|
|
457
|
+
"""Parse INI configuration"""
|
|
458
|
+
import configparser
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
parser = configparser.ConfigParser()
|
|
462
|
+
parser.read_string(config_file.raw_content)
|
|
463
|
+
|
|
464
|
+
for section in parser.sections():
|
|
465
|
+
for key, value in parser[section].items():
|
|
466
|
+
setting = ConfigSetting(
|
|
467
|
+
key=f"{section}.{key}",
|
|
468
|
+
value=value,
|
|
469
|
+
value_type=self._infer_type(value),
|
|
470
|
+
nested_path=[section, key],
|
|
471
|
+
)
|
|
472
|
+
config_file.settings.append(setting)
|
|
473
|
+
except Exception as e:
|
|
474
|
+
config_file.parse_errors.append(f"INI parse error: {str(e)}")
|
|
475
|
+
|
|
476
|
+
def _parse_python_config(self, config_file: ConfigFile):
|
|
477
|
+
"""Parse Python configuration module"""
|
|
478
|
+
try:
|
|
479
|
+
tree = ast.parse(config_file.raw_content)
|
|
480
|
+
|
|
481
|
+
for node in ast.walk(tree):
|
|
482
|
+
# Get variable name and skip private variables
|
|
483
|
+
if (
|
|
484
|
+
isinstance(node, ast.Assign)
|
|
485
|
+
and len(node.targets) == 1
|
|
486
|
+
and isinstance(node.targets[0], ast.Name)
|
|
487
|
+
and not node.targets[0].id.startswith("_")
|
|
488
|
+
):
|
|
489
|
+
key = node.targets[0].id
|
|
490
|
+
|
|
491
|
+
# Extract value
|
|
492
|
+
try:
|
|
493
|
+
value = ast.literal_eval(node.value)
|
|
494
|
+
setting = ConfigSetting(
|
|
495
|
+
key=key,
|
|
496
|
+
value=value,
|
|
497
|
+
value_type=self._infer_type(value),
|
|
498
|
+
description=self._extract_python_docstring(node),
|
|
499
|
+
)
|
|
500
|
+
config_file.settings.append(setting)
|
|
501
|
+
except (ValueError, TypeError):
|
|
502
|
+
# Can't evaluate complex expressions
|
|
503
|
+
pass
|
|
504
|
+
|
|
505
|
+
except SyntaxError as e:
|
|
506
|
+
config_file.parse_errors.append(f"Python parse error: {str(e)}")
|
|
507
|
+
|
|
508
|
+
def _parse_javascript_config(self, config_file: ConfigFile):
|
|
509
|
+
"""Parse JavaScript/TypeScript config (basic extraction)"""
|
|
510
|
+
# Simple regex-based extraction for common patterns
|
|
511
|
+
patterns = [
|
|
512
|
+
r'(?:const|let|var)\s+(\w+)\s*[:=]\s*(["\'])(.*?)\2', # String values
|
|
513
|
+
r"(?:const|let|var)\s+(\w+)\s*[:=]\s*(\d+)", # Number values
|
|
514
|
+
r"(?:const|let|var)\s+(\w+)\s*[:=]\s*(true|false)", # Boolean values
|
|
515
|
+
]
|
|
516
|
+
|
|
517
|
+
for pattern in patterns:
|
|
518
|
+
for match in re.finditer(pattern, config_file.raw_content):
|
|
519
|
+
if len(match.groups()) >= 2:
|
|
520
|
+
key = match.group(1)
|
|
521
|
+
value = match.group(3) if len(match.groups()) > 2 else match.group(2)
|
|
522
|
+
|
|
523
|
+
setting = ConfigSetting(
|
|
524
|
+
key=key, value=value, value_type=self._infer_type(value)
|
|
525
|
+
)
|
|
526
|
+
config_file.settings.append(setting)
|
|
527
|
+
|
|
528
|
+
def _parse_dockerfile(self, config_file: ConfigFile):
|
|
529
|
+
"""Parse Dockerfile configuration"""
|
|
530
|
+
lines = config_file.raw_content.split("\n")
|
|
531
|
+
|
|
532
|
+
for line in lines:
|
|
533
|
+
line = line.strip()
|
|
534
|
+
|
|
535
|
+
# Extract ENV variables
|
|
536
|
+
if line.startswith("ENV "):
|
|
537
|
+
parts = line[4:].split("=", 1)
|
|
538
|
+
if len(parts) == 2:
|
|
539
|
+
key, value = parts
|
|
540
|
+
setting = ConfigSetting(
|
|
541
|
+
key=key.strip(),
|
|
542
|
+
value=value.strip(),
|
|
543
|
+
value_type="string",
|
|
544
|
+
env_var=key.strip(),
|
|
545
|
+
)
|
|
546
|
+
config_file.settings.append(setting)
|
|
547
|
+
|
|
548
|
+
# Extract ARG variables
|
|
549
|
+
elif line.startswith("ARG "):
|
|
550
|
+
parts = line[4:].split("=", 1)
|
|
551
|
+
key = parts[0].strip()
|
|
552
|
+
value = parts[1].strip() if len(parts) == 2 else None
|
|
553
|
+
|
|
554
|
+
setting = ConfigSetting(key=key, value=value, value_type="string")
|
|
555
|
+
config_file.settings.append(setting)
|
|
556
|
+
|
|
557
|
+
def _extract_settings_from_dict(
|
|
558
|
+
self, data: dict, config_file: ConfigFile, parent_path: list[str] = None
|
|
559
|
+
):
|
|
560
|
+
"""Recursively extract settings from dictionary"""
|
|
561
|
+
if parent_path is None:
|
|
562
|
+
parent_path = []
|
|
563
|
+
|
|
564
|
+
for key, value in data.items():
|
|
565
|
+
if isinstance(value, dict):
|
|
566
|
+
# Recurse into nested dicts
|
|
567
|
+
self._extract_settings_from_dict(value, config_file, parent_path + [key])
|
|
568
|
+
else:
|
|
569
|
+
setting = ConfigSetting(
|
|
570
|
+
key=".".join(parent_path + [key]) if parent_path else key,
|
|
571
|
+
value=value,
|
|
572
|
+
value_type=self._infer_type(value),
|
|
573
|
+
nested_path=parent_path + [key],
|
|
574
|
+
)
|
|
575
|
+
config_file.settings.append(setting)
|
|
576
|
+
|
|
577
|
+
def _infer_type(self, value: Any) -> str:
|
|
578
|
+
"""Infer value type"""
|
|
579
|
+
if value is None:
|
|
580
|
+
return "null"
|
|
581
|
+
elif isinstance(value, bool):
|
|
582
|
+
return "boolean"
|
|
583
|
+
elif isinstance(value, int):
|
|
584
|
+
return "integer"
|
|
585
|
+
elif isinstance(value, float):
|
|
586
|
+
return "number"
|
|
587
|
+
elif isinstance(value, (list, tuple)):
|
|
588
|
+
return "array"
|
|
589
|
+
elif isinstance(value, dict):
|
|
590
|
+
return "object"
|
|
591
|
+
else:
|
|
592
|
+
return "string"
|
|
593
|
+
|
|
594
|
+
def _extract_env_description(self, lines: list[str], line_index: int) -> str:
|
|
595
|
+
"""Extract description from comment above env variable"""
|
|
596
|
+
if line_index > 0:
|
|
597
|
+
prev_line = lines[line_index - 1].strip()
|
|
598
|
+
if prev_line.startswith("#"):
|
|
599
|
+
return prev_line[1:].strip()
|
|
600
|
+
return ""
|
|
601
|
+
|
|
602
|
+
def _extract_python_docstring(self, _node: ast.AST) -> str:
|
|
603
|
+
"""Extract docstring/comment for Python node"""
|
|
604
|
+
# This is simplified - real implementation would need more context
|
|
605
|
+
return ""
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
class ConfigPatternDetector:
|
|
609
|
+
"""Detect common configuration patterns"""
|
|
610
|
+
|
|
611
|
+
# Known configuration patterns
|
|
612
|
+
KNOWN_PATTERNS = {
|
|
613
|
+
"database_config": {
|
|
614
|
+
"keys": [
|
|
615
|
+
"host",
|
|
616
|
+
"port",
|
|
617
|
+
"database",
|
|
618
|
+
"user",
|
|
619
|
+
"username",
|
|
620
|
+
"password",
|
|
621
|
+
"db_name",
|
|
622
|
+
],
|
|
623
|
+
"min_match": 3,
|
|
624
|
+
},
|
|
625
|
+
"api_config": {
|
|
626
|
+
"keys": [
|
|
627
|
+
"base_url",
|
|
628
|
+
"api_key",
|
|
629
|
+
"api_secret",
|
|
630
|
+
"timeout",
|
|
631
|
+
"retry",
|
|
632
|
+
"endpoint",
|
|
633
|
+
],
|
|
634
|
+
"min_match": 2,
|
|
635
|
+
},
|
|
636
|
+
"logging_config": {
|
|
637
|
+
"keys": ["level", "format", "handler", "file", "console", "log_level"],
|
|
638
|
+
"min_match": 2,
|
|
639
|
+
},
|
|
640
|
+
"cache_config": {
|
|
641
|
+
"keys": ["backend", "ttl", "timeout", "max_size", "redis", "memcached"],
|
|
642
|
+
"min_match": 2,
|
|
643
|
+
},
|
|
644
|
+
"email_config": {
|
|
645
|
+
"keys": ["smtp_host", "smtp_port", "email", "from_email", "mail_server"],
|
|
646
|
+
"min_match": 2,
|
|
647
|
+
},
|
|
648
|
+
"auth_config": {
|
|
649
|
+
"keys": ["secret_key", "jwt_secret", "token", "oauth", "authentication"],
|
|
650
|
+
"min_match": 1,
|
|
651
|
+
},
|
|
652
|
+
"server_config": {
|
|
653
|
+
"keys": ["host", "port", "bind", "workers", "threads"],
|
|
654
|
+
"min_match": 2,
|
|
655
|
+
},
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
def detect_patterns(self, config_file: ConfigFile) -> list[str]:
|
|
659
|
+
"""
|
|
660
|
+
Detect which patterns this config file matches.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
config_file: ConfigFile with settings extracted
|
|
664
|
+
|
|
665
|
+
Returns:
|
|
666
|
+
List of detected pattern names
|
|
667
|
+
"""
|
|
668
|
+
detected = []
|
|
669
|
+
|
|
670
|
+
# Get all keys from settings (lowercase for matching)
|
|
671
|
+
setting_keys = {s.key.lower() for s in config_file.settings}
|
|
672
|
+
|
|
673
|
+
# Check against each known pattern
|
|
674
|
+
for pattern_name, pattern_def in self.KNOWN_PATTERNS.items():
|
|
675
|
+
pattern_keys = {k.lower() for k in pattern_def["keys"]}
|
|
676
|
+
min_match = pattern_def["min_match"]
|
|
677
|
+
|
|
678
|
+
# Count matches
|
|
679
|
+
matches = len(setting_keys & pattern_keys)
|
|
680
|
+
|
|
681
|
+
if matches >= min_match:
|
|
682
|
+
detected.append(pattern_name)
|
|
683
|
+
logger.debug(
|
|
684
|
+
f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)"
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
return detected
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
class ConfigExtractor:
|
|
691
|
+
"""Main configuration extraction orchestrator"""
|
|
692
|
+
|
|
693
|
+
def __init__(self):
|
|
694
|
+
self.detector = ConfigFileDetector()
|
|
695
|
+
self.parser = ConfigParser()
|
|
696
|
+
self.pattern_detector = ConfigPatternDetector()
|
|
697
|
+
|
|
698
|
+
def extract_from_directory(
|
|
699
|
+
self, directory: Path, max_files: int = 100
|
|
700
|
+
) -> ConfigExtractionResult:
|
|
701
|
+
"""
|
|
702
|
+
Extract configuration patterns from directory.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
directory: Root directory to analyze
|
|
706
|
+
max_files: Maximum config files to process
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
ConfigExtractionResult with all findings
|
|
710
|
+
"""
|
|
711
|
+
result = ConfigExtractionResult()
|
|
712
|
+
|
|
713
|
+
logger.info(f"Extracting configuration patterns from: {directory}")
|
|
714
|
+
|
|
715
|
+
# Step 1: Find config files
|
|
716
|
+
config_files = self.detector.find_config_files(directory, max_files)
|
|
717
|
+
result.total_files = len(config_files)
|
|
718
|
+
|
|
719
|
+
if not config_files:
|
|
720
|
+
logger.warning("No configuration files found")
|
|
721
|
+
return result
|
|
722
|
+
|
|
723
|
+
# Step 2: Parse each config file
|
|
724
|
+
for config_file in config_files:
|
|
725
|
+
try:
|
|
726
|
+
parsed = self.parser.parse_config_file(config_file)
|
|
727
|
+
|
|
728
|
+
# Step 3: Detect patterns
|
|
729
|
+
patterns = self.pattern_detector.detect_patterns(parsed)
|
|
730
|
+
parsed.patterns = patterns
|
|
731
|
+
|
|
732
|
+
# Track patterns
|
|
733
|
+
for pattern in patterns:
|
|
734
|
+
if pattern not in result.detected_patterns:
|
|
735
|
+
result.detected_patterns[pattern] = []
|
|
736
|
+
result.detected_patterns[pattern].append(parsed.relative_path)
|
|
737
|
+
|
|
738
|
+
result.config_files.append(parsed)
|
|
739
|
+
result.total_settings += len(parsed.settings)
|
|
740
|
+
|
|
741
|
+
except Exception as e:
|
|
742
|
+
error_msg = f"Error processing {config_file.relative_path}: {str(e)}"
|
|
743
|
+
logger.error(error_msg)
|
|
744
|
+
result.errors.append(error_msg)
|
|
745
|
+
|
|
746
|
+
logger.info(
|
|
747
|
+
f"Extracted {result.total_settings} settings from {result.total_files} config files"
|
|
748
|
+
)
|
|
749
|
+
logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}")
|
|
750
|
+
|
|
751
|
+
return result
|
|
752
|
+
|
|
753
|
+
def to_dict(self, result: ConfigExtractionResult) -> dict:
|
|
754
|
+
"""Convert result to dictionary for JSON output"""
|
|
755
|
+
return {
|
|
756
|
+
"total_files": result.total_files,
|
|
757
|
+
"total_settings": result.total_settings,
|
|
758
|
+
"detected_patterns": result.detected_patterns,
|
|
759
|
+
"config_files": [
|
|
760
|
+
{
|
|
761
|
+
"file_path": cf.file_path,
|
|
762
|
+
"relative_path": cf.relative_path,
|
|
763
|
+
"type": cf.config_type,
|
|
764
|
+
"purpose": cf.purpose,
|
|
765
|
+
"patterns": cf.patterns,
|
|
766
|
+
"settings_count": len(cf.settings),
|
|
767
|
+
"settings": [
|
|
768
|
+
{
|
|
769
|
+
"key": s.key,
|
|
770
|
+
"value": s.value,
|
|
771
|
+
"type": s.value_type,
|
|
772
|
+
"env_var": s.env_var,
|
|
773
|
+
"description": s.description,
|
|
774
|
+
}
|
|
775
|
+
for s in cf.settings
|
|
776
|
+
],
|
|
777
|
+
"parse_errors": cf.parse_errors,
|
|
778
|
+
}
|
|
779
|
+
for cf in result.config_files
|
|
780
|
+
],
|
|
781
|
+
"errors": result.errors,
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
def main():
|
|
786
|
+
"""CLI entry point for config extraction"""
|
|
787
|
+
import argparse
|
|
788
|
+
|
|
789
|
+
parser = argparse.ArgumentParser(
|
|
790
|
+
description="Extract configuration patterns from codebase with optional AI enhancement"
|
|
791
|
+
)
|
|
792
|
+
parser.add_argument("directory", type=Path, help="Directory to analyze")
|
|
793
|
+
parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
|
|
794
|
+
parser.add_argument(
|
|
795
|
+
"--max-files", type=int, default=100, help="Maximum config files to process"
|
|
796
|
+
)
|
|
797
|
+
parser.add_argument(
|
|
798
|
+
"--enhance",
|
|
799
|
+
action="store_true",
|
|
800
|
+
help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)",
|
|
801
|
+
)
|
|
802
|
+
parser.add_argument(
|
|
803
|
+
"--enhance-local",
|
|
804
|
+
action="store_true",
|
|
805
|
+
help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)",
|
|
806
|
+
)
|
|
807
|
+
parser.add_argument(
|
|
808
|
+
"--ai-mode",
|
|
809
|
+
choices=["auto", "api", "local", "none"],
|
|
810
|
+
default="none",
|
|
811
|
+
help="AI enhancement mode: auto (detect), api (Claude API), local (Claude Code CLI), none (disable)",
|
|
812
|
+
)
|
|
813
|
+
|
|
814
|
+
args = parser.parse_args()
|
|
815
|
+
|
|
816
|
+
# Setup logging
|
|
817
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
818
|
+
|
|
819
|
+
# Extract
|
|
820
|
+
extractor = ConfigExtractor()
|
|
821
|
+
result = extractor.extract_from_directory(args.directory, args.max_files)
|
|
822
|
+
|
|
823
|
+
# Convert to dict
|
|
824
|
+
output_dict = extractor.to_dict(result)
|
|
825
|
+
|
|
826
|
+
# AI Enhancement (if requested)
|
|
827
|
+
enhance_mode = args.ai_mode
|
|
828
|
+
if args.enhance:
|
|
829
|
+
enhance_mode = "api"
|
|
830
|
+
elif args.enhance_local:
|
|
831
|
+
enhance_mode = "local"
|
|
832
|
+
|
|
833
|
+
if enhance_mode != "none":
|
|
834
|
+
try:
|
|
835
|
+
from skill_seekers.cli.config_enhancer import ConfigEnhancer
|
|
836
|
+
|
|
837
|
+
logger.info(f"š¤ Starting AI enhancement (mode: {enhance_mode})...")
|
|
838
|
+
enhancer = ConfigEnhancer(mode=enhance_mode)
|
|
839
|
+
output_dict = enhancer.enhance_config_result(output_dict)
|
|
840
|
+
logger.info("ā
AI enhancement complete")
|
|
841
|
+
except ImportError:
|
|
842
|
+
logger.warning("ā ļø ConfigEnhancer not available, skipping enhancement")
|
|
843
|
+
except Exception as e:
|
|
844
|
+
logger.error(f"ā AI enhancement failed: {e}")
|
|
845
|
+
|
|
846
|
+
# Output
|
|
847
|
+
if args.output:
|
|
848
|
+
with open(args.output, "w") as f:
|
|
849
|
+
json.dump(output_dict, f, indent=2)
|
|
850
|
+
print(f"ā
Saved config extraction results to: {args.output}")
|
|
851
|
+
else:
|
|
852
|
+
print(json.dumps(output_dict, indent=2))
|
|
853
|
+
|
|
854
|
+
# Summary
|
|
855
|
+
print("\nš Summary:")
|
|
856
|
+
print(f" Config files found: {result.total_files}")
|
|
857
|
+
print(f" Total settings: {result.total_settings}")
|
|
858
|
+
print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}")
|
|
859
|
+
|
|
860
|
+
if "ai_enhancements" in output_dict:
|
|
861
|
+
print(f" ⨠AI enhancements: Yes ({enhance_mode} mode)")
|
|
862
|
+
insights = output_dict["ai_enhancements"].get("overall_insights", {})
|
|
863
|
+
if insights.get("security_issues_found"):
|
|
864
|
+
print(f" š Security issues found: {insights['security_issues_found']}")
|
|
865
|
+
|
|
866
|
+
if result.errors:
|
|
867
|
+
print(f"\nā ļø Errors: {len(result.errors)}")
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
if __name__ == "__main__":
|
|
871
|
+
main()
|