skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1605 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Unified Skill Builder
|
|
4
|
+
|
|
5
|
+
Generates final skill structure from merged multi-source data:
|
|
6
|
+
- SKILL.md with merged APIs and conflict warnings
|
|
7
|
+
- references/ with organized content by source
|
|
8
|
+
- Inline conflict markers (ā ļø)
|
|
9
|
+
- Separate conflicts summary section
|
|
10
|
+
|
|
11
|
+
Supports mixed sources (documentation, GitHub, PDF) and highlights
|
|
12
|
+
discrepancies transparently.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import shutil
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
logging.basicConfig(level=logging.INFO)
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class UnifiedSkillBuilder:
|
|
26
|
+
"""
|
|
27
|
+
Builds unified skill from multi-source data.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
config: dict,
|
|
33
|
+
scraped_data: dict,
|
|
34
|
+
merged_data: dict | None = None,
|
|
35
|
+
conflicts: list | None = None,
|
|
36
|
+
cache_dir: str | None = None,
|
|
37
|
+
):
|
|
38
|
+
"""
|
|
39
|
+
Initialize skill builder.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
config: Unified config dict
|
|
43
|
+
scraped_data: Dict of scraped data by source type
|
|
44
|
+
merged_data: Merged API data (if conflicts were resolved)
|
|
45
|
+
conflicts: List of detected conflicts
|
|
46
|
+
cache_dir: Optional cache directory for intermediate files
|
|
47
|
+
"""
|
|
48
|
+
self.config = config
|
|
49
|
+
self.scraped_data = scraped_data
|
|
50
|
+
self.merged_data = merged_data
|
|
51
|
+
self.conflicts = conflicts or []
|
|
52
|
+
self.cache_dir = cache_dir
|
|
53
|
+
|
|
54
|
+
self.name = config["name"]
|
|
55
|
+
self.description = config["description"]
|
|
56
|
+
self.skill_dir = f"output/{self.name}"
|
|
57
|
+
|
|
58
|
+
# Create directories
|
|
59
|
+
os.makedirs(self.skill_dir, exist_ok=True)
|
|
60
|
+
os.makedirs(f"{self.skill_dir}/references", exist_ok=True)
|
|
61
|
+
os.makedirs(f"{self.skill_dir}/scripts", exist_ok=True)
|
|
62
|
+
os.makedirs(f"{self.skill_dir}/assets", exist_ok=True)
|
|
63
|
+
|
|
64
|
+
def build(self):
|
|
65
|
+
"""Build complete skill structure."""
|
|
66
|
+
logger.info(f"Building unified skill: {self.name}")
|
|
67
|
+
|
|
68
|
+
# Generate main SKILL.md
|
|
69
|
+
self._generate_skill_md()
|
|
70
|
+
|
|
71
|
+
# Generate reference files by source
|
|
72
|
+
self._generate_references()
|
|
73
|
+
|
|
74
|
+
# Generate conflicts report (if any)
|
|
75
|
+
if self.conflicts:
|
|
76
|
+
self._generate_conflicts_report()
|
|
77
|
+
|
|
78
|
+
logger.info(f"ā
Unified skill built: {self.skill_dir}/")
|
|
79
|
+
|
|
80
|
+
def _load_source_skill_mds(self) -> dict[str, str]:
|
|
81
|
+
"""Load standalone SKILL.md files from each source.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dict mapping source type to SKILL.md content
|
|
85
|
+
e.g., {'documentation': '...', 'github': '...', 'pdf': '...'}
|
|
86
|
+
"""
|
|
87
|
+
skill_mds = {}
|
|
88
|
+
|
|
89
|
+
# Determine base directory for source SKILL.md files
|
|
90
|
+
sources_dir = Path(self.cache_dir) / "sources" if self.cache_dir else Path("output")
|
|
91
|
+
|
|
92
|
+
# Load documentation SKILL.md
|
|
93
|
+
docs_skill_path = sources_dir / f"{self.name}_docs" / "SKILL.md"
|
|
94
|
+
if docs_skill_path.exists():
|
|
95
|
+
try:
|
|
96
|
+
skill_mds["documentation"] = docs_skill_path.read_text(encoding="utf-8")
|
|
97
|
+
logger.debug(
|
|
98
|
+
f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)"
|
|
99
|
+
)
|
|
100
|
+
except OSError as e:
|
|
101
|
+
logger.warning(f"Failed to read documentation SKILL.md: {e}")
|
|
102
|
+
|
|
103
|
+
# Load ALL GitHub sources (multi-source support)
|
|
104
|
+
github_sources = []
|
|
105
|
+
for github_dir in sources_dir.glob(f"{self.name}_github_*"):
|
|
106
|
+
github_skill_path = github_dir / "SKILL.md"
|
|
107
|
+
if github_skill_path.exists():
|
|
108
|
+
try:
|
|
109
|
+
content = github_skill_path.read_text(encoding="utf-8")
|
|
110
|
+
github_sources.append(content)
|
|
111
|
+
logger.debug(
|
|
112
|
+
f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)"
|
|
113
|
+
)
|
|
114
|
+
except OSError as e:
|
|
115
|
+
logger.warning(f"Failed to read GitHub SKILL.md from {github_dir.name}: {e}")
|
|
116
|
+
|
|
117
|
+
if github_sources:
|
|
118
|
+
# Concatenate all GitHub sources with separator
|
|
119
|
+
skill_mds["github"] = "\n\n---\n\n".join(github_sources)
|
|
120
|
+
logger.debug(f"Combined {len(github_sources)} GitHub SKILL.md files")
|
|
121
|
+
|
|
122
|
+
# Load ALL PDF sources (multi-source support)
|
|
123
|
+
pdf_sources = []
|
|
124
|
+
for pdf_dir in sources_dir.glob(f"{self.name}_pdf_*"):
|
|
125
|
+
pdf_skill_path = pdf_dir / "SKILL.md"
|
|
126
|
+
if pdf_skill_path.exists():
|
|
127
|
+
try:
|
|
128
|
+
content = pdf_skill_path.read_text(encoding="utf-8")
|
|
129
|
+
pdf_sources.append(content)
|
|
130
|
+
logger.debug(f"Loaded PDF SKILL.md from {pdf_dir.name} ({len(content)} chars)")
|
|
131
|
+
except OSError as e:
|
|
132
|
+
logger.warning(f"Failed to read PDF SKILL.md from {pdf_dir.name}: {e}")
|
|
133
|
+
|
|
134
|
+
if pdf_sources:
|
|
135
|
+
# Concatenate all PDF sources with separator
|
|
136
|
+
skill_mds["pdf"] = "\n\n---\n\n".join(pdf_sources)
|
|
137
|
+
logger.debug(f"Combined {len(pdf_sources)} PDF SKILL.md files")
|
|
138
|
+
|
|
139
|
+
logger.info(f"Loaded {len(skill_mds)} source SKILL.md files")
|
|
140
|
+
return skill_mds
|
|
141
|
+
|
|
142
|
+
def _parse_skill_md_sections(self, skill_md: str) -> dict[str, str]:
|
|
143
|
+
"""Parse SKILL.md into sections by ## headers.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
skill_md: Full SKILL.md content
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Dict mapping section name to content
|
|
150
|
+
e.g., {'When to Use': '...', 'Quick Reference': '...'}
|
|
151
|
+
"""
|
|
152
|
+
sections = {}
|
|
153
|
+
current_section = None
|
|
154
|
+
current_content = []
|
|
155
|
+
|
|
156
|
+
lines = skill_md.split("\n")
|
|
157
|
+
|
|
158
|
+
for line in lines:
|
|
159
|
+
# Detect section header (## Header)
|
|
160
|
+
if line.startswith("## "):
|
|
161
|
+
# Save previous section
|
|
162
|
+
if current_section:
|
|
163
|
+
sections[current_section] = "\n".join(current_content).strip()
|
|
164
|
+
|
|
165
|
+
# Start new section
|
|
166
|
+
current_section = line[3:].strip()
|
|
167
|
+
# Remove emoji and markdown formatting
|
|
168
|
+
current_section = current_section.split("](")[0] # Remove links
|
|
169
|
+
for emoji in [
|
|
170
|
+
"š",
|
|
171
|
+
"šļø",
|
|
172
|
+
"ā ļø",
|
|
173
|
+
"š§",
|
|
174
|
+
"š",
|
|
175
|
+
"š”",
|
|
176
|
+
"šÆ",
|
|
177
|
+
"š",
|
|
178
|
+
"š",
|
|
179
|
+
"āļø",
|
|
180
|
+
"š§Ŗ",
|
|
181
|
+
"š",
|
|
182
|
+
"šļø",
|
|
183
|
+
"š",
|
|
184
|
+
"ā”",
|
|
185
|
+
]:
|
|
186
|
+
current_section = current_section.replace(emoji, "").strip()
|
|
187
|
+
current_content = []
|
|
188
|
+
elif current_section:
|
|
189
|
+
# Accumulate content for current section
|
|
190
|
+
current_content.append(line)
|
|
191
|
+
|
|
192
|
+
# Save last section
|
|
193
|
+
if current_section and current_content:
|
|
194
|
+
sections[current_section] = "\n".join(current_content).strip()
|
|
195
|
+
|
|
196
|
+
logger.debug(f"Parsed {len(sections)} sections from SKILL.md")
|
|
197
|
+
return sections
|
|
198
|
+
|
|
199
|
+
def _synthesize_docs_github(self, skill_mds: dict[str, str]) -> str:
|
|
200
|
+
"""Synthesize documentation + GitHub sources with weighted merge.
|
|
201
|
+
|
|
202
|
+
Strategy:
|
|
203
|
+
- Start with docs frontmatter and intro
|
|
204
|
+
- Add GitHub metadata (stars, topics, language stats)
|
|
205
|
+
- Merge "When to Use" from both sources
|
|
206
|
+
- Merge "Quick Reference" from both sources
|
|
207
|
+
- Include GitHub-specific sections (patterns, architecture)
|
|
208
|
+
- Merge code examples (prioritize GitHub real usage)
|
|
209
|
+
- Include Known Issues from GitHub
|
|
210
|
+
- Fix placeholder text (httpx_docs ā httpx)
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
skill_mds: Dict with 'documentation' and 'github' keys
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
Synthesized SKILL.md content
|
|
217
|
+
"""
|
|
218
|
+
docs_sections = self._parse_skill_md_sections(skill_mds.get("documentation", ""))
|
|
219
|
+
github_sections = self._parse_skill_md_sections(skill_mds.get("github", ""))
|
|
220
|
+
|
|
221
|
+
# Extract GitHub metadata from full content
|
|
222
|
+
_github_full = skill_mds.get("github", "")
|
|
223
|
+
|
|
224
|
+
# Start with YAML frontmatter
|
|
225
|
+
skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64]
|
|
226
|
+
desc = self.description[:1024] if len(self.description) > 1024 else self.description
|
|
227
|
+
|
|
228
|
+
content = f"""---
|
|
229
|
+
name: {skill_name}
|
|
230
|
+
description: {desc}
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
# {self.name.title()}
|
|
234
|
+
|
|
235
|
+
{self.description}
|
|
236
|
+
|
|
237
|
+
## š Sources
|
|
238
|
+
|
|
239
|
+
This skill synthesizes knowledge from multiple sources:
|
|
240
|
+
|
|
241
|
+
- ā
**Official Documentation**: {self.config.get("sources", [{}])[0].get("base_url", "N/A")}
|
|
242
|
+
- ā
**GitHub Repository**: {[s for s in self.config.get("sources", []) if s.get("type") == "github"][0].get("repo", "N/A") if [s for s in self.config.get("sources", []) if s.get("type") == "github"] else "N/A"}
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
# Add GitHub Description and Metadata if present
|
|
247
|
+
if "Description" in github_sections:
|
|
248
|
+
content += "## š¦ About\n\n"
|
|
249
|
+
content += github_sections["Description"] + "\n\n"
|
|
250
|
+
|
|
251
|
+
# Add Repository Info from GitHub
|
|
252
|
+
if "Repository Info" in github_sections:
|
|
253
|
+
content += "### Repository Info\n\n"
|
|
254
|
+
content += github_sections["Repository Info"] + "\n\n"
|
|
255
|
+
|
|
256
|
+
# Add Language stats from GitHub
|
|
257
|
+
if "Languages" in github_sections:
|
|
258
|
+
content += "### Languages\n\n"
|
|
259
|
+
content += github_sections["Languages"] + "\n\n"
|
|
260
|
+
|
|
261
|
+
content += "## š” When to Use This Skill\n\n"
|
|
262
|
+
|
|
263
|
+
# Merge "When to Use" sections - Fix placeholder text
|
|
264
|
+
when_to_use_added = False
|
|
265
|
+
for key in ["When to Use This Skill", "When to Use"]:
|
|
266
|
+
if key in docs_sections:
|
|
267
|
+
# Fix placeholder text: httpx_docs ā httpx
|
|
268
|
+
when_content = docs_sections[key].replace("httpx_docs", self.name)
|
|
269
|
+
when_content = when_content.replace("httpx_github", self.name)
|
|
270
|
+
content += when_content + "\n\n"
|
|
271
|
+
when_to_use_added = True
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
if "When to Use This Skill" in github_sections:
|
|
275
|
+
if when_to_use_added:
|
|
276
|
+
content += "**From repository analysis:**\n\n"
|
|
277
|
+
content += github_sections["When to Use This Skill"] + "\n\n"
|
|
278
|
+
|
|
279
|
+
# Quick Reference: Merge from both sources
|
|
280
|
+
content += "## šÆ Quick Reference\n\n"
|
|
281
|
+
|
|
282
|
+
if "Quick Reference" in docs_sections:
|
|
283
|
+
content += "**From Documentation:**\n\n"
|
|
284
|
+
content += docs_sections["Quick Reference"] + "\n\n"
|
|
285
|
+
|
|
286
|
+
if "Quick Reference" in github_sections:
|
|
287
|
+
# Include GitHub's Quick Reference (contains design patterns summary)
|
|
288
|
+
logger.info(
|
|
289
|
+
f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)"
|
|
290
|
+
)
|
|
291
|
+
content += github_sections["Quick Reference"] + "\n\n"
|
|
292
|
+
else:
|
|
293
|
+
logger.warning("DEBUG: GitHub Quick Reference section NOT FOUND!")
|
|
294
|
+
|
|
295
|
+
# Design Patterns (GitHub only - C3.1 analysis)
|
|
296
|
+
if "Design Patterns Detected" in github_sections:
|
|
297
|
+
content += "### Design Patterns Detected\n\n"
|
|
298
|
+
content += "*From C3.1 codebase analysis (confidence > 0.7)*\n\n"
|
|
299
|
+
content += github_sections["Design Patterns Detected"] + "\n\n"
|
|
300
|
+
|
|
301
|
+
# Code Examples: Prefer GitHub (real usage)
|
|
302
|
+
content += "## š§Ŗ Code Examples\n\n"
|
|
303
|
+
|
|
304
|
+
if "Code Examples" in github_sections:
|
|
305
|
+
content += "**From Repository Tests:**\n\n"
|
|
306
|
+
# Note: GitHub section already includes "*High-quality examples from codebase (C3.2)*" label
|
|
307
|
+
content += github_sections["Code Examples"] + "\n\n"
|
|
308
|
+
elif "Usage Examples" in github_sections:
|
|
309
|
+
content += "**From Repository:**\n\n"
|
|
310
|
+
content += github_sections["Usage Examples"] + "\n\n"
|
|
311
|
+
|
|
312
|
+
if "Example Code Patterns" in docs_sections:
|
|
313
|
+
content += "**From Documentation:**\n\n"
|
|
314
|
+
content += docs_sections["Example Code Patterns"] + "\n\n"
|
|
315
|
+
|
|
316
|
+
# API Reference: Include from both sources
|
|
317
|
+
if "API Reference" in docs_sections or "API Reference" in github_sections:
|
|
318
|
+
content += "## š§ API Reference\n\n"
|
|
319
|
+
|
|
320
|
+
if "API Reference" in github_sections:
|
|
321
|
+
# Note: GitHub section already includes "*Extracted from codebase analysis (C2.5)*" label
|
|
322
|
+
content += github_sections["API Reference"] + "\n\n"
|
|
323
|
+
|
|
324
|
+
if "API Reference" in docs_sections:
|
|
325
|
+
content += "**Official API Documentation:**\n\n"
|
|
326
|
+
content += docs_sections["API Reference"] + "\n\n"
|
|
327
|
+
|
|
328
|
+
# Known Issues: GitHub only
|
|
329
|
+
if "Known Issues" in github_sections:
|
|
330
|
+
content += "## ā ļø Known Issues\n\n"
|
|
331
|
+
content += "*Recent issues from GitHub*\n\n"
|
|
332
|
+
content += github_sections["Known Issues"] + "\n\n"
|
|
333
|
+
|
|
334
|
+
# Recent Releases: GitHub only (include subsection if present)
|
|
335
|
+
if "Recent Releases" in github_sections:
|
|
336
|
+
# Recent Releases might be a subsection within Known Issues
|
|
337
|
+
# Check if it's standalone
|
|
338
|
+
releases_content = github_sections["Recent Releases"]
|
|
339
|
+
if releases_content.strip() and not releases_content.startswith("###"):
|
|
340
|
+
content += "### Recent Releases\n"
|
|
341
|
+
content += releases_content + "\n\n"
|
|
342
|
+
|
|
343
|
+
# Reference documentation
|
|
344
|
+
content += "## š Reference Documentation\n\n"
|
|
345
|
+
content += "Organized by source:\n\n"
|
|
346
|
+
content += "- [Documentation](references/documentation/)\n"
|
|
347
|
+
content += "- [GitHub](references/github/)\n"
|
|
348
|
+
content += "- [Codebase Analysis](references/codebase_analysis/ARCHITECTURE.md)\n\n"
|
|
349
|
+
|
|
350
|
+
# Footer
|
|
351
|
+
content += "---\n\n"
|
|
352
|
+
content += (
|
|
353
|
+
"*Synthesized from official documentation and codebase analysis by Skill Seekers*\n"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
return content
|
|
357
|
+
|
|
358
|
+
def _synthesize_docs_github_pdf(self, skill_mds: dict[str, str]) -> str:
|
|
359
|
+
"""Synthesize all three sources: documentation + GitHub + PDF.
|
|
360
|
+
|
|
361
|
+
Strategy:
|
|
362
|
+
- Start with docs+github synthesis
|
|
363
|
+
- Insert PDF chapters after Quick Reference
|
|
364
|
+
- Add PDF key concepts as supplementary section
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
skill_mds: Dict with 'documentation', 'github', and 'pdf' keys
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Synthesized SKILL.md content
|
|
371
|
+
"""
|
|
372
|
+
# Start with docs+github synthesis
|
|
373
|
+
base_content = self._synthesize_docs_github(skill_mds)
|
|
374
|
+
pdf_sections = self._parse_skill_md_sections(skill_mds.get("pdf", ""))
|
|
375
|
+
|
|
376
|
+
# Find insertion point after Quick Reference
|
|
377
|
+
lines = base_content.split("\n")
|
|
378
|
+
insertion_index = -1
|
|
379
|
+
|
|
380
|
+
for i, line in enumerate(lines):
|
|
381
|
+
if line.startswith("## š§Ŗ Code Examples") or line.startswith("## š§ API Reference"):
|
|
382
|
+
insertion_index = i
|
|
383
|
+
break
|
|
384
|
+
|
|
385
|
+
if insertion_index == -1:
|
|
386
|
+
# Fallback: insert before Reference Documentation
|
|
387
|
+
for i, line in enumerate(lines):
|
|
388
|
+
if line.startswith("## š Reference Documentation"):
|
|
389
|
+
insertion_index = i
|
|
390
|
+
break
|
|
391
|
+
|
|
392
|
+
# Build PDF section
|
|
393
|
+
pdf_content_lines = []
|
|
394
|
+
|
|
395
|
+
# Add Chapter Overview
|
|
396
|
+
if "Chapter Overview" in pdf_sections:
|
|
397
|
+
pdf_content_lines.append("## š PDF Documentation Structure\n")
|
|
398
|
+
pdf_content_lines.append("*From PDF analysis*\n")
|
|
399
|
+
pdf_content_lines.append(pdf_sections["Chapter Overview"])
|
|
400
|
+
pdf_content_lines.append("\n")
|
|
401
|
+
|
|
402
|
+
# Add Key Concepts
|
|
403
|
+
if "Key Concepts" in pdf_sections:
|
|
404
|
+
pdf_content_lines.append("## š Key Concepts\n")
|
|
405
|
+
pdf_content_lines.append("*Extracted from PDF headings*\n")
|
|
406
|
+
pdf_content_lines.append(pdf_sections["Key Concepts"])
|
|
407
|
+
pdf_content_lines.append("\n")
|
|
408
|
+
|
|
409
|
+
# Insert PDF content
|
|
410
|
+
if pdf_content_lines and insertion_index != -1:
|
|
411
|
+
lines[insertion_index:insertion_index] = pdf_content_lines
|
|
412
|
+
elif pdf_content_lines:
|
|
413
|
+
# Append at end before footer
|
|
414
|
+
footer_index = -1
|
|
415
|
+
for i, line in enumerate(lines):
|
|
416
|
+
if line.startswith("---") and i > len(lines) - 5:
|
|
417
|
+
footer_index = i
|
|
418
|
+
break
|
|
419
|
+
if footer_index != -1:
|
|
420
|
+
lines[footer_index:footer_index] = pdf_content_lines
|
|
421
|
+
|
|
422
|
+
# Update reference documentation to include PDF
|
|
423
|
+
final_content = "\n".join(lines)
|
|
424
|
+
final_content = final_content.replace(
|
|
425
|
+
"- [Codebase Analysis](references/codebase_analysis/ARCHITECTURE.md)\n",
|
|
426
|
+
"- [Codebase Analysis](references/codebase_analysis/ARCHITECTURE.md)\n- [PDF Documentation](references/pdf/)\n",
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
return final_content
|
|
430
|
+
|
|
431
|
+
def _generate_skill_md(self):
|
|
432
|
+
"""Generate main SKILL.md file using synthesis formulas.
|
|
433
|
+
|
|
434
|
+
Strategy:
|
|
435
|
+
1. Try to load standalone SKILL.md from each source
|
|
436
|
+
2. If found, use synthesis formulas for rich content
|
|
437
|
+
3. If not found, fall back to legacy minimal generation
|
|
438
|
+
"""
|
|
439
|
+
skill_path = os.path.join(self.skill_dir, "SKILL.md")
|
|
440
|
+
|
|
441
|
+
# Try to load source SKILL.md files
|
|
442
|
+
skill_mds = self._load_source_skill_mds()
|
|
443
|
+
|
|
444
|
+
# Determine synthesis strategy based on available sources
|
|
445
|
+
has_docs = "documentation" in skill_mds
|
|
446
|
+
has_github = "github" in skill_mds
|
|
447
|
+
has_pdf = "pdf" in skill_mds
|
|
448
|
+
|
|
449
|
+
content = None
|
|
450
|
+
|
|
451
|
+
# Apply appropriate synthesis formula
|
|
452
|
+
if has_docs and has_github and has_pdf:
|
|
453
|
+
logger.info("Synthesizing: documentation + GitHub + PDF")
|
|
454
|
+
content = self._synthesize_docs_github_pdf(skill_mds)
|
|
455
|
+
|
|
456
|
+
elif has_docs and has_github:
|
|
457
|
+
logger.info("Synthesizing: documentation + GitHub")
|
|
458
|
+
content = self._synthesize_docs_github(skill_mds)
|
|
459
|
+
|
|
460
|
+
elif has_docs and has_pdf:
|
|
461
|
+
logger.info("Synthesizing: documentation + PDF")
|
|
462
|
+
content = self._synthesize_docs_pdf(skill_mds)
|
|
463
|
+
|
|
464
|
+
elif has_github and has_pdf:
|
|
465
|
+
logger.info("Synthesizing: GitHub + PDF")
|
|
466
|
+
content = self._synthesize_github_pdf(skill_mds)
|
|
467
|
+
|
|
468
|
+
elif has_docs:
|
|
469
|
+
logger.info("Using documentation SKILL.md as-is")
|
|
470
|
+
content = skill_mds["documentation"]
|
|
471
|
+
|
|
472
|
+
elif has_github:
|
|
473
|
+
logger.info("Using GitHub SKILL.md as-is")
|
|
474
|
+
content = skill_mds["github"]
|
|
475
|
+
|
|
476
|
+
elif has_pdf:
|
|
477
|
+
logger.info("Using PDF SKILL.md as-is")
|
|
478
|
+
content = skill_mds["pdf"]
|
|
479
|
+
|
|
480
|
+
# Fallback: generate minimal SKILL.md (legacy behavior)
|
|
481
|
+
if not content:
|
|
482
|
+
logger.warning("No source SKILL.md files found, generating minimal SKILL.md (legacy)")
|
|
483
|
+
content = self._generate_minimal_skill_md()
|
|
484
|
+
|
|
485
|
+
# Write final content
|
|
486
|
+
with open(skill_path, "w", encoding="utf-8") as f:
|
|
487
|
+
f.write(content)
|
|
488
|
+
|
|
489
|
+
logger.info(f"Created SKILL.md ({len(content)} chars, ~{len(content.split())} words)")
|
|
490
|
+
|
|
491
|
+
def _synthesize_docs_pdf(self, skill_mds: dict[str, str]) -> str:
|
|
492
|
+
"""Synthesize documentation + PDF sources.
|
|
493
|
+
|
|
494
|
+
Strategy:
|
|
495
|
+
- Start with docs SKILL.md
|
|
496
|
+
- Insert PDF chapters and key concepts as supplementary sections
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
skill_mds: Dict with 'documentation' and 'pdf' keys
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
Synthesized SKILL.md content
|
|
503
|
+
"""
|
|
504
|
+
docs_content = skill_mds["documentation"]
|
|
505
|
+
pdf_sections = self._parse_skill_md_sections(skill_mds["pdf"])
|
|
506
|
+
|
|
507
|
+
lines = docs_content.split("\n")
|
|
508
|
+
insertion_index = -1
|
|
509
|
+
|
|
510
|
+
# Find insertion point before Reference Documentation
|
|
511
|
+
for i, line in enumerate(lines):
|
|
512
|
+
if line.startswith("## š Reference") or line.startswith("## Reference"):
|
|
513
|
+
insertion_index = i
|
|
514
|
+
break
|
|
515
|
+
|
|
516
|
+
# Build PDF sections
|
|
517
|
+
pdf_content_lines = []
|
|
518
|
+
|
|
519
|
+
if "Chapter Overview" in pdf_sections:
|
|
520
|
+
pdf_content_lines.append("## š PDF Documentation Structure\n")
|
|
521
|
+
pdf_content_lines.append("*From PDF analysis*\n")
|
|
522
|
+
pdf_content_lines.append(pdf_sections["Chapter Overview"])
|
|
523
|
+
pdf_content_lines.append("\n")
|
|
524
|
+
|
|
525
|
+
if "Key Concepts" in pdf_sections:
|
|
526
|
+
pdf_content_lines.append("## š Key Concepts\n")
|
|
527
|
+
pdf_content_lines.append("*Extracted from PDF headings*\n")
|
|
528
|
+
pdf_content_lines.append(pdf_sections["Key Concepts"])
|
|
529
|
+
pdf_content_lines.append("\n")
|
|
530
|
+
|
|
531
|
+
# Insert PDF content
|
|
532
|
+
if pdf_content_lines and insertion_index != -1:
|
|
533
|
+
lines[insertion_index:insertion_index] = pdf_content_lines
|
|
534
|
+
|
|
535
|
+
return "\n".join(lines)
|
|
536
|
+
|
|
537
|
+
def _synthesize_github_pdf(self, skill_mds: dict[str, str]) -> str:
|
|
538
|
+
"""Synthesize GitHub + PDF sources.
|
|
539
|
+
|
|
540
|
+
Strategy:
|
|
541
|
+
- Start with GitHub SKILL.md (has C3.x analysis)
|
|
542
|
+
- Add PDF documentation structure as supplementary section
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
skill_mds: Dict with 'github' and 'pdf' keys
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
Synthesized SKILL.md content
|
|
549
|
+
"""
|
|
550
|
+
github_content = skill_mds["github"]
|
|
551
|
+
pdf_sections = self._parse_skill_md_sections(skill_mds["pdf"])
|
|
552
|
+
|
|
553
|
+
lines = github_content.split("\n")
|
|
554
|
+
insertion_index = -1
|
|
555
|
+
|
|
556
|
+
# Find insertion point before Reference Documentation
|
|
557
|
+
for i, line in enumerate(lines):
|
|
558
|
+
if line.startswith("## š Reference") or line.startswith("## Reference"):
|
|
559
|
+
insertion_index = i
|
|
560
|
+
break
|
|
561
|
+
|
|
562
|
+
# Build PDF sections
|
|
563
|
+
pdf_content_lines = []
|
|
564
|
+
|
|
565
|
+
if "Chapter Overview" in pdf_sections:
|
|
566
|
+
pdf_content_lines.append("## š PDF Documentation Structure\n")
|
|
567
|
+
pdf_content_lines.append("*From PDF analysis*\n")
|
|
568
|
+
pdf_content_lines.append(pdf_sections["Chapter Overview"])
|
|
569
|
+
pdf_content_lines.append("\n")
|
|
570
|
+
|
|
571
|
+
# Insert PDF content
|
|
572
|
+
if pdf_content_lines and insertion_index != -1:
|
|
573
|
+
lines[insertion_index:insertion_index] = pdf_content_lines
|
|
574
|
+
|
|
575
|
+
return "\n".join(lines)
|
|
576
|
+
|
|
577
|
+
def _generate_minimal_skill_md(self) -> str:
|
|
578
|
+
"""Generate minimal SKILL.md (legacy fallback behavior).
|
|
579
|
+
|
|
580
|
+
Used when no source SKILL.md files are available.
|
|
581
|
+
"""
|
|
582
|
+
skill_name = self.name.lower().replace("_", "-").replace(" ", "-")[:64]
|
|
583
|
+
desc = self.description[:1024] if len(self.description) > 1024 else self.description
|
|
584
|
+
|
|
585
|
+
content = f"""---
|
|
586
|
+
name: {skill_name}
|
|
587
|
+
description: {desc}
|
|
588
|
+
---
|
|
589
|
+
|
|
590
|
+
# {self.name.title()}
|
|
591
|
+
|
|
592
|
+
{self.description}
|
|
593
|
+
|
|
594
|
+
## š Sources
|
|
595
|
+
|
|
596
|
+
This skill combines knowledge from multiple sources:
|
|
597
|
+
|
|
598
|
+
"""
|
|
599
|
+
|
|
600
|
+
# List sources
|
|
601
|
+
for source in self.config.get("sources", []):
|
|
602
|
+
source_type = source["type"]
|
|
603
|
+
if source_type == "documentation":
|
|
604
|
+
content += f"- ā
**Documentation**: {source.get('base_url', 'N/A')}\n"
|
|
605
|
+
content += f" - Pages: {source.get('max_pages', 'unlimited')}\n"
|
|
606
|
+
elif source_type == "github":
|
|
607
|
+
content += f"- ā
**GitHub Repository**: {source.get('repo', 'N/A')}\n"
|
|
608
|
+
content += f" - Code Analysis: {source.get('code_analysis_depth', 'surface')}\n"
|
|
609
|
+
content += f" - Issues: {source.get('max_issues', 0)}\n"
|
|
610
|
+
elif source_type == "pdf":
|
|
611
|
+
content += f"- ā
**PDF Document**: {source.get('path', 'N/A')}\n"
|
|
612
|
+
|
|
613
|
+
# C3.x Architecture & Code Analysis section (if available)
|
|
614
|
+
github_data = self.scraped_data.get("github", {}).get("data", {})
|
|
615
|
+
if github_data.get("c3_analysis"):
|
|
616
|
+
content += self._format_c3_summary_section(github_data["c3_analysis"])
|
|
617
|
+
|
|
618
|
+
# Data quality section
|
|
619
|
+
if self.conflicts:
|
|
620
|
+
content += "\n## ā ļø Data Quality\n\n"
|
|
621
|
+
content += f"**{len(self.conflicts)} conflicts detected** between sources.\n\n"
|
|
622
|
+
|
|
623
|
+
# Count by type
|
|
624
|
+
by_type = {}
|
|
625
|
+
for conflict in self.conflicts:
|
|
626
|
+
ctype = (
|
|
627
|
+
conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown")
|
|
628
|
+
)
|
|
629
|
+
by_type[ctype] = by_type.get(ctype, 0) + 1
|
|
630
|
+
|
|
631
|
+
content += "**Conflict Breakdown:**\n"
|
|
632
|
+
for ctype, count in by_type.items():
|
|
633
|
+
content += f"- {ctype}: {count}\n"
|
|
634
|
+
|
|
635
|
+
content += "\nSee `references/conflicts.md` for detailed conflict information.\n"
|
|
636
|
+
|
|
637
|
+
# Merged API section (if available)
|
|
638
|
+
if self.merged_data:
|
|
639
|
+
content += self._format_merged_apis()
|
|
640
|
+
|
|
641
|
+
# Quick reference from each source
|
|
642
|
+
content += "\n## š Reference Documentation\n\n"
|
|
643
|
+
content += "Organized by source:\n\n"
|
|
644
|
+
|
|
645
|
+
for source in self.config.get("sources", []):
|
|
646
|
+
source_type = source["type"]
|
|
647
|
+
content += f"- [{source_type.title()}](references/{source_type}/)\n"
|
|
648
|
+
|
|
649
|
+
# When to use this skill
|
|
650
|
+
content += "\n## š” When to Use This Skill\n\n"
|
|
651
|
+
content += "Use this skill when you need to:\n"
|
|
652
|
+
content += f"- Understand how to use {self.name}\n"
|
|
653
|
+
content += "- Look up API documentation\n"
|
|
654
|
+
content += "- Find usage examples\n"
|
|
655
|
+
|
|
656
|
+
if "github" in self.scraped_data:
|
|
657
|
+
content += "- Check for known issues or recent changes\n"
|
|
658
|
+
content += "- Review release history\n"
|
|
659
|
+
|
|
660
|
+
content += "\n---\n\n"
|
|
661
|
+
content += "*Generated by Skill Seeker's unified multi-source scraper*\n"
|
|
662
|
+
|
|
663
|
+
return content
|
|
664
|
+
|
|
665
|
+
def _format_merged_apis(self) -> str:
|
|
666
|
+
"""Format merged APIs section with inline conflict warnings."""
|
|
667
|
+
if not self.merged_data:
|
|
668
|
+
return ""
|
|
669
|
+
|
|
670
|
+
content = "\n## š§ API Reference\n\n"
|
|
671
|
+
content += "*Merged from documentation and code analysis*\n\n"
|
|
672
|
+
|
|
673
|
+
apis = self.merged_data.get("apis", {})
|
|
674
|
+
|
|
675
|
+
if not apis:
|
|
676
|
+
return content + "*No APIs to display*\n"
|
|
677
|
+
|
|
678
|
+
# Group APIs by status
|
|
679
|
+
matched = {k: v for k, v in apis.items() if v.get("status") == "matched"}
|
|
680
|
+
conflicts = {k: v for k, v in apis.items() if v.get("status") == "conflict"}
|
|
681
|
+
docs_only = {k: v for k, v in apis.items() if v.get("status") == "docs_only"}
|
|
682
|
+
code_only = {k: v for k, v in apis.items() if v.get("status") == "code_only"}
|
|
683
|
+
|
|
684
|
+
# Show matched APIs first
|
|
685
|
+
if matched:
|
|
686
|
+
content += "### ā
Verified APIs\n\n"
|
|
687
|
+
content += "*Documentation and code agree*\n\n"
|
|
688
|
+
for _api_name, api_data in list(matched.items())[:10]: # Limit to first 10
|
|
689
|
+
content += self._format_api_entry(api_data, inline_conflict=False)
|
|
690
|
+
|
|
691
|
+
# Show conflicting APIs with warnings
|
|
692
|
+
if conflicts:
|
|
693
|
+
content += "\n### ā ļø APIs with Conflicts\n\n"
|
|
694
|
+
content += "*Documentation and code differ*\n\n"
|
|
695
|
+
for _api_name, api_data in list(conflicts.items())[:10]:
|
|
696
|
+
content += self._format_api_entry(api_data, inline_conflict=True)
|
|
697
|
+
|
|
698
|
+
# Show undocumented APIs
|
|
699
|
+
if code_only:
|
|
700
|
+
content += "\n### š» Undocumented APIs\n\n"
|
|
701
|
+
content += f"*Found in code but not in documentation ({len(code_only)} total)*\n\n"
|
|
702
|
+
for _api_name, api_data in list(code_only.items())[:5]:
|
|
703
|
+
content += self._format_api_entry(api_data, inline_conflict=False)
|
|
704
|
+
|
|
705
|
+
# Show removed/missing APIs
|
|
706
|
+
if docs_only:
|
|
707
|
+
content += "\n### š Documentation-Only APIs\n\n"
|
|
708
|
+
content += f"*Documented but not found in code ({len(docs_only)} total)*\n\n"
|
|
709
|
+
for _api_name, api_data in list(docs_only.items())[:5]:
|
|
710
|
+
content += self._format_api_entry(api_data, inline_conflict=False)
|
|
711
|
+
|
|
712
|
+
content += "\n*See references/api/ for complete API documentation*\n"
|
|
713
|
+
|
|
714
|
+
return content
|
|
715
|
+
|
|
716
|
+
def _format_api_entry(self, api_data: dict, inline_conflict: bool = False) -> str:
|
|
717
|
+
"""Format a single API entry."""
|
|
718
|
+
name = api_data.get("name", "Unknown")
|
|
719
|
+
signature = api_data.get("merged_signature", name)
|
|
720
|
+
description = api_data.get("merged_description", "")
|
|
721
|
+
warning = api_data.get("warning", "")
|
|
722
|
+
|
|
723
|
+
entry = f"#### `{signature}`\n\n"
|
|
724
|
+
|
|
725
|
+
if description:
|
|
726
|
+
entry += f"{description}\n\n"
|
|
727
|
+
|
|
728
|
+
# Add inline conflict warning
|
|
729
|
+
if inline_conflict and warning:
|
|
730
|
+
entry += f"ā ļø **Conflict**: {warning}\n\n"
|
|
731
|
+
|
|
732
|
+
# Show both versions if available
|
|
733
|
+
conflict = api_data.get("conflict", {})
|
|
734
|
+
if conflict:
|
|
735
|
+
docs_info = conflict.get("docs_info")
|
|
736
|
+
code_info = conflict.get("code_info")
|
|
737
|
+
|
|
738
|
+
if docs_info and code_info:
|
|
739
|
+
entry += "**Documentation says:**\n"
|
|
740
|
+
entry += f"```\n{docs_info.get('raw_signature', 'N/A')}\n```\n\n"
|
|
741
|
+
entry += "**Code implementation:**\n"
|
|
742
|
+
entry += f"```\n{self._format_code_signature(code_info)}\n```\n\n"
|
|
743
|
+
|
|
744
|
+
# Add source info
|
|
745
|
+
source = api_data.get("source", "unknown")
|
|
746
|
+
entry += f"*Source: {source}*\n\n"
|
|
747
|
+
|
|
748
|
+
entry += "---\n\n"
|
|
749
|
+
|
|
750
|
+
return entry
|
|
751
|
+
|
|
752
|
+
def _format_code_signature(self, code_info: dict) -> str:
|
|
753
|
+
"""Format code signature for display."""
|
|
754
|
+
name = code_info.get("name", "")
|
|
755
|
+
params = code_info.get("parameters", [])
|
|
756
|
+
return_type = code_info.get("return_type")
|
|
757
|
+
|
|
758
|
+
param_strs = []
|
|
759
|
+
for param in params:
|
|
760
|
+
param_str = param.get("name", "")
|
|
761
|
+
if param.get("type_hint"):
|
|
762
|
+
param_str += f": {param['type_hint']}"
|
|
763
|
+
if param.get("default"):
|
|
764
|
+
param_str += f" = {param['default']}"
|
|
765
|
+
param_strs.append(param_str)
|
|
766
|
+
|
|
767
|
+
sig = f"{name}({', '.join(param_strs)})"
|
|
768
|
+
if return_type:
|
|
769
|
+
sig += f" -> {return_type}"
|
|
770
|
+
|
|
771
|
+
return sig
|
|
772
|
+
|
|
773
|
+
def _generate_references(self):
|
|
774
|
+
"""Generate reference files organized by source."""
|
|
775
|
+
logger.info("Generating reference files...")
|
|
776
|
+
|
|
777
|
+
# Generate references for each source type (now lists)
|
|
778
|
+
docs_list = self.scraped_data.get("documentation", [])
|
|
779
|
+
if docs_list:
|
|
780
|
+
self._generate_docs_references(docs_list)
|
|
781
|
+
|
|
782
|
+
github_list = self.scraped_data.get("github", [])
|
|
783
|
+
if github_list:
|
|
784
|
+
self._generate_github_references(github_list)
|
|
785
|
+
|
|
786
|
+
pdf_list = self.scraped_data.get("pdf", [])
|
|
787
|
+
if pdf_list:
|
|
788
|
+
self._generate_pdf_references(pdf_list)
|
|
789
|
+
|
|
790
|
+
# Generate merged API reference if available
|
|
791
|
+
if self.merged_data:
|
|
792
|
+
self._generate_merged_api_reference()
|
|
793
|
+
|
|
794
|
+
# Generate C3.x codebase analysis references if available (multi-source)
|
|
795
|
+
github_list = self.scraped_data.get("github", [])
|
|
796
|
+
for github_source in github_list:
|
|
797
|
+
github_data = github_source.get("data", {})
|
|
798
|
+
if github_data.get("c3_analysis"):
|
|
799
|
+
repo_id = github_source.get("repo_id", "unknown")
|
|
800
|
+
self._generate_c3_analysis_references(repo_id=repo_id)
|
|
801
|
+
|
|
802
|
+
def _generate_docs_references(self, docs_list: list[dict]):
|
|
803
|
+
"""Generate references from multiple documentation sources."""
|
|
804
|
+
# Skip if no documentation sources
|
|
805
|
+
if not docs_list:
|
|
806
|
+
return
|
|
807
|
+
|
|
808
|
+
docs_dir = os.path.join(self.skill_dir, "references", "documentation")
|
|
809
|
+
os.makedirs(docs_dir, exist_ok=True)
|
|
810
|
+
|
|
811
|
+
all_copied_files: list[str] = []
|
|
812
|
+
|
|
813
|
+
# Process each documentation source
|
|
814
|
+
for i, doc_source in enumerate(docs_list):
|
|
815
|
+
source_id = doc_source.get("source_id", f"source_{i}")
|
|
816
|
+
base_url = doc_source.get("base_url", "Unknown")
|
|
817
|
+
refs_dir = doc_source.get("refs_dir", "")
|
|
818
|
+
|
|
819
|
+
# Create subdirectory for this source
|
|
820
|
+
source_dir = os.path.join(docs_dir, source_id)
|
|
821
|
+
os.makedirs(source_dir, exist_ok=True)
|
|
822
|
+
|
|
823
|
+
copied_files: list[str] = []
|
|
824
|
+
|
|
825
|
+
if refs_dir and os.path.isdir(refs_dir):
|
|
826
|
+
for entry in sorted(os.listdir(refs_dir)):
|
|
827
|
+
src_path = os.path.join(refs_dir, entry)
|
|
828
|
+
dst_path = os.path.join(source_dir, entry)
|
|
829
|
+
if not os.path.isfile(src_path):
|
|
830
|
+
continue
|
|
831
|
+
shutil.copy2(src_path, dst_path)
|
|
832
|
+
copied_files.append(entry)
|
|
833
|
+
|
|
834
|
+
# Create index for this source
|
|
835
|
+
source_index_path = os.path.join(source_dir, "index.md")
|
|
836
|
+
with open(source_index_path, "w", encoding="utf-8") as f:
|
|
837
|
+
f.write(f"# Documentation: {source_id}\n\n")
|
|
838
|
+
f.write(f"**Source**: {base_url}\n\n")
|
|
839
|
+
f.write(f"**Pages**: {doc_source.get('total_pages', 'N/A')}\n\n")
|
|
840
|
+
|
|
841
|
+
if copied_files:
|
|
842
|
+
files_no_index = [p for p in copied_files if p.lower() != "index.md"]
|
|
843
|
+
f.write("## Files\n\n")
|
|
844
|
+
for filename in files_no_index:
|
|
845
|
+
f.write(f"- [{filename}]({filename})\n")
|
|
846
|
+
else:
|
|
847
|
+
f.write("No reference files available.\n")
|
|
848
|
+
|
|
849
|
+
all_copied_files.extend(copied_files)
|
|
850
|
+
|
|
851
|
+
# Create main index
|
|
852
|
+
index_path = os.path.join(docs_dir, "index.md")
|
|
853
|
+
with open(index_path, "w", encoding="utf-8") as f:
|
|
854
|
+
f.write("# Documentation References\n\n")
|
|
855
|
+
f.write(f"Combined from {len(docs_list)} documentation sources.\n\n")
|
|
856
|
+
|
|
857
|
+
f.write("## Sources\n\n")
|
|
858
|
+
for doc_source in docs_list:
|
|
859
|
+
source_id = doc_source.get("source_id", "unknown")
|
|
860
|
+
base_url = doc_source.get("base_url", "Unknown")
|
|
861
|
+
total_pages = doc_source.get("total_pages", "N/A")
|
|
862
|
+
f.write(
|
|
863
|
+
f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n"
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
logger.info(f"Created documentation references ({len(docs_list)} sources)")
|
|
867
|
+
|
|
868
|
+
def _generate_github_references(self, github_list: list[dict]):
|
|
869
|
+
"""Generate references from multiple GitHub sources."""
|
|
870
|
+
# Skip if no GitHub sources
|
|
871
|
+
if not github_list:
|
|
872
|
+
return
|
|
873
|
+
|
|
874
|
+
github_dir = os.path.join(self.skill_dir, "references", "github")
|
|
875
|
+
os.makedirs(github_dir, exist_ok=True)
|
|
876
|
+
|
|
877
|
+
# Process each GitHub source
|
|
878
|
+
for i, github_source in enumerate(github_list):
|
|
879
|
+
repo = github_source.get("repo", f"repo_{i}")
|
|
880
|
+
repo_id = github_source.get("repo_id", repo.replace("/", "_"))
|
|
881
|
+
github_data = github_source.get("data", {})
|
|
882
|
+
|
|
883
|
+
# Create subdirectory for this repo
|
|
884
|
+
repo_dir = os.path.join(github_dir, repo_id)
|
|
885
|
+
os.makedirs(repo_dir, exist_ok=True)
|
|
886
|
+
|
|
887
|
+
# Create README reference
|
|
888
|
+
if github_data.get("readme"):
|
|
889
|
+
readme_path = os.path.join(repo_dir, "README.md")
|
|
890
|
+
with open(readme_path, "w", encoding="utf-8") as f:
|
|
891
|
+
f.write(f"# Repository README: {repo}\n\n")
|
|
892
|
+
f.write(github_data["readme"])
|
|
893
|
+
|
|
894
|
+
# Create issues reference
|
|
895
|
+
if github_data.get("issues"):
|
|
896
|
+
issues_path = os.path.join(repo_dir, "issues.md")
|
|
897
|
+
with open(issues_path, "w", encoding="utf-8") as f:
|
|
898
|
+
f.write(f"# GitHub Issues: {repo}\n\n")
|
|
899
|
+
f.write(f"{len(github_data['issues'])} recent issues.\n\n")
|
|
900
|
+
|
|
901
|
+
for issue in github_data["issues"][:20]:
|
|
902
|
+
f.write(f"## #{issue['number']}: {issue['title']}\n\n")
|
|
903
|
+
f.write(f"**State**: {issue['state']}\n")
|
|
904
|
+
if issue.get("labels"):
|
|
905
|
+
f.write(f"**Labels**: {', '.join(issue['labels'])}\n")
|
|
906
|
+
f.write(f"**URL**: {issue.get('url', 'N/A')}\n\n")
|
|
907
|
+
|
|
908
|
+
# Create releases reference
|
|
909
|
+
if github_data.get("releases"):
|
|
910
|
+
releases_path = os.path.join(repo_dir, "releases.md")
|
|
911
|
+
with open(releases_path, "w", encoding="utf-8") as f:
|
|
912
|
+
f.write(f"# Releases: {repo}\n\n")
|
|
913
|
+
|
|
914
|
+
for release in github_data["releases"][:10]:
|
|
915
|
+
f.write(f"## {release['tag_name']}: {release.get('name', 'N/A')}\n\n")
|
|
916
|
+
f.write(f"**Published**: {release.get('published_at', 'N/A')[:10]}\n\n")
|
|
917
|
+
if release.get("body"):
|
|
918
|
+
f.write(release["body"][:500])
|
|
919
|
+
f.write("\n\n")
|
|
920
|
+
|
|
921
|
+
# Create index for this repo
|
|
922
|
+
repo_index_path = os.path.join(repo_dir, "index.md")
|
|
923
|
+
repo_info = github_data.get("repo_info", {})
|
|
924
|
+
with open(repo_index_path, "w", encoding="utf-8") as f:
|
|
925
|
+
f.write(f"# GitHub: {repo}\n\n")
|
|
926
|
+
f.write(f"**Stars**: {repo_info.get('stars', 'N/A')}\n")
|
|
927
|
+
f.write(f"**Language**: {repo_info.get('language', 'N/A')}\n")
|
|
928
|
+
f.write(f"**Issues**: {len(github_data.get('issues', []))}\n")
|
|
929
|
+
f.write(f"**Releases**: {len(github_data.get('releases', []))}\n\n")
|
|
930
|
+
f.write("## Files\n\n")
|
|
931
|
+
f.write("- [README.md](README.md)\n")
|
|
932
|
+
if github_data.get("issues"):
|
|
933
|
+
f.write("- [issues.md](issues.md)\n")
|
|
934
|
+
if github_data.get("releases"):
|
|
935
|
+
f.write("- [releases.md](releases.md)\n")
|
|
936
|
+
|
|
937
|
+
# Create main index
|
|
938
|
+
index_path = os.path.join(github_dir, "index.md")
|
|
939
|
+
with open(index_path, "w", encoding="utf-8") as f:
|
|
940
|
+
f.write("# GitHub References\n\n")
|
|
941
|
+
f.write(f"Combined from {len(github_list)} GitHub repositories.\n\n")
|
|
942
|
+
|
|
943
|
+
f.write("## Repositories\n\n")
|
|
944
|
+
for github_source in github_list:
|
|
945
|
+
repo = github_source.get("repo", "unknown")
|
|
946
|
+
repo_id = github_source.get("repo_id", repo.replace("/", "_"))
|
|
947
|
+
github_data = github_source.get("data", {})
|
|
948
|
+
repo_info = github_data.get("repo_info", {})
|
|
949
|
+
stars = repo_info.get("stars", "N/A")
|
|
950
|
+
f.write(f"- [{repo}]({repo_id}/index.md) - {stars} stars\n")
|
|
951
|
+
|
|
952
|
+
logger.info(f"Created GitHub references ({len(github_list)} repos)")
|
|
953
|
+
|
|
954
|
+
def _generate_pdf_references(self, pdf_list: list[dict]):
|
|
955
|
+
"""Generate references from PDF sources."""
|
|
956
|
+
# Skip if no PDF sources
|
|
957
|
+
if not pdf_list:
|
|
958
|
+
return
|
|
959
|
+
|
|
960
|
+
pdf_dir = os.path.join(self.skill_dir, "references", "pdf")
|
|
961
|
+
os.makedirs(pdf_dir, exist_ok=True)
|
|
962
|
+
|
|
963
|
+
# Create index
|
|
964
|
+
index_path = os.path.join(pdf_dir, "index.md")
|
|
965
|
+
with open(index_path, "w", encoding="utf-8") as f:
|
|
966
|
+
f.write("# PDF Documentation\n\n")
|
|
967
|
+
f.write(f"Reference from {len(pdf_list)} PDF document(s).\n\n")
|
|
968
|
+
|
|
969
|
+
logger.info(f"Created PDF references ({len(pdf_list)} sources)")
|
|
970
|
+
|
|
971
|
+
def _generate_merged_api_reference(self):
|
|
972
|
+
"""Generate merged API reference file."""
|
|
973
|
+
api_dir = os.path.join(self.skill_dir, "references", "api")
|
|
974
|
+
os.makedirs(api_dir, exist_ok=True)
|
|
975
|
+
|
|
976
|
+
api_path = os.path.join(api_dir, "merged_api.md")
|
|
977
|
+
|
|
978
|
+
with open(api_path, "w") as f:
|
|
979
|
+
f.write("# Merged API Reference\n\n")
|
|
980
|
+
f.write("*Combined from documentation and code analysis*\n\n")
|
|
981
|
+
|
|
982
|
+
apis = self.merged_data.get("apis", {})
|
|
983
|
+
|
|
984
|
+
for api_name in sorted(apis.keys()):
|
|
985
|
+
api_data = apis[api_name]
|
|
986
|
+
entry = self._format_api_entry(api_data, inline_conflict=True)
|
|
987
|
+
f.write(entry)
|
|
988
|
+
|
|
989
|
+
logger.info(f"Created merged API reference ({len(apis)} APIs)")
|
|
990
|
+
|
|
991
|
+
def _generate_c3_analysis_references(self, repo_id: str = "github"):
|
|
992
|
+
"""Generate codebase analysis references (C3.5) for a specific GitHub source.
|
|
993
|
+
|
|
994
|
+
Args:
|
|
995
|
+
repo_id: Repository identifier (e.g., 'encode_httpx') for multi-source support
|
|
996
|
+
"""
|
|
997
|
+
# Find the correct github_source from the list
|
|
998
|
+
github_list = self.scraped_data.get("github", [])
|
|
999
|
+
github_source = None
|
|
1000
|
+
for source in github_list:
|
|
1001
|
+
if source.get("repo_id") == repo_id:
|
|
1002
|
+
github_source = source
|
|
1003
|
+
break
|
|
1004
|
+
|
|
1005
|
+
if not github_source:
|
|
1006
|
+
logger.warning(f"GitHub source with repo_id '{repo_id}' not found")
|
|
1007
|
+
return
|
|
1008
|
+
|
|
1009
|
+
github_data = github_source.get("data", {})
|
|
1010
|
+
c3_data = github_data.get("c3_analysis")
|
|
1011
|
+
|
|
1012
|
+
if not c3_data:
|
|
1013
|
+
return
|
|
1014
|
+
|
|
1015
|
+
# Create unique directory per repo for multi-source support
|
|
1016
|
+
c3_dir = os.path.join(self.skill_dir, "references", "codebase_analysis", repo_id)
|
|
1017
|
+
os.makedirs(c3_dir, exist_ok=True)
|
|
1018
|
+
|
|
1019
|
+
logger.info("Generating C3.x codebase analysis references...")
|
|
1020
|
+
|
|
1021
|
+
# Generate ARCHITECTURE.md (main deliverable)
|
|
1022
|
+
self._generate_architecture_overview(c3_dir, c3_data, github_data)
|
|
1023
|
+
|
|
1024
|
+
# Generate subdirectories for each C3.x component
|
|
1025
|
+
self._generate_pattern_references(c3_dir, c3_data.get("patterns"))
|
|
1026
|
+
self._generate_example_references(c3_dir, c3_data.get("test_examples"))
|
|
1027
|
+
self._generate_guide_references(c3_dir, c3_data.get("how_to_guides"))
|
|
1028
|
+
self._generate_config_references(c3_dir, c3_data.get("config_patterns"))
|
|
1029
|
+
self._copy_architecture_details(c3_dir, c3_data.get("architecture"))
|
|
1030
|
+
|
|
1031
|
+
logger.info("ā
Created codebase analysis references")
|
|
1032
|
+
|
|
1033
|
+
def _generate_architecture_overview(self, c3_dir: str, c3_data: dict, github_data: dict):
|
|
1034
|
+
"""Generate comprehensive ARCHITECTURE.md (C3.5 main deliverable)."""
|
|
1035
|
+
arch_path = os.path.join(c3_dir, "ARCHITECTURE.md")
|
|
1036
|
+
|
|
1037
|
+
with open(arch_path, "w", encoding="utf-8") as f:
|
|
1038
|
+
f.write(f"# {self.name.title()} Architecture Overview\n\n")
|
|
1039
|
+
f.write("*Generated from C3.x automated codebase analysis*\n\n")
|
|
1040
|
+
|
|
1041
|
+
# Section 1: Overview
|
|
1042
|
+
f.write("## 1. Overview\n\n")
|
|
1043
|
+
f.write(f"{self.description}\n\n")
|
|
1044
|
+
|
|
1045
|
+
# Section 2: Architectural Patterns (C3.7)
|
|
1046
|
+
if c3_data.get("architecture"):
|
|
1047
|
+
arch = c3_data["architecture"]
|
|
1048
|
+
patterns = arch.get("patterns", [])
|
|
1049
|
+
if patterns:
|
|
1050
|
+
f.write("## 2. Architectural Patterns\n\n")
|
|
1051
|
+
f.write("*Detected architectural patterns from codebase structure*\n\n")
|
|
1052
|
+
for pattern in patterns[:5]: # Top 5 patterns
|
|
1053
|
+
f.write(f"### {pattern['pattern_name']}\n\n")
|
|
1054
|
+
f.write(f"- **Confidence**: {pattern['confidence']:.2f}\n")
|
|
1055
|
+
if pattern.get("framework"):
|
|
1056
|
+
f.write(f"- **Framework**: {pattern['framework']}\n")
|
|
1057
|
+
if pattern.get("evidence"):
|
|
1058
|
+
f.write(f"- **Evidence**: {', '.join(pattern['evidence'][:3])}\n")
|
|
1059
|
+
f.write("\n")
|
|
1060
|
+
|
|
1061
|
+
# Section 3: Technology Stack
|
|
1062
|
+
f.write("## 3. Technology Stack\n\n")
|
|
1063
|
+
|
|
1064
|
+
# Try to get languages from C3.7 architecture analysis first
|
|
1065
|
+
languages = {}
|
|
1066
|
+
if c3_data.get("architecture"):
|
|
1067
|
+
languages = c3_data["architecture"].get("languages", {})
|
|
1068
|
+
|
|
1069
|
+
# If no languages from C3.7, try to get from GitHub data
|
|
1070
|
+
# github_data already available from method scope
|
|
1071
|
+
if not languages and github_data.get("languages"):
|
|
1072
|
+
# GitHub data has languages as list, convert to dict with count 1
|
|
1073
|
+
languages = dict.fromkeys(github_data["languages"], 1)
|
|
1074
|
+
|
|
1075
|
+
if languages:
|
|
1076
|
+
f.write("**Languages Detected**:\n")
|
|
1077
|
+
for lang, count in sorted(languages.items(), key=lambda x: x[1], reverse=True)[:5]:
|
|
1078
|
+
if isinstance(count, int):
|
|
1079
|
+
f.write(f"- {lang}: {count} files\n")
|
|
1080
|
+
else:
|
|
1081
|
+
f.write(f"- {lang}\n")
|
|
1082
|
+
f.write("\n")
|
|
1083
|
+
|
|
1084
|
+
# Add frameworks if available
|
|
1085
|
+
if c3_data.get("architecture"):
|
|
1086
|
+
frameworks = c3_data["architecture"].get("frameworks_detected", [])
|
|
1087
|
+
if frameworks:
|
|
1088
|
+
f.write("**Frameworks & Libraries**:\n")
|
|
1089
|
+
for fw in frameworks[:10]:
|
|
1090
|
+
f.write(f"- {fw}\n")
|
|
1091
|
+
f.write("\n")
|
|
1092
|
+
|
|
1093
|
+
if not languages and not (
|
|
1094
|
+
c3_data.get("architecture") and c3_data["architecture"].get("frameworks_detected")
|
|
1095
|
+
):
|
|
1096
|
+
f.write("*Technology stack analysis not available*\n\n")
|
|
1097
|
+
|
|
1098
|
+
# Section 4: Design Patterns (C3.1)
|
|
1099
|
+
if c3_data.get("patterns"):
|
|
1100
|
+
f.write("## 4. Design Patterns\n\n")
|
|
1101
|
+
f.write("*Classic design patterns identified in the codebase*\n\n")
|
|
1102
|
+
|
|
1103
|
+
# Summarize pattern types
|
|
1104
|
+
pattern_summary = {}
|
|
1105
|
+
for file_data in c3_data["patterns"]:
|
|
1106
|
+
for pattern in file_data.get("patterns", []):
|
|
1107
|
+
ptype = pattern["pattern_type"]
|
|
1108
|
+
pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
|
|
1109
|
+
|
|
1110
|
+
if pattern_summary:
|
|
1111
|
+
for ptype, count in sorted(
|
|
1112
|
+
pattern_summary.items(), key=lambda x: x[1], reverse=True
|
|
1113
|
+
):
|
|
1114
|
+
f.write(f"- **{ptype}**: {count} instance(s)\n")
|
|
1115
|
+
f.write(
|
|
1116
|
+
"\nš See `references/codebase_analysis/patterns/` for detailed analysis.\n\n"
|
|
1117
|
+
)
|
|
1118
|
+
else:
|
|
1119
|
+
f.write("*No design patterns detected.*\n\n")
|
|
1120
|
+
|
|
1121
|
+
# Section 5: Configuration Overview (C3.4)
|
|
1122
|
+
if c3_data.get("config_patterns"):
|
|
1123
|
+
f.write("## 5. Configuration Overview\n\n")
|
|
1124
|
+
config = c3_data["config_patterns"]
|
|
1125
|
+
config_files = config.get("config_files", [])
|
|
1126
|
+
|
|
1127
|
+
if config_files:
|
|
1128
|
+
f.write(f"**{len(config_files)} configuration file(s) detected**:\n\n")
|
|
1129
|
+
for cf in config_files[:10]: # Top 10
|
|
1130
|
+
f.write(f"- **`{cf['relative_path']}`**: {cf['type']}\n")
|
|
1131
|
+
if cf.get("purpose"):
|
|
1132
|
+
f.write(f" - Purpose: {cf['purpose']}\n")
|
|
1133
|
+
|
|
1134
|
+
# Add security warnings if available
|
|
1135
|
+
if config.get("ai_enhancements"):
|
|
1136
|
+
insights = config["ai_enhancements"].get("overall_insights", {})
|
|
1137
|
+
security_issues = insights.get("security_issues_found", 0)
|
|
1138
|
+
if security_issues > 0:
|
|
1139
|
+
f.write(
|
|
1140
|
+
f"\nš **Security Alert**: {security_issues} potential security issue(s) found in configurations.\n"
|
|
1141
|
+
)
|
|
1142
|
+
if insights.get("recommended_actions"):
|
|
1143
|
+
f.write("\n**Recommended Actions**:\n")
|
|
1144
|
+
for action in insights["recommended_actions"][:5]:
|
|
1145
|
+
f.write(f"- {action}\n")
|
|
1146
|
+
f.write(
|
|
1147
|
+
"\nš See `references/codebase_analysis/configuration/` for details.\n\n"
|
|
1148
|
+
)
|
|
1149
|
+
else:
|
|
1150
|
+
f.write("*No configuration files detected.*\n\n")
|
|
1151
|
+
|
|
1152
|
+
# Section 6: Common Workflows (C3.3)
|
|
1153
|
+
if c3_data.get("how_to_guides"):
|
|
1154
|
+
f.write("## 6. Common Workflows\n\n")
|
|
1155
|
+
guides = c3_data["how_to_guides"].get("guides", [])
|
|
1156
|
+
|
|
1157
|
+
if guides:
|
|
1158
|
+
f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n")
|
|
1159
|
+
for guide in guides[:10]: # Top 10
|
|
1160
|
+
f.write(f"- {guide.get('title', 'Untitled Guide')}\n")
|
|
1161
|
+
f.write(
|
|
1162
|
+
"\nš See `references/codebase_analysis/guides/` for detailed tutorials.\n\n"
|
|
1163
|
+
)
|
|
1164
|
+
else:
|
|
1165
|
+
f.write("*No workflow guides extracted.*\n\n")
|
|
1166
|
+
|
|
1167
|
+
# Section 7: Usage Examples (C3.2)
|
|
1168
|
+
if c3_data.get("test_examples"):
|
|
1169
|
+
f.write("## 7. Usage Examples\n\n")
|
|
1170
|
+
examples = c3_data["test_examples"]
|
|
1171
|
+
total = examples.get("total_examples", 0)
|
|
1172
|
+
high_value = examples.get("high_value_count", 0)
|
|
1173
|
+
|
|
1174
|
+
if total > 0:
|
|
1175
|
+
f.write(f"**{total} usage example(s) extracted from tests**:\n")
|
|
1176
|
+
f.write(f"- High-value examples: {high_value}\n")
|
|
1177
|
+
|
|
1178
|
+
# Category breakdown
|
|
1179
|
+
if examples.get("examples_by_category"):
|
|
1180
|
+
f.write("\n**By Category**:\n")
|
|
1181
|
+
for cat, count in sorted(
|
|
1182
|
+
examples["examples_by_category"].items(),
|
|
1183
|
+
key=lambda x: x[1],
|
|
1184
|
+
reverse=True,
|
|
1185
|
+
):
|
|
1186
|
+
f.write(f"- {cat}: {count}\n")
|
|
1187
|
+
|
|
1188
|
+
f.write(
|
|
1189
|
+
"\nš See `references/codebase_analysis/examples/` for code samples.\n\n"
|
|
1190
|
+
)
|
|
1191
|
+
else:
|
|
1192
|
+
f.write("*No test examples extracted.*\n\n")
|
|
1193
|
+
|
|
1194
|
+
# Section 8: Entry Points & Directory Structure
|
|
1195
|
+
f.write("## 8. Entry Points & Directory Structure\n\n")
|
|
1196
|
+
f.write("*Analysis based on codebase organization*\n\n")
|
|
1197
|
+
|
|
1198
|
+
if c3_data.get("architecture"):
|
|
1199
|
+
dir_struct = c3_data["architecture"].get("directory_structure", {})
|
|
1200
|
+
if dir_struct:
|
|
1201
|
+
f.write("**Main Directories**:\n")
|
|
1202
|
+
for dir_name, file_count in sorted(
|
|
1203
|
+
dir_struct.items(), key=lambda x: x[1], reverse=True
|
|
1204
|
+
)[:15]:
|
|
1205
|
+
f.write(f"- `{dir_name}/`: {file_count} file(s)\n")
|
|
1206
|
+
f.write("\n")
|
|
1207
|
+
|
|
1208
|
+
# Footer
|
|
1209
|
+
f.write("---\n\n")
|
|
1210
|
+
f.write(
|
|
1211
|
+
"*This architecture overview was automatically generated by C3.x codebase analysis.*\n"
|
|
1212
|
+
)
|
|
1213
|
+
f.write("*Last updated: skill build time*\n")
|
|
1214
|
+
|
|
1215
|
+
logger.info("š Created ARCHITECTURE.md")
|
|
1216
|
+
|
|
1217
|
+
def _generate_pattern_references(self, c3_dir: str, patterns_data: dict):
|
|
1218
|
+
"""Generate design pattern references (C3.1)."""
|
|
1219
|
+
if not patterns_data:
|
|
1220
|
+
return
|
|
1221
|
+
|
|
1222
|
+
patterns_dir = os.path.join(c3_dir, "patterns")
|
|
1223
|
+
os.makedirs(patterns_dir, exist_ok=True)
|
|
1224
|
+
|
|
1225
|
+
# Save JSON data
|
|
1226
|
+
json_path = os.path.join(patterns_dir, "detected_patterns.json")
|
|
1227
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
1228
|
+
json.dump(patterns_data, f, indent=2, ensure_ascii=False)
|
|
1229
|
+
|
|
1230
|
+
# Create summary markdown
|
|
1231
|
+
md_path = os.path.join(patterns_dir, "index.md")
|
|
1232
|
+
with open(md_path, "w", encoding="utf-8") as f:
|
|
1233
|
+
f.write("# Design Patterns\n\n")
|
|
1234
|
+
f.write("*Detected patterns from C3.1 analysis*\n\n")
|
|
1235
|
+
|
|
1236
|
+
for file_data in patterns_data:
|
|
1237
|
+
patterns = file_data.get("patterns", [])
|
|
1238
|
+
if patterns:
|
|
1239
|
+
f.write(f"## {file_data['file_path']}\n\n")
|
|
1240
|
+
for p in patterns:
|
|
1241
|
+
f.write(f"### {p['pattern_type']}\n\n")
|
|
1242
|
+
if p.get("class_name"):
|
|
1243
|
+
f.write(f"- **Class**: `{p['class_name']}`\n")
|
|
1244
|
+
if p.get("confidence"):
|
|
1245
|
+
f.write(f"- **Confidence**: {p['confidence']:.2f}\n")
|
|
1246
|
+
if p.get("indicators"):
|
|
1247
|
+
f.write(f"- **Indicators**: {', '.join(p['indicators'][:3])}\n")
|
|
1248
|
+
f.write("\n")
|
|
1249
|
+
|
|
1250
|
+
logger.info(f" ā Design patterns: {len(patterns_data)} files")
|
|
1251
|
+
|
|
1252
|
+
def _generate_example_references(self, c3_dir: str, examples_data: dict):
|
|
1253
|
+
"""Generate test example references (C3.2)."""
|
|
1254
|
+
if not examples_data:
|
|
1255
|
+
return
|
|
1256
|
+
|
|
1257
|
+
examples_dir = os.path.join(c3_dir, "examples")
|
|
1258
|
+
os.makedirs(examples_dir, exist_ok=True)
|
|
1259
|
+
|
|
1260
|
+
# Save JSON data
|
|
1261
|
+
json_path = os.path.join(examples_dir, "test_examples.json")
|
|
1262
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
1263
|
+
json.dump(examples_data, f, indent=2, ensure_ascii=False)
|
|
1264
|
+
|
|
1265
|
+
# Create summary markdown
|
|
1266
|
+
md_path = os.path.join(examples_dir, "index.md")
|
|
1267
|
+
with open(md_path, "w", encoding="utf-8") as f:
|
|
1268
|
+
f.write("# Usage Examples\n\n")
|
|
1269
|
+
f.write("*Extracted from test files (C3.2)*\n\n")
|
|
1270
|
+
|
|
1271
|
+
total = examples_data.get("total_examples", 0)
|
|
1272
|
+
high_value = examples_data.get("high_value_count", 0)
|
|
1273
|
+
|
|
1274
|
+
f.write(f"**Total Examples**: {total}\n")
|
|
1275
|
+
f.write(f"**High-Value Examples**: {high_value}\n\n")
|
|
1276
|
+
|
|
1277
|
+
# List high-value examples
|
|
1278
|
+
examples = examples_data.get("examples", [])
|
|
1279
|
+
high_value_examples = [e for e in examples if e.get("confidence", 0) > 0.7]
|
|
1280
|
+
|
|
1281
|
+
if high_value_examples:
|
|
1282
|
+
f.write("## High-Value Examples\n\n")
|
|
1283
|
+
for ex in high_value_examples[:20]: # Top 20
|
|
1284
|
+
f.write(f"### {ex.get('description', 'Example')}\n\n")
|
|
1285
|
+
f.write(f"- **Category**: {ex.get('category', 'unknown')}\n")
|
|
1286
|
+
f.write(f"- **Confidence**: {ex.get('confidence', 0):.2f}\n")
|
|
1287
|
+
f.write(f"- **File**: `{ex.get('file_path', 'N/A')}`\n")
|
|
1288
|
+
if ex.get("code_snippet"):
|
|
1289
|
+
f.write(f"\n```python\n{ex['code_snippet'][:300]}\n```\n")
|
|
1290
|
+
f.write("\n")
|
|
1291
|
+
|
|
1292
|
+
logger.info(f" ā Test examples: {total} total, {high_value} high-value")
|
|
1293
|
+
|
|
1294
|
+
def _generate_guide_references(self, c3_dir: str, guides_data: dict):
|
|
1295
|
+
"""Generate how-to guide references (C3.3)."""
|
|
1296
|
+
if not guides_data:
|
|
1297
|
+
return
|
|
1298
|
+
|
|
1299
|
+
guides_dir = os.path.join(c3_dir, "guides")
|
|
1300
|
+
os.makedirs(guides_dir, exist_ok=True)
|
|
1301
|
+
|
|
1302
|
+
# Save JSON collection data
|
|
1303
|
+
json_path = os.path.join(guides_dir, "guide_collection.json")
|
|
1304
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
1305
|
+
json.dump(guides_data, f, indent=2, ensure_ascii=False)
|
|
1306
|
+
|
|
1307
|
+
guides = guides_data.get("guides", [])
|
|
1308
|
+
|
|
1309
|
+
# Create index
|
|
1310
|
+
md_path = os.path.join(guides_dir, "index.md")
|
|
1311
|
+
with open(md_path, "w", encoding="utf-8") as f:
|
|
1312
|
+
f.write("# How-To Guides\n\n")
|
|
1313
|
+
f.write("*Workflow tutorials extracted from codebase (C3.3)*\n\n")
|
|
1314
|
+
|
|
1315
|
+
f.write(f"**Total Guides**: {len(guides)}\n\n")
|
|
1316
|
+
|
|
1317
|
+
if guides:
|
|
1318
|
+
f.write("## Available Guides\n\n")
|
|
1319
|
+
for guide in guides:
|
|
1320
|
+
f.write(
|
|
1321
|
+
f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n"
|
|
1322
|
+
)
|
|
1323
|
+
f.write("\n")
|
|
1324
|
+
|
|
1325
|
+
# Save individual guide markdown files
|
|
1326
|
+
for guide in guides:
|
|
1327
|
+
guide_id = guide.get("id", "unknown")
|
|
1328
|
+
guide_path = os.path.join(guides_dir, f"guide_{guide_id}.md")
|
|
1329
|
+
|
|
1330
|
+
with open(guide_path, "w", encoding="utf-8") as f:
|
|
1331
|
+
f.write(f"# {guide.get('title', 'Untitled Guide')}\n\n")
|
|
1332
|
+
|
|
1333
|
+
if guide.get("description"):
|
|
1334
|
+
f.write(f"{guide['description']}\n\n")
|
|
1335
|
+
|
|
1336
|
+
steps = guide.get("steps", [])
|
|
1337
|
+
if steps:
|
|
1338
|
+
f.write("## Steps\n\n")
|
|
1339
|
+
for i, step in enumerate(steps, 1):
|
|
1340
|
+
f.write(f"### {i}. {step.get('action', 'Step')}\n\n")
|
|
1341
|
+
if step.get("code_example"):
|
|
1342
|
+
lang = step.get("language", "python")
|
|
1343
|
+
f.write(f"```{lang}\n{step['code_example']}\n```\n\n")
|
|
1344
|
+
if step.get("explanation"):
|
|
1345
|
+
f.write(f"{step['explanation']}\n\n")
|
|
1346
|
+
|
|
1347
|
+
logger.info(f" ā How-to guides: {len(guides)}")
|
|
1348
|
+
|
|
1349
|
+
def _generate_config_references(self, c3_dir: str, config_data: dict):
|
|
1350
|
+
"""Generate configuration pattern references (C3.4)."""
|
|
1351
|
+
if not config_data:
|
|
1352
|
+
return
|
|
1353
|
+
|
|
1354
|
+
config_dir = os.path.join(c3_dir, "configuration")
|
|
1355
|
+
os.makedirs(config_dir, exist_ok=True)
|
|
1356
|
+
|
|
1357
|
+
# Save JSON data
|
|
1358
|
+
json_path = os.path.join(config_dir, "config_patterns.json")
|
|
1359
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
1360
|
+
json.dump(config_data, f, indent=2, ensure_ascii=False)
|
|
1361
|
+
|
|
1362
|
+
# Create summary markdown
|
|
1363
|
+
md_path = os.path.join(config_dir, "index.md")
|
|
1364
|
+
config_files = config_data.get("config_files", [])
|
|
1365
|
+
|
|
1366
|
+
with open(md_path, "w", encoding="utf-8") as f:
|
|
1367
|
+
f.write("# Configuration Patterns\n\n")
|
|
1368
|
+
f.write("*Detected configuration files (C3.4)*\n\n")
|
|
1369
|
+
|
|
1370
|
+
f.write(f"**Total Config Files**: {len(config_files)}\n\n")
|
|
1371
|
+
|
|
1372
|
+
if config_files:
|
|
1373
|
+
f.write("## Configuration Files\n\n")
|
|
1374
|
+
for cf in config_files:
|
|
1375
|
+
f.write(f"### `{cf['relative_path']}`\n\n")
|
|
1376
|
+
f.write(f"- **Type**: {cf['type']}\n")
|
|
1377
|
+
f.write(f"- **Purpose**: {cf.get('purpose', 'N/A')}\n")
|
|
1378
|
+
f.write(f"- **Settings**: {len(cf.get('settings', []))}\n")
|
|
1379
|
+
|
|
1380
|
+
# Show AI enhancements if available
|
|
1381
|
+
if cf.get("ai_enhancement"):
|
|
1382
|
+
enh = cf["ai_enhancement"]
|
|
1383
|
+
if enh.get("security_concern"):
|
|
1384
|
+
f.write(f"- **Security**: {enh['security_concern']}\n")
|
|
1385
|
+
if enh.get("best_practice"):
|
|
1386
|
+
f.write(f"- **Best Practice**: {enh['best_practice']}\n")
|
|
1387
|
+
|
|
1388
|
+
f.write("\n")
|
|
1389
|
+
|
|
1390
|
+
# Overall insights
|
|
1391
|
+
if config_data.get("ai_enhancements"):
|
|
1392
|
+
insights = config_data["ai_enhancements"].get("overall_insights", {})
|
|
1393
|
+
if insights:
|
|
1394
|
+
f.write("## Overall Insights\n\n")
|
|
1395
|
+
if insights.get("security_issues_found"):
|
|
1396
|
+
f.write(
|
|
1397
|
+
f"š **Security Issues**: {insights['security_issues_found']}\n\n"
|
|
1398
|
+
)
|
|
1399
|
+
if insights.get("recommended_actions"):
|
|
1400
|
+
f.write("**Recommended Actions**:\n")
|
|
1401
|
+
for action in insights["recommended_actions"]:
|
|
1402
|
+
f.write(f"- {action}\n")
|
|
1403
|
+
f.write("\n")
|
|
1404
|
+
|
|
1405
|
+
logger.info(f" ā Configuration files: {len(config_files)}")
|
|
1406
|
+
|
|
1407
|
+
def _copy_architecture_details(self, c3_dir: str, arch_data: dict):
|
|
1408
|
+
"""Copy architectural pattern JSON details (C3.7)."""
|
|
1409
|
+
if not arch_data:
|
|
1410
|
+
return
|
|
1411
|
+
|
|
1412
|
+
arch_dir = os.path.join(c3_dir, "architecture_details")
|
|
1413
|
+
os.makedirs(arch_dir, exist_ok=True)
|
|
1414
|
+
|
|
1415
|
+
# Save full JSON data
|
|
1416
|
+
json_path = os.path.join(arch_dir, "architectural_patterns.json")
|
|
1417
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
1418
|
+
json.dump(arch_data, f, indent=2, ensure_ascii=False)
|
|
1419
|
+
|
|
1420
|
+
# Create summary markdown
|
|
1421
|
+
md_path = os.path.join(arch_dir, "index.md")
|
|
1422
|
+
with open(md_path, "w", encoding="utf-8") as f:
|
|
1423
|
+
f.write("# Architectural Patterns (Detailed)\n\n")
|
|
1424
|
+
f.write("*Comprehensive architectural analysis (C3.7)*\n\n")
|
|
1425
|
+
|
|
1426
|
+
patterns = arch_data.get("patterns", [])
|
|
1427
|
+
if patterns:
|
|
1428
|
+
f.write("## Detected Patterns\n\n")
|
|
1429
|
+
for p in patterns:
|
|
1430
|
+
f.write(f"### {p['pattern_name']}\n\n")
|
|
1431
|
+
f.write(f"- **Confidence**: {p['confidence']:.2f}\n")
|
|
1432
|
+
if p.get("framework"):
|
|
1433
|
+
f.write(f"- **Framework**: {p['framework']}\n")
|
|
1434
|
+
if p.get("evidence"):
|
|
1435
|
+
f.write("- **Evidence**:\n")
|
|
1436
|
+
for e in p["evidence"][:5]:
|
|
1437
|
+
f.write(f" - {e}\n")
|
|
1438
|
+
f.write("\n")
|
|
1439
|
+
|
|
1440
|
+
logger.info(f" ā Architectural details: {len(patterns)} patterns")
|
|
1441
|
+
|
|
1442
|
+
def _format_c3_summary_section(self, c3_data: dict) -> str:
|
|
1443
|
+
"""Format C3.x analysis summary for SKILL.md."""
|
|
1444
|
+
content = "\n## šļø Architecture & Code Analysis\n\n"
|
|
1445
|
+
content += "*This skill includes comprehensive codebase analysis*\n\n"
|
|
1446
|
+
|
|
1447
|
+
# Add architectural pattern summary
|
|
1448
|
+
if c3_data.get("architecture"):
|
|
1449
|
+
patterns = c3_data["architecture"].get("patterns", [])
|
|
1450
|
+
if patterns:
|
|
1451
|
+
top_pattern = patterns[0]
|
|
1452
|
+
content += f"**Primary Architecture**: {top_pattern['pattern_name']}"
|
|
1453
|
+
if top_pattern.get("framework"):
|
|
1454
|
+
content += f" ({top_pattern['framework']})"
|
|
1455
|
+
content += f" - Confidence: {top_pattern['confidence']:.0%}\n\n"
|
|
1456
|
+
|
|
1457
|
+
# Add design patterns summary
|
|
1458
|
+
if c3_data.get("patterns"):
|
|
1459
|
+
total_patterns = sum(len(f.get("patterns", [])) for f in c3_data["patterns"])
|
|
1460
|
+
if total_patterns > 0:
|
|
1461
|
+
content += f"**Design Patterns**: {total_patterns} detected\n"
|
|
1462
|
+
|
|
1463
|
+
# Show top 3 pattern types
|
|
1464
|
+
pattern_summary = {}
|
|
1465
|
+
for file_data in c3_data["patterns"]:
|
|
1466
|
+
for pattern in file_data.get("patterns", []):
|
|
1467
|
+
ptype = pattern["pattern_type"]
|
|
1468
|
+
pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
|
|
1469
|
+
|
|
1470
|
+
top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3]
|
|
1471
|
+
if top_patterns:
|
|
1472
|
+
content += (
|
|
1473
|
+
f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
|
|
1474
|
+
)
|
|
1475
|
+
content += "\n"
|
|
1476
|
+
|
|
1477
|
+
# Add test examples summary
|
|
1478
|
+
if c3_data.get("test_examples"):
|
|
1479
|
+
total = c3_data["test_examples"].get("total_examples", 0)
|
|
1480
|
+
high_value = c3_data["test_examples"].get("high_value_count", 0)
|
|
1481
|
+
if total > 0:
|
|
1482
|
+
content += f"**Usage Examples**: {total} extracted from tests ({high_value} high-value)\n\n"
|
|
1483
|
+
|
|
1484
|
+
# Add how-to guides summary
|
|
1485
|
+
if c3_data.get("how_to_guides"):
|
|
1486
|
+
guide_count = len(c3_data["how_to_guides"].get("guides", []))
|
|
1487
|
+
if guide_count > 0:
|
|
1488
|
+
content += f"**How-To Guides**: {guide_count} workflow tutorials\n\n"
|
|
1489
|
+
|
|
1490
|
+
# Add configuration summary
|
|
1491
|
+
if c3_data.get("config_patterns"):
|
|
1492
|
+
config_files = c3_data["config_patterns"].get("config_files", [])
|
|
1493
|
+
if config_files:
|
|
1494
|
+
content += f"**Configuration Files**: {len(config_files)} analyzed\n"
|
|
1495
|
+
|
|
1496
|
+
# Add security warning if present
|
|
1497
|
+
if c3_data["config_patterns"].get("ai_enhancements"):
|
|
1498
|
+
insights = c3_data["config_patterns"]["ai_enhancements"].get(
|
|
1499
|
+
"overall_insights", {}
|
|
1500
|
+
)
|
|
1501
|
+
security_issues = insights.get("security_issues_found", 0)
|
|
1502
|
+
if security_issues > 0:
|
|
1503
|
+
content += f"- š **Security Alert**: {security_issues} issue(s) detected\n"
|
|
1504
|
+
content += "\n"
|
|
1505
|
+
|
|
1506
|
+
# Add link to ARCHITECTURE.md
|
|
1507
|
+
content += "š **See** `references/codebase_analysis/ARCHITECTURE.md` for complete architectural overview.\n\n"
|
|
1508
|
+
|
|
1509
|
+
return content
|
|
1510
|
+
|
|
1511
|
+
def _generate_conflicts_report(self):
|
|
1512
|
+
"""Generate detailed conflicts report."""
|
|
1513
|
+
conflicts_path = os.path.join(self.skill_dir, "references", "conflicts.md")
|
|
1514
|
+
|
|
1515
|
+
with open(conflicts_path, "w") as f:
|
|
1516
|
+
f.write("# Conflict Report\n\n")
|
|
1517
|
+
f.write(f"Found **{len(self.conflicts)}** conflicts between sources.\n\n")
|
|
1518
|
+
|
|
1519
|
+
# Group by severity
|
|
1520
|
+
high = [
|
|
1521
|
+
c
|
|
1522
|
+
for c in self.conflicts
|
|
1523
|
+
if (hasattr(c, "severity") and c.severity == "high") or c.get("severity") == "high"
|
|
1524
|
+
]
|
|
1525
|
+
medium = [
|
|
1526
|
+
c
|
|
1527
|
+
for c in self.conflicts
|
|
1528
|
+
if (hasattr(c, "severity") and c.severity == "medium")
|
|
1529
|
+
or c.get("severity") == "medium"
|
|
1530
|
+
]
|
|
1531
|
+
low = [
|
|
1532
|
+
c
|
|
1533
|
+
for c in self.conflicts
|
|
1534
|
+
if (hasattr(c, "severity") and c.severity == "low") or c.get("severity") == "low"
|
|
1535
|
+
]
|
|
1536
|
+
|
|
1537
|
+
f.write("## Severity Breakdown\n\n")
|
|
1538
|
+
f.write(f"- š“ **High**: {len(high)} (action required)\n")
|
|
1539
|
+
f.write(f"- š” **Medium**: {len(medium)} (review recommended)\n")
|
|
1540
|
+
f.write(f"- š¢ **Low**: {len(low)} (informational)\n\n")
|
|
1541
|
+
|
|
1542
|
+
# List high severity conflicts
|
|
1543
|
+
if high:
|
|
1544
|
+
f.write("## š“ High Severity\n\n")
|
|
1545
|
+
f.write("*These conflicts require immediate attention*\n\n")
|
|
1546
|
+
|
|
1547
|
+
for conflict in high:
|
|
1548
|
+
api_name = (
|
|
1549
|
+
conflict.api_name
|
|
1550
|
+
if hasattr(conflict, "api_name")
|
|
1551
|
+
else conflict.get("api_name", "Unknown")
|
|
1552
|
+
)
|
|
1553
|
+
diff = (
|
|
1554
|
+
conflict.difference
|
|
1555
|
+
if hasattr(conflict, "difference")
|
|
1556
|
+
else conflict.get("difference", "N/A")
|
|
1557
|
+
)
|
|
1558
|
+
|
|
1559
|
+
f.write(f"### {api_name}\n\n")
|
|
1560
|
+
f.write(f"**Issue**: {diff}\n\n")
|
|
1561
|
+
|
|
1562
|
+
# List medium severity
|
|
1563
|
+
if medium:
|
|
1564
|
+
f.write("## š” Medium Severity\n\n")
|
|
1565
|
+
|
|
1566
|
+
for conflict in medium[:20]: # Limit to 20
|
|
1567
|
+
api_name = (
|
|
1568
|
+
conflict.api_name
|
|
1569
|
+
if hasattr(conflict, "api_name")
|
|
1570
|
+
else conflict.get("api_name", "Unknown")
|
|
1571
|
+
)
|
|
1572
|
+
diff = (
|
|
1573
|
+
conflict.difference
|
|
1574
|
+
if hasattr(conflict, "difference")
|
|
1575
|
+
else conflict.get("difference", "N/A")
|
|
1576
|
+
)
|
|
1577
|
+
|
|
1578
|
+
f.write(f"### {api_name}\n\n")
|
|
1579
|
+
f.write(f"{diff}\n\n")
|
|
1580
|
+
|
|
1581
|
+
logger.info("Created conflicts report")
|
|
1582
|
+
|
|
1583
|
+
|
|
1584
|
+
if __name__ == "__main__":
|
|
1585
|
+
# Test with mock data
|
|
1586
|
+
import sys
|
|
1587
|
+
|
|
1588
|
+
if len(sys.argv) < 2:
|
|
1589
|
+
print("Usage: python unified_skill_builder.py <config.json>")
|
|
1590
|
+
sys.exit(1)
|
|
1591
|
+
|
|
1592
|
+
config_path = sys.argv[1]
|
|
1593
|
+
|
|
1594
|
+
with open(config_path) as f:
|
|
1595
|
+
config = json.load(f)
|
|
1596
|
+
|
|
1597
|
+
# Mock scraped data
|
|
1598
|
+
scraped_data = {
|
|
1599
|
+
"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
builder = UnifiedSkillBuilder(config, scraped_data)
|
|
1603
|
+
builder.build()
|
|
1604
|
+
|
|
1605
|
+
print(f"\nā
Test skill built in: output/{config['name']}/")
|