skill-seekers 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_seekers/__init__.py +22 -0
- skill_seekers/cli/__init__.py +39 -0
- skill_seekers/cli/adaptors/__init__.py +120 -0
- skill_seekers/cli/adaptors/base.py +221 -0
- skill_seekers/cli/adaptors/claude.py +485 -0
- skill_seekers/cli/adaptors/gemini.py +453 -0
- skill_seekers/cli/adaptors/markdown.py +269 -0
- skill_seekers/cli/adaptors/openai.py +503 -0
- skill_seekers/cli/ai_enhancer.py +310 -0
- skill_seekers/cli/api_reference_builder.py +373 -0
- skill_seekers/cli/architectural_pattern_detector.py +525 -0
- skill_seekers/cli/code_analyzer.py +1462 -0
- skill_seekers/cli/codebase_scraper.py +1225 -0
- skill_seekers/cli/config_command.py +563 -0
- skill_seekers/cli/config_enhancer.py +431 -0
- skill_seekers/cli/config_extractor.py +871 -0
- skill_seekers/cli/config_manager.py +452 -0
- skill_seekers/cli/config_validator.py +394 -0
- skill_seekers/cli/conflict_detector.py +528 -0
- skill_seekers/cli/constants.py +72 -0
- skill_seekers/cli/dependency_analyzer.py +757 -0
- skill_seekers/cli/doc_scraper.py +2332 -0
- skill_seekers/cli/enhance_skill.py +488 -0
- skill_seekers/cli/enhance_skill_local.py +1096 -0
- skill_seekers/cli/enhance_status.py +194 -0
- skill_seekers/cli/estimate_pages.py +433 -0
- skill_seekers/cli/generate_router.py +1209 -0
- skill_seekers/cli/github_fetcher.py +534 -0
- skill_seekers/cli/github_scraper.py +1466 -0
- skill_seekers/cli/guide_enhancer.py +723 -0
- skill_seekers/cli/how_to_guide_builder.py +1267 -0
- skill_seekers/cli/install_agent.py +461 -0
- skill_seekers/cli/install_skill.py +178 -0
- skill_seekers/cli/language_detector.py +614 -0
- skill_seekers/cli/llms_txt_detector.py +60 -0
- skill_seekers/cli/llms_txt_downloader.py +104 -0
- skill_seekers/cli/llms_txt_parser.py +150 -0
- skill_seekers/cli/main.py +558 -0
- skill_seekers/cli/markdown_cleaner.py +132 -0
- skill_seekers/cli/merge_sources.py +806 -0
- skill_seekers/cli/package_multi.py +77 -0
- skill_seekers/cli/package_skill.py +241 -0
- skill_seekers/cli/pattern_recognizer.py +1825 -0
- skill_seekers/cli/pdf_extractor_poc.py +1166 -0
- skill_seekers/cli/pdf_scraper.py +617 -0
- skill_seekers/cli/quality_checker.py +519 -0
- skill_seekers/cli/rate_limit_handler.py +438 -0
- skill_seekers/cli/resume_command.py +160 -0
- skill_seekers/cli/run_tests.py +230 -0
- skill_seekers/cli/setup_wizard.py +93 -0
- skill_seekers/cli/split_config.py +390 -0
- skill_seekers/cli/swift_patterns.py +560 -0
- skill_seekers/cli/test_example_extractor.py +1081 -0
- skill_seekers/cli/test_unified_simple.py +179 -0
- skill_seekers/cli/unified_codebase_analyzer.py +572 -0
- skill_seekers/cli/unified_scraper.py +932 -0
- skill_seekers/cli/unified_skill_builder.py +1605 -0
- skill_seekers/cli/upload_skill.py +162 -0
- skill_seekers/cli/utils.py +432 -0
- skill_seekers/mcp/__init__.py +33 -0
- skill_seekers/mcp/agent_detector.py +316 -0
- skill_seekers/mcp/git_repo.py +273 -0
- skill_seekers/mcp/server.py +231 -0
- skill_seekers/mcp/server_fastmcp.py +1249 -0
- skill_seekers/mcp/server_legacy.py +2302 -0
- skill_seekers/mcp/source_manager.py +285 -0
- skill_seekers/mcp/tools/__init__.py +115 -0
- skill_seekers/mcp/tools/config_tools.py +251 -0
- skill_seekers/mcp/tools/packaging_tools.py +826 -0
- skill_seekers/mcp/tools/scraping_tools.py +842 -0
- skill_seekers/mcp/tools/source_tools.py +828 -0
- skill_seekers/mcp/tools/splitting_tools.py +212 -0
- skill_seekers/py.typed +0 -0
- skill_seekers-2.7.3.dist-info/METADATA +2027 -0
- skill_seekers-2.7.3.dist-info/RECORD +79 -0
- skill_seekers-2.7.3.dist-info/WHEEL +5 -0
- skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
- skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
- skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,806 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Source Merger for Multi-Source Skills
|
|
4
|
+
|
|
5
|
+
Merges documentation and code data intelligently with GitHub insights:
|
|
6
|
+
- Rule-based merge: Fast, deterministic rules
|
|
7
|
+
- Claude-enhanced merge: AI-powered reconciliation
|
|
8
|
+
|
|
9
|
+
Handles conflicts and creates unified API reference with GitHub metadata.
|
|
10
|
+
|
|
11
|
+
Multi-layer architecture (Phase 3):
|
|
12
|
+
- Layer 1: C3.x code (ground truth)
|
|
13
|
+
- Layer 2: HTML docs (official intent)
|
|
14
|
+
- Layer 3: GitHub docs (README/CONTRIBUTING)
|
|
15
|
+
- Layer 4: GitHub insights (issues)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
import subprocess
|
|
22
|
+
import tempfile
|
|
23
|
+
from typing import Any, Optional
|
|
24
|
+
|
|
25
|
+
from .conflict_detector import Conflict, ConflictDetector
|
|
26
|
+
|
|
27
|
+
# Import three-stream data classes (Phase 1)
|
|
28
|
+
try:
|
|
29
|
+
from .github_fetcher import CodeStream, DocsStream, InsightsStream, ThreeStreamData
|
|
30
|
+
except ImportError:
|
|
31
|
+
# Fallback if github_fetcher not available
|
|
32
|
+
ThreeStreamData = None
|
|
33
|
+
CodeStream = None
|
|
34
|
+
DocsStream = None
|
|
35
|
+
InsightsStream = None
|
|
36
|
+
|
|
37
|
+
logging.basicConfig(level=logging.INFO)
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def categorize_issues_by_topic(
|
|
42
|
+
problems: list[dict], solutions: list[dict], topics: list[str]
|
|
43
|
+
) -> dict[str, list[dict]]:
|
|
44
|
+
"""
|
|
45
|
+
Categorize GitHub issues by topic keywords.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
problems: List of common problems (open issues with 5+ comments)
|
|
49
|
+
solutions: List of known solutions (closed issues with comments)
|
|
50
|
+
topics: List of topic keywords to match against
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Dict mapping topic to relevant issues
|
|
54
|
+
"""
|
|
55
|
+
categorized = {topic: [] for topic in topics}
|
|
56
|
+
categorized["other"] = []
|
|
57
|
+
|
|
58
|
+
all_issues = problems + solutions
|
|
59
|
+
|
|
60
|
+
for issue in all_issues:
|
|
61
|
+
# Get searchable text
|
|
62
|
+
title = issue.get("title", "").lower()
|
|
63
|
+
labels = [label.lower() for label in issue.get("labels", [])]
|
|
64
|
+
text = f"{title} {' '.join(labels)}"
|
|
65
|
+
|
|
66
|
+
# Find best matching topic
|
|
67
|
+
matched_topic = None
|
|
68
|
+
max_matches = 0
|
|
69
|
+
|
|
70
|
+
for topic in topics:
|
|
71
|
+
# Count keyword matches
|
|
72
|
+
topic_keywords = topic.lower().split()
|
|
73
|
+
matches = sum(1 for keyword in topic_keywords if keyword in text)
|
|
74
|
+
|
|
75
|
+
if matches > max_matches:
|
|
76
|
+
max_matches = matches
|
|
77
|
+
matched_topic = topic
|
|
78
|
+
|
|
79
|
+
# Categorize by best match or 'other'
|
|
80
|
+
if matched_topic and max_matches > 0:
|
|
81
|
+
categorized[matched_topic].append(issue)
|
|
82
|
+
else:
|
|
83
|
+
categorized["other"].append(issue)
|
|
84
|
+
|
|
85
|
+
# Remove empty categories
|
|
86
|
+
return {k: v for k, v in categorized.items() if v}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def generate_hybrid_content(
|
|
90
|
+
api_data: dict,
|
|
91
|
+
github_docs: dict | None,
|
|
92
|
+
github_insights: dict | None,
|
|
93
|
+
conflicts: list[Conflict],
|
|
94
|
+
) -> dict[str, Any]:
|
|
95
|
+
"""
|
|
96
|
+
Generate hybrid content combining API data with GitHub context.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
api_data: Merged API data
|
|
100
|
+
github_docs: GitHub docs stream (README, CONTRIBUTING, docs/*.md)
|
|
101
|
+
github_insights: GitHub insights stream (metadata, issues, labels)
|
|
102
|
+
conflicts: List of detected conflicts
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Hybrid content dict with enriched API reference
|
|
106
|
+
"""
|
|
107
|
+
hybrid = {"api_reference": api_data, "github_context": {}}
|
|
108
|
+
|
|
109
|
+
# Add GitHub documentation layer
|
|
110
|
+
if github_docs:
|
|
111
|
+
hybrid["github_context"]["docs"] = {
|
|
112
|
+
"readme": github_docs.get("readme"),
|
|
113
|
+
"contributing": github_docs.get("contributing"),
|
|
114
|
+
"docs_files_count": len(github_docs.get("docs_files", [])),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# Add GitHub insights layer
|
|
118
|
+
if github_insights:
|
|
119
|
+
metadata = github_insights.get("metadata", {})
|
|
120
|
+
hybrid["github_context"]["metadata"] = {
|
|
121
|
+
"stars": metadata.get("stars", 0),
|
|
122
|
+
"forks": metadata.get("forks", 0),
|
|
123
|
+
"language": metadata.get("language", "Unknown"),
|
|
124
|
+
"description": metadata.get("description", ""),
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# Add issue insights
|
|
128
|
+
common_problems = github_insights.get("common_problems", [])
|
|
129
|
+
known_solutions = github_insights.get("known_solutions", [])
|
|
130
|
+
|
|
131
|
+
hybrid["github_context"]["issues"] = {
|
|
132
|
+
"common_problems_count": len(common_problems),
|
|
133
|
+
"known_solutions_count": len(known_solutions),
|
|
134
|
+
"top_problems": common_problems[:5], # Top 5 most-discussed
|
|
135
|
+
"top_solutions": known_solutions[:5],
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", [])
|
|
139
|
+
|
|
140
|
+
# Add conflict summary
|
|
141
|
+
hybrid["conflict_summary"] = {
|
|
142
|
+
"total_conflicts": len(conflicts),
|
|
143
|
+
"by_type": {},
|
|
144
|
+
"by_severity": {},
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
for conflict in conflicts:
|
|
148
|
+
# Count by type
|
|
149
|
+
conflict_type = conflict.type
|
|
150
|
+
hybrid["conflict_summary"]["by_type"][conflict_type] = (
|
|
151
|
+
hybrid["conflict_summary"]["by_type"].get(conflict_type, 0) + 1
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Count by severity
|
|
155
|
+
severity = conflict.severity
|
|
156
|
+
hybrid["conflict_summary"]["by_severity"][severity] = (
|
|
157
|
+
hybrid["conflict_summary"]["by_severity"].get(severity, 0) + 1
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Add GitHub issue links for relevant APIs
|
|
161
|
+
if github_insights:
|
|
162
|
+
hybrid["issue_links"] = _match_issues_to_apis(
|
|
163
|
+
api_data.get("apis", {}),
|
|
164
|
+
github_insights.get("common_problems", []),
|
|
165
|
+
github_insights.get("known_solutions", []),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return hybrid
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _match_issues_to_apis(
|
|
172
|
+
apis: dict[str, dict], problems: list[dict], solutions: list[dict]
|
|
173
|
+
) -> dict[str, list[dict]]:
|
|
174
|
+
"""
|
|
175
|
+
Match GitHub issues to specific APIs by keyword matching.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
apis: Dict of API data keyed by name
|
|
179
|
+
problems: List of common problems
|
|
180
|
+
solutions: List of known solutions
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Dict mapping API names to relevant issues
|
|
184
|
+
"""
|
|
185
|
+
issue_links = {}
|
|
186
|
+
all_issues = problems + solutions
|
|
187
|
+
|
|
188
|
+
for api_name in apis:
|
|
189
|
+
# Extract searchable keywords from API name
|
|
190
|
+
api_keywords = api_name.lower().replace("_", " ").split(".")
|
|
191
|
+
|
|
192
|
+
matched_issues = []
|
|
193
|
+
for issue in all_issues:
|
|
194
|
+
title = issue.get("title", "").lower()
|
|
195
|
+
labels = [label.lower() for label in issue.get("labels", [])]
|
|
196
|
+
text = f"{title} {' '.join(labels)}"
|
|
197
|
+
|
|
198
|
+
# Check if any API keyword appears in issue
|
|
199
|
+
if any(keyword in text for keyword in api_keywords):
|
|
200
|
+
matched_issues.append(
|
|
201
|
+
{
|
|
202
|
+
"number": issue.get("number"),
|
|
203
|
+
"title": issue.get("title"),
|
|
204
|
+
"state": issue.get("state"),
|
|
205
|
+
"comments": issue.get("comments"),
|
|
206
|
+
}
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if matched_issues:
|
|
210
|
+
issue_links[api_name] = matched_issues
|
|
211
|
+
|
|
212
|
+
return issue_links
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class RuleBasedMerger:
|
|
216
|
+
"""
|
|
217
|
+
Rule-based API merger using deterministic rules with GitHub insights.
|
|
218
|
+
|
|
219
|
+
Multi-layer architecture (Phase 3):
|
|
220
|
+
- Layer 1: C3.x code (ground truth)
|
|
221
|
+
- Layer 2: HTML docs (official intent)
|
|
222
|
+
- Layer 3: GitHub docs (README/CONTRIBUTING)
|
|
223
|
+
- Layer 4: GitHub insights (issues)
|
|
224
|
+
|
|
225
|
+
Rules:
|
|
226
|
+
1. If API only in docs → Include with [DOCS_ONLY] tag
|
|
227
|
+
2. If API only in code → Include with [UNDOCUMENTED] tag
|
|
228
|
+
3. If both match perfectly → Include normally
|
|
229
|
+
4. If conflict → Include both versions with [CONFLICT] tag, prefer code signature
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
def __init__(
|
|
233
|
+
self,
|
|
234
|
+
docs_data: dict,
|
|
235
|
+
github_data: dict,
|
|
236
|
+
conflicts: list[Conflict],
|
|
237
|
+
github_streams: Optional["ThreeStreamData"] = None,
|
|
238
|
+
):
|
|
239
|
+
"""
|
|
240
|
+
Initialize rule-based merger with GitHub streams support.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
docs_data: Documentation scraper data (Layer 2: HTML docs)
|
|
244
|
+
github_data: GitHub scraper data (Layer 1: C3.x code)
|
|
245
|
+
conflicts: List of detected conflicts
|
|
246
|
+
github_streams: Optional ThreeStreamData with docs and insights (Layers 3-4)
|
|
247
|
+
"""
|
|
248
|
+
self.docs_data = docs_data
|
|
249
|
+
self.github_data = github_data
|
|
250
|
+
self.conflicts = conflicts
|
|
251
|
+
self.github_streams = github_streams
|
|
252
|
+
|
|
253
|
+
# Build conflict index for fast lookup
|
|
254
|
+
self.conflict_index = {c.api_name: c for c in conflicts}
|
|
255
|
+
|
|
256
|
+
# Extract APIs from both sources
|
|
257
|
+
detector = ConflictDetector(docs_data, github_data)
|
|
258
|
+
self.docs_apis = detector.docs_apis
|
|
259
|
+
self.code_apis = detector.code_apis
|
|
260
|
+
|
|
261
|
+
# Extract GitHub streams if available
|
|
262
|
+
self.github_docs = None
|
|
263
|
+
self.github_insights = None
|
|
264
|
+
if github_streams:
|
|
265
|
+
# Layer 3: GitHub docs
|
|
266
|
+
if github_streams.docs_stream:
|
|
267
|
+
self.github_docs = {
|
|
268
|
+
"readme": github_streams.docs_stream.readme,
|
|
269
|
+
"contributing": github_streams.docs_stream.contributing,
|
|
270
|
+
"docs_files": github_streams.docs_stream.docs_files,
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Layer 4: GitHub insights
|
|
274
|
+
if github_streams.insights_stream:
|
|
275
|
+
self.github_insights = {
|
|
276
|
+
"metadata": github_streams.insights_stream.metadata,
|
|
277
|
+
"common_problems": github_streams.insights_stream.common_problems,
|
|
278
|
+
"known_solutions": github_streams.insights_stream.known_solutions,
|
|
279
|
+
"top_labels": github_streams.insights_stream.top_labels,
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
def merge_all(self) -> dict[str, Any]:
|
|
283
|
+
"""
|
|
284
|
+
Merge all APIs using rule-based logic with GitHub insights (Phase 3).
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Dict containing merged API data with hybrid content
|
|
288
|
+
"""
|
|
289
|
+
logger.info("Starting rule-based merge with GitHub streams...")
|
|
290
|
+
|
|
291
|
+
merged_apis = {}
|
|
292
|
+
|
|
293
|
+
# Get all unique API names
|
|
294
|
+
all_api_names = set(self.docs_apis.keys()) | set(self.code_apis.keys())
|
|
295
|
+
|
|
296
|
+
for api_name in sorted(all_api_names):
|
|
297
|
+
merged_api = self._merge_single_api(api_name)
|
|
298
|
+
merged_apis[api_name] = merged_api
|
|
299
|
+
|
|
300
|
+
logger.info(f"Merged {len(merged_apis)} APIs")
|
|
301
|
+
|
|
302
|
+
# Build base result
|
|
303
|
+
merged_data = {
|
|
304
|
+
"merge_mode": "rule-based",
|
|
305
|
+
"apis": merged_apis,
|
|
306
|
+
"summary": {
|
|
307
|
+
"total_apis": len(merged_apis),
|
|
308
|
+
"docs_only": sum(1 for api in merged_apis.values() if api["status"] == "docs_only"),
|
|
309
|
+
"code_only": sum(1 for api in merged_apis.values() if api["status"] == "code_only"),
|
|
310
|
+
"matched": sum(1 for api in merged_apis.values() if api["status"] == "matched"),
|
|
311
|
+
"conflict": sum(1 for api in merged_apis.values() if api["status"] == "conflict"),
|
|
312
|
+
},
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
# Generate hybrid content if GitHub streams available (Phase 3)
|
|
316
|
+
if self.github_streams:
|
|
317
|
+
logger.info("Generating hybrid content with GitHub insights...")
|
|
318
|
+
hybrid_content = generate_hybrid_content(
|
|
319
|
+
api_data=merged_data,
|
|
320
|
+
github_docs=self.github_docs,
|
|
321
|
+
github_insights=self.github_insights,
|
|
322
|
+
conflicts=self.conflicts,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Merge hybrid content into result
|
|
326
|
+
merged_data["github_context"] = hybrid_content.get("github_context", {})
|
|
327
|
+
merged_data["conflict_summary"] = hybrid_content.get("conflict_summary", {})
|
|
328
|
+
merged_data["issue_links"] = hybrid_content.get("issue_links", {})
|
|
329
|
+
|
|
330
|
+
logger.info(
|
|
331
|
+
f"Added GitHub context: {len(self.github_insights.get('common_problems', []))} problems, "
|
|
332
|
+
f"{len(self.github_insights.get('known_solutions', []))} solutions"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return merged_data
|
|
336
|
+
|
|
337
|
+
def _merge_single_api(self, api_name: str) -> dict[str, Any]:
|
|
338
|
+
"""
|
|
339
|
+
Merge a single API using rules.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
api_name: Name of the API to merge
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
Merged API dict
|
|
346
|
+
"""
|
|
347
|
+
in_docs = api_name in self.docs_apis
|
|
348
|
+
in_code = api_name in self.code_apis
|
|
349
|
+
has_conflict = api_name in self.conflict_index
|
|
350
|
+
|
|
351
|
+
# Rule 1: Only in docs
|
|
352
|
+
if in_docs and not in_code:
|
|
353
|
+
conflict = self.conflict_index.get(api_name)
|
|
354
|
+
return {
|
|
355
|
+
"name": api_name,
|
|
356
|
+
"status": "docs_only",
|
|
357
|
+
"source": "documentation",
|
|
358
|
+
"data": self.docs_apis[api_name],
|
|
359
|
+
"warning": "This API is documented but not found in codebase",
|
|
360
|
+
"conflict": conflict.__dict__ if conflict else None,
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
# Rule 2: Only in code
|
|
364
|
+
if in_code and not in_docs:
|
|
365
|
+
is_private = api_name.startswith("_")
|
|
366
|
+
conflict = self.conflict_index.get(api_name)
|
|
367
|
+
return {
|
|
368
|
+
"name": api_name,
|
|
369
|
+
"status": "code_only",
|
|
370
|
+
"source": "code",
|
|
371
|
+
"data": self.code_apis[api_name],
|
|
372
|
+
"warning": "This API exists in code but is not documented"
|
|
373
|
+
if not is_private
|
|
374
|
+
else "Internal/private API",
|
|
375
|
+
"conflict": conflict.__dict__ if conflict else None,
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
# Both exist - check for conflicts
|
|
379
|
+
docs_info = self.docs_apis[api_name]
|
|
380
|
+
code_info = self.code_apis[api_name]
|
|
381
|
+
|
|
382
|
+
# Rule 3: Both match perfectly (no conflict)
|
|
383
|
+
if not has_conflict:
|
|
384
|
+
return {
|
|
385
|
+
"name": api_name,
|
|
386
|
+
"status": "matched",
|
|
387
|
+
"source": "both",
|
|
388
|
+
"docs_data": docs_info,
|
|
389
|
+
"code_data": code_info,
|
|
390
|
+
"merged_signature": self._create_merged_signature(code_info, docs_info),
|
|
391
|
+
"merged_description": docs_info.get("docstring") or code_info.get("docstring"),
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
# Rule 4: Conflict exists - prefer code signature, keep docs description
|
|
395
|
+
conflict = self.conflict_index[api_name]
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
"name": api_name,
|
|
399
|
+
"status": "conflict",
|
|
400
|
+
"source": "both",
|
|
401
|
+
"docs_data": docs_info,
|
|
402
|
+
"code_data": code_info,
|
|
403
|
+
"conflict": conflict.__dict__,
|
|
404
|
+
"resolution": "prefer_code_signature",
|
|
405
|
+
"merged_signature": self._create_merged_signature(code_info, docs_info),
|
|
406
|
+
"merged_description": docs_info.get("docstring") or code_info.get("docstring"),
|
|
407
|
+
"warning": conflict.difference,
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
def _create_merged_signature(self, code_info: dict, docs_info: dict) -> str:
|
|
411
|
+
"""
|
|
412
|
+
Create merged signature preferring code data.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
code_info: API info from code
|
|
416
|
+
docs_info: API info from docs
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Merged signature string
|
|
420
|
+
"""
|
|
421
|
+
name = code_info.get("name", docs_info.get("name"))
|
|
422
|
+
params = code_info.get("parameters", docs_info.get("parameters", []))
|
|
423
|
+
return_type = code_info.get("return_type", docs_info.get("return_type"))
|
|
424
|
+
|
|
425
|
+
# Build parameter string
|
|
426
|
+
param_strs = []
|
|
427
|
+
for param in params:
|
|
428
|
+
param_str = param["name"]
|
|
429
|
+
if param.get("type_hint"):
|
|
430
|
+
param_str += f": {param['type_hint']}"
|
|
431
|
+
if param.get("default"):
|
|
432
|
+
param_str += f" = {param['default']}"
|
|
433
|
+
param_strs.append(param_str)
|
|
434
|
+
|
|
435
|
+
signature = f"{name}({', '.join(param_strs)})"
|
|
436
|
+
|
|
437
|
+
if return_type:
|
|
438
|
+
signature += f" -> {return_type}"
|
|
439
|
+
|
|
440
|
+
return signature
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
class ClaudeEnhancedMerger:
|
|
444
|
+
"""
|
|
445
|
+
Claude-enhanced API merger using local Claude Code with GitHub insights.
|
|
446
|
+
|
|
447
|
+
Opens Claude Code in a new terminal to intelligently reconcile conflicts.
|
|
448
|
+
Uses the same approach as enhance_skill_local.py.
|
|
449
|
+
|
|
450
|
+
Multi-layer architecture (Phase 3):
|
|
451
|
+
- Layer 1: C3.x code (ground truth)
|
|
452
|
+
- Layer 2: HTML docs (official intent)
|
|
453
|
+
- Layer 3: GitHub docs (README/CONTRIBUTING)
|
|
454
|
+
- Layer 4: GitHub insights (issues)
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
def __init__(
|
|
458
|
+
self,
|
|
459
|
+
docs_data: dict,
|
|
460
|
+
github_data: dict,
|
|
461
|
+
conflicts: list[Conflict],
|
|
462
|
+
github_streams: Optional["ThreeStreamData"] = None,
|
|
463
|
+
):
|
|
464
|
+
"""
|
|
465
|
+
Initialize Claude-enhanced merger with GitHub streams support.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
docs_data: Documentation scraper data (Layer 2: HTML docs)
|
|
469
|
+
github_data: GitHub scraper data (Layer 1: C3.x code)
|
|
470
|
+
conflicts: List of detected conflicts
|
|
471
|
+
github_streams: Optional ThreeStreamData with docs and insights (Layers 3-4)
|
|
472
|
+
"""
|
|
473
|
+
self.docs_data = docs_data
|
|
474
|
+
self.github_data = github_data
|
|
475
|
+
self.conflicts = conflicts
|
|
476
|
+
self.github_streams = github_streams
|
|
477
|
+
|
|
478
|
+
# First do rule-based merge as baseline
|
|
479
|
+
self.rule_merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams)
|
|
480
|
+
|
|
481
|
+
def merge_all(self) -> dict[str, Any]:
|
|
482
|
+
"""
|
|
483
|
+
Merge all APIs using Claude enhancement.
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
Dict containing merged API data
|
|
487
|
+
"""
|
|
488
|
+
logger.info("Starting Claude-enhanced merge...")
|
|
489
|
+
|
|
490
|
+
# Create temporary workspace
|
|
491
|
+
workspace_dir = self._create_workspace()
|
|
492
|
+
|
|
493
|
+
# Launch Claude Code for enhancement
|
|
494
|
+
logger.info("Launching Claude Code for intelligent merging...")
|
|
495
|
+
logger.info("Claude will analyze conflicts and create reconciled API reference")
|
|
496
|
+
|
|
497
|
+
try:
|
|
498
|
+
self._launch_claude_merge(workspace_dir)
|
|
499
|
+
|
|
500
|
+
# Read enhanced results
|
|
501
|
+
merged_data = self._read_merged_results(workspace_dir)
|
|
502
|
+
|
|
503
|
+
logger.info("Claude-enhanced merge complete")
|
|
504
|
+
return merged_data
|
|
505
|
+
|
|
506
|
+
except Exception as e:
|
|
507
|
+
logger.error(f"Claude enhancement failed: {e}")
|
|
508
|
+
logger.info("Falling back to rule-based merge")
|
|
509
|
+
return self.rule_merger.merge_all()
|
|
510
|
+
|
|
511
|
+
def _create_workspace(self) -> str:
|
|
512
|
+
"""
|
|
513
|
+
Create temporary workspace with merge context.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Path to workspace directory
|
|
517
|
+
"""
|
|
518
|
+
workspace = tempfile.mkdtemp(prefix="skill_merge_")
|
|
519
|
+
logger.info(f"Created merge workspace: {workspace}")
|
|
520
|
+
|
|
521
|
+
# Write context files for Claude
|
|
522
|
+
self._write_context_files(workspace)
|
|
523
|
+
|
|
524
|
+
return workspace
|
|
525
|
+
|
|
526
|
+
def _write_context_files(self, workspace: str):
|
|
527
|
+
"""Write context files for Claude to analyze."""
|
|
528
|
+
|
|
529
|
+
# 1. Write conflicts summary
|
|
530
|
+
conflicts_file = os.path.join(workspace, "conflicts.json")
|
|
531
|
+
with open(conflicts_file, "w") as f:
|
|
532
|
+
json.dump(
|
|
533
|
+
{
|
|
534
|
+
"conflicts": [c.__dict__ for c in self.conflicts],
|
|
535
|
+
"summary": {
|
|
536
|
+
"total": len(self.conflicts),
|
|
537
|
+
"by_type": self._count_by_field("type"),
|
|
538
|
+
"by_severity": self._count_by_field("severity"),
|
|
539
|
+
},
|
|
540
|
+
},
|
|
541
|
+
f,
|
|
542
|
+
indent=2,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# 2. Write documentation APIs
|
|
546
|
+
docs_apis_file = os.path.join(workspace, "docs_apis.json")
|
|
547
|
+
detector = ConflictDetector(self.docs_data, self.github_data)
|
|
548
|
+
with open(docs_apis_file, "w") as f:
|
|
549
|
+
json.dump(detector.docs_apis, f, indent=2)
|
|
550
|
+
|
|
551
|
+
# 3. Write code APIs
|
|
552
|
+
code_apis_file = os.path.join(workspace, "code_apis.json")
|
|
553
|
+
with open(code_apis_file, "w") as f:
|
|
554
|
+
json.dump(detector.code_apis, f, indent=2)
|
|
555
|
+
|
|
556
|
+
# 4. Write merge instructions for Claude
|
|
557
|
+
instructions = """# API Merge Task
|
|
558
|
+
|
|
559
|
+
You are merging API documentation from two sources:
|
|
560
|
+
1. Official documentation (user-facing)
|
|
561
|
+
2. Source code analysis (implementation reality)
|
|
562
|
+
|
|
563
|
+
## Context Files:
|
|
564
|
+
- `conflicts.json` - All detected conflicts between sources
|
|
565
|
+
- `docs_apis.json` - APIs from documentation
|
|
566
|
+
- `code_apis.json` - APIs from source code
|
|
567
|
+
|
|
568
|
+
## Your Task:
|
|
569
|
+
For each conflict, reconcile the differences intelligently:
|
|
570
|
+
|
|
571
|
+
1. **Prefer code signatures as source of truth**
|
|
572
|
+
- Use actual parameter names, types, defaults from code
|
|
573
|
+
- Code is what actually runs, docs might be outdated
|
|
574
|
+
|
|
575
|
+
2. **Keep documentation descriptions**
|
|
576
|
+
- Docs are user-friendly, code comments might be technical
|
|
577
|
+
- Keep the docs' explanation of what the API does
|
|
578
|
+
|
|
579
|
+
3. **Add implementation notes for discrepancies**
|
|
580
|
+
- If docs differ from code, explain the difference
|
|
581
|
+
- Example: "⚠️ The `snap` parameter exists in code but is not documented"
|
|
582
|
+
|
|
583
|
+
4. **Flag missing APIs clearly**
|
|
584
|
+
- Missing in docs → Add [UNDOCUMENTED] tag
|
|
585
|
+
- Missing in code → Add [REMOVED] or [DOCS_ERROR] tag
|
|
586
|
+
|
|
587
|
+
5. **Create unified API reference**
|
|
588
|
+
- One definitive signature per API
|
|
589
|
+
- Clear warnings about conflicts
|
|
590
|
+
- Implementation notes where helpful
|
|
591
|
+
|
|
592
|
+
## Output Format:
|
|
593
|
+
Create `merged_apis.json` with this structure:
|
|
594
|
+
|
|
595
|
+
```json
|
|
596
|
+
{
|
|
597
|
+
"apis": {
|
|
598
|
+
"API.name": {
|
|
599
|
+
"signature": "final_signature_here",
|
|
600
|
+
"parameters": [...],
|
|
601
|
+
"return_type": "type",
|
|
602
|
+
"description": "user-friendly description",
|
|
603
|
+
"implementation_notes": "Any discrepancies or warnings",
|
|
604
|
+
"source": "both|docs_only|code_only",
|
|
605
|
+
"confidence": "high|medium|low"
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
Take your time to analyze each conflict carefully. The goal is to create the most accurate and helpful API reference possible.
|
|
612
|
+
"""
|
|
613
|
+
|
|
614
|
+
instructions_file = os.path.join(workspace, "MERGE_INSTRUCTIONS.md")
|
|
615
|
+
with open(instructions_file, "w") as f:
|
|
616
|
+
f.write(instructions)
|
|
617
|
+
|
|
618
|
+
logger.info(f"Wrote context files to {workspace}")
|
|
619
|
+
|
|
620
|
+
def _count_by_field(self, field: str) -> dict[str, int]:
|
|
621
|
+
"""Count conflicts by a specific field."""
|
|
622
|
+
counts = {}
|
|
623
|
+
for conflict in self.conflicts:
|
|
624
|
+
value = getattr(conflict, field)
|
|
625
|
+
counts[value] = counts.get(value, 0) + 1
|
|
626
|
+
return counts
|
|
627
|
+
|
|
628
|
+
def _launch_claude_merge(self, workspace: str):
|
|
629
|
+
"""
|
|
630
|
+
Launch Claude Code to perform merge.
|
|
631
|
+
|
|
632
|
+
Similar to enhance_skill_local.py approach.
|
|
633
|
+
"""
|
|
634
|
+
# Create a script that Claude will execute
|
|
635
|
+
script_path = os.path.join(workspace, "merge_script.sh")
|
|
636
|
+
|
|
637
|
+
script_content = f"""#!/bin/bash
|
|
638
|
+
# Automatic merge script for Claude Code
|
|
639
|
+
|
|
640
|
+
cd "{workspace}"
|
|
641
|
+
|
|
642
|
+
echo "📊 Analyzing conflicts..."
|
|
643
|
+
cat conflicts.json | head -20
|
|
644
|
+
|
|
645
|
+
echo ""
|
|
646
|
+
echo "📖 Documentation APIs: $(cat docs_apis.json | grep -c '\"name\"')"
|
|
647
|
+
echo "💻 Code APIs: $(cat code_apis.json | grep -c '\"name\"')"
|
|
648
|
+
echo ""
|
|
649
|
+
echo "Please review the conflicts and create merged_apis.json"
|
|
650
|
+
echo "Follow the instructions in MERGE_INSTRUCTIONS.md"
|
|
651
|
+
echo ""
|
|
652
|
+
echo "When done, save merged_apis.json and close this terminal."
|
|
653
|
+
|
|
654
|
+
# Wait for user to complete merge
|
|
655
|
+
read -p "Press Enter when merge is complete..."
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
with open(script_path, "w") as f:
|
|
659
|
+
f.write(script_content)
|
|
660
|
+
|
|
661
|
+
os.chmod(script_path, 0o755)
|
|
662
|
+
|
|
663
|
+
# Open new terminal with Claude Code
|
|
664
|
+
# Try different terminal emulators
|
|
665
|
+
terminals = [
|
|
666
|
+
["x-terminal-emulator", "-e"],
|
|
667
|
+
["gnome-terminal", "--"],
|
|
668
|
+
["xterm", "-e"],
|
|
669
|
+
["konsole", "-e"],
|
|
670
|
+
]
|
|
671
|
+
|
|
672
|
+
for terminal_cmd in terminals:
|
|
673
|
+
try:
|
|
674
|
+
cmd = terminal_cmd + ["bash", script_path]
|
|
675
|
+
subprocess.Popen(cmd)
|
|
676
|
+
logger.info(f"Opened terminal with {terminal_cmd[0]}")
|
|
677
|
+
break
|
|
678
|
+
except FileNotFoundError:
|
|
679
|
+
continue
|
|
680
|
+
|
|
681
|
+
# Wait for merge to complete
|
|
682
|
+
merged_file = os.path.join(workspace, "merged_apis.json")
|
|
683
|
+
logger.info(f"Waiting for merged results at: {merged_file}")
|
|
684
|
+
logger.info("Close the terminal when done to continue...")
|
|
685
|
+
|
|
686
|
+
# Poll for file existence
|
|
687
|
+
import time
|
|
688
|
+
|
|
689
|
+
timeout = 3600 # 1 hour max
|
|
690
|
+
elapsed = 0
|
|
691
|
+
while not os.path.exists(merged_file) and elapsed < timeout:
|
|
692
|
+
time.sleep(5)
|
|
693
|
+
elapsed += 5
|
|
694
|
+
|
|
695
|
+
if not os.path.exists(merged_file):
|
|
696
|
+
raise TimeoutError("Claude merge timed out after 1 hour")
|
|
697
|
+
|
|
698
|
+
def _read_merged_results(self, workspace: str) -> dict[str, Any]:
|
|
699
|
+
"""Read merged results from workspace."""
|
|
700
|
+
merged_file = os.path.join(workspace, "merged_apis.json")
|
|
701
|
+
|
|
702
|
+
if not os.path.exists(merged_file):
|
|
703
|
+
raise FileNotFoundError(f"Merged results not found: {merged_file}")
|
|
704
|
+
|
|
705
|
+
with open(merged_file) as f:
|
|
706
|
+
merged_data = json.load(f)
|
|
707
|
+
|
|
708
|
+
return {"merge_mode": "claude-enhanced", **merged_data}
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def merge_sources(
|
|
712
|
+
docs_data_path: str,
|
|
713
|
+
github_data_path: str,
|
|
714
|
+
output_path: str,
|
|
715
|
+
mode: str = "rule-based",
|
|
716
|
+
github_streams: Optional["ThreeStreamData"] = None,
|
|
717
|
+
) -> dict[str, Any]:
|
|
718
|
+
"""
|
|
719
|
+
Merge documentation and GitHub data with optional GitHub streams (Phase 3).
|
|
720
|
+
|
|
721
|
+
Multi-layer architecture:
|
|
722
|
+
- Layer 1: C3.x code (ground truth)
|
|
723
|
+
- Layer 2: HTML docs (official intent)
|
|
724
|
+
- Layer 3: GitHub docs (README/CONTRIBUTING) - from github_streams
|
|
725
|
+
- Layer 4: GitHub insights (issues) - from github_streams
|
|
726
|
+
|
|
727
|
+
Args:
|
|
728
|
+
docs_data_path: Path to documentation data JSON
|
|
729
|
+
github_data_path: Path to GitHub data JSON
|
|
730
|
+
output_path: Path to save merged output
|
|
731
|
+
mode: 'rule-based' or 'claude-enhanced'
|
|
732
|
+
github_streams: Optional ThreeStreamData with docs and insights
|
|
733
|
+
|
|
734
|
+
Returns:
|
|
735
|
+
Merged data dict with hybrid content
|
|
736
|
+
"""
|
|
737
|
+
# Load data
|
|
738
|
+
with open(docs_data_path) as f:
|
|
739
|
+
docs_data = json.load(f)
|
|
740
|
+
|
|
741
|
+
with open(github_data_path) as f:
|
|
742
|
+
github_data = json.load(f)
|
|
743
|
+
|
|
744
|
+
# Detect conflicts
|
|
745
|
+
detector = ConflictDetector(docs_data, github_data)
|
|
746
|
+
conflicts = detector.detect_all_conflicts()
|
|
747
|
+
|
|
748
|
+
logger.info(f"Detected {len(conflicts)} conflicts")
|
|
749
|
+
|
|
750
|
+
# Log GitHub streams availability
|
|
751
|
+
if github_streams:
|
|
752
|
+
logger.info("GitHub streams available for multi-layer merge")
|
|
753
|
+
if github_streams.docs_stream:
|
|
754
|
+
logger.info(
|
|
755
|
+
f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files"
|
|
756
|
+
)
|
|
757
|
+
if github_streams.insights_stream:
|
|
758
|
+
problems = len(github_streams.insights_stream.common_problems)
|
|
759
|
+
solutions = len(github_streams.insights_stream.known_solutions)
|
|
760
|
+
logger.info(f" - Insights stream: {problems} problems, {solutions} solutions")
|
|
761
|
+
|
|
762
|
+
# Merge based on mode
|
|
763
|
+
if mode == "claude-enhanced":
|
|
764
|
+
merger = ClaudeEnhancedMerger(docs_data, github_data, conflicts, github_streams)
|
|
765
|
+
else:
|
|
766
|
+
merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams)
|
|
767
|
+
|
|
768
|
+
merged_data = merger.merge_all()
|
|
769
|
+
|
|
770
|
+
# Save merged data
|
|
771
|
+
with open(output_path, "w") as f:
|
|
772
|
+
json.dump(merged_data, f, indent=2, ensure_ascii=False)
|
|
773
|
+
|
|
774
|
+
logger.info(f"Merged data saved to: {output_path}")
|
|
775
|
+
|
|
776
|
+
return merged_data
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
if __name__ == "__main__":
|
|
780
|
+
import argparse
|
|
781
|
+
|
|
782
|
+
parser = argparse.ArgumentParser(description="Merge documentation and code sources")
|
|
783
|
+
parser.add_argument("docs_data", help="Path to documentation data JSON")
|
|
784
|
+
parser.add_argument("github_data", help="Path to GitHub data JSON")
|
|
785
|
+
parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path")
|
|
786
|
+
parser.add_argument(
|
|
787
|
+
"--mode",
|
|
788
|
+
"-m",
|
|
789
|
+
choices=["rule-based", "claude-enhanced"],
|
|
790
|
+
default="rule-based",
|
|
791
|
+
help="Merge mode",
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
args = parser.parse_args()
|
|
795
|
+
|
|
796
|
+
merged = merge_sources(args.docs_data, args.github_data, args.output, args.mode)
|
|
797
|
+
|
|
798
|
+
# Print summary
|
|
799
|
+
summary = merged.get("summary", {})
|
|
800
|
+
print(f"\n✅ Merge complete ({merged.get('merge_mode')})")
|
|
801
|
+
print(f" Total APIs: {summary.get('total_apis', 0)}")
|
|
802
|
+
print(f" Matched: {summary.get('matched', 0)}")
|
|
803
|
+
print(f" Docs only: {summary.get('docs_only', 0)}")
|
|
804
|
+
print(f" Code only: {summary.get('code_only', 0)}")
|
|
805
|
+
print(f" Conflicts: {summary.get('conflict', 0)}")
|
|
806
|
+
print(f"\n📄 Saved to: {args.output}")
|