skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,806 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Source Merger for Multi-Source Skills
4
+
5
+ Merges documentation and code data intelligently with GitHub insights:
6
+ - Rule-based merge: Fast, deterministic rules
7
+ - Claude-enhanced merge: AI-powered reconciliation
8
+
9
+ Handles conflicts and creates unified API reference with GitHub metadata.
10
+
11
+ Multi-layer architecture (Phase 3):
12
+ - Layer 1: C3.x code (ground truth)
13
+ - Layer 2: HTML docs (official intent)
14
+ - Layer 3: GitHub docs (README/CONTRIBUTING)
15
+ - Layer 4: GitHub insights (issues)
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ import os
21
+ import subprocess
22
+ import tempfile
23
+ from typing import Any, Optional
24
+
25
+ from .conflict_detector import Conflict, ConflictDetector
26
+
27
+ # Import three-stream data classes (Phase 1)
28
+ try:
29
+ from .github_fetcher import CodeStream, DocsStream, InsightsStream, ThreeStreamData
30
+ except ImportError:
31
+ # Fallback if github_fetcher not available
32
+ ThreeStreamData = None
33
+ CodeStream = None
34
+ DocsStream = None
35
+ InsightsStream = None
36
+
37
+ logging.basicConfig(level=logging.INFO)
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def categorize_issues_by_topic(
42
+ problems: list[dict], solutions: list[dict], topics: list[str]
43
+ ) -> dict[str, list[dict]]:
44
+ """
45
+ Categorize GitHub issues by topic keywords.
46
+
47
+ Args:
48
+ problems: List of common problems (open issues with 5+ comments)
49
+ solutions: List of known solutions (closed issues with comments)
50
+ topics: List of topic keywords to match against
51
+
52
+ Returns:
53
+ Dict mapping topic to relevant issues
54
+ """
55
+ categorized = {topic: [] for topic in topics}
56
+ categorized["other"] = []
57
+
58
+ all_issues = problems + solutions
59
+
60
+ for issue in all_issues:
61
+ # Get searchable text
62
+ title = issue.get("title", "").lower()
63
+ labels = [label.lower() for label in issue.get("labels", [])]
64
+ text = f"{title} {' '.join(labels)}"
65
+
66
+ # Find best matching topic
67
+ matched_topic = None
68
+ max_matches = 0
69
+
70
+ for topic in topics:
71
+ # Count keyword matches
72
+ topic_keywords = topic.lower().split()
73
+ matches = sum(1 for keyword in topic_keywords if keyword in text)
74
+
75
+ if matches > max_matches:
76
+ max_matches = matches
77
+ matched_topic = topic
78
+
79
+ # Categorize by best match or 'other'
80
+ if matched_topic and max_matches > 0:
81
+ categorized[matched_topic].append(issue)
82
+ else:
83
+ categorized["other"].append(issue)
84
+
85
+ # Remove empty categories
86
+ return {k: v for k, v in categorized.items() if v}
87
+
88
+
89
+ def generate_hybrid_content(
90
+ api_data: dict,
91
+ github_docs: dict | None,
92
+ github_insights: dict | None,
93
+ conflicts: list[Conflict],
94
+ ) -> dict[str, Any]:
95
+ """
96
+ Generate hybrid content combining API data with GitHub context.
97
+
98
+ Args:
99
+ api_data: Merged API data
100
+ github_docs: GitHub docs stream (README, CONTRIBUTING, docs/*.md)
101
+ github_insights: GitHub insights stream (metadata, issues, labels)
102
+ conflicts: List of detected conflicts
103
+
104
+ Returns:
105
+ Hybrid content dict with enriched API reference
106
+ """
107
+ hybrid = {"api_reference": api_data, "github_context": {}}
108
+
109
+ # Add GitHub documentation layer
110
+ if github_docs:
111
+ hybrid["github_context"]["docs"] = {
112
+ "readme": github_docs.get("readme"),
113
+ "contributing": github_docs.get("contributing"),
114
+ "docs_files_count": len(github_docs.get("docs_files", [])),
115
+ }
116
+
117
+ # Add GitHub insights layer
118
+ if github_insights:
119
+ metadata = github_insights.get("metadata", {})
120
+ hybrid["github_context"]["metadata"] = {
121
+ "stars": metadata.get("stars", 0),
122
+ "forks": metadata.get("forks", 0),
123
+ "language": metadata.get("language", "Unknown"),
124
+ "description": metadata.get("description", ""),
125
+ }
126
+
127
+ # Add issue insights
128
+ common_problems = github_insights.get("common_problems", [])
129
+ known_solutions = github_insights.get("known_solutions", [])
130
+
131
+ hybrid["github_context"]["issues"] = {
132
+ "common_problems_count": len(common_problems),
133
+ "known_solutions_count": len(known_solutions),
134
+ "top_problems": common_problems[:5], # Top 5 most-discussed
135
+ "top_solutions": known_solutions[:5],
136
+ }
137
+
138
+ hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", [])
139
+
140
+ # Add conflict summary
141
+ hybrid["conflict_summary"] = {
142
+ "total_conflicts": len(conflicts),
143
+ "by_type": {},
144
+ "by_severity": {},
145
+ }
146
+
147
+ for conflict in conflicts:
148
+ # Count by type
149
+ conflict_type = conflict.type
150
+ hybrid["conflict_summary"]["by_type"][conflict_type] = (
151
+ hybrid["conflict_summary"]["by_type"].get(conflict_type, 0) + 1
152
+ )
153
+
154
+ # Count by severity
155
+ severity = conflict.severity
156
+ hybrid["conflict_summary"]["by_severity"][severity] = (
157
+ hybrid["conflict_summary"]["by_severity"].get(severity, 0) + 1
158
+ )
159
+
160
+ # Add GitHub issue links for relevant APIs
161
+ if github_insights:
162
+ hybrid["issue_links"] = _match_issues_to_apis(
163
+ api_data.get("apis", {}),
164
+ github_insights.get("common_problems", []),
165
+ github_insights.get("known_solutions", []),
166
+ )
167
+
168
+ return hybrid
169
+
170
+
171
+ def _match_issues_to_apis(
172
+ apis: dict[str, dict], problems: list[dict], solutions: list[dict]
173
+ ) -> dict[str, list[dict]]:
174
+ """
175
+ Match GitHub issues to specific APIs by keyword matching.
176
+
177
+ Args:
178
+ apis: Dict of API data keyed by name
179
+ problems: List of common problems
180
+ solutions: List of known solutions
181
+
182
+ Returns:
183
+ Dict mapping API names to relevant issues
184
+ """
185
+ issue_links = {}
186
+ all_issues = problems + solutions
187
+
188
+ for api_name in apis:
189
+ # Extract searchable keywords from API name
190
+ api_keywords = api_name.lower().replace("_", " ").split(".")
191
+
192
+ matched_issues = []
193
+ for issue in all_issues:
194
+ title = issue.get("title", "").lower()
195
+ labels = [label.lower() for label in issue.get("labels", [])]
196
+ text = f"{title} {' '.join(labels)}"
197
+
198
+ # Check if any API keyword appears in issue
199
+ if any(keyword in text for keyword in api_keywords):
200
+ matched_issues.append(
201
+ {
202
+ "number": issue.get("number"),
203
+ "title": issue.get("title"),
204
+ "state": issue.get("state"),
205
+ "comments": issue.get("comments"),
206
+ }
207
+ )
208
+
209
+ if matched_issues:
210
+ issue_links[api_name] = matched_issues
211
+
212
+ return issue_links
213
+
214
+
215
+ class RuleBasedMerger:
216
+ """
217
+ Rule-based API merger using deterministic rules with GitHub insights.
218
+
219
+ Multi-layer architecture (Phase 3):
220
+ - Layer 1: C3.x code (ground truth)
221
+ - Layer 2: HTML docs (official intent)
222
+ - Layer 3: GitHub docs (README/CONTRIBUTING)
223
+ - Layer 4: GitHub insights (issues)
224
+
225
+ Rules:
226
+ 1. If API only in docs → Include with [DOCS_ONLY] tag
227
+ 2. If API only in code → Include with [UNDOCUMENTED] tag
228
+ 3. If both match perfectly → Include normally
229
+ 4. If conflict → Include both versions with [CONFLICT] tag, prefer code signature
230
+ """
231
+
232
+ def __init__(
233
+ self,
234
+ docs_data: dict,
235
+ github_data: dict,
236
+ conflicts: list[Conflict],
237
+ github_streams: Optional["ThreeStreamData"] = None,
238
+ ):
239
+ """
240
+ Initialize rule-based merger with GitHub streams support.
241
+
242
+ Args:
243
+ docs_data: Documentation scraper data (Layer 2: HTML docs)
244
+ github_data: GitHub scraper data (Layer 1: C3.x code)
245
+ conflicts: List of detected conflicts
246
+ github_streams: Optional ThreeStreamData with docs and insights (Layers 3-4)
247
+ """
248
+ self.docs_data = docs_data
249
+ self.github_data = github_data
250
+ self.conflicts = conflicts
251
+ self.github_streams = github_streams
252
+
253
+ # Build conflict index for fast lookup
254
+ self.conflict_index = {c.api_name: c for c in conflicts}
255
+
256
+ # Extract APIs from both sources
257
+ detector = ConflictDetector(docs_data, github_data)
258
+ self.docs_apis = detector.docs_apis
259
+ self.code_apis = detector.code_apis
260
+
261
+ # Extract GitHub streams if available
262
+ self.github_docs = None
263
+ self.github_insights = None
264
+ if github_streams:
265
+ # Layer 3: GitHub docs
266
+ if github_streams.docs_stream:
267
+ self.github_docs = {
268
+ "readme": github_streams.docs_stream.readme,
269
+ "contributing": github_streams.docs_stream.contributing,
270
+ "docs_files": github_streams.docs_stream.docs_files,
271
+ }
272
+
273
+ # Layer 4: GitHub insights
274
+ if github_streams.insights_stream:
275
+ self.github_insights = {
276
+ "metadata": github_streams.insights_stream.metadata,
277
+ "common_problems": github_streams.insights_stream.common_problems,
278
+ "known_solutions": github_streams.insights_stream.known_solutions,
279
+ "top_labels": github_streams.insights_stream.top_labels,
280
+ }
281
+
282
+ def merge_all(self) -> dict[str, Any]:
283
+ """
284
+ Merge all APIs using rule-based logic with GitHub insights (Phase 3).
285
+
286
+ Returns:
287
+ Dict containing merged API data with hybrid content
288
+ """
289
+ logger.info("Starting rule-based merge with GitHub streams...")
290
+
291
+ merged_apis = {}
292
+
293
+ # Get all unique API names
294
+ all_api_names = set(self.docs_apis.keys()) | set(self.code_apis.keys())
295
+
296
+ for api_name in sorted(all_api_names):
297
+ merged_api = self._merge_single_api(api_name)
298
+ merged_apis[api_name] = merged_api
299
+
300
+ logger.info(f"Merged {len(merged_apis)} APIs")
301
+
302
+ # Build base result
303
+ merged_data = {
304
+ "merge_mode": "rule-based",
305
+ "apis": merged_apis,
306
+ "summary": {
307
+ "total_apis": len(merged_apis),
308
+ "docs_only": sum(1 for api in merged_apis.values() if api["status"] == "docs_only"),
309
+ "code_only": sum(1 for api in merged_apis.values() if api["status"] == "code_only"),
310
+ "matched": sum(1 for api in merged_apis.values() if api["status"] == "matched"),
311
+ "conflict": sum(1 for api in merged_apis.values() if api["status"] == "conflict"),
312
+ },
313
+ }
314
+
315
+ # Generate hybrid content if GitHub streams available (Phase 3)
316
+ if self.github_streams:
317
+ logger.info("Generating hybrid content with GitHub insights...")
318
+ hybrid_content = generate_hybrid_content(
319
+ api_data=merged_data,
320
+ github_docs=self.github_docs,
321
+ github_insights=self.github_insights,
322
+ conflicts=self.conflicts,
323
+ )
324
+
325
+ # Merge hybrid content into result
326
+ merged_data["github_context"] = hybrid_content.get("github_context", {})
327
+ merged_data["conflict_summary"] = hybrid_content.get("conflict_summary", {})
328
+ merged_data["issue_links"] = hybrid_content.get("issue_links", {})
329
+
330
+ logger.info(
331
+ f"Added GitHub context: {len(self.github_insights.get('common_problems', []))} problems, "
332
+ f"{len(self.github_insights.get('known_solutions', []))} solutions"
333
+ )
334
+
335
+ return merged_data
336
+
337
+ def _merge_single_api(self, api_name: str) -> dict[str, Any]:
338
+ """
339
+ Merge a single API using rules.
340
+
341
+ Args:
342
+ api_name: Name of the API to merge
343
+
344
+ Returns:
345
+ Merged API dict
346
+ """
347
+ in_docs = api_name in self.docs_apis
348
+ in_code = api_name in self.code_apis
349
+ has_conflict = api_name in self.conflict_index
350
+
351
+ # Rule 1: Only in docs
352
+ if in_docs and not in_code:
353
+ conflict = self.conflict_index.get(api_name)
354
+ return {
355
+ "name": api_name,
356
+ "status": "docs_only",
357
+ "source": "documentation",
358
+ "data": self.docs_apis[api_name],
359
+ "warning": "This API is documented but not found in codebase",
360
+ "conflict": conflict.__dict__ if conflict else None,
361
+ }
362
+
363
+ # Rule 2: Only in code
364
+ if in_code and not in_docs:
365
+ is_private = api_name.startswith("_")
366
+ conflict = self.conflict_index.get(api_name)
367
+ return {
368
+ "name": api_name,
369
+ "status": "code_only",
370
+ "source": "code",
371
+ "data": self.code_apis[api_name],
372
+ "warning": "This API exists in code but is not documented"
373
+ if not is_private
374
+ else "Internal/private API",
375
+ "conflict": conflict.__dict__ if conflict else None,
376
+ }
377
+
378
+ # Both exist - check for conflicts
379
+ docs_info = self.docs_apis[api_name]
380
+ code_info = self.code_apis[api_name]
381
+
382
+ # Rule 3: Both match perfectly (no conflict)
383
+ if not has_conflict:
384
+ return {
385
+ "name": api_name,
386
+ "status": "matched",
387
+ "source": "both",
388
+ "docs_data": docs_info,
389
+ "code_data": code_info,
390
+ "merged_signature": self._create_merged_signature(code_info, docs_info),
391
+ "merged_description": docs_info.get("docstring") or code_info.get("docstring"),
392
+ }
393
+
394
+ # Rule 4: Conflict exists - prefer code signature, keep docs description
395
+ conflict = self.conflict_index[api_name]
396
+
397
+ return {
398
+ "name": api_name,
399
+ "status": "conflict",
400
+ "source": "both",
401
+ "docs_data": docs_info,
402
+ "code_data": code_info,
403
+ "conflict": conflict.__dict__,
404
+ "resolution": "prefer_code_signature",
405
+ "merged_signature": self._create_merged_signature(code_info, docs_info),
406
+ "merged_description": docs_info.get("docstring") or code_info.get("docstring"),
407
+ "warning": conflict.difference,
408
+ }
409
+
410
+ def _create_merged_signature(self, code_info: dict, docs_info: dict) -> str:
411
+ """
412
+ Create merged signature preferring code data.
413
+
414
+ Args:
415
+ code_info: API info from code
416
+ docs_info: API info from docs
417
+
418
+ Returns:
419
+ Merged signature string
420
+ """
421
+ name = code_info.get("name", docs_info.get("name"))
422
+ params = code_info.get("parameters", docs_info.get("parameters", []))
423
+ return_type = code_info.get("return_type", docs_info.get("return_type"))
424
+
425
+ # Build parameter string
426
+ param_strs = []
427
+ for param in params:
428
+ param_str = param["name"]
429
+ if param.get("type_hint"):
430
+ param_str += f": {param['type_hint']}"
431
+ if param.get("default"):
432
+ param_str += f" = {param['default']}"
433
+ param_strs.append(param_str)
434
+
435
+ signature = f"{name}({', '.join(param_strs)})"
436
+
437
+ if return_type:
438
+ signature += f" -> {return_type}"
439
+
440
+ return signature
441
+
442
+
443
+ class ClaudeEnhancedMerger:
444
+ """
445
+ Claude-enhanced API merger using local Claude Code with GitHub insights.
446
+
447
+ Opens Claude Code in a new terminal to intelligently reconcile conflicts.
448
+ Uses the same approach as enhance_skill_local.py.
449
+
450
+ Multi-layer architecture (Phase 3):
451
+ - Layer 1: C3.x code (ground truth)
452
+ - Layer 2: HTML docs (official intent)
453
+ - Layer 3: GitHub docs (README/CONTRIBUTING)
454
+ - Layer 4: GitHub insights (issues)
455
+ """
456
+
457
+ def __init__(
458
+ self,
459
+ docs_data: dict,
460
+ github_data: dict,
461
+ conflicts: list[Conflict],
462
+ github_streams: Optional["ThreeStreamData"] = None,
463
+ ):
464
+ """
465
+ Initialize Claude-enhanced merger with GitHub streams support.
466
+
467
+ Args:
468
+ docs_data: Documentation scraper data (Layer 2: HTML docs)
469
+ github_data: GitHub scraper data (Layer 1: C3.x code)
470
+ conflicts: List of detected conflicts
471
+ github_streams: Optional ThreeStreamData with docs and insights (Layers 3-4)
472
+ """
473
+ self.docs_data = docs_data
474
+ self.github_data = github_data
475
+ self.conflicts = conflicts
476
+ self.github_streams = github_streams
477
+
478
+ # First do rule-based merge as baseline
479
+ self.rule_merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams)
480
+
481
+ def merge_all(self) -> dict[str, Any]:
482
+ """
483
+ Merge all APIs using Claude enhancement.
484
+
485
+ Returns:
486
+ Dict containing merged API data
487
+ """
488
+ logger.info("Starting Claude-enhanced merge...")
489
+
490
+ # Create temporary workspace
491
+ workspace_dir = self._create_workspace()
492
+
493
+ # Launch Claude Code for enhancement
494
+ logger.info("Launching Claude Code for intelligent merging...")
495
+ logger.info("Claude will analyze conflicts and create reconciled API reference")
496
+
497
+ try:
498
+ self._launch_claude_merge(workspace_dir)
499
+
500
+ # Read enhanced results
501
+ merged_data = self._read_merged_results(workspace_dir)
502
+
503
+ logger.info("Claude-enhanced merge complete")
504
+ return merged_data
505
+
506
+ except Exception as e:
507
+ logger.error(f"Claude enhancement failed: {e}")
508
+ logger.info("Falling back to rule-based merge")
509
+ return self.rule_merger.merge_all()
510
+
511
+ def _create_workspace(self) -> str:
512
+ """
513
+ Create temporary workspace with merge context.
514
+
515
+ Returns:
516
+ Path to workspace directory
517
+ """
518
+ workspace = tempfile.mkdtemp(prefix="skill_merge_")
519
+ logger.info(f"Created merge workspace: {workspace}")
520
+
521
+ # Write context files for Claude
522
+ self._write_context_files(workspace)
523
+
524
+ return workspace
525
+
526
+ def _write_context_files(self, workspace: str):
527
+ """Write context files for Claude to analyze."""
528
+
529
+ # 1. Write conflicts summary
530
+ conflicts_file = os.path.join(workspace, "conflicts.json")
531
+ with open(conflicts_file, "w") as f:
532
+ json.dump(
533
+ {
534
+ "conflicts": [c.__dict__ for c in self.conflicts],
535
+ "summary": {
536
+ "total": len(self.conflicts),
537
+ "by_type": self._count_by_field("type"),
538
+ "by_severity": self._count_by_field("severity"),
539
+ },
540
+ },
541
+ f,
542
+ indent=2,
543
+ )
544
+
545
+ # 2. Write documentation APIs
546
+ docs_apis_file = os.path.join(workspace, "docs_apis.json")
547
+ detector = ConflictDetector(self.docs_data, self.github_data)
548
+ with open(docs_apis_file, "w") as f:
549
+ json.dump(detector.docs_apis, f, indent=2)
550
+
551
+ # 3. Write code APIs
552
+ code_apis_file = os.path.join(workspace, "code_apis.json")
553
+ with open(code_apis_file, "w") as f:
554
+ json.dump(detector.code_apis, f, indent=2)
555
+
556
+ # 4. Write merge instructions for Claude
557
+ instructions = """# API Merge Task
558
+
559
+ You are merging API documentation from two sources:
560
+ 1. Official documentation (user-facing)
561
+ 2. Source code analysis (implementation reality)
562
+
563
+ ## Context Files:
564
+ - `conflicts.json` - All detected conflicts between sources
565
+ - `docs_apis.json` - APIs from documentation
566
+ - `code_apis.json` - APIs from source code
567
+
568
+ ## Your Task:
569
+ For each conflict, reconcile the differences intelligently:
570
+
571
+ 1. **Prefer code signatures as source of truth**
572
+ - Use actual parameter names, types, defaults from code
573
+ - Code is what actually runs, docs might be outdated
574
+
575
+ 2. **Keep documentation descriptions**
576
+ - Docs are user-friendly, code comments might be technical
577
+ - Keep the docs' explanation of what the API does
578
+
579
+ 3. **Add implementation notes for discrepancies**
580
+ - If docs differ from code, explain the difference
581
+ - Example: "⚠️ The `snap` parameter exists in code but is not documented"
582
+
583
+ 4. **Flag missing APIs clearly**
584
+ - Missing in docs → Add [UNDOCUMENTED] tag
585
+ - Missing in code → Add [REMOVED] or [DOCS_ERROR] tag
586
+
587
+ 5. **Create unified API reference**
588
+ - One definitive signature per API
589
+ - Clear warnings about conflicts
590
+ - Implementation notes where helpful
591
+
592
+ ## Output Format:
593
+ Create `merged_apis.json` with this structure:
594
+
595
+ ```json
596
+ {
597
+ "apis": {
598
+ "API.name": {
599
+ "signature": "final_signature_here",
600
+ "parameters": [...],
601
+ "return_type": "type",
602
+ "description": "user-friendly description",
603
+ "implementation_notes": "Any discrepancies or warnings",
604
+ "source": "both|docs_only|code_only",
605
+ "confidence": "high|medium|low"
606
+ }
607
+ }
608
+ }
609
+ ```
610
+
611
+ Take your time to analyze each conflict carefully. The goal is to create the most accurate and helpful API reference possible.
612
+ """
613
+
614
+ instructions_file = os.path.join(workspace, "MERGE_INSTRUCTIONS.md")
615
+ with open(instructions_file, "w") as f:
616
+ f.write(instructions)
617
+
618
+ logger.info(f"Wrote context files to {workspace}")
619
+
620
+ def _count_by_field(self, field: str) -> dict[str, int]:
621
+ """Count conflicts by a specific field."""
622
+ counts = {}
623
+ for conflict in self.conflicts:
624
+ value = getattr(conflict, field)
625
+ counts[value] = counts.get(value, 0) + 1
626
+ return counts
627
+
628
+ def _launch_claude_merge(self, workspace: str):
629
+ """
630
+ Launch Claude Code to perform merge.
631
+
632
+ Similar to enhance_skill_local.py approach.
633
+ """
634
+ # Create a script that Claude will execute
635
+ script_path = os.path.join(workspace, "merge_script.sh")
636
+
637
+ script_content = f"""#!/bin/bash
638
+ # Automatic merge script for Claude Code
639
+
640
+ cd "{workspace}"
641
+
642
+ echo "📊 Analyzing conflicts..."
643
+ cat conflicts.json | head -20
644
+
645
+ echo ""
646
+ echo "📖 Documentation APIs: $(cat docs_apis.json | grep -c '\"name\"')"
647
+ echo "💻 Code APIs: $(cat code_apis.json | grep -c '\"name\"')"
648
+ echo ""
649
+ echo "Please review the conflicts and create merged_apis.json"
650
+ echo "Follow the instructions in MERGE_INSTRUCTIONS.md"
651
+ echo ""
652
+ echo "When done, save merged_apis.json and close this terminal."
653
+
654
+ # Wait for user to complete merge
655
+ read -p "Press Enter when merge is complete..."
656
+ """
657
+
658
+ with open(script_path, "w") as f:
659
+ f.write(script_content)
660
+
661
+ os.chmod(script_path, 0o755)
662
+
663
+ # Open new terminal with Claude Code
664
+ # Try different terminal emulators
665
+ terminals = [
666
+ ["x-terminal-emulator", "-e"],
667
+ ["gnome-terminal", "--"],
668
+ ["xterm", "-e"],
669
+ ["konsole", "-e"],
670
+ ]
671
+
672
+ for terminal_cmd in terminals:
673
+ try:
674
+ cmd = terminal_cmd + ["bash", script_path]
675
+ subprocess.Popen(cmd)
676
+ logger.info(f"Opened terminal with {terminal_cmd[0]}")
677
+ break
678
+ except FileNotFoundError:
679
+ continue
680
+
681
+ # Wait for merge to complete
682
+ merged_file = os.path.join(workspace, "merged_apis.json")
683
+ logger.info(f"Waiting for merged results at: {merged_file}")
684
+ logger.info("Close the terminal when done to continue...")
685
+
686
+ # Poll for file existence
687
+ import time
688
+
689
+ timeout = 3600 # 1 hour max
690
+ elapsed = 0
691
+ while not os.path.exists(merged_file) and elapsed < timeout:
692
+ time.sleep(5)
693
+ elapsed += 5
694
+
695
+ if not os.path.exists(merged_file):
696
+ raise TimeoutError("Claude merge timed out after 1 hour")
697
+
698
+ def _read_merged_results(self, workspace: str) -> dict[str, Any]:
699
+ """Read merged results from workspace."""
700
+ merged_file = os.path.join(workspace, "merged_apis.json")
701
+
702
+ if not os.path.exists(merged_file):
703
+ raise FileNotFoundError(f"Merged results not found: {merged_file}")
704
+
705
+ with open(merged_file) as f:
706
+ merged_data = json.load(f)
707
+
708
+ return {"merge_mode": "claude-enhanced", **merged_data}
709
+
710
+
711
+ def merge_sources(
712
+ docs_data_path: str,
713
+ github_data_path: str,
714
+ output_path: str,
715
+ mode: str = "rule-based",
716
+ github_streams: Optional["ThreeStreamData"] = None,
717
+ ) -> dict[str, Any]:
718
+ """
719
+ Merge documentation and GitHub data with optional GitHub streams (Phase 3).
720
+
721
+ Multi-layer architecture:
722
+ - Layer 1: C3.x code (ground truth)
723
+ - Layer 2: HTML docs (official intent)
724
+ - Layer 3: GitHub docs (README/CONTRIBUTING) - from github_streams
725
+ - Layer 4: GitHub insights (issues) - from github_streams
726
+
727
+ Args:
728
+ docs_data_path: Path to documentation data JSON
729
+ github_data_path: Path to GitHub data JSON
730
+ output_path: Path to save merged output
731
+ mode: 'rule-based' or 'claude-enhanced'
732
+ github_streams: Optional ThreeStreamData with docs and insights
733
+
734
+ Returns:
735
+ Merged data dict with hybrid content
736
+ """
737
+ # Load data
738
+ with open(docs_data_path) as f:
739
+ docs_data = json.load(f)
740
+
741
+ with open(github_data_path) as f:
742
+ github_data = json.load(f)
743
+
744
+ # Detect conflicts
745
+ detector = ConflictDetector(docs_data, github_data)
746
+ conflicts = detector.detect_all_conflicts()
747
+
748
+ logger.info(f"Detected {len(conflicts)} conflicts")
749
+
750
+ # Log GitHub streams availability
751
+ if github_streams:
752
+ logger.info("GitHub streams available for multi-layer merge")
753
+ if github_streams.docs_stream:
754
+ logger.info(
755
+ f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files"
756
+ )
757
+ if github_streams.insights_stream:
758
+ problems = len(github_streams.insights_stream.common_problems)
759
+ solutions = len(github_streams.insights_stream.known_solutions)
760
+ logger.info(f" - Insights stream: {problems} problems, {solutions} solutions")
761
+
762
+ # Merge based on mode
763
+ if mode == "claude-enhanced":
764
+ merger = ClaudeEnhancedMerger(docs_data, github_data, conflicts, github_streams)
765
+ else:
766
+ merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams)
767
+
768
+ merged_data = merger.merge_all()
769
+
770
+ # Save merged data
771
+ with open(output_path, "w") as f:
772
+ json.dump(merged_data, f, indent=2, ensure_ascii=False)
773
+
774
+ logger.info(f"Merged data saved to: {output_path}")
775
+
776
+ return merged_data
777
+
778
+
779
+ if __name__ == "__main__":
780
+ import argparse
781
+
782
+ parser = argparse.ArgumentParser(description="Merge documentation and code sources")
783
+ parser.add_argument("docs_data", help="Path to documentation data JSON")
784
+ parser.add_argument("github_data", help="Path to GitHub data JSON")
785
+ parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path")
786
+ parser.add_argument(
787
+ "--mode",
788
+ "-m",
789
+ choices=["rule-based", "claude-enhanced"],
790
+ default="rule-based",
791
+ help="Merge mode",
792
+ )
793
+
794
+ args = parser.parse_args()
795
+
796
+ merged = merge_sources(args.docs_data, args.github_data, args.output, args.mode)
797
+
798
+ # Print summary
799
+ summary = merged.get("summary", {})
800
+ print(f"\n✅ Merge complete ({merged.get('merge_mode')})")
801
+ print(f" Total APIs: {summary.get('total_apis', 0)}")
802
+ print(f" Matched: {summary.get('matched', 0)}")
803
+ print(f" Docs only: {summary.get('docs_only', 0)}")
804
+ print(f" Code only: {summary.get('code_only', 0)}")
805
+ print(f" Conflicts: {summary.get('conflict', 0)}")
806
+ print(f"\n📄 Saved to: {args.output}")