skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,104 @@
1
+ """ABOUTME: Downloads llms.txt files from documentation URLs with retry logic"""
2
+
3
+ import time
4
+
5
+ import requests
6
+
7
+
8
+ class LlmsTxtDownloader:
9
+ """Download llms.txt content from URLs with retry logic"""
10
+
11
+ def __init__(self, url: str, timeout: int = 30, max_retries: int = 3):
12
+ self.url = url
13
+ self.timeout = timeout
14
+ self.max_retries = max_retries
15
+
16
+ def get_proper_filename(self) -> str:
17
+ """
18
+ Extract filename from URL and convert .txt to .md
19
+
20
+ Returns:
21
+ Proper filename with .md extension
22
+
23
+ Examples:
24
+ https://hono.dev/llms-full.txt -> llms-full.md
25
+ https://hono.dev/llms.txt -> llms.md
26
+ https://hono.dev/llms-small.txt -> llms-small.md
27
+ """
28
+ # Extract filename from URL
29
+ from urllib.parse import urlparse
30
+
31
+ parsed = urlparse(self.url)
32
+ filename = parsed.path.split("/")[-1]
33
+
34
+ # Replace .txt with .md
35
+ if filename.endswith(".txt"):
36
+ filename = filename[:-4] + ".md"
37
+
38
+ return filename
39
+
40
+ def _is_markdown(self, content: str) -> bool:
41
+ """
42
+ Check if content looks like markdown (not HTML).
43
+
44
+ Returns:
45
+ True if content contains markdown patterns and is NOT HTML
46
+ """
47
+ # First, reject HTML content (common redirect trap)
48
+ content_start = content.strip()[:500].lower()
49
+ html_indicators = [
50
+ "<!doctype html",
51
+ "<html",
52
+ "<!doctype",
53
+ "<head>",
54
+ "<meta charset",
55
+ ]
56
+ if any(indicator in content_start for indicator in html_indicators):
57
+ return False
58
+
59
+ # Then check for markdown patterns
60
+ markdown_patterns = ["# ", "## ", "```", "- ", "* ", "`"]
61
+ return any(pattern in content for pattern in markdown_patterns)
62
+
63
+ def download(self) -> str | None:
64
+ """
65
+ Download llms.txt content with retry logic.
66
+
67
+ Returns:
68
+ String content or None if download fails
69
+ """
70
+ headers = {"User-Agent": "Skill-Seekers-llms.txt-Reader/1.0"}
71
+
72
+ for attempt in range(self.max_retries):
73
+ try:
74
+ response = requests.get(self.url, headers=headers, timeout=self.timeout)
75
+ response.raise_for_status()
76
+
77
+ content = response.text
78
+
79
+ # Validate content is not empty
80
+ if len(content) < 100:
81
+ print(f"⚠️ Content too short ({len(content)} chars), rejecting")
82
+ return None
83
+
84
+ # Validate content looks like markdown
85
+ if not self._is_markdown(content):
86
+ print("⚠️ Content doesn't look like markdown")
87
+ return None
88
+
89
+ return content
90
+
91
+ except requests.RequestException as e:
92
+ if attempt < self.max_retries - 1:
93
+ # Calculate exponential backoff delay: 1s, 2s, 4s, etc.
94
+ delay = 2**attempt
95
+ print(f"⚠️ Attempt {attempt + 1}/{self.max_retries} failed: {e}")
96
+ print(f" Retrying in {delay}s...")
97
+ time.sleep(delay)
98
+ else:
99
+ print(
100
+ f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}"
101
+ )
102
+ return None
103
+
104
+ return None
@@ -0,0 +1,150 @@
1
+ """ABOUTME: Parses llms.txt markdown content into structured page data"""
2
+
3
+ import re
4
+ from urllib.parse import urljoin
5
+
6
+
7
+ class LlmsTxtParser:
8
+ """Parse llms.txt markdown content into page structures"""
9
+
10
+ def __init__(self, content: str, base_url: str = None):
11
+ self.content = content
12
+ self.base_url = base_url
13
+
14
+ def extract_urls(self) -> list[str]:
15
+ """
16
+ Extract all URLs from the llms.txt content.
17
+
18
+ Supports both markdown-style links [text](url) and bare URLs.
19
+ Resolves relative URLs using base_url if provided.
20
+ Filters out malformed URLs with invalid anchor patterns.
21
+
22
+ Returns:
23
+ List of unique, cleaned URLs found in the content.
24
+ Returns empty list if no valid URLs found.
25
+
26
+ Note:
27
+ - Markdown links: [Getting Started](./docs/guide.md)
28
+ - Bare URLs: https://example.com/api.md
29
+ - Relative paths resolved with base_url
30
+ - Invalid anchors (#section/path.md) are stripped
31
+ """
32
+ urls = set()
33
+
34
+ # Match markdown links: [text](url)
35
+ md_links = re.findall(r"\[([^\]]*)\]\(([^)]+)\)", self.content)
36
+ for _, url in md_links:
37
+ if url.startswith("http"):
38
+ clean_url = self._clean_url(url)
39
+ if clean_url:
40
+ urls.add(clean_url)
41
+ elif self.base_url and not url.startswith("#"):
42
+ clean_url = self._clean_url(urljoin(self.base_url, url))
43
+ if clean_url:
44
+ urls.add(clean_url)
45
+
46
+ # Match bare URLs
47
+ bare_urls = re.findall(r'https?://[^\s\)\]<>"\']+', self.content)
48
+ for url in bare_urls:
49
+ # Clean trailing punctuation
50
+ url = url.rstrip(".,;:")
51
+ clean_url = self._clean_url(url)
52
+ if clean_url:
53
+ urls.add(clean_url)
54
+
55
+ return list(urls)
56
+
57
+ def _clean_url(self, url: str) -> str:
58
+ """
59
+ Clean and validate URL, removing invalid anchor patterns.
60
+
61
+ Detects and strips malformed anchors that contain path separators.
62
+ Valid: https://example.com/page.md#section
63
+ Invalid: https://example.com/page#section/index.html.md
64
+
65
+ Args:
66
+ url: URL to clean (absolute or relative)
67
+
68
+ Returns:
69
+ Cleaned URL with malformed anchors stripped.
70
+ Returns base URL if anchor contains '/' (malformed).
71
+ Returns original URL if anchor is valid or no anchor present.
72
+
73
+ Example:
74
+ >>> parser._clean_url("https://ex.com/page#sec/path.md")
75
+ "https://ex.com/page"
76
+ >>> parser._clean_url("https://ex.com/page.md#section")
77
+ "https://ex.com/page.md#section"
78
+ """
79
+ # Skip URLs with path after anchor (e.g., #section/index.html.md)
80
+ # These are malformed and return duplicate HTML content
81
+ if "#" in url:
82
+ anchor_pos = url.index("#")
83
+ after_anchor = url[anchor_pos + 1 :]
84
+ # If there's a path separator after anchor, it's invalid
85
+ if "/" in after_anchor:
86
+ # Extract the base URL without the malformed anchor
87
+ return url[:anchor_pos]
88
+ return url
89
+
90
+ def parse(self) -> list[dict]:
91
+ """
92
+ Parse markdown content into page structures.
93
+
94
+ Returns:
95
+ List of page dicts with title, content, code_samples, headings
96
+ """
97
+ pages = []
98
+
99
+ # Split by h1 headers (# Title)
100
+ sections = re.split(r"\n# ", self.content)
101
+
102
+ for section in sections:
103
+ if not section.strip():
104
+ continue
105
+
106
+ # First line is title
107
+ lines = section.split("\n")
108
+ title = lines[0].strip("#").strip()
109
+
110
+ # Parse content
111
+ page = self._parse_section("\n".join(lines[1:]), title)
112
+ pages.append(page)
113
+
114
+ return pages
115
+
116
+ def _parse_section(self, content: str, title: str) -> dict:
117
+ """Parse a single section into page structure"""
118
+ page = {
119
+ "title": title,
120
+ "content": "",
121
+ "code_samples": [],
122
+ "headings": [],
123
+ "url": f"llms-txt#{title.lower().replace(' ', '-')}",
124
+ "links": [],
125
+ }
126
+
127
+ # Extract code blocks
128
+ code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
129
+ for lang, code in code_blocks:
130
+ page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"})
131
+
132
+ # Extract h2/h3 headings
133
+ headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE)
134
+ for level_markers, text in headings:
135
+ page["headings"].append(
136
+ {
137
+ "level": f"h{len(level_markers)}",
138
+ "text": text.strip(),
139
+ "id": text.lower().replace(" ", "-"),
140
+ }
141
+ )
142
+
143
+ # Remove code blocks from content for plain text
144
+ content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
145
+
146
+ # Extract paragraphs
147
+ paragraphs = [p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20]
148
+ page["content"] = "\n\n".join(paragraphs)
149
+
150
+ return page