PyPI - skill-seekers - Versions diffs - 2.7.3__py3-none-any.whl - Mend

skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

skill_seekers/__init__.py +22 -0
skill_seekers/cli/__init__.py +39 -0
skill_seekers/cli/adaptors/__init__.py +120 -0
skill_seekers/cli/adaptors/base.py +221 -0
skill_seekers/cli/adaptors/claude.py +485 -0
skill_seekers/cli/adaptors/gemini.py +453 -0
skill_seekers/cli/adaptors/markdown.py +269 -0
skill_seekers/cli/adaptors/openai.py +503 -0
skill_seekers/cli/ai_enhancer.py +310 -0
skill_seekers/cli/api_reference_builder.py +373 -0
skill_seekers/cli/architectural_pattern_detector.py +525 -0
skill_seekers/cli/code_analyzer.py +1462 -0
skill_seekers/cli/codebase_scraper.py +1225 -0
skill_seekers/cli/config_command.py +563 -0
skill_seekers/cli/config_enhancer.py +431 -0
skill_seekers/cli/config_extractor.py +871 -0
skill_seekers/cli/config_manager.py +452 -0
skill_seekers/cli/config_validator.py +394 -0
skill_seekers/cli/conflict_detector.py +528 -0
skill_seekers/cli/constants.py +72 -0
skill_seekers/cli/dependency_analyzer.py +757 -0
skill_seekers/cli/doc_scraper.py +2332 -0
skill_seekers/cli/enhance_skill.py +488 -0
skill_seekers/cli/enhance_skill_local.py +1096 -0
skill_seekers/cli/enhance_status.py +194 -0
skill_seekers/cli/estimate_pages.py +433 -0
skill_seekers/cli/generate_router.py +1209 -0
skill_seekers/cli/github_fetcher.py +534 -0
skill_seekers/cli/github_scraper.py +1466 -0
skill_seekers/cli/guide_enhancer.py +723 -0
skill_seekers/cli/how_to_guide_builder.py +1267 -0
skill_seekers/cli/install_agent.py +461 -0
skill_seekers/cli/install_skill.py +178 -0
skill_seekers/cli/language_detector.py +614 -0
skill_seekers/cli/llms_txt_detector.py +60 -0
skill_seekers/cli/llms_txt_downloader.py +104 -0
skill_seekers/cli/llms_txt_parser.py +150 -0
skill_seekers/cli/main.py +558 -0
skill_seekers/cli/markdown_cleaner.py +132 -0
skill_seekers/cli/merge_sources.py +806 -0
skill_seekers/cli/package_multi.py +77 -0
skill_seekers/cli/package_skill.py +241 -0
skill_seekers/cli/pattern_recognizer.py +1825 -0
skill_seekers/cli/pdf_extractor_poc.py +1166 -0
skill_seekers/cli/pdf_scraper.py +617 -0
skill_seekers/cli/quality_checker.py +519 -0
skill_seekers/cli/rate_limit_handler.py +438 -0
skill_seekers/cli/resume_command.py +160 -0
skill_seekers/cli/run_tests.py +230 -0
skill_seekers/cli/setup_wizard.py +93 -0
skill_seekers/cli/split_config.py +390 -0
skill_seekers/cli/swift_patterns.py +560 -0
skill_seekers/cli/test_example_extractor.py +1081 -0
skill_seekers/cli/test_unified_simple.py +179 -0
skill_seekers/cli/unified_codebase_analyzer.py +572 -0
skill_seekers/cli/unified_scraper.py +932 -0
skill_seekers/cli/unified_skill_builder.py +1605 -0
skill_seekers/cli/upload_skill.py +162 -0
skill_seekers/cli/utils.py +432 -0
skill_seekers/mcp/__init__.py +33 -0
skill_seekers/mcp/agent_detector.py +316 -0
skill_seekers/mcp/git_repo.py +273 -0
skill_seekers/mcp/server.py +231 -0
skill_seekers/mcp/server_fastmcp.py +1249 -0
skill_seekers/mcp/server_legacy.py +2302 -0
skill_seekers/mcp/source_manager.py +285 -0
skill_seekers/mcp/tools/__init__.py +115 -0
skill_seekers/mcp/tools/config_tools.py +251 -0
skill_seekers/mcp/tools/packaging_tools.py +826 -0
skill_seekers/mcp/tools/scraping_tools.py +842 -0
skill_seekers/mcp/tools/source_tools.py +828 -0
skill_seekers/mcp/tools/splitting_tools.py +212 -0
skill_seekers/py.typed +0 -0
skill_seekers-2.7.3.dist-info/METADATA +2027 -0
skill_seekers-2.7.3.dist-info/RECORD +79 -0
skill_seekers-2.7.3.dist-info/WHEEL +5 -0
skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
skill_seekers-2.7.3.dist-info/top_level.txt +1 -0

skill_seekers/cli/rate_limit_handler.py ADDED Viewed

@@ -0,0 +1,438 @@
+"""
+Rate Limit Handler for GitHub API
+Handles GitHub API rate limits with smart strategies:
+- Upfront warnings about token status
+- Real-time countdown timers
+- Profile switching for multi-token setups
+- Progress auto-save on interruption
+- Non-interactive mode for CI/CD
+"""
+import sys
+import time
+from datetime import datetime
+from typing import Any
+import requests
+from .config_manager import get_config_manager
+class RateLimitError(Exception):
+    """Raised when rate limit is exceeded and cannot be handled."""
+    pass
+class RateLimitHandler:
+    """
+    Handles GitHub API rate limits with multiple strategies.
+    Usage:
+        handler = RateLimitHandler(
+            token=github_token,
+            interactive=True,
+            profile_name="personal"
+        )
+        # Before starting
+        handler.check_upfront()
+        # Around requests
+        response = requests.get(url, headers=headers)
+        handler.check_response(response)
+    """
+    def __init__(
+        self,
+        token: str | None = None,
+        interactive: bool = True,
+        profile_name: str | None = None,
+        auto_switch: bool = True,
+    ):
+        """
+        Initialize rate limit handler.
+        Args:
+            token: GitHub token (or None for unauthenticated)
+            interactive: Whether to show prompts (False for CI/CD)
+            profile_name: Name of the profile being used
+            auto_switch: Whether to auto-switch profiles when rate limited
+        """
+        self.token = token
+        self.interactive = interactive
+        self.profile_name = profile_name
+        self.config = get_config_manager()
+        # Get settings from config
+        self.auto_switch = auto_switch and self.config.config["rate_limit"]["auto_switch_profiles"]
+        self.show_countdown = self.config.config["rate_limit"]["show_countdown"]
+        self.default_timeout = self.config.config["rate_limit"]["default_timeout_minutes"]
+        # Get profile-specific settings if available
+        if token:
+            self.strategy = self.config.get_rate_limit_strategy(token)
+            self.timeout_minutes = self.config.get_timeout_minutes(token)
+        else:
+            self.strategy = "prompt"
+            self.timeout_minutes = self.default_timeout
+    def check_upfront(self) -> bool:
+        """
+        Check rate limit status before starting.
+        Shows non-intrusive warning if no token configured.
+        Returns:
+            True if check passed, False if should abort
+        """
+        if not self.token:
+            print("\n💡 Tip: GitHub API limit is 60 requests/hour without a token.")
+            print("   Set up a GitHub token for 5000 requests/hour:")
+            print("   $ skill-seekers config --github")
+            print()
+            if self.interactive:
+                response = input("Continue without token? [Y/n]: ").strip().lower()
+                if response in ["n", "no"]:
+                    print("\n✅ Run 'skill-seekers config --github' to set up a token.\n")
+                    return False
+            return True
+        # Check current rate limit status
+        try:
+            rate_info = self.get_rate_limit_info()
+            remaining = rate_info.get("remaining", 0)
+            limit = rate_info.get("limit", 5000)
+            if remaining == 0:
+                print(f"\n⚠️  Warning: GitHub rate limit already exhausted (0/{limit})")
+                reset_time = rate_info.get("reset_time")
+                if reset_time:
+                    wait_minutes = (reset_time - datetime.now()).total_seconds() / 60
+                    print(f"   Resets in {int(wait_minutes)} minutes")
+                if self.interactive:
+                    return self.handle_rate_limit(rate_info)
+                else:
+                    print("\n❌ Cannot proceed: Rate limit exhausted (non-interactive mode)\n")
+                    return False
+            # Show friendly status
+            if remaining < 100:
+                print(f"⚠️  GitHub API: {remaining}/{limit} requests remaining")
+            else:
+                print(f"✅ GitHub API: {remaining}/{limit} requests available")
+            return True
+        except Exception as e:
+            print(f"⚠️  Could not check rate limit status: {e}")
+            print("   Proceeding anyway...")
+            return True
+    def check_response(self, response: requests.Response) -> bool:
+        """
+        Check if response indicates rate limit and handle it.
+        Args:
+            response: requests.Response object
+        Returns:
+            True if handled successfully, False if should abort
+        Raises:
+            RateLimitError: If rate limit cannot be handled
+        """
+        # Check for rate limit (403 with specific message)
+        if response.status_code == 403:
+            try:
+                error_data = response.json()
+                message = error_data.get("message", "")
+                if "rate limit" in message.lower() or "api rate limit exceeded" in message.lower():
+                    # Extract rate limit info from headers
+                    rate_info = self.extract_rate_limit_info(response)
+                    return self.handle_rate_limit(rate_info)
+            except Exception:
+                pass  # Not a rate limit error
+        return True
+    def extract_rate_limit_info(self, response: requests.Response) -> dict[str, Any]:
+        """
+        Extract rate limit information from response headers.
+        Args:
+            response: requests.Response with rate limit headers
+        Returns:
+            Dict with rate limit info
+        """
+        headers = response.headers
+        limit = int(headers.get("X-RateLimit-Limit", 0))
+        remaining = int(headers.get("X-RateLimit-Remaining", 0))
+        reset_timestamp = int(headers.get("X-RateLimit-Reset", 0))
+        reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
+        return {
+            "limit": limit,
+            "remaining": remaining,
+            "reset_timestamp": reset_timestamp,
+            "reset_time": reset_time,
+        }
+    def get_rate_limit_info(self) -> dict[str, Any]:
+        """
+        Get current rate limit status from GitHub API.
+        Returns:
+            Dict with rate limit info
+        """
+        url = "https://api.github.com/rate_limit"
+        headers = {}
+        if self.token:
+            headers["Authorization"] = f"token {self.token}"
+        response = requests.get(url, headers=headers, timeout=5)
+        response.raise_for_status()
+        data = response.json()
+        core = data.get("rate", {})
+        reset_timestamp = core.get("reset", 0)
+        reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
+        return {
+            "limit": core.get("limit", 0),
+            "remaining": core.get("remaining", 0),
+            "reset_timestamp": reset_timestamp,
+            "reset_time": reset_time,
+        }
+    def handle_rate_limit(self, rate_info: dict[str, Any]) -> bool:
+        """
+        Handle rate limit based on strategy.
+        Args:
+            rate_info: Dict with rate limit information
+        Returns:
+            True if handled (can continue), False if should abort
+        Raises:
+            RateLimitError: If cannot handle in non-interactive mode
+        """
+        reset_time = rate_info.get("reset_time")
+        remaining = rate_info.get("remaining", 0)
+        limit = rate_info.get("limit", 0)
+        print("\n⚠️  GitHub Rate Limit Reached")
+        print(f"   Profile: {self.profile_name or 'default'}")
+        print(f"   Limit: {remaining}/{limit} requests")
+        if reset_time:
+            wait_seconds = (reset_time - datetime.now()).total_seconds()
+            wait_minutes = int(wait_seconds / 60)
+            print(f"   Resets at: {reset_time.strftime('%H:%M:%S')} ({wait_minutes} minutes)")
+        else:
+            wait_seconds = 0
+            wait_minutes = 0
+        print()
+        # Strategy-based handling
+        if self.strategy == "fail":
+            print("❌ Strategy: fail - Aborting immediately")
+            if not self.interactive:
+                raise RateLimitError("Rate limit exceeded (fail strategy)")
+            return False
+        if self.strategy == "switch" and self.auto_switch:
+            # Try switching to another profile
+            new_profile = self.try_switch_profile()
+            if new_profile:
+                return True
+            else:
+                print("⚠️  No alternative profiles available")
+                # Fall through to other strategies
+        if self.strategy == "wait":
+            # Auto-wait with countdown
+            return self.wait_for_reset(wait_seconds, wait_minutes)
+        # Default: prompt user (if interactive)
+        if self.interactive:
+            return self.prompt_user_action(wait_seconds, wait_minutes)
+        else:
+            # Non-interactive mode: fail
+            raise RateLimitError("Rate limit exceeded (non-interactive mode)")
+    def try_switch_profile(self) -> bool:
+        """
+        Try to switch to another GitHub profile.
+        Returns:
+            True if switched successfully, False otherwise
+        """
+        if not self.token:
+            return False
+        next_profile_data = self.config.get_next_profile(self.token)
+        if not next_profile_data:
+            return False
+        next_name, next_token = next_profile_data
+        print(f"🔄 Switching to profile: {next_name}")
+        # Check if new profile has quota
+        try:
+            old_token = self.token
+            self.token = next_token
+            rate_info = self.get_rate_limit_info()
+            remaining = rate_info.get("remaining", 0)
+            limit = rate_info.get("limit", 0)
+            if remaining > 0:
+                print(f"✅ Profile '{next_name}' has {remaining}/{limit} requests available")
+                self.profile_name = next_name
+                return True
+            else:
+                print(f"⚠️  Profile '{next_name}' also exhausted ({remaining}/{limit})")
+                self.token = old_token  # Restore old token
+                return False
+        except Exception as e:
+            print(f"❌ Failed to switch profiles: {e}")
+            self.token = old_token  # Restore old token
+            return False
+    def wait_for_reset(self, wait_seconds: float, wait_minutes: int) -> bool:
+        """
+        Wait for rate limit to reset with countdown.
+        Args:
+            wait_seconds: Seconds to wait
+            wait_minutes: Minutes to wait (for display)
+        Returns:
+            True if waited successfully, False if aborted
+        """
+        # Check timeout
+        if wait_minutes > self.timeout_minutes:
+            print(f"⚠️  Wait time ({wait_minutes}m) exceeds timeout ({self.timeout_minutes}m)")
+            return False
+        if wait_seconds <= 0:
+            print("✅ Rate limit should be reset now")
+            return True
+        print(f"⏳ Waiting {wait_minutes} minutes for rate limit reset...")
+        print("   Press Ctrl+C to cancel\n")
+        try:
+            if self.show_countdown:
+                self.show_countdown_timer(wait_seconds)
+            else:
+                time.sleep(wait_seconds)
+            print("\n✅ Rate limit reset! Continuing...\n")
+            return True
+        except KeyboardInterrupt:
+            print("\n\n⏸️  Wait interrupted by user")
+            return False
+    def show_countdown_timer(self, total_seconds: float):
+        """
+        Show a live countdown timer.
+        Args:
+            total_seconds: Total seconds to count down
+        """
+        end_time = time.time() + total_seconds
+        while time.time() < end_time:
+            remaining = int(end_time - time.time())
+            minutes, seconds = divmod(remaining, 60)
+            # Print countdown on same line
+            sys.stdout.write(f"\r⏱️  Resuming in {minutes:02d}:{seconds:02d}...")
+            sys.stdout.flush()
+            time.sleep(1)
+        sys.stdout.write("\r" + " " * 50 + "\r")  # Clear line
+        sys.stdout.flush()
+    def prompt_user_action(self, wait_seconds: float, wait_minutes: int) -> bool:
+        """
+        Prompt user for action when rate limited.
+        Args:
+            wait_seconds: Seconds until reset
+            wait_minutes: Minutes until reset
+        Returns:
+            True if user chooses to continue, False to abort
+        """
+        print("Options:")
+        print(f"  [w] Wait {wait_minutes} minutes (auto-continues)")
+        # Check if profile switching is available
+        if self.token and self.config.get_next_profile(self.token):
+            print("  [s] Switch to another GitHub profile")
+        print("  [t] Set up new GitHub token")
+        print("  [c] Cancel")
+        print()
+        while True:
+            choice = input("Select an option [w/s/t/c]: ").strip().lower()
+            if choice == "w":
+                return self.wait_for_reset(wait_seconds, wait_minutes)
+            elif choice == "s":
+                if self.try_switch_profile():
+                    return True
+                else:
+                    print("⚠️  Profile switching failed. Choose another option.")
+                    continue
+            elif choice == "t":
+                print("\n💡 Opening GitHub token setup...")
+                print("   Run this command in another terminal:")
+                print("   $ skill-seekers config --github\n")
+                print("   Then restart your scraping job.\n")
+                return False
+            elif choice == "c":
+                print("\n⏸️  Operation cancelled by user\n")
+                return False
+            else:
+                print("❌ Invalid choice. Please enter w, s, t, or c.")
+def create_github_headers(token: str | None = None) -> dict[str, str]:
+    """
+    Create GitHub API headers with optional token.
+    Args:
+        token: GitHub token (or None)
+    Returns:
+        Dict of headers
+    """
+    headers = {}
+    if token:
+        headers["Authorization"] = f"token {token}"
+    return headers

skill_seekers/cli/resume_command.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""
+Resume Command for Skill Seekers
+Allows users to resume interrupted scraping jobs from saved progress.
+"""
+import argparse
+import sys
+from .config_manager import get_config_manager
+def list_resumable_jobs():
+    """List all resumable jobs with details."""
+    config = get_config_manager()
+    jobs = config.list_resumable_jobs()
+    if not jobs:
+        print("\n📦 No resumable jobs found.\n")
+        print("Jobs are automatically saved when:")
+        print("  • You interrupt a scraping operation (Ctrl+C)")
+        print("  • A rate limit is reached")
+        print("  • An error occurs during scraping\n")
+        return
+    print(f"\n📦 Resumable Jobs ({len(jobs)} available):\n")
+    for idx, job in enumerate(jobs, 1):
+        job_id = job["job_id"]
+        started = job.get("started_at", "Unknown")
+        command = job.get("command", "Unknown")
+        progress = job.get("progress", {})
+        last_updated = job.get("last_updated", "Unknown")
+        print(f"{idx}. Job ID: {job_id}")
+        print(f"   Started: {started}")
+        print(f"   Command: {command}")
+        if progress:
+            phase = progress.get("phase", "Unknown")
+            files_processed = progress.get("files_processed", 0)
+            files_total = progress.get("files_total", 0)
+            print(f"   Progress: {phase}")
+            if files_total > 0:
+                percentage = (files_processed / files_total) * 100
+                print(f"   Files: {files_processed}/{files_total} ({percentage:.1f}%)")
+        print(f"   Last updated: {last_updated}")
+        print()
+    print("To resume a job:")
+    print("  $ skill-seekers resume <job_id>\n")
+def resume_job(job_id: str):
+    """Resume a specific job."""
+    config = get_config_manager()
+    print(f"\n🔄 Resuming job: {job_id}\n")
+    # Load progress
+    progress = config.load_progress(job_id)
+    if not progress:
+        print(f"❌ Job '{job_id}' not found or cannot be resumed.\n")
+        print("Use 'skill-seekers resume --list' to see available jobs.\n")
+        return 1
+    if not progress.get("can_resume", False):
+        print(f"❌ Job '{job_id}' is not marked as resumable.\n")
+        return 1
+    # Extract job details
+    command = progress.get("command", "")
+    _job_config = progress.get("config", {})
+    checkpoint = progress.get("progress", {}).get("last_checkpoint")
+    print(f"Original command: {command}")
+    print(f"Last checkpoint: {checkpoint or 'Unknown'}")
+    print()
+    # Reconstruct command
+    if "github" in command:
+        print("📌 Resuming GitHub scraping...")
+        print("⚠️  Note: GitHub resume feature not yet implemented")
+        print("   You can re-run the original command - it will use cached data where available.\n")
+        print(f"   Command: {command}\n")
+        return 1
+    elif "scrape" in command:
+        print("📌 Resuming documentation scraping...")
+        print("⚠️  Note: Documentation scraping resume feature not yet implemented")
+        print("   You can re-run the original command - it will use cached data where available.\n")
+        print(f"   Command: {command}\n")
+        return 1
+    elif "unified" in command:
+        print("📌 Resuming unified scraping...")
+        print("⚠️  Note: Unified scraping resume feature not yet implemented")
+        print("   You can re-run the original command - it will use cached data where available.\n")
+        print(f"   Command: {command}\n")
+        return 1
+    else:
+        print("❌ Unknown job type. Cannot resume.\n")
+        return 1
+def clean_old_jobs():
+    """Clean up old progress files."""
+    config = get_config_manager()
+    print("\n🧹 Cleaning up old progress files...\n")
+    jobs_before = len(config.list_resumable_jobs())
+    config.cleanup_old_progress()
+    jobs_after = len(config.list_resumable_jobs())
+    deleted = jobs_before - jobs_after
+    if deleted > 0:
+        print(f"✅ Deleted {deleted} old job(s)")
+    else:
+        print("✅ No old jobs to clean up")
+    if jobs_after > 0:
+        print(f"📦 {jobs_after} job(s) remaining\n")
+    else:
+        print()
+def main():
+    """Main entry point for resume command."""
+    parser = argparse.ArgumentParser(description="Resume interrupted Skill Seekers jobs")
+    parser.add_argument("job_id", nargs="?", help="Job ID to resume")
+    parser.add_argument("--list", action="store_true", help="List all resumable jobs")
+    parser.add_argument("--clean", action="store_true", help="Clean up old progress files")
+    args = parser.parse_args()
+    # Handle options
+    if args.list:
+        list_resumable_jobs()
+        return 0
+    if args.clean:
+        clean_old_jobs()
+        return 0
+    if not args.job_id:
+        print("\n❌ Error: Job ID required or use --list to see available jobs\n")
+        parser.print_help()
+        return 1
+    return resume_job(args.job_id)
+if __name__ == "__main__":
+    sys.exit(main())