PyPI - feedkit - Versions diffs - 0.1.0__py3-none-any.whl - Mend

feedkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

feedkit/__init__.py +8 -0
feedkit/__main__.py +231 -0
feedkit/catalog.py +104 -0
feedkit/core.py +167 -0
feedkit/data/feeds.json +3594 -0
feedkit/mcp_server.py +177 -0
feedkit/opml.py +70 -0
feedkit/storage.py +187 -0
feedkit-0.1.0.dist-info/METADATA +120 -0
feedkit-0.1.0.dist-info/RECORD +14 -0
feedkit-0.1.0.dist-info/WHEEL +5 -0
feedkit-0.1.0.dist-info/entry_points.txt +3 -0
feedkit-0.1.0.dist-info/licenses/LICENSE +21 -0
feedkit-0.1.0.dist-info/top_level.txt +1 -0

feedkit/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""FeedKit — RSS/Atom feed collection with curated catalog."""
+from feedkit.core import fetch_feed
+from feedkit.catalog import search_catalog, get_catalog_stats
+from feedkit.storage import FeedStore
+__all__ = ["fetch_feed", "search_catalog", "get_catalog_stats", "FeedStore"]
+__version__ = "0.1.0"

feedkit/__main__.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""CLI entry point — python -m feedkit or `feedkit` command."""
+import asyncio
+import json
+import sys
+import click
+from rich.console import Console
+from rich.table import Table
+from feedkit import __version__
+from feedkit.catalog import get_catalog_stats, list_categories, search_catalog
+from feedkit.core import collect, fetch_feed
+from feedkit.storage import FeedStore
+console = Console()
+def _get_store():
+    return FeedStore()
+@click.group()
+@click.version_option(__version__, prog_name="feedkit")
+def main():
+    """FeedKit — RSS/Atom feed collection with curated catalog."""
+@main.command()
+@click.argument("query", default="")
+@click.option("--category", "-c", default="", help="Filter by category")
+@click.option("--language", "-l", default="", help="Filter by language (en, ko, ...)")
+@click.option("--limit", "-n", default=20, help="Max results")
+@click.option("--json-output", "-j", is_flag=True, help="JSON output")
+def search(query, category, language, limit, json_output):
+    """Search the built-in feed catalog."""
+    results = search_catalog(query, category=category, language=language, limit=limit)
+    if json_output:
+        data = [{"url": f.url, "title": f.title, "category": f.category, "language": f.language} for f in results]
+        click.echo(json.dumps(data, ensure_ascii=False, indent=2))
+    else:
+        table = Table(title=f"Catalog Search: {query or '(all)'} ({len(results)} results)")
+        table.add_column("Title", style="cyan", max_width=40)
+        table.add_column("Category", style="green")
+        table.add_column("Lang")
+        table.add_column("URL", style="dim", max_width=50)
+        for f in results:
+            table.add_row(f.title, f.category, f.language, f.url)
+        console.print(table)
+@main.command()
+@click.argument("url")
+@click.option("--category", "-c", default="", help="Category for this subscription")
+@click.option("--title", "-t", default="", help="Title override")
+def subscribe(url, category, title):
+    """Subscribe to a feed."""
+    store = _get_store()
+    store.subscribe(url, title=title, category=category)
+    console.print(f"[green]✓[/green] Subscribed to {url}")
+    store.close()
+@main.command()
+@click.argument("url")
+def unsubscribe(url):
+    """Unsubscribe from a feed."""
+    store = _get_store()
+    store.unsubscribe(url)
+    console.print(f"[yellow]✓[/yellow] Unsubscribed from {url}")
+    store.close()
+@main.command("list")
+def list_subs():
+    """List all subscriptions."""
+    store = _get_store()
+    subs = store.list_subscriptions()
+    if not subs:
+        console.print("[dim]No subscriptions yet. Use `feedkit subscribe <url>` or `feedkit subscribe-catalog`.[/dim]")
+    else:
+        table = Table(title=f"Subscriptions ({len(subs)})")
+        table.add_column("Title", style="cyan", max_width=35)
+        table.add_column("Category", style="green")
+        table.add_column("Fetched", justify="right")
+        table.add_column("Errors", justify="right")
+        for s in subs:
+            table.add_row(s.title or s.feed_url[:35], s.category, str(s.fetch_count), str(s.error_count))
+        console.print(table)
+    store.close()
+@main.command("subscribe-catalog")
+@click.option("--category", "-c", required=True, help="Subscribe to all feeds in this category")
+def subscribe_catalog(category):
+    """Subscribe to all feeds in a catalog category."""
+    feeds = search_catalog(category=category, limit=1000)
+    if not feeds:
+        console.print(f"[red]No feeds found in category '{category}'[/red]")
+        return
+    store = _get_store()
+    for f in feeds:
+        store.subscribe(f.url, title=f.title, category=f.category, language=f.language)
+    console.print(f"[green]✓[/green] Subscribed to {len(feeds)} feeds in category '{category}'")
+    store.close()
+@main.command("collect")
+@click.option("--category", "-c", default="", help="Only collect from this category")
+@click.option("--concurrency", "-n", default=20, help="Max concurrent requests")
+def collect_cmd(category, concurrency):
+    """Collect articles from all subscribed feeds."""
+    store = _get_store()
+    sub_count = store.subscription_count()
+    if sub_count == 0:
+        console.print("[dim]No subscriptions. Use `feedkit subscribe` first.[/dim]")
+        store.close()
+        return
+    console.print(f"Collecting from {sub_count} feeds...")
+    try:
+        result = asyncio.run(collect(store, category=category, concurrency=concurrency))
+    except KeyboardInterrupt:
+        sys.exit(130)
+    console.print(f"[green]✓[/green] {result.feeds_ok}/{result.feeds_total} feeds OK, "
+                  f"{result.new_articles} new articles, {result.duration_ms:.0f}ms")
+    if result.errors:
+        console.print(f"[yellow]{result.feeds_error} feeds failed[/yellow]")
+    store.close()
+@main.command()
+@click.argument("query")
+@click.option("--count", "-n", default=20, help="Max results")
+def find(query, count):
+    """Full-text search across collected articles."""
+    store = _get_store()
+    articles = store.search(query, count=count)
+    if not articles:
+        console.print("[dim]No matching articles found.[/dim]")
+    else:
+        table = Table(title=f"Search: '{query}' ({len(articles)} results)")
+        table.add_column("Title", style="cyan", max_width=50)
+        table.add_column("Published")
+        table.add_column("URL", style="dim", max_width=50)
+        for a in articles:
+            table.add_row(a.title[:50], a.published or "", a.url[:50])
+        console.print(table)
+    store.close()
+@main.command()
+@click.option("--count", "-n", default=20, help="Number of articles")
+@click.option("--category", "-c", default="", help="Filter by category")
+def latest(count, category):
+    """Show latest collected articles."""
+    store = _get_store()
+    articles = store.get_latest(count=count, category=category)
+    if not articles:
+        console.print("[dim]No articles yet. Run `feedkit collect` first.[/dim]")
+    else:
+        table = Table(title=f"Latest Articles ({len(articles)})")
+        table.add_column("Title", style="cyan", max_width=50)
+        table.add_column("Published")
+        table.add_column("Feed", style="dim", max_width=30)
+        for a in articles:
+            table.add_row(a.title[:50], a.published or "", a.feed_url[:30])
+        console.print(table)
+    store.close()
+@main.command()
+def stats():
+    """Show catalog and subscription statistics."""
+    cat_stats = get_catalog_stats()
+    console.print(f"\n[bold]Catalog:[/bold] {cat_stats['total_feeds']} feeds")
+    for cat, n in cat_stats["categories"].items():
+        console.print(f"  {cat}: {n}")
+    store = _get_store()
+    sub_count = store.subscription_count()
+    art_count = store.article_count()
+    console.print(f"\n[bold]Local:[/bold] {sub_count} subscriptions, {art_count} articles")
+    store.close()
+@main.command("categories")
+def categories_cmd():
+    """List available catalog categories."""
+    for cat in list_categories():
+        console.print(f"  {cat}")
+@main.command("import-opml")
+@click.argument("path")
+def import_opml_cmd(path):
+    """Import feeds from an OPML file."""
+    from feedkit.opml import import_opml
+    store = _get_store()
+    count = import_opml(store, path)
+    console.print(f"[green]✓[/green] Imported {count} feeds from {path}")
+    store.close()
+@main.command("export-opml")
+@click.argument("path")
+def export_opml_cmd(path):
+    """Export subscriptions to an OPML file."""
+    from feedkit.opml import export_opml
+    store = _get_store()
+    count = export_opml(store, path)
+    console.print(f"[green]✓[/green] Exported {count} feeds to {path}")
+    store.close()
+if __name__ == "__main__":
+    main()

feedkit/catalog.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Built-in curated feed catalog — 449 verified RSS/Atom feeds."""
+from __future__ import annotations
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+logger = logging.getLogger(__name__)
+_CATALOG_PATH = Path(__file__).parent / "data" / "feeds.json"
+_catalog: list[dict] | None = None
+@dataclass
+class CatalogFeed:
+    """A feed from the built-in catalog."""
+    url: str
+    title: str
+    category: str
+    subcategory: str
+    language: str
+    domain: str
+def _load_catalog() -> list[dict]:
+    global _catalog
+    if _catalog is None:
+        with open(_CATALOG_PATH, encoding="utf-8") as f:
+            _catalog = json.load(f)
+    return _catalog
+def search_catalog(
+    query: str = "",
+    *,
+    category: str = "",
+    language: str = "",
+    limit: int = 50,
+) -> list[CatalogFeed]:
+    """Search the built-in feed catalog.
+    Args:
+        query: Search by title or domain (case-insensitive substring match).
+        category: Filter by category (e.g., "technology", "science", "finance").
+        language: Filter by language (e.g., "en", "ko").
+        limit: Maximum results to return.
+    """
+    catalog = _load_catalog()
+    results = []
+    q = query.lower()
+    for entry in catalog:
+        if category and entry.get("category", "") != category:
+            continue
+        if language and entry.get("language", "") != language:
+            continue
+        if q:
+            title = entry.get("title", "").lower()
+            domain = entry.get("domain", "").lower()
+            url = entry.get("url", "").lower()
+            if q not in title and q not in domain and q not in url:
+                continue
+        results.append(CatalogFeed(
+            url=entry["url"],
+            title=entry.get("title", ""),
+            category=entry.get("category", ""),
+            subcategory=entry.get("subcategory", ""),
+            language=entry.get("language", "en"),
+            domain=entry.get("domain", ""),
+        ))
+        if len(results) >= limit:
+            break
+    return results
+def get_catalog_stats() -> dict:
+    """Get catalog statistics."""
+    catalog = _load_catalog()
+    categories: dict[str, int] = {}
+    languages: dict[str, int] = {}
+    for entry in catalog:
+        cat = entry.get("category", "unknown")
+        lang = entry.get("language", "unknown")
+        categories[cat] = categories.get(cat, 0) + 1
+        languages[lang] = languages.get(lang, 0) + 1
+    return {
+        "total_feeds": len(catalog),
+        "categories": dict(sorted(categories.items())),
+        "languages": dict(sorted(languages.items())),
+    }
+def list_categories() -> list[str]:
+    """List all available categories."""
+    catalog = _load_catalog()
+    return sorted({entry.get("category", "") for entry in catalog if entry.get("category")})

feedkit/core.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""Core feed fetching logic — async HTTP + feedparser."""
+from __future__ import annotations
+import asyncio
+import logging
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+import feedparser
+import httpx
+from feedkit.storage import Article, FeedStore
+logger = logging.getLogger(__name__)
+DEFAULT_TIMEOUT = 15.0
+DEFAULT_CONCURRENCY = 20
+USER_AGENT = "FeedKit/0.1.0 (+https://github.com/QuartzUnit/feedkit)"
+@dataclass
+class FeedEntry:
+    """A single entry from a feed."""
+    title: str = ""
+    url: str = ""
+    summary: str = ""
+    published: str | None = None
+    author: str = ""
+    feed_url: str = ""
+    feed_title: str = ""
+    guid: str = ""
+@dataclass
+class CollectResult:
+    """Result of a bulk feed collection."""
+    feeds_total: int = 0
+    feeds_ok: int = 0
+    feeds_error: int = 0
+    new_articles: int = 0
+    duration_ms: float = 0.0
+    errors: list[dict] = field(default_factory=list)
+async def fetch_feed(
+    url: str,
+    *,
+    count: int = 50,
+    timeout: float = DEFAULT_TIMEOUT,
+) -> list[FeedEntry]:
+    """Fetch and parse a single RSS/Atom feed.
+    Args:
+        url: Feed URL.
+        count: Maximum entries to return.
+        timeout: HTTP timeout in seconds.
+    """
+    headers = {"User-Agent": USER_AGENT}
+    async with httpx.AsyncClient(timeout=httpx.Timeout(timeout), headers=headers, follow_redirects=True) as client:
+        response = await client.get(url)
+        response.raise_for_status()
+    parsed = feedparser.parse(response.text)
+    feed_title = parsed.feed.get("title", "")
+    entries = []
+    for entry in parsed.entries[:count]:
+        guid = entry.get("id") or entry.get("link") or entry.get("title", "")
+        published = None
+        if hasattr(entry, "published_parsed") and entry.published_parsed:
+            try:
+                published = datetime(*entry.published_parsed[:6]).isoformat()
+            except (ValueError, TypeError):
+                pass
+        entries.append(FeedEntry(
+            title=entry.get("title", ""),
+            url=entry.get("link", ""),
+            summary=_clean_summary(entry.get("summary", "")),
+            published=published,
+            author=entry.get("author", ""),
+            feed_url=url,
+            feed_title=feed_title,
+            guid=guid,
+        ))
+    return entries
+async def collect(
+    store: FeedStore,
+    *,
+    category: str = "",
+    concurrency: int = DEFAULT_CONCURRENCY,
+    timeout: float = DEFAULT_TIMEOUT,
+    count_per_feed: int = 50,
+) -> CollectResult:
+    """Collect articles from all subscribed feeds.
+    Args:
+        store: FeedStore instance.
+        category: Only collect from this category (empty = all).
+        concurrency: Max concurrent HTTP requests.
+        timeout: Per-feed HTTP timeout.
+        count_per_feed: Max entries per feed.
+    """
+    start = time.monotonic()
+    subs = store.list_subscriptions()
+    if category:
+        subs = [s for s in subs if s.category == category]
+    result = CollectResult(feeds_total=len(subs))
+    semaphore = asyncio.Semaphore(concurrency)
+    async def _fetch_one(feed_url: str) -> tuple[str, list[FeedEntry] | None, str]:
+        async with semaphore:
+            try:
+                entries = await fetch_feed(feed_url, count=count_per_feed, timeout=timeout)
+                return feed_url, entries, ""
+            except Exception as e:
+                return feed_url, None, str(e)
+    tasks = [_fetch_one(s.feed_url) for s in subs]
+    outcomes = await asyncio.gather(*tasks)
+    for feed_url, entries, error in outcomes:
+        if entries is not None:
+            result.feeds_ok += 1
+            articles = [
+                Article(
+                    guid=e.guid,
+                    feed_url=e.feed_url,
+                    title=e.title,
+                    url=e.url,
+                    summary=e.summary,
+                    author=e.author,
+                    published=e.published,
+                )
+                for e in entries
+            ]
+            new = store.save_articles(articles)
+            result.new_articles += new
+            store.update_fetch_status(feed_url, success=True)
+        else:
+            result.feeds_error += 1
+            result.errors.append({"feed_url": feed_url, "error": error})
+            store.update_fetch_status(feed_url, success=False, error=error)
+    result.duration_ms = (time.monotonic() - start) * 1000
+    return result
+def _clean_summary(text: str, max_len: int = 500) -> str:
+    """Strip HTML tags and truncate summary."""
+    import re
+    clean = re.sub(r"<[^>]+>", "", text)
+    clean = re.sub(r"\s+", " ", clean).strip()
+    if len(clean) > max_len:
+        clean = clean[:max_len] + "..."
+    return clean