PyPI - feedkit - Versions diffs - 0.1.0__tar.gz - Mend

feedkit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

feedkit-0.1.0/LICENSE +21 -0
feedkit-0.1.0/PKG-INFO +120 -0
feedkit-0.1.0/README.md +85 -0
feedkit-0.1.0/feedkit/__init__.py +8 -0
feedkit-0.1.0/feedkit/__main__.py +231 -0
feedkit-0.1.0/feedkit/catalog.py +104 -0
feedkit-0.1.0/feedkit/core.py +167 -0
feedkit-0.1.0/feedkit/data/feeds.json +3594 -0
feedkit-0.1.0/feedkit/mcp_server.py +177 -0
feedkit-0.1.0/feedkit/opml.py +70 -0
feedkit-0.1.0/feedkit/storage.py +187 -0
feedkit-0.1.0/feedkit.egg-info/PKG-INFO +120 -0
feedkit-0.1.0/feedkit.egg-info/SOURCES.txt +17 -0
feedkit-0.1.0/feedkit.egg-info/dependency_links.txt +1 -0
feedkit-0.1.0/feedkit.egg-info/entry_points.txt +3 -0
feedkit-0.1.0/feedkit.egg-info/requires.txt +18 -0
feedkit-0.1.0/feedkit.egg-info/top_level.txt +1 -0
feedkit-0.1.0/pyproject.toml +56 -0
feedkit-0.1.0/setup.cfg +4 -0

feedkit-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 QuartzUnit
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

feedkit-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,120 @@
+Metadata-Version: 2.4
+Name: feedkit
+Version: 0.1.0
+Summary: RSS/Atom feed collection with 449 curated feeds. Python MCP server included.
+Author: QuartzUnit
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/QuartzUnit/feedkit
+Project-URL: Repository, https://github.com/QuartzUnit/feedkit
+Project-URL: Issues, https://github.com/QuartzUnit/feedkit/issues
+Keywords: rss,atom,feed,news,mcp,collection,catalog
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: feedparser>=6.0
+Requires-Dist: httpx>=0.27
+Requires-Dist: click>=8.0
+Requires-Dist: rich>=13.0
+Provides-Extra: opml
+Requires-Dist: listparser>=0.20; extra == "opml"
+Provides-Extra: mcp
+Requires-Dist: fastmcp>=2.0; extra == "mcp"
+Provides-Extra: all
+Requires-Dist: feedkit[mcp,opml]; extra == "all"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
+Requires-Dist: ruff>=0.9; extra == "dev"
+Dynamic: license-file
+# FeedKit
+> RSS/Atom feed collection with 449 curated feeds. Python MCP server included.
+```python
+from feedkit import search_catalog, fetch_feed, FeedStore
+# Search the built-in catalog
+feeds = search_catalog("cloudflare")
+# Fetch a feed
+entries = await fetch_feed("https://blog.cloudflare.com/rss/")
+for entry in entries:
+    print(entry.title, entry.url)
+# Subscribe and collect
+store = FeedStore()
+store.subscribe("https://blog.cloudflare.com/rss/", category="tech")
+result = await collect(store)  # async parallel fetch
+print(f"{result.new_articles} new articles")
+```
+## Features
+- **449 curated feeds** — tech blogs, academic, government, news, fact-check. All verified with 778K+ articles collected.
+- **Async parallel collection** — fetch hundreds of feeds in under a minute
+- **SQLite + FTS5** — local storage with full-text search, no server needed
+- **OPML import/export** — interop with Feedly, Inoreader, NetNewsWire
+- **MCP server** — 9 tools for Claude Code / MCP clients
+- **Feed health monitoring** — track success rates and errors per feed
+## Install
+```bash
+pip install feedkit
+```
+## CLI
+```bash
+feedkit search aws                        # search catalog
+feedkit search --category technology      # by category
+feedkit categories                        # list categories
+feedkit subscribe https://example.com/rss # subscribe to a feed
+feedkit subscribe-catalog -c technology   # subscribe to entire category
+feedkit list                              # list subscriptions
+feedkit collect                           # fetch all subscriptions
+feedkit latest                            # show latest articles
+feedkit find "kubernetes deployment"      # full-text search
+feedkit import-opml subs.opml             # import OPML
+feedkit export-opml backup.opml           # export OPML
+feedkit stats                             # catalog + local stats
+```
+## MCP Server
+```bash
+pip install "feedkit[mcp]"
+feedkit-mcp  # starts stdio MCP server
+```
+**9 tools:** fetch_single_feed, search_feed_catalog, catalog_stats, collect_feeds, search_articles, get_latest_articles, subscribe_feed, unsubscribe_feed, list_subscriptions
+## Built-in Catalog
+449 verified feeds across 5 categories:
+| Category | Feeds | Examples |
+|----------|-------|---------|
+| technology | 68 | AWS, Cloudflare, Stripe, Netflix, Spotify, Meta |
+| science | 128 | NASA, PLOS, Harvard, Cambridge, BAIR, arXiv |
+| finance | 114 | World Bank, BoC, RBA, financial news |
+| society | 126 | JTBC, MBC, international news |
+| academia | 13 | Research journals, university blogs |
+All feeds verified working — collected 778K+ articles via daily automated collection.
+## License
+[MIT](LICENSE)
+<!-- mcp-name: io.github.ArkNill/feedkit -->

feedkit-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,85 @@
+# FeedKit
+> RSS/Atom feed collection with 449 curated feeds. Python MCP server included.
+```python
+from feedkit import search_catalog, fetch_feed, FeedStore
+# Search the built-in catalog
+feeds = search_catalog("cloudflare")
+# Fetch a feed
+entries = await fetch_feed("https://blog.cloudflare.com/rss/")
+for entry in entries:
+    print(entry.title, entry.url)
+# Subscribe and collect
+store = FeedStore()
+store.subscribe("https://blog.cloudflare.com/rss/", category="tech")
+result = await collect(store)  # async parallel fetch
+print(f"{result.new_articles} new articles")
+```
+## Features
+- **449 curated feeds** — tech blogs, academic, government, news, fact-check. All verified with 778K+ articles collected.
+- **Async parallel collection** — fetch hundreds of feeds in under a minute
+- **SQLite + FTS5** — local storage with full-text search, no server needed
+- **OPML import/export** — interop with Feedly, Inoreader, NetNewsWire
+- **MCP server** — 9 tools for Claude Code / MCP clients
+- **Feed health monitoring** — track success rates and errors per feed
+## Install
+```bash
+pip install feedkit
+```
+## CLI
+```bash
+feedkit search aws                        # search catalog
+feedkit search --category technology      # by category
+feedkit categories                        # list categories
+feedkit subscribe https://example.com/rss # subscribe to a feed
+feedkit subscribe-catalog -c technology   # subscribe to entire category
+feedkit list                              # list subscriptions
+feedkit collect                           # fetch all subscriptions
+feedkit latest                            # show latest articles
+feedkit find "kubernetes deployment"      # full-text search
+feedkit import-opml subs.opml             # import OPML
+feedkit export-opml backup.opml           # export OPML
+feedkit stats                             # catalog + local stats
+```
+## MCP Server
+```bash
+pip install "feedkit[mcp]"
+feedkit-mcp  # starts stdio MCP server
+```
+**9 tools:** fetch_single_feed, search_feed_catalog, catalog_stats, collect_feeds, search_articles, get_latest_articles, subscribe_feed, unsubscribe_feed, list_subscriptions
+## Built-in Catalog
+449 verified feeds across 5 categories:
+| Category | Feeds | Examples |
+|----------|-------|---------|
+| technology | 68 | AWS, Cloudflare, Stripe, Netflix, Spotify, Meta |
+| science | 128 | NASA, PLOS, Harvard, Cambridge, BAIR, arXiv |
+| finance | 114 | World Bank, BoC, RBA, financial news |
+| society | 126 | JTBC, MBC, international news |
+| academia | 13 | Research journals, university blogs |
+All feeds verified working — collected 778K+ articles via daily automated collection.
+## License
+[MIT](LICENSE)
+<!-- mcp-name: io.github.ArkNill/feedkit -->

feedkit-0.1.0/feedkit/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""FeedKit — RSS/Atom feed collection with curated catalog."""
+from feedkit.core import fetch_feed
+from feedkit.catalog import search_catalog, get_catalog_stats
+from feedkit.storage import FeedStore
+__all__ = ["fetch_feed", "search_catalog", "get_catalog_stats", "FeedStore"]
+__version__ = "0.1.0"

feedkit-0.1.0/feedkit/__main__.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""CLI entry point — python -m feedkit or `feedkit` command."""
+import asyncio
+import json
+import sys
+import click
+from rich.console import Console
+from rich.table import Table
+from feedkit import __version__
+from feedkit.catalog import get_catalog_stats, list_categories, search_catalog
+from feedkit.core import collect, fetch_feed
+from feedkit.storage import FeedStore
+console = Console()
+def _get_store():
+    return FeedStore()
+@click.group()
+@click.version_option(__version__, prog_name="feedkit")
+def main():
+    """FeedKit — RSS/Atom feed collection with curated catalog."""
+@main.command()
+@click.argument("query", default="")
+@click.option("--category", "-c", default="", help="Filter by category")
+@click.option("--language", "-l", default="", help="Filter by language (en, ko, ...)")
+@click.option("--limit", "-n", default=20, help="Max results")
+@click.option("--json-output", "-j", is_flag=True, help="JSON output")
+def search(query, category, language, limit, json_output):
+    """Search the built-in feed catalog."""
+    results = search_catalog(query, category=category, language=language, limit=limit)
+    if json_output:
+        data = [{"url": f.url, "title": f.title, "category": f.category, "language": f.language} for f in results]
+        click.echo(json.dumps(data, ensure_ascii=False, indent=2))
+    else:
+        table = Table(title=f"Catalog Search: {query or '(all)'} ({len(results)} results)")
+        table.add_column("Title", style="cyan", max_width=40)
+        table.add_column("Category", style="green")
+        table.add_column("Lang")
+        table.add_column("URL", style="dim", max_width=50)
+        for f in results:
+            table.add_row(f.title, f.category, f.language, f.url)
+        console.print(table)
+@main.command()
+@click.argument("url")
+@click.option("--category", "-c", default="", help="Category for this subscription")
+@click.option("--title", "-t", default="", help="Title override")
+def subscribe(url, category, title):
+    """Subscribe to a feed."""
+    store = _get_store()
+    store.subscribe(url, title=title, category=category)
+    console.print(f"[green]✓[/green] Subscribed to {url}")
+    store.close()
+@main.command()
+@click.argument("url")
+def unsubscribe(url):
+    """Unsubscribe from a feed."""
+    store = _get_store()
+    store.unsubscribe(url)
+    console.print(f"[yellow]✓[/yellow] Unsubscribed from {url}")
+    store.close()
+@main.command("list")
+def list_subs():
+    """List all subscriptions."""
+    store = _get_store()
+    subs = store.list_subscriptions()
+    if not subs:
+        console.print("[dim]No subscriptions yet. Use `feedkit subscribe <url>` or `feedkit subscribe-catalog`.[/dim]")
+    else:
+        table = Table(title=f"Subscriptions ({len(subs)})")
+        table.add_column("Title", style="cyan", max_width=35)
+        table.add_column("Category", style="green")
+        table.add_column("Fetched", justify="right")
+        table.add_column("Errors", justify="right")
+        for s in subs:
+            table.add_row(s.title or s.feed_url[:35], s.category, str(s.fetch_count), str(s.error_count))
+        console.print(table)
+    store.close()
+@main.command("subscribe-catalog")
+@click.option("--category", "-c", required=True, help="Subscribe to all feeds in this category")
+def subscribe_catalog(category):
+    """Subscribe to all feeds in a catalog category."""
+    feeds = search_catalog(category=category, limit=1000)
+    if not feeds:
+        console.print(f"[red]No feeds found in category '{category}'[/red]")
+        return
+    store = _get_store()
+    for f in feeds:
+        store.subscribe(f.url, title=f.title, category=f.category, language=f.language)
+    console.print(f"[green]✓[/green] Subscribed to {len(feeds)} feeds in category '{category}'")
+    store.close()
+@main.command("collect")
+@click.option("--category", "-c", default="", help="Only collect from this category")
+@click.option("--concurrency", "-n", default=20, help="Max concurrent requests")
+def collect_cmd(category, concurrency):
+    """Collect articles from all subscribed feeds."""
+    store = _get_store()
+    sub_count = store.subscription_count()
+    if sub_count == 0:
+        console.print("[dim]No subscriptions. Use `feedkit subscribe` first.[/dim]")
+        store.close()
+        return
+    console.print(f"Collecting from {sub_count} feeds...")
+    try:
+        result = asyncio.run(collect(store, category=category, concurrency=concurrency))
+    except KeyboardInterrupt:
+        sys.exit(130)
+    console.print(f"[green]✓[/green] {result.feeds_ok}/{result.feeds_total} feeds OK, "
+                  f"{result.new_articles} new articles, {result.duration_ms:.0f}ms")
+    if result.errors:
+        console.print(f"[yellow]{result.feeds_error} feeds failed[/yellow]")
+    store.close()
+@main.command()
+@click.argument("query")
+@click.option("--count", "-n", default=20, help="Max results")
+def find(query, count):
+    """Full-text search across collected articles."""
+    store = _get_store()
+    articles = store.search(query, count=count)
+    if not articles:
+        console.print("[dim]No matching articles found.[/dim]")
+    else:
+        table = Table(title=f"Search: '{query}' ({len(articles)} results)")
+        table.add_column("Title", style="cyan", max_width=50)
+        table.add_column("Published")
+        table.add_column("URL", style="dim", max_width=50)
+        for a in articles:
+            table.add_row(a.title[:50], a.published or "", a.url[:50])
+        console.print(table)
+    store.close()
+@main.command()
+@click.option("--count", "-n", default=20, help="Number of articles")
+@click.option("--category", "-c", default="", help="Filter by category")
+def latest(count, category):
+    """Show latest collected articles."""
+    store = _get_store()
+    articles = store.get_latest(count=count, category=category)
+    if not articles:
+        console.print("[dim]No articles yet. Run `feedkit collect` first.[/dim]")
+    else:
+        table = Table(title=f"Latest Articles ({len(articles)})")
+        table.add_column("Title", style="cyan", max_width=50)
+        table.add_column("Published")
+        table.add_column("Feed", style="dim", max_width=30)
+        for a in articles:
+            table.add_row(a.title[:50], a.published or "", a.feed_url[:30])
+        console.print(table)
+    store.close()
+@main.command()
+def stats():
+    """Show catalog and subscription statistics."""
+    cat_stats = get_catalog_stats()
+    console.print(f"\n[bold]Catalog:[/bold] {cat_stats['total_feeds']} feeds")
+    for cat, n in cat_stats["categories"].items():
+        console.print(f"  {cat}: {n}")
+    store = _get_store()
+    sub_count = store.subscription_count()
+    art_count = store.article_count()
+    console.print(f"\n[bold]Local:[/bold] {sub_count} subscriptions, {art_count} articles")
+    store.close()
+@main.command("categories")
+def categories_cmd():
+    """List available catalog categories."""
+    for cat in list_categories():
+        console.print(f"  {cat}")
+@main.command("import-opml")
+@click.argument("path")
+def import_opml_cmd(path):
+    """Import feeds from an OPML file."""
+    from feedkit.opml import import_opml
+    store = _get_store()
+    count = import_opml(store, path)
+    console.print(f"[green]✓[/green] Imported {count} feeds from {path}")
+    store.close()
+@main.command("export-opml")
+@click.argument("path")
+def export_opml_cmd(path):
+    """Export subscriptions to an OPML file."""
+    from feedkit.opml import export_opml
+    store = _get_store()
+    count = export_opml(store, path)
+    console.print(f"[green]✓[/green] Exported {count} feeds to {path}")
+    store.close()
+if __name__ == "__main__":
+    main()

feedkit-0.1.0/feedkit/catalog.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Built-in curated feed catalog — 449 verified RSS/Atom feeds."""
+from __future__ import annotations
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+logger = logging.getLogger(__name__)
+_CATALOG_PATH = Path(__file__).parent / "data" / "feeds.json"
+_catalog: list[dict] | None = None
+@dataclass
+class CatalogFeed:
+    """A feed from the built-in catalog."""
+    url: str
+    title: str
+    category: str
+    subcategory: str
+    language: str
+    domain: str
+def _load_catalog() -> list[dict]:
+    global _catalog
+    if _catalog is None:
+        with open(_CATALOG_PATH, encoding="utf-8") as f:
+            _catalog = json.load(f)
+    return _catalog
+def search_catalog(
+    query: str = "",
+    *,
+    category: str = "",
+    language: str = "",
+    limit: int = 50,
+) -> list[CatalogFeed]:
+    """Search the built-in feed catalog.
+    Args:
+        query: Search by title or domain (case-insensitive substring match).
+        category: Filter by category (e.g., "technology", "science", "finance").
+        language: Filter by language (e.g., "en", "ko").
+        limit: Maximum results to return.
+    """
+    catalog = _load_catalog()
+    results = []
+    q = query.lower()
+    for entry in catalog:
+        if category and entry.get("category", "") != category:
+            continue
+        if language and entry.get("language", "") != language:
+            continue
+        if q:
+            title = entry.get("title", "").lower()
+            domain = entry.get("domain", "").lower()
+            url = entry.get("url", "").lower()
+            if q not in title and q not in domain and q not in url:
+                continue
+        results.append(CatalogFeed(
+            url=entry["url"],
+            title=entry.get("title", ""),
+            category=entry.get("category", ""),
+            subcategory=entry.get("subcategory", ""),
+            language=entry.get("language", "en"),
+            domain=entry.get("domain", ""),
+        ))
+        if len(results) >= limit:
+            break
+    return results
+def get_catalog_stats() -> dict:
+    """Get catalog statistics."""
+    catalog = _load_catalog()
+    categories: dict[str, int] = {}
+    languages: dict[str, int] = {}
+    for entry in catalog:
+        cat = entry.get("category", "unknown")
+        lang = entry.get("language", "unknown")
+        categories[cat] = categories.get(cat, 0) + 1
+        languages[lang] = languages.get(lang, 0) + 1
+    return {
+        "total_feeds": len(catalog),
+        "categories": dict(sorted(categories.items())),
+        "languages": dict(sorted(languages.items())),
+    }
+def list_categories() -> list[str]:
+    """List all available categories."""
+    catalog = _load_catalog()
+    return sorted({entry.get("category", "") for entry in catalog if entry.get("category")})