feedkit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
feedkit-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 QuartzUnit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
feedkit-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: feedkit
3
+ Version: 0.1.0
4
+ Summary: RSS/Atom feed collection with 449 curated feeds. Python MCP server included.
5
+ Author: QuartzUnit
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/QuartzUnit/feedkit
8
+ Project-URL: Repository, https://github.com/QuartzUnit/feedkit
9
+ Project-URL: Issues, https://github.com/QuartzUnit/feedkit/issues
10
+ Keywords: rss,atom,feed,news,mcp,collection,catalog
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: feedparser>=6.0
21
+ Requires-Dist: httpx>=0.27
22
+ Requires-Dist: click>=8.0
23
+ Requires-Dist: rich>=13.0
24
+ Provides-Extra: opml
25
+ Requires-Dist: listparser>=0.20; extra == "opml"
26
+ Provides-Extra: mcp
27
+ Requires-Dist: fastmcp>=2.0; extra == "mcp"
28
+ Provides-Extra: all
29
+ Requires-Dist: feedkit[mcp,opml]; extra == "all"
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=8.0; extra == "dev"
32
+ Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
33
+ Requires-Dist: ruff>=0.9; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # FeedKit
37
+
38
+ > RSS/Atom feed collection with 449 curated feeds. Python MCP server included.
39
+
40
+ ```python
41
+ from feedkit import search_catalog, fetch_feed, FeedStore
42
+
43
+ # Search the built-in catalog
44
+ feeds = search_catalog("cloudflare")
45
+
46
+ # Fetch a feed
47
+ entries = await fetch_feed("https://blog.cloudflare.com/rss/")
48
+ for entry in entries:
49
+ print(entry.title, entry.url)
50
+
51
+ # Subscribe and collect
52
+ store = FeedStore()
53
+ store.subscribe("https://blog.cloudflare.com/rss/", category="tech")
54
+ result = await collect(store) # async parallel fetch
55
+ print(f"{result.new_articles} new articles")
56
+ ```
57
+
58
+ ## Features
59
+
60
+ - **449 curated feeds** — tech blogs, academic, government, news, fact-check. All verified with 778K+ articles collected.
61
+ - **Async parallel collection** — fetch hundreds of feeds in under a minute
62
+ - **SQLite + FTS5** — local storage with full-text search, no server needed
63
+ - **OPML import/export** — interop with Feedly, Inoreader, NetNewsWire
64
+ - **MCP server** — 9 tools for Claude Code / MCP clients
65
+ - **Feed health monitoring** — track success rates and errors per feed
66
+
67
+ ## Install
68
+
69
+ ```bash
70
+ pip install feedkit
71
+ ```
72
+
73
+ ## CLI
74
+
75
+ ```bash
76
+ feedkit search aws # search catalog
77
+ feedkit search --category technology # by category
78
+ feedkit categories # list categories
79
+
80
+ feedkit subscribe https://example.com/rss # subscribe to a feed
81
+ feedkit subscribe-catalog -c technology # subscribe to entire category
82
+ feedkit list # list subscriptions
83
+
84
+ feedkit collect # fetch all subscriptions
85
+ feedkit latest # show latest articles
86
+ feedkit find "kubernetes deployment" # full-text search
87
+
88
+ feedkit import-opml subs.opml # import OPML
89
+ feedkit export-opml backup.opml # export OPML
90
+ feedkit stats # catalog + local stats
91
+ ```
92
+
93
+ ## MCP Server
94
+
95
+ ```bash
96
+ pip install "feedkit[mcp]"
97
+ feedkit-mcp # starts stdio MCP server
98
+ ```
99
+
100
+ **9 tools:** fetch_single_feed, search_feed_catalog, catalog_stats, collect_feeds, search_articles, get_latest_articles, subscribe_feed, unsubscribe_feed, list_subscriptions
101
+
102
+ ## Built-in Catalog
103
+
104
+ 449 verified feeds across 5 categories:
105
+
106
+ | Category | Feeds | Examples |
107
+ |----------|-------|---------|
108
+ | technology | 68 | AWS, Cloudflare, Stripe, Netflix, Spotify, Meta |
109
+ | science | 128 | NASA, PLOS, Harvard, Cambridge, BAIR, arXiv |
110
+ | finance | 114 | World Bank, BoC, RBA, financial news |
111
+ | society | 126 | JTBC, MBC, international news |
112
+ | academia | 13 | Research journals, university blogs |
113
+
114
+ All feeds verified working — collected 778K+ articles via daily automated collection.
115
+
116
+ ## License
117
+
118
+ [MIT](LICENSE)
119
+
120
+ <!-- mcp-name: io.github.ArkNill/feedkit -->
@@ -0,0 +1,85 @@
1
+ # FeedKit
2
+
3
+ > RSS/Atom feed collection with 449 curated feeds. Python MCP server included.
4
+
5
+ ```python
6
+ from feedkit import search_catalog, fetch_feed, FeedStore
7
+
8
+ # Search the built-in catalog
9
+ feeds = search_catalog("cloudflare")
10
+
11
+ # Fetch a feed
12
+ entries = await fetch_feed("https://blog.cloudflare.com/rss/")
13
+ for entry in entries:
14
+ print(entry.title, entry.url)
15
+
16
+ # Subscribe and collect
17
+ store = FeedStore()
18
+ store.subscribe("https://blog.cloudflare.com/rss/", category="tech")
19
+ result = await collect(store) # async parallel fetch
20
+ print(f"{result.new_articles} new articles")
21
+ ```
22
+
23
+ ## Features
24
+
25
+ - **449 curated feeds** — tech blogs, academic, government, news, fact-check. All verified with 778K+ articles collected.
26
+ - **Async parallel collection** — fetch hundreds of feeds in under a minute
27
+ - **SQLite + FTS5** — local storage with full-text search, no server needed
28
+ - **OPML import/export** — interop with Feedly, Inoreader, NetNewsWire
29
+ - **MCP server** — 9 tools for Claude Code / MCP clients
30
+ - **Feed health monitoring** — track success rates and errors per feed
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install feedkit
36
+ ```
37
+
38
+ ## CLI
39
+
40
+ ```bash
41
+ feedkit search aws # search catalog
42
+ feedkit search --category technology # by category
43
+ feedkit categories # list categories
44
+
45
+ feedkit subscribe https://example.com/rss # subscribe to a feed
46
+ feedkit subscribe-catalog -c technology # subscribe to entire category
47
+ feedkit list # list subscriptions
48
+
49
+ feedkit collect # fetch all subscriptions
50
+ feedkit latest # show latest articles
51
+ feedkit find "kubernetes deployment" # full-text search
52
+
53
+ feedkit import-opml subs.opml # import OPML
54
+ feedkit export-opml backup.opml # export OPML
55
+ feedkit stats # catalog + local stats
56
+ ```
57
+
58
+ ## MCP Server
59
+
60
+ ```bash
61
+ pip install "feedkit[mcp]"
62
+ feedkit-mcp # starts stdio MCP server
63
+ ```
64
+
65
+ **9 tools:** fetch_single_feed, search_feed_catalog, catalog_stats, collect_feeds, search_articles, get_latest_articles, subscribe_feed, unsubscribe_feed, list_subscriptions
66
+
67
+ ## Built-in Catalog
68
+
69
+ 449 verified feeds across 5 categories:
70
+
71
+ | Category | Feeds | Examples |
72
+ |----------|-------|---------|
73
+ | technology | 68 | AWS, Cloudflare, Stripe, Netflix, Spotify, Meta |
74
+ | science | 128 | NASA, PLOS, Harvard, Cambridge, BAIR, arXiv |
75
+ | finance | 114 | World Bank, BoC, RBA, financial news |
76
+ | society | 126 | JTBC, MBC, international news |
77
+ | academia | 13 | Research journals, university blogs |
78
+
79
+ All feeds verified working — collected 778K+ articles via daily automated collection.
80
+
81
+ ## License
82
+
83
+ [MIT](LICENSE)
84
+
85
+ <!-- mcp-name: io.github.ArkNill/feedkit -->
@@ -0,0 +1,8 @@
1
+ """FeedKit — RSS/Atom feed collection with curated catalog."""
2
+
3
+ from feedkit.core import fetch_feed
4
+ from feedkit.catalog import search_catalog, get_catalog_stats
5
+ from feedkit.storage import FeedStore
6
+
7
+ __all__ = ["fetch_feed", "search_catalog", "get_catalog_stats", "FeedStore"]
8
+ __version__ = "0.1.0"
@@ -0,0 +1,231 @@
1
+ """CLI entry point — python -m feedkit or `feedkit` command."""
2
+
3
+ import asyncio
4
+ import json
5
+ import sys
6
+
7
+ import click
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+
11
+ from feedkit import __version__
12
+ from feedkit.catalog import get_catalog_stats, list_categories, search_catalog
13
+ from feedkit.core import collect, fetch_feed
14
+ from feedkit.storage import FeedStore
15
+
16
+ console = Console()
17
+
18
+
19
+ def _get_store():
20
+ return FeedStore()
21
+
22
+
23
+ @click.group()
24
+ @click.version_option(__version__, prog_name="feedkit")
25
+ def main():
26
+ """FeedKit — RSS/Atom feed collection with curated catalog."""
27
+
28
+
29
+ @main.command()
30
+ @click.argument("query", default="")
31
+ @click.option("--category", "-c", default="", help="Filter by category")
32
+ @click.option("--language", "-l", default="", help="Filter by language (en, ko, ...)")
33
+ @click.option("--limit", "-n", default=20, help="Max results")
34
+ @click.option("--json-output", "-j", is_flag=True, help="JSON output")
35
+ def search(query, category, language, limit, json_output):
36
+ """Search the built-in feed catalog."""
37
+ results = search_catalog(query, category=category, language=language, limit=limit)
38
+
39
+ if json_output:
40
+ data = [{"url": f.url, "title": f.title, "category": f.category, "language": f.language} for f in results]
41
+ click.echo(json.dumps(data, ensure_ascii=False, indent=2))
42
+ else:
43
+ table = Table(title=f"Catalog Search: {query or '(all)'} ({len(results)} results)")
44
+ table.add_column("Title", style="cyan", max_width=40)
45
+ table.add_column("Category", style="green")
46
+ table.add_column("Lang")
47
+ table.add_column("URL", style="dim", max_width=50)
48
+ for f in results:
49
+ table.add_row(f.title, f.category, f.language, f.url)
50
+ console.print(table)
51
+
52
+
53
+ @main.command()
54
+ @click.argument("url")
55
+ @click.option("--category", "-c", default="", help="Category for this subscription")
56
+ @click.option("--title", "-t", default="", help="Title override")
57
+ def subscribe(url, category, title):
58
+ """Subscribe to a feed."""
59
+ store = _get_store()
60
+ store.subscribe(url, title=title, category=category)
61
+ console.print(f"[green]✓[/green] Subscribed to {url}")
62
+ store.close()
63
+
64
+
65
+ @main.command()
66
+ @click.argument("url")
67
+ def unsubscribe(url):
68
+ """Unsubscribe from a feed."""
69
+ store = _get_store()
70
+ store.unsubscribe(url)
71
+ console.print(f"[yellow]✓[/yellow] Unsubscribed from {url}")
72
+ store.close()
73
+
74
+
75
+ @main.command("list")
76
+ def list_subs():
77
+ """List all subscriptions."""
78
+ store = _get_store()
79
+ subs = store.list_subscriptions()
80
+
81
+ if not subs:
82
+ console.print("[dim]No subscriptions yet. Use `feedkit subscribe <url>` or `feedkit subscribe-catalog`.[/dim]")
83
+ else:
84
+ table = Table(title=f"Subscriptions ({len(subs)})")
85
+ table.add_column("Title", style="cyan", max_width=35)
86
+ table.add_column("Category", style="green")
87
+ table.add_column("Fetched", justify="right")
88
+ table.add_column("Errors", justify="right")
89
+ for s in subs:
90
+ table.add_row(s.title or s.feed_url[:35], s.category, str(s.fetch_count), str(s.error_count))
91
+ console.print(table)
92
+
93
+ store.close()
94
+
95
+
96
+ @main.command("subscribe-catalog")
97
+ @click.option("--category", "-c", required=True, help="Subscribe to all feeds in this category")
98
+ def subscribe_catalog(category):
99
+ """Subscribe to all feeds in a catalog category."""
100
+ feeds = search_catalog(category=category, limit=1000)
101
+ if not feeds:
102
+ console.print(f"[red]No feeds found in category '{category}'[/red]")
103
+ return
104
+
105
+ store = _get_store()
106
+ for f in feeds:
107
+ store.subscribe(f.url, title=f.title, category=f.category, language=f.language)
108
+ console.print(f"[green]✓[/green] Subscribed to {len(feeds)} feeds in category '{category}'")
109
+ store.close()
110
+
111
+
112
+ @main.command("collect")
113
+ @click.option("--category", "-c", default="", help="Only collect from this category")
114
+ @click.option("--concurrency", "-n", default=20, help="Max concurrent requests")
115
+ def collect_cmd(category, concurrency):
116
+ """Collect articles from all subscribed feeds."""
117
+ store = _get_store()
118
+ sub_count = store.subscription_count()
119
+
120
+ if sub_count == 0:
121
+ console.print("[dim]No subscriptions. Use `feedkit subscribe` first.[/dim]")
122
+ store.close()
123
+ return
124
+
125
+ console.print(f"Collecting from {sub_count} feeds...")
126
+
127
+ try:
128
+ result = asyncio.run(collect(store, category=category, concurrency=concurrency))
129
+ except KeyboardInterrupt:
130
+ sys.exit(130)
131
+
132
+ console.print(f"[green]✓[/green] {result.feeds_ok}/{result.feeds_total} feeds OK, "
133
+ f"{result.new_articles} new articles, {result.duration_ms:.0f}ms")
134
+ if result.errors:
135
+ console.print(f"[yellow]{result.feeds_error} feeds failed[/yellow]")
136
+
137
+ store.close()
138
+
139
+
140
+ @main.command()
141
+ @click.argument("query")
142
+ @click.option("--count", "-n", default=20, help="Max results")
143
+ def find(query, count):
144
+ """Full-text search across collected articles."""
145
+ store = _get_store()
146
+ articles = store.search(query, count=count)
147
+
148
+ if not articles:
149
+ console.print("[dim]No matching articles found.[/dim]")
150
+ else:
151
+ table = Table(title=f"Search: '{query}' ({len(articles)} results)")
152
+ table.add_column("Title", style="cyan", max_width=50)
153
+ table.add_column("Published")
154
+ table.add_column("URL", style="dim", max_width=50)
155
+ for a in articles:
156
+ table.add_row(a.title[:50], a.published or "", a.url[:50])
157
+ console.print(table)
158
+
159
+ store.close()
160
+
161
+
162
+ @main.command()
163
+ @click.option("--count", "-n", default=20, help="Number of articles")
164
+ @click.option("--category", "-c", default="", help="Filter by category")
165
+ def latest(count, category):
166
+ """Show latest collected articles."""
167
+ store = _get_store()
168
+ articles = store.get_latest(count=count, category=category)
169
+
170
+ if not articles:
171
+ console.print("[dim]No articles yet. Run `feedkit collect` first.[/dim]")
172
+ else:
173
+ table = Table(title=f"Latest Articles ({len(articles)})")
174
+ table.add_column("Title", style="cyan", max_width=50)
175
+ table.add_column("Published")
176
+ table.add_column("Feed", style="dim", max_width=30)
177
+ for a in articles:
178
+ table.add_row(a.title[:50], a.published or "", a.feed_url[:30])
179
+ console.print(table)
180
+
181
+ store.close()
182
+
183
+
184
+ @main.command()
185
+ def stats():
186
+ """Show catalog and subscription statistics."""
187
+ cat_stats = get_catalog_stats()
188
+ console.print(f"\n[bold]Catalog:[/bold] {cat_stats['total_feeds']} feeds")
189
+ for cat, n in cat_stats["categories"].items():
190
+ console.print(f" {cat}: {n}")
191
+
192
+ store = _get_store()
193
+ sub_count = store.subscription_count()
194
+ art_count = store.article_count()
195
+ console.print(f"\n[bold]Local:[/bold] {sub_count} subscriptions, {art_count} articles")
196
+ store.close()
197
+
198
+
199
+ @main.command("categories")
200
+ def categories_cmd():
201
+ """List available catalog categories."""
202
+ for cat in list_categories():
203
+ console.print(f" {cat}")
204
+
205
+
206
+ @main.command("import-opml")
207
+ @click.argument("path")
208
+ def import_opml_cmd(path):
209
+ """Import feeds from an OPML file."""
210
+ from feedkit.opml import import_opml
211
+
212
+ store = _get_store()
213
+ count = import_opml(store, path)
214
+ console.print(f"[green]✓[/green] Imported {count} feeds from {path}")
215
+ store.close()
216
+
217
+
218
+ @main.command("export-opml")
219
+ @click.argument("path")
220
+ def export_opml_cmd(path):
221
+ """Export subscriptions to an OPML file."""
222
+ from feedkit.opml import export_opml
223
+
224
+ store = _get_store()
225
+ count = export_opml(store, path)
226
+ console.print(f"[green]✓[/green] Exported {count} feeds to {path}")
227
+ store.close()
228
+
229
+
230
+ if __name__ == "__main__":
231
+ main()
@@ -0,0 +1,104 @@
1
+ """Built-in curated feed catalog — 449 verified RSS/Atom feeds."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ _CATALOG_PATH = Path(__file__).parent / "data" / "feeds.json"
13
+ _catalog: list[dict] | None = None
14
+
15
+
16
+ @dataclass
17
+ class CatalogFeed:
18
+ """A feed from the built-in catalog."""
19
+
20
+ url: str
21
+ title: str
22
+ category: str
23
+ subcategory: str
24
+ language: str
25
+ domain: str
26
+
27
+
28
+ def _load_catalog() -> list[dict]:
29
+ global _catalog
30
+ if _catalog is None:
31
+ with open(_CATALOG_PATH, encoding="utf-8") as f:
32
+ _catalog = json.load(f)
33
+ return _catalog
34
+
35
+
36
+ def search_catalog(
37
+ query: str = "",
38
+ *,
39
+ category: str = "",
40
+ language: str = "",
41
+ limit: int = 50,
42
+ ) -> list[CatalogFeed]:
43
+ """Search the built-in feed catalog.
44
+
45
+ Args:
46
+ query: Search by title or domain (case-insensitive substring match).
47
+ category: Filter by category (e.g., "technology", "science", "finance").
48
+ language: Filter by language (e.g., "en", "ko").
49
+ limit: Maximum results to return.
50
+ """
51
+ catalog = _load_catalog()
52
+ results = []
53
+
54
+ q = query.lower()
55
+ for entry in catalog:
56
+ if category and entry.get("category", "") != category:
57
+ continue
58
+ if language and entry.get("language", "") != language:
59
+ continue
60
+ if q:
61
+ title = entry.get("title", "").lower()
62
+ domain = entry.get("domain", "").lower()
63
+ url = entry.get("url", "").lower()
64
+ if q not in title and q not in domain and q not in url:
65
+ continue
66
+
67
+ results.append(CatalogFeed(
68
+ url=entry["url"],
69
+ title=entry.get("title", ""),
70
+ category=entry.get("category", ""),
71
+ subcategory=entry.get("subcategory", ""),
72
+ language=entry.get("language", "en"),
73
+ domain=entry.get("domain", ""),
74
+ ))
75
+
76
+ if len(results) >= limit:
77
+ break
78
+
79
+ return results
80
+
81
+
82
+ def get_catalog_stats() -> dict:
83
+ """Get catalog statistics."""
84
+ catalog = _load_catalog()
85
+ categories: dict[str, int] = {}
86
+ languages: dict[str, int] = {}
87
+
88
+ for entry in catalog:
89
+ cat = entry.get("category", "unknown")
90
+ lang = entry.get("language", "unknown")
91
+ categories[cat] = categories.get(cat, 0) + 1
92
+ languages[lang] = languages.get(lang, 0) + 1
93
+
94
+ return {
95
+ "total_feeds": len(catalog),
96
+ "categories": dict(sorted(categories.items())),
97
+ "languages": dict(sorted(languages.items())),
98
+ }
99
+
100
+
101
+ def list_categories() -> list[str]:
102
+ """List all available categories."""
103
+ catalog = _load_catalog()
104
+ return sorted({entry.get("category", "") for entry in catalog if entry.get("category")})