metatron-cli 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metatron/__init__.py +51 -0
- metatron/api.py +290 -0
- metatron/cli.py +221 -0
- metatron/config.py +187 -0
- metatron/db.py +357 -0
- metatron/dedup.py +210 -0
- metatron/fetcher.py +147 -0
- metatron/llm.py +270 -0
- metatron/normalize.py +141 -0
- metatron/poller.py +325 -0
- metatron_cli-0.2.1.dist-info/METADATA +174 -0
- metatron_cli-0.2.1.dist-info/RECORD +16 -0
- metatron_cli-0.2.1.dist-info/WHEEL +5 -0
- metatron_cli-0.2.1.dist-info/entry_points.txt +2 -0
- metatron_cli-0.2.1.dist-info/licenses/LICENSE +21 -0
- metatron_cli-0.2.1.dist-info/top_level.txt +1 -0
metatron/__init__.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Metatron — multi-project RSS feed manager with cross-outlet deduplication."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.2.0"
|
|
4
|
+
|
|
5
|
+
from metatron.config import (
|
|
6
|
+
ApiConfig,
|
|
7
|
+
ConfigError,
|
|
8
|
+
DatabaseConfig,
|
|
9
|
+
LlmConfig,
|
|
10
|
+
MetatronConfig,
|
|
11
|
+
PollerConfig,
|
|
12
|
+
)
|
|
13
|
+
from metatron.db import Database
|
|
14
|
+
from metatron.dedup import (
|
|
15
|
+
BatchPlan,
|
|
16
|
+
CheapDecision,
|
|
17
|
+
DedupConfig,
|
|
18
|
+
build_batch_plan,
|
|
19
|
+
cheap_decide,
|
|
20
|
+
run_batch,
|
|
21
|
+
)
|
|
22
|
+
from metatron.llm import BatchJudge, ClusterGroup, ClusterItem
|
|
23
|
+
from metatron.normalize import canonicalize_url, jaccard, normalize_title, tokenize
|
|
24
|
+
from metatron.poller import Poller, poll_feeds, refresh_project_now
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"__version__",
|
|
28
|
+
"ApiConfig",
|
|
29
|
+
"BatchJudge",
|
|
30
|
+
"BatchPlan",
|
|
31
|
+
"CheapDecision",
|
|
32
|
+
"ClusterGroup",
|
|
33
|
+
"ClusterItem",
|
|
34
|
+
"ConfigError",
|
|
35
|
+
"Database",
|
|
36
|
+
"DatabaseConfig",
|
|
37
|
+
"DedupConfig",
|
|
38
|
+
"LlmConfig",
|
|
39
|
+
"MetatronConfig",
|
|
40
|
+
"Poller",
|
|
41
|
+
"PollerConfig",
|
|
42
|
+
"build_batch_plan",
|
|
43
|
+
"canonicalize_url",
|
|
44
|
+
"cheap_decide",
|
|
45
|
+
"jaccard",
|
|
46
|
+
"normalize_title",
|
|
47
|
+
"poll_feeds",
|
|
48
|
+
"refresh_project_now",
|
|
49
|
+
"run_batch",
|
|
50
|
+
"tokenize",
|
|
51
|
+
]
|
metatron/api.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""FastAPI HTTP surface for Metatron.
|
|
2
|
+
|
|
3
|
+
One purpose: manage feeds, expose deduplicated articles. No curation, no
|
|
4
|
+
personality, no opinions about what's interesting — that's for the caller.
|
|
5
|
+
|
|
6
|
+
Endpoints:
|
|
7
|
+
POST /projects — create a project
|
|
8
|
+
GET /projects — list projects
|
|
9
|
+
DELETE /projects/{project_id} — delete a project (cascades)
|
|
10
|
+
POST /projects/{project_id}/feeds — add a feed to a project
|
|
11
|
+
GET /projects/{project_id}/feeds — list feeds for a project
|
|
12
|
+
DELETE /feeds/{feed_id} — remove a feed
|
|
13
|
+
GET /projects/{project_id}/articles — list deduped articles (?since=, ?limit=)
|
|
14
|
+
GET /articles/{article_id} — fetch one article with body + cluster members
|
|
15
|
+
POST /projects/{project_id}/refresh — force a synchronous poll of this project's feeds
|
|
16
|
+
GET /health — liveness probe (no auth)
|
|
17
|
+
|
|
18
|
+
Authentication: bearer token from config [api].api_token. When the token is
|
|
19
|
+
empty, the API is open (dev convenience; configure a token before exposing).
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
import logging
|
|
26
|
+
from datetime import datetime, timezone
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from fastapi import Depends, FastAPI, Header, HTTPException, Path as PathParam, Query
|
|
30
|
+
from pydantic import BaseModel, Field
|
|
31
|
+
|
|
32
|
+
from metatron.config import MetatronConfig
|
|
33
|
+
from metatron.db import Database
|
|
34
|
+
from metatron.dedup import DedupConfig
|
|
35
|
+
from metatron.llm import BatchJudge
|
|
36
|
+
from metatron.poller import Poller, refresh_project_now
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger("metatron.api")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ── request / response shapes ────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CreateProjectIn(BaseModel):
|
|
45
|
+
name: str = Field(..., min_length=1, max_length=200)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ProjectOut(BaseModel):
|
|
49
|
+
id: str
|
|
50
|
+
name: str
|
|
51
|
+
created_at: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AddFeedIn(BaseModel):
|
|
55
|
+
url: str = Field(..., min_length=1)
|
|
56
|
+
name: str | None = None
|
|
57
|
+
category: str = ""
|
|
58
|
+
poll_interval_seconds: int = 1800
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FeedOut(BaseModel):
|
|
62
|
+
id: str
|
|
63
|
+
project_id: str
|
|
64
|
+
url: str
|
|
65
|
+
name: str
|
|
66
|
+
category: str
|
|
67
|
+
enabled: bool
|
|
68
|
+
poll_interval_seconds: int
|
|
69
|
+
last_polled: str | None
|
|
70
|
+
last_error: str | None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ArticleOut(BaseModel):
|
|
74
|
+
id: str
|
|
75
|
+
project_id: str
|
|
76
|
+
canonical_url: str
|
|
77
|
+
source_url: str
|
|
78
|
+
source: str
|
|
79
|
+
title: str
|
|
80
|
+
summary: str
|
|
81
|
+
published: str | None
|
|
82
|
+
fetched_at: str
|
|
83
|
+
cluster_id: str | None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ArticleDetailOut(ArticleOut):
|
|
87
|
+
body: str
|
|
88
|
+
cluster_members: list[dict[str, Any]] = []
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class RefreshOut(BaseModel):
|
|
92
|
+
polled: int
|
|
93
|
+
new_articles: int
|
|
94
|
+
duplicates: int
|
|
95
|
+
cluster_joins: int
|
|
96
|
+
feed_errors: int = 0
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ── factory ─────────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def create_app(
|
|
103
|
+
*,
|
|
104
|
+
config: MetatronConfig,
|
|
105
|
+
db: Database,
|
|
106
|
+
judge: BatchJudge,
|
|
107
|
+
poller: Poller | None = None,
|
|
108
|
+
) -> FastAPI:
|
|
109
|
+
"""Build the FastAPI app wired to the given dependencies."""
|
|
110
|
+
app = FastAPI(title="Metatron", version="0.2.0")
|
|
111
|
+
|
|
112
|
+
expected_token = config.api.api_token
|
|
113
|
+
|
|
114
|
+
def require_auth(authorization: str | None = Header(default=None)) -> None:
|
|
115
|
+
if not expected_token:
|
|
116
|
+
return
|
|
117
|
+
if not authorization or not authorization.startswith("Bearer "):
|
|
118
|
+
raise HTTPException(status_code=401, detail="Missing bearer token")
|
|
119
|
+
token = authorization.removeprefix("Bearer ").strip()
|
|
120
|
+
if token != expected_token:
|
|
121
|
+
raise HTTPException(status_code=401, detail="Invalid bearer token")
|
|
122
|
+
|
|
123
|
+
@app.on_event("startup")
|
|
124
|
+
async def _startup() -> None:
|
|
125
|
+
if poller is not None and config.poller.enabled:
|
|
126
|
+
poller.start()
|
|
127
|
+
|
|
128
|
+
@app.on_event("shutdown")
|
|
129
|
+
async def _shutdown() -> None:
|
|
130
|
+
if poller is not None:
|
|
131
|
+
await poller.stop()
|
|
132
|
+
|
|
133
|
+
@app.get("/health")
|
|
134
|
+
async def health() -> dict[str, Any]:
|
|
135
|
+
return {"status": "ok", "llm_enabled": judge.enabled}
|
|
136
|
+
|
|
137
|
+
# ── projects ─────────────────────────────────────────────────────────
|
|
138
|
+
@app.post("/projects", response_model=ProjectOut, status_code=201, dependencies=[Depends(require_auth)])
|
|
139
|
+
async def create_project(payload: CreateProjectIn) -> ProjectOut:
|
|
140
|
+
try:
|
|
141
|
+
row = db.create_project(payload.name)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
raise HTTPException(status_code=409, detail=f"Project create failed: {e}") from e
|
|
144
|
+
return ProjectOut(**row)
|
|
145
|
+
|
|
146
|
+
@app.get("/projects", response_model=list[ProjectOut], dependencies=[Depends(require_auth)])
|
|
147
|
+
async def list_projects() -> list[ProjectOut]:
|
|
148
|
+
return [ProjectOut(**r) for r in db.list_projects()]
|
|
149
|
+
|
|
150
|
+
@app.delete("/projects/{project_id}", status_code=204, dependencies=[Depends(require_auth)])
|
|
151
|
+
async def delete_project(project_id: str = PathParam(...)) -> None:
|
|
152
|
+
if not db.delete_project(project_id):
|
|
153
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
154
|
+
|
|
155
|
+
# ── feeds ────────────────────────────────────────────────────────────
|
|
156
|
+
@app.post(
|
|
157
|
+
"/projects/{project_id}/feeds",
|
|
158
|
+
response_model=FeedOut,
|
|
159
|
+
status_code=201,
|
|
160
|
+
dependencies=[Depends(require_auth)],
|
|
161
|
+
)
|
|
162
|
+
async def add_feed(project_id: str, payload: AddFeedIn) -> FeedOut:
|
|
163
|
+
if not db.get_project(project_id):
|
|
164
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
165
|
+
name = payload.name or _host_of(payload.url)
|
|
166
|
+
try:
|
|
167
|
+
row = db.add_feed(
|
|
168
|
+
project_id=project_id,
|
|
169
|
+
url=payload.url,
|
|
170
|
+
name=name,
|
|
171
|
+
category=payload.category,
|
|
172
|
+
poll_interval_seconds=payload.poll_interval_seconds,
|
|
173
|
+
)
|
|
174
|
+
except Exception as e:
|
|
175
|
+
raise HTTPException(status_code=409, detail=f"Feed add failed: {e}") from e
|
|
176
|
+
return _feed_to_out(row)
|
|
177
|
+
|
|
178
|
+
@app.get(
|
|
179
|
+
"/projects/{project_id}/feeds",
|
|
180
|
+
response_model=list[FeedOut],
|
|
181
|
+
dependencies=[Depends(require_auth)],
|
|
182
|
+
)
|
|
183
|
+
async def list_feeds(project_id: str) -> list[FeedOut]:
|
|
184
|
+
if not db.get_project(project_id):
|
|
185
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
186
|
+
return [_feed_to_out(r) for r in db.list_feeds(project_id)]
|
|
187
|
+
|
|
188
|
+
@app.delete("/feeds/{feed_id}", status_code=204, dependencies=[Depends(require_auth)])
|
|
189
|
+
async def delete_feed(feed_id: str) -> None:
|
|
190
|
+
if not db.delete_feed(feed_id):
|
|
191
|
+
raise HTTPException(status_code=404, detail="Feed not found")
|
|
192
|
+
|
|
193
|
+
# ── articles ─────────────────────────────────────────────────────────
|
|
194
|
+
@app.get(
|
|
195
|
+
"/projects/{project_id}/articles",
|
|
196
|
+
response_model=list[ArticleOut],
|
|
197
|
+
dependencies=[Depends(require_auth)],
|
|
198
|
+
)
|
|
199
|
+
async def list_articles(
|
|
200
|
+
project_id: str,
|
|
201
|
+
since: str | None = Query(default=None),
|
|
202
|
+
limit: int = Query(default=50, ge=1, le=500),
|
|
203
|
+
) -> list[ArticleOut]:
|
|
204
|
+
if not db.get_project(project_id):
|
|
205
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
206
|
+
rows = db.list_articles(project_id, since=since, limit=limit, deduped=True)
|
|
207
|
+
return [_article_to_out(r) for r in rows]
|
|
208
|
+
|
|
209
|
+
@app.get(
|
|
210
|
+
"/articles/{article_id}",
|
|
211
|
+
response_model=ArticleDetailOut,
|
|
212
|
+
dependencies=[Depends(require_auth)],
|
|
213
|
+
)
|
|
214
|
+
async def get_article(article_id: str) -> ArticleDetailOut:
|
|
215
|
+
row = db.get_article(article_id)
|
|
216
|
+
if not row:
|
|
217
|
+
raise HTTPException(status_code=404, detail="Article not found")
|
|
218
|
+
members: list[dict[str, Any]] = []
|
|
219
|
+
if row.get("cluster_id"):
|
|
220
|
+
members = db.article_cluster_members(row["cluster_id"])
|
|
221
|
+
return ArticleDetailOut(
|
|
222
|
+
**_article_to_out(row).model_dump(),
|
|
223
|
+
body=row.get("body", ""),
|
|
224
|
+
cluster_members=members,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# ── on-demand refresh ────────────────────────────────────────────────
|
|
228
|
+
@app.post(
|
|
229
|
+
"/projects/{project_id}/refresh",
|
|
230
|
+
response_model=RefreshOut,
|
|
231
|
+
dependencies=[Depends(require_auth)],
|
|
232
|
+
)
|
|
233
|
+
async def refresh(project_id: str) -> RefreshOut:
|
|
234
|
+
if not db.get_project(project_id):
|
|
235
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
236
|
+
loop = asyncio.get_running_loop()
|
|
237
|
+
stats = await loop.run_in_executor(
|
|
238
|
+
None,
|
|
239
|
+
refresh_project_now,
|
|
240
|
+
project_id,
|
|
241
|
+
db,
|
|
242
|
+
judge,
|
|
243
|
+
DedupConfig(),
|
|
244
|
+
config.poller.feed_timeout_seconds,
|
|
245
|
+
)
|
|
246
|
+
return RefreshOut(**stats)
|
|
247
|
+
|
|
248
|
+
return app
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ── helpers ─────────────────────────────────────────────────────────────
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _feed_to_out(row: dict[str, Any]) -> FeedOut:
|
|
255
|
+
return FeedOut(
|
|
256
|
+
id=row["id"],
|
|
257
|
+
project_id=row["project_id"],
|
|
258
|
+
url=row["url"],
|
|
259
|
+
name=row["name"],
|
|
260
|
+
category=row.get("category", "") or "",
|
|
261
|
+
enabled=bool(row.get("enabled", 1)),
|
|
262
|
+
poll_interval_seconds=int(row.get("poll_interval_seconds", 1800)),
|
|
263
|
+
last_polled=row.get("last_polled"),
|
|
264
|
+
last_error=row.get("last_error"),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _article_to_out(row: dict[str, Any]) -> ArticleOut:
|
|
269
|
+
return ArticleOut(
|
|
270
|
+
id=row["id"],
|
|
271
|
+
project_id=row["project_id"],
|
|
272
|
+
canonical_url=row["canonical_url"],
|
|
273
|
+
source_url=row["source_url"],
|
|
274
|
+
source=row["source"],
|
|
275
|
+
title=row["title"],
|
|
276
|
+
summary=row.get("summary", "") or "",
|
|
277
|
+
published=row.get("published"),
|
|
278
|
+
fetched_at=row["fetched_at"],
|
|
279
|
+
cluster_id=row.get("cluster_id"),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _host_of(url: str) -> str:
|
|
284
|
+
from urllib.parse import urlsplit
|
|
285
|
+
|
|
286
|
+
try:
|
|
287
|
+
host = urlsplit(url).hostname or url
|
|
288
|
+
return host[4:] if host.startswith("www.") else host
|
|
289
|
+
except ValueError:
|
|
290
|
+
return url
|
metatron/cli.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Metatron CLI.
|
|
2
|
+
|
|
3
|
+
Subcommands:
|
|
4
|
+
metatron serve — run the HTTP API + background poller
|
|
5
|
+
metatron config init — write a starter config to ~/.config/metatron/config.toml
|
|
6
|
+
metatron config show — print the resolved config
|
|
7
|
+
metatron seed-feeds <project> <feeds.json>
|
|
8
|
+
— bulk-add feeds to a project from a JSON file
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from metatron.config import (
|
|
21
|
+
DEFAULT_CONFIG_TEMPLATE,
|
|
22
|
+
ConfigError,
|
|
23
|
+
MetatronConfig,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("metatron.cli")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _resolve_config_path() -> Path:
|
|
30
|
+
override = os.environ.get("METATRON_CONFIG")
|
|
31
|
+
return Path(override) if override else MetatronConfig.default_config_path()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _cmd_config_init() -> int:
|
|
35
|
+
path = _resolve_config_path()
|
|
36
|
+
if path.exists():
|
|
37
|
+
print(f"Config already exists: {path}", file=sys.stderr)
|
|
38
|
+
return 1
|
|
39
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
path.write_text(DEFAULT_CONFIG_TEMPLATE, encoding="utf-8")
|
|
41
|
+
print(f"Wrote starter config to {path}")
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _cmd_config_show() -> int:
|
|
46
|
+
path = _resolve_config_path()
|
|
47
|
+
try:
|
|
48
|
+
config = MetatronConfig.from_file()
|
|
49
|
+
except ConfigError as e:
|
|
50
|
+
print(f"Configuration error: {e}", file=sys.stderr)
|
|
51
|
+
return 1
|
|
52
|
+
print(f"Config path: {path}")
|
|
53
|
+
print(f"Exists: {path.exists()}")
|
|
54
|
+
print(f"API host:port: {config.api.host}:{config.api.port}")
|
|
55
|
+
print(f"API token set: {'yes' if config.api.api_token else 'no (open)'}")
|
|
56
|
+
print(f"LLM tiebreaker: {'enabled' if config.llm.enabled else 'disabled'}")
|
|
57
|
+
print(f"LLM model: {config.llm.model} (via {config.llm.binary} CLI)")
|
|
58
|
+
print(f"Poller enabled: {config.poller.enabled}")
|
|
59
|
+
print(f"Poller tick: {config.poller.tick_seconds}s")
|
|
60
|
+
print(f"Feed interval: {config.poller.default_feed_interval_seconds}s")
|
|
61
|
+
return 0
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _build_judge(config: MetatronConfig) -> "BatchJudge":
|
|
65
|
+
from metatron.llm import BatchJudge as _Judge
|
|
66
|
+
|
|
67
|
+
if not config.llm.enabled:
|
|
68
|
+
class _Disabled(_Judge):
|
|
69
|
+
@property
|
|
70
|
+
def enabled(self) -> bool: # type: ignore[override]
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
def cluster(self, items): # type: ignore[override]
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
return _Disabled(model=config.llm.model, binary=config.llm.binary)
|
|
77
|
+
return _Judge(
|
|
78
|
+
model=config.llm.model,
|
|
79
|
+
binary=config.llm.binary,
|
|
80
|
+
idle_timeout=config.llm.idle_timeout_seconds,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _cmd_serve() -> int:
|
|
85
|
+
import uvicorn
|
|
86
|
+
|
|
87
|
+
from metatron.api import create_app
|
|
88
|
+
from metatron.db import Database, default_db_path
|
|
89
|
+
from metatron.llm import BatchJudge
|
|
90
|
+
from metatron.poller import Poller
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
config = MetatronConfig.from_file()
|
|
94
|
+
except ConfigError as e:
|
|
95
|
+
print(f"Configuration error: {e}", file=sys.stderr)
|
|
96
|
+
return 1
|
|
97
|
+
|
|
98
|
+
db_path = Path(config.database.path) if config.database.path else default_db_path()
|
|
99
|
+
db = Database(path=db_path)
|
|
100
|
+
judge = _build_judge(config)
|
|
101
|
+
poller = Poller(
|
|
102
|
+
db=db,
|
|
103
|
+
judge=judge,
|
|
104
|
+
tick_seconds=config.poller.tick_seconds,
|
|
105
|
+
feed_timeout_seconds=config.poller.feed_timeout_seconds,
|
|
106
|
+
)
|
|
107
|
+
app = create_app(config=config, db=db, judge=judge, poller=poller)
|
|
108
|
+
|
|
109
|
+
uvicorn.run(
|
|
110
|
+
app,
|
|
111
|
+
host=config.api.host,
|
|
112
|
+
port=config.api.port,
|
|
113
|
+
log_level="info",
|
|
114
|
+
)
|
|
115
|
+
return 0
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _cmd_seed_feeds(project_name: str, feeds_path: str) -> int:
|
|
119
|
+
from metatron.db import Database, default_db_path
|
|
120
|
+
|
|
121
|
+
path = Path(feeds_path)
|
|
122
|
+
if not path.exists():
|
|
123
|
+
print(f"feeds file not found: {path}", file=sys.stderr)
|
|
124
|
+
return 1
|
|
125
|
+
try:
|
|
126
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
127
|
+
except json.JSONDecodeError as e:
|
|
128
|
+
print(f"feeds file not valid JSON: {e}", file=sys.stderr)
|
|
129
|
+
return 1
|
|
130
|
+
feeds = data.get("feeds", data if isinstance(data, list) else [])
|
|
131
|
+
if not feeds:
|
|
132
|
+
print("no feeds found in input", file=sys.stderr)
|
|
133
|
+
return 1
|
|
134
|
+
|
|
135
|
+
config = MetatronConfig.from_file()
|
|
136
|
+
db_path = Path(config.database.path) if config.database.path else default_db_path()
|
|
137
|
+
db = Database(path=db_path)
|
|
138
|
+
|
|
139
|
+
existing = next(
|
|
140
|
+
(p for p in db.list_projects() if p["name"] == project_name), None
|
|
141
|
+
)
|
|
142
|
+
if existing is None:
|
|
143
|
+
project = db.create_project(project_name)
|
|
144
|
+
else:
|
|
145
|
+
project = existing
|
|
146
|
+
project_id = project["id"]
|
|
147
|
+
|
|
148
|
+
added = 0
|
|
149
|
+
skipped = 0
|
|
150
|
+
for feed in feeds:
|
|
151
|
+
url = feed.get("url")
|
|
152
|
+
name = feed.get("name") or url
|
|
153
|
+
category = feed.get("category", "")
|
|
154
|
+
if not url:
|
|
155
|
+
skipped += 1
|
|
156
|
+
continue
|
|
157
|
+
try:
|
|
158
|
+
db.add_feed(
|
|
159
|
+
project_id=project_id,
|
|
160
|
+
url=url,
|
|
161
|
+
name=name,
|
|
162
|
+
category=category,
|
|
163
|
+
poll_interval_seconds=int(
|
|
164
|
+
feed.get(
|
|
165
|
+
"poll_interval_seconds",
|
|
166
|
+
config.poller.default_feed_interval_seconds,
|
|
167
|
+
)
|
|
168
|
+
),
|
|
169
|
+
)
|
|
170
|
+
added += 1
|
|
171
|
+
except Exception as e:
|
|
172
|
+
print(f" skip {url}: {e}", file=sys.stderr)
|
|
173
|
+
skipped += 1
|
|
174
|
+
print(f"Project: {project_name} ({project_id})")
|
|
175
|
+
print(f"Added: {added}")
|
|
176
|
+
print(f"Skipped: {skipped}")
|
|
177
|
+
return 0
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def main(args: list[str] | None = None) -> int:
|
|
181
|
+
logging.basicConfig(
|
|
182
|
+
level=logging.INFO,
|
|
183
|
+
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
184
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
from metatron import __version__
|
|
188
|
+
|
|
189
|
+
parser = argparse.ArgumentParser(
|
|
190
|
+
description="Metatron — multi-project RSS feed manager with cross-outlet deduplication",
|
|
191
|
+
)
|
|
192
|
+
parser.add_argument("--version", action="version", version=f"metatron {__version__}")
|
|
193
|
+
sub = parser.add_subparsers(dest="subcommand")
|
|
194
|
+
|
|
195
|
+
sub.add_parser("serve", help="Run the HTTP API + background poller")
|
|
196
|
+
|
|
197
|
+
cfg = sub.add_parser("config", help="Config management")
|
|
198
|
+
cfg.add_argument("action", choices=["init", "show"])
|
|
199
|
+
|
|
200
|
+
seed = sub.add_parser("seed-feeds", help="Bulk-add feeds to a project from JSON")
|
|
201
|
+
seed.add_argument("project", help="Project name (created if missing)")
|
|
202
|
+
seed.add_argument("feeds_path", help="Path to a JSON file with feeds")
|
|
203
|
+
|
|
204
|
+
parsed = parser.parse_args(args)
|
|
205
|
+
|
|
206
|
+
if parsed.subcommand == "serve":
|
|
207
|
+
return _cmd_serve()
|
|
208
|
+
if parsed.subcommand == "config":
|
|
209
|
+
if parsed.action == "init":
|
|
210
|
+
return _cmd_config_init()
|
|
211
|
+
if parsed.action == "show":
|
|
212
|
+
return _cmd_config_show()
|
|
213
|
+
if parsed.subcommand == "seed-feeds":
|
|
214
|
+
return _cmd_seed_feeds(parsed.project, parsed.feeds_path)
|
|
215
|
+
|
|
216
|
+
parser.print_help()
|
|
217
|
+
return 0
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
if __name__ == "__main__":
|
|
221
|
+
sys.exit(main())
|