android-watcher 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- android_watcher/__init__.py +10 -0
- android_watcher/catalog/__init__.py +32 -0
- android_watcher/catalog/catalog.toml +531 -0
- android_watcher/cli.py +161 -0
- android_watcher/config.py +262 -0
- android_watcher/detect/__init__.py +1 -0
- android_watcher/detect/_normalize.py +192 -0
- android_watcher/detect/android_sitemap.py +540 -0
- android_watcher/detect/base.py +14 -0
- android_watcher/detect/content.py +99 -0
- android_watcher/detect/feed.py +135 -0
- android_watcher/detect/sitemap.py +203 -0
- android_watcher/doctor.py +125 -0
- android_watcher/fetch.py +162 -0
- android_watcher/group.py +79 -0
- android_watcher/lock.py +32 -0
- android_watcher/models.py +156 -0
- android_watcher/notify/__init__.py +1 -0
- android_watcher/notify/base.py +21 -0
- android_watcher/notify/email.py +52 -0
- android_watcher/notify/html.py +114 -0
- android_watcher/notify/render.py +239 -0
- android_watcher/notify/slack.py +124 -0
- android_watcher/notify/telegram.py +46 -0
- android_watcher/rank.py +84 -0
- android_watcher/registry.py +38 -0
- android_watcher/run.py +283 -0
- android_watcher/schedule.py +488 -0
- android_watcher/seed/__init__.py +45 -0
- android_watcher/seed/seed.sql.gz +0 -0
- android_watcher/store.py +492 -0
- android_watcher/triage/__init__.py +1 -0
- android_watcher/triage/base.py +25 -0
- android_watcher/triage/claude_cli.py +185 -0
- android_watcher/triage/noop.py +24 -0
- android_watcher/tui/__init__.py +1 -0
- android_watcher/tui/app.py +163 -0
- android_watcher/tui/configio.py +215 -0
- android_watcher/tui/screens.py +927 -0
- android_watcher-1.0.0.dist-info/METADATA +310 -0
- android_watcher-1.0.0.dist-info/RECORD +44 -0
- android_watcher-1.0.0.dist-info/WHEEL +4 -0
- android_watcher-1.0.0.dist-info/entry_points.txt +2 -0
- android_watcher-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Slack notifier: bot token (chat.postMessage + threaded file upload)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from android_watcher.config import Config
|
|
10
|
+
from android_watcher.models import Digest, DigestGroup
|
|
11
|
+
from android_watcher.notify.base import NOTIFIERS, NotifyError
|
|
12
|
+
from android_watcher.notify.html import render_html
|
|
13
|
+
from android_watcher.notify.render import render_slack
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _member_ids(groups: list[DigestGroup]) -> set[int]:
|
|
19
|
+
return {m.id for g in groups for m in g.members if m.id is not None}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _targets(raw: str) -> list[str]:
|
|
23
|
+
return [t.strip() for t in raw.split(",") if t.strip()]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@NOTIFIERS.register("slack")
|
|
27
|
+
class SlackNotifier:
|
|
28
|
+
name = "slack"
|
|
29
|
+
|
|
30
|
+
def send(self, digest: Digest, config: Config) -> set[int]:
|
|
31
|
+
sl = config.slack
|
|
32
|
+
if not (sl.bot_token and sl.channel):
|
|
33
|
+
raise NotifyError("slack enabled but bot_token + channel not configured")
|
|
34
|
+
payload = render_slack(digest, thread_page=True)
|
|
35
|
+
delivered: set[int] = set()
|
|
36
|
+
for target in _targets(sl.channel):
|
|
37
|
+
logger.info("slack: posting message to %s then uploading digest page", target)
|
|
38
|
+
channel_id, ts = self._send_bot(sl.bot_token, target, payload)
|
|
39
|
+
logger.info(
|
|
40
|
+
"slack: message posted (channel_id=%s ts=%s); uploading HTML page",
|
|
41
|
+
channel_id,
|
|
42
|
+
ts,
|
|
43
|
+
)
|
|
44
|
+
uploaded = self._deliver_page(sl.bot_token, channel_id, ts, digest)
|
|
45
|
+
if uploaded:
|
|
46
|
+
# Full HTML reached the channel: all groups (incl. carried) are visible.
|
|
47
|
+
delivered |= _member_ids(digest.groups)
|
|
48
|
+
else:
|
|
49
|
+
# Upload failed entirely; only the capped message reached Slack.
|
|
50
|
+
delivered |= _member_ids(digest.message_groups())
|
|
51
|
+
return delivered
|
|
52
|
+
|
|
53
|
+
def _send_bot(self, bot_token: str, channel: str, payload: dict) -> tuple[str, str]:
|
|
54
|
+
try:
|
|
55
|
+
resp = httpx.post(
|
|
56
|
+
"https://slack.com/api/chat.postMessage",
|
|
57
|
+
headers={"Authorization": f"Bearer {bot_token}"},
|
|
58
|
+
json={"channel": channel, **payload},
|
|
59
|
+
timeout=30.0,
|
|
60
|
+
)
|
|
61
|
+
resp.raise_for_status()
|
|
62
|
+
except (httpx.HTTPStatusError, httpx.RequestError) as exc:
|
|
63
|
+
raise NotifyError(f"slack send failed: {type(exc).__name__}") from exc
|
|
64
|
+
body = resp.json()
|
|
65
|
+
if not body.get("ok"):
|
|
66
|
+
raise NotifyError(f"slack chat.postMessage failed: {body.get('error')}")
|
|
67
|
+
return body["channel"], body["ts"]
|
|
68
|
+
|
|
69
|
+
def _deliver_page(self, bot_token: str, channel_id: str, ts: str, digest: Digest) -> bool:
|
|
70
|
+
"""Deliver the full-digest HTML page. Try a threaded reply first; if that
|
|
71
|
+
fails, fall back to a standalone message in the channel so the page still
|
|
72
|
+
lands. Non-fatal: the main message is already delivered, so a total failure
|
|
73
|
+
logs and returns False (carried changes then retry next run)."""
|
|
74
|
+
data = render_html(digest).encode("utf-8")
|
|
75
|
+
if self._upload_page(bot_token, channel_id, data, thread_ts=ts):
|
|
76
|
+
return True
|
|
77
|
+
logger.warning(
|
|
78
|
+
"slack: threaded page upload failed; retrying as a standalone channel message"
|
|
79
|
+
)
|
|
80
|
+
if self._upload_page(bot_token, channel_id, data, thread_ts=None):
|
|
81
|
+
return True
|
|
82
|
+
logger.error("slack: digest page upload failed (thread and standalone); page not delivered")
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
def _upload_page(
|
|
86
|
+
self, bot_token: str, channel_id: str, data: bytes, *, thread_ts: str | None
|
|
87
|
+
) -> bool:
|
|
88
|
+
"""One external-upload sequence (getUploadURLExternal -> PUT -> complete).
|
|
89
|
+
Posts into the thread when thread_ts is set, else as a new channel message."""
|
|
90
|
+
where = f"thread {thread_ts}" if thread_ts else "channel"
|
|
91
|
+
try:
|
|
92
|
+
headers = {"Authorization": f"Bearer {bot_token}"}
|
|
93
|
+
r1 = httpx.get(
|
|
94
|
+
"https://slack.com/api/files.getUploadURLExternal",
|
|
95
|
+
headers=headers,
|
|
96
|
+
params={"filename": "digest.html", "length": len(data)},
|
|
97
|
+
timeout=30.0,
|
|
98
|
+
)
|
|
99
|
+
r1.raise_for_status()
|
|
100
|
+
b1 = r1.json()
|
|
101
|
+
if not b1.get("ok"):
|
|
102
|
+
raise NotifyError(f"getUploadURLExternal: {b1.get('error')}")
|
|
103
|
+
httpx.post(b1["upload_url"], content=data, timeout=30.0).raise_for_status()
|
|
104
|
+
body: dict = {
|
|
105
|
+
"files": [{"id": b1["file_id"], "title": "Android Watcher Digest"}],
|
|
106
|
+
"channel_id": channel_id,
|
|
107
|
+
}
|
|
108
|
+
if thread_ts:
|
|
109
|
+
body["thread_ts"] = thread_ts
|
|
110
|
+
r2 = httpx.post(
|
|
111
|
+
"https://slack.com/api/files.completeUploadExternal",
|
|
112
|
+
headers=headers,
|
|
113
|
+
json=body,
|
|
114
|
+
timeout=30.0,
|
|
115
|
+
)
|
|
116
|
+
r2.raise_for_status()
|
|
117
|
+
b2 = r2.json()
|
|
118
|
+
if not b2.get("ok"):
|
|
119
|
+
raise NotifyError(f"completeUploadExternal: {b2.get('error')}")
|
|
120
|
+
logger.info("slack: digest page uploaded to %s", where)
|
|
121
|
+
return True
|
|
122
|
+
except (httpx.HTTPStatusError, httpx.RequestError, NotifyError, KeyError) as exc:
|
|
123
|
+
logger.warning("slack: digest page upload to %s failed: %s", where, exc)
|
|
124
|
+
return False
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Telegram notifier — delivers digests via the Telegram Bot API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from android_watcher.config import Config
|
|
8
|
+
from android_watcher.models import Digest, NotifyError
|
|
9
|
+
from android_watcher.notify.base import NOTIFIERS
|
|
10
|
+
from android_watcher.notify.render import render_telegram
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _chat_ids(raw: str) -> list[str]:
|
|
14
|
+
"""Split a comma-separated list of chat ids into individual targets."""
|
|
15
|
+
return [c.strip() for c in raw.split(",") if c.strip()]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@NOTIFIERS.register("telegram")
|
|
19
|
+
class TelegramNotifier:
|
|
20
|
+
name = "telegram"
|
|
21
|
+
|
|
22
|
+
def send(self, digest: Digest, config: Config) -> set[int]:
|
|
23
|
+
token = config.telegram.bot_token
|
|
24
|
+
text = render_telegram(digest)
|
|
25
|
+
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
|
26
|
+
for chat_id in _chat_ids(config.telegram.chat_id):
|
|
27
|
+
try:
|
|
28
|
+
resp = httpx.post(
|
|
29
|
+
url,
|
|
30
|
+
json={
|
|
31
|
+
"chat_id": chat_id,
|
|
32
|
+
"text": text,
|
|
33
|
+
"parse_mode": "HTML",
|
|
34
|
+
"disable_web_page_preview": True,
|
|
35
|
+
},
|
|
36
|
+
timeout=30.0,
|
|
37
|
+
)
|
|
38
|
+
resp.raise_for_status()
|
|
39
|
+
except httpx.HTTPStatusError as exc:
|
|
40
|
+
raise NotifyError(
|
|
41
|
+
f"telegram send failed: {exc.response.status_code} {exc.response.text[:200]}"
|
|
42
|
+
) from exc
|
|
43
|
+
except httpx.RequestError as exc:
|
|
44
|
+
detail = exc.args[0] if exc.args else "request error"
|
|
45
|
+
raise NotifyError(f"telegram send failed: {type(exc).__name__}: {detail}") from exc
|
|
46
|
+
return {m.id for g in digest.message_groups() for m in g.members if m.id is not None}
|
android_watcher/rank.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Rank a list of Changes into a Digest.
|
|
2
|
+
|
|
3
|
+
Scoring:
|
|
4
|
+
base = source.default_weight if nonzero, else CATEGORY_WEIGHTS[source.category]
|
|
5
|
+
(falls back to DEFAULT_CATEGORY_WEIGHT for unknown categories or missing sources)
|
|
6
|
+
score = base + config.sort override (source_id key takes precedence over category key)
|
|
7
|
+
|
|
8
|
+
Tie-break: detected_at DESC.
|
|
9
|
+
|
|
10
|
+
Groups are sorted globally by (score, members[0].detected_at) DESC; max_items caps
|
|
11
|
+
how many appear in the on-channel message vs. carried over.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from .config import Config
|
|
17
|
+
from .group import group_changes
|
|
18
|
+
from .models import Change, Digest, DigestGroup, Source
|
|
19
|
+
|
|
20
|
+
CATEGORY_WEIGHTS: dict[str, int] = {
|
|
21
|
+
"platform-release": 100,
|
|
22
|
+
"api-reference": 80,
|
|
23
|
+
"tooling": 70,
|
|
24
|
+
"guides": 50,
|
|
25
|
+
"dev-blog": 40,
|
|
26
|
+
"design": 30,
|
|
27
|
+
"news": 20,
|
|
28
|
+
}
|
|
29
|
+
DEFAULT_CATEGORY_WEIGHT = 10
|
|
30
|
+
|
|
31
|
+
# Display order for category subheadings. Anything not listed falls to "Other".
|
|
32
|
+
CATEGORY_ORDER: list[tuple[str, str]] = [
|
|
33
|
+
("platform-release", "Platform & Releases"),
|
|
34
|
+
("api-reference", "API Reference"),
|
|
35
|
+
("tooling", "Developer Tooling"),
|
|
36
|
+
("guides", "Guides & Blog"),
|
|
37
|
+
("dev-blog", "Guides & Blog"),
|
|
38
|
+
("design", "Design"),
|
|
39
|
+
("news", "News"),
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _score(change: Change, source: Source | None, config: Config) -> int:
|
|
44
|
+
# Unknown source_id (not in sources map) => DEFAULT_CATEGORY_WEIGHT, no override.
|
|
45
|
+
if source is None:
|
|
46
|
+
return DEFAULT_CATEGORY_WEIGHT
|
|
47
|
+
|
|
48
|
+
if source.default_weight:
|
|
49
|
+
base = source.default_weight
|
|
50
|
+
else:
|
|
51
|
+
base = CATEGORY_WEIGHTS.get(source.category, DEFAULT_CATEGORY_WEIGHT)
|
|
52
|
+
|
|
53
|
+
# source_id override takes precedence over category override
|
|
54
|
+
override = config.sort.get(change.source_id)
|
|
55
|
+
if override is None:
|
|
56
|
+
override = config.sort.get(source.category)
|
|
57
|
+
return base + (override or 0)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def rank(changes: list[Change], sources: dict[str, Source], config: Config) -> Digest:
|
|
61
|
+
substantive = [c for c in changes if c.verdict == "substantive"]
|
|
62
|
+
groups = group_changes(substantive, sources, config)
|
|
63
|
+
groups.sort(key=lambda g: (g.score, g.members[0].detected_at), reverse=True)
|
|
64
|
+
return Digest(groups=groups, max_items=config.digest.max_items)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def by_category(groups: list[DigestGroup]) -> list[tuple[str, str, list[DigestGroup]]]:
|
|
68
|
+
"""Bucket groups under category labels in CATEGORY_ORDER; preserve rank order."""
|
|
69
|
+
label_for = dict(CATEGORY_ORDER)
|
|
70
|
+
order = [cid for cid, _ in CATEGORY_ORDER]
|
|
71
|
+
buckets: dict[str, list[DigestGroup]] = {}
|
|
72
|
+
for g in groups:
|
|
73
|
+
buckets.setdefault(g.category, []).append(g)
|
|
74
|
+
out: list[tuple[str, str, list[DigestGroup]]] = []
|
|
75
|
+
seen: set[str] = set()
|
|
76
|
+
for cid in order:
|
|
77
|
+
if cid in buckets and cid not in seen:
|
|
78
|
+
out.append((cid, label_for[cid], buckets[cid]))
|
|
79
|
+
seen.add(cid)
|
|
80
|
+
# Unknown categories last, under "Other".
|
|
81
|
+
other = [g for cid, gs in buckets.items() if cid not in label_for for g in gs]
|
|
82
|
+
if other:
|
|
83
|
+
out.append(("other", "Other", other))
|
|
84
|
+
return out
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
T = TypeVar("T")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Registry(Generic[T]):
|
|
10
|
+
"""A name->implementation registry shared by detectors, triagers, notifiers.
|
|
11
|
+
|
|
12
|
+
Registries store CLASSES, not instances. ``@reg.register("feed")`` decorates
|
|
13
|
+
a class; ``reg.get("feed")`` returns the class; the caller instantiates with
|
|
14
|
+
no args (``DETECTORS.get("feed")()``).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, kind: str) -> None:
|
|
18
|
+
self.kind = kind
|
|
19
|
+
self._items: dict[str, type[T]] = {}
|
|
20
|
+
|
|
21
|
+
def register(self, name: str) -> Callable[[type[T]], type[T]]:
|
|
22
|
+
def decorator(impl: type[T]) -> type[T]:
|
|
23
|
+
if name in self._items:
|
|
24
|
+
raise ValueError(f"{self.kind} {name!r} is already registered")
|
|
25
|
+
self._items[name] = impl
|
|
26
|
+
return impl
|
|
27
|
+
|
|
28
|
+
return decorator
|
|
29
|
+
|
|
30
|
+
def get(self, name: str) -> type[T]:
|
|
31
|
+
try:
|
|
32
|
+
return self._items[name]
|
|
33
|
+
except KeyError:
|
|
34
|
+
avail = ", ".join(self.available()) or "(none registered)"
|
|
35
|
+
raise KeyError(f"{self.kind} {name!r} not found; available: {avail}") from None
|
|
36
|
+
|
|
37
|
+
def available(self) -> list[str]:
|
|
38
|
+
return sorted(self._items)
|
android_watcher/run.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""The run_once pipeline: source resolution, detection, and orchestration.
|
|
2
|
+
|
|
3
|
+
The first section covers source selection and the isolated async detector
|
|
4
|
+
driver; the rest is the full ``run_once`` orchestration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
import logging.handlers
|
|
12
|
+
import time
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from android_watcher import __version__
|
|
17
|
+
from android_watcher.catalog import load_catalog
|
|
18
|
+
from android_watcher.config import Config, data_path, db_path, log_path
|
|
19
|
+
from android_watcher.detect.base import DETECTORS
|
|
20
|
+
from android_watcher.fetch import USER_AGENT, Fetcher
|
|
21
|
+
from android_watcher.lock import run_lock
|
|
22
|
+
from android_watcher.models import INTERVAL_DELTA, UTC, Change, Digest, Source
|
|
23
|
+
from android_watcher.notify.base import NOTIFIERS, NotifyError
|
|
24
|
+
from android_watcher.rank import rank
|
|
25
|
+
from android_watcher.seed import apply_seed_if_empty
|
|
26
|
+
from android_watcher.store import Store
|
|
27
|
+
from android_watcher.triage.base import TRIAGERS, TriageResult
|
|
28
|
+
from android_watcher.triage.claude_cli import MAX_TRIAGE_BATCH
|
|
29
|
+
|
|
30
|
+
log = logging.getLogger("android_watcher.run")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def configure_file_logging() -> str:
|
|
34
|
+
"""Attach a rotating file handler to the package logger; return the log path.
|
|
35
|
+
|
|
36
|
+
Idempotent: repeated calls do not stack handlers. Called by the CLI so both
|
|
37
|
+
manual and scheduled runs append to the same log; not called from tests,
|
|
38
|
+
which exercise run_once() directly and must not write to the user log dir.
|
|
39
|
+
"""
|
|
40
|
+
path = log_path()
|
|
41
|
+
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
pkg = logging.getLogger("android_watcher")
|
|
43
|
+
pkg.setLevel(logging.INFO)
|
|
44
|
+
if not any(isinstance(h, logging.handlers.RotatingFileHandler) for h in pkg.handlers):
|
|
45
|
+
handler = logging.handlers.RotatingFileHandler(
|
|
46
|
+
path, maxBytes=1_000_000, backupCount=3, encoding="utf-8"
|
|
47
|
+
)
|
|
48
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s"))
|
|
49
|
+
pkg.addHandler(handler)
|
|
50
|
+
return path
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def resolve_sources(config: Config) -> list[Source]:
|
|
54
|
+
"""Resolve the watched sources from catalog + config.
|
|
55
|
+
|
|
56
|
+
Start from the catalog entries that are enabled by their own flag. If
|
|
57
|
+
``enabled_source_ids`` is non-empty, keep only those ids (an override). An
|
|
58
|
+
empty/absent selection means "use the catalog enabled flags", never "watch
|
|
59
|
+
nothing". Custom sources are always watched and override a catalog source on
|
|
60
|
+
id collision.
|
|
61
|
+
"""
|
|
62
|
+
watched = [s for s in load_catalog() if s.enabled]
|
|
63
|
+
if config.enabled_source_ids:
|
|
64
|
+
watched = [s for s in watched if s.id in config.enabled_source_ids]
|
|
65
|
+
by_id: dict[str, Source] = {s.id: s for s in watched}
|
|
66
|
+
for s in config.custom_sources: # custom always included, overrides catalog
|
|
67
|
+
by_id[s.id] = s
|
|
68
|
+
return list(by_id.values())
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def _detect_all(sources: list[Source], store: Store, fetcher: Fetcher) -> list[Change]:
|
|
72
|
+
"""Run each source's detector, isolating per-source failures.
|
|
73
|
+
|
|
74
|
+
One source raising never aborts the run: it is logged and skipped. Returns
|
|
75
|
+
the flattened list of changes across all sources that succeeded.
|
|
76
|
+
"""
|
|
77
|
+
changes: list[Change] = []
|
|
78
|
+
total = len(sources)
|
|
79
|
+
for i, source in enumerate(sources, 1):
|
|
80
|
+
log.info("detecting [%d/%d] %s (%s)", i, total, source.id, source.detector)
|
|
81
|
+
t0 = time.monotonic()
|
|
82
|
+
try:
|
|
83
|
+
detector = DETECTORS.get(source.detector)()
|
|
84
|
+
found = await detector.detect(source, store, fetcher)
|
|
85
|
+
changes.extend(found)
|
|
86
|
+
log.info(" %s: %d change(s) in %.1fs", source.id, len(found), time.monotonic() - t0)
|
|
87
|
+
except Exception: # isolation: one source must not abort the run
|
|
88
|
+
log.exception("source %s failed after %.1fs", source.id, time.monotonic() - t0)
|
|
89
|
+
return changes
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _enabled_channels(config: Config) -> set[str]:
|
|
93
|
+
channels: set[str] = set()
|
|
94
|
+
if config.email.enabled:
|
|
95
|
+
channels.add("email")
|
|
96
|
+
if config.slack.enabled:
|
|
97
|
+
channels.add("slack")
|
|
98
|
+
if config.telegram.enabled:
|
|
99
|
+
channels.add("telegram")
|
|
100
|
+
return channels
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _source_index(config: Config) -> dict[str, Source]:
|
|
104
|
+
sources = {s.id: s for s in load_catalog()}
|
|
105
|
+
sources.update({s.id: s for s in config.custom_sources}) # custom wins on collision
|
|
106
|
+
return sources
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _catch_up_due(store: Store, config: Config, force: bool) -> bool:
|
|
110
|
+
"""Whether this run should cover the current cycle.
|
|
111
|
+
|
|
112
|
+
Due when forced, when nothing has ever run, or when the last successful run
|
|
113
|
+
is at least one schedule interval in the past. cron has no fixed delta, so
|
|
114
|
+
it is always due; the native scheduler enforces cron timing.
|
|
115
|
+
"""
|
|
116
|
+
last = store.last_successful_run()
|
|
117
|
+
if force or last is None:
|
|
118
|
+
return True
|
|
119
|
+
delta = INTERVAL_DELTA.get(config.schedule.interval)
|
|
120
|
+
if delta is None: # cron => always due
|
|
121
|
+
return True
|
|
122
|
+
return datetime.now(UTC) - last >= delta
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def _run_async(sources: list[Source], store: Store, fetcher: Fetcher) -> list[Change]:
|
|
126
|
+
try:
|
|
127
|
+
return await _detect_all(sources, store, fetcher)
|
|
128
|
+
finally:
|
|
129
|
+
await fetcher.close()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _triage_batched(triager, changes, ai_config, batch_size=MAX_TRIAGE_BATCH):
|
|
133
|
+
all_changes: list = []
|
|
134
|
+
tldr: str | None = None
|
|
135
|
+
unavailable: str | None = None
|
|
136
|
+
for i in range(0, len(changes), batch_size):
|
|
137
|
+
batch = changes[i : i + batch_size]
|
|
138
|
+
res = triager.triage(batch, ai_config)
|
|
139
|
+
all_changes.extend(res.changes)
|
|
140
|
+
if tldr is None:
|
|
141
|
+
tldr = res.tldr
|
|
142
|
+
if unavailable is None:
|
|
143
|
+
unavailable = res.unavailable
|
|
144
|
+
return TriageResult(changes=all_changes, tldr=tldr, unavailable=unavailable)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _build_ledger_digest(
|
|
148
|
+
store: Store,
|
|
149
|
+
config: Config,
|
|
150
|
+
channels: set[str],
|
|
151
|
+
tldr: str | None,
|
|
152
|
+
unavailable: str | None,
|
|
153
|
+
) -> Digest:
|
|
154
|
+
digest = rank(store.changes_for_digest(channels), _source_index(config), config)
|
|
155
|
+
digest.tldr = tldr # intentionally unrendered — no TL;DR preamble in any channel output
|
|
156
|
+
digest.ai_unavailable = unavailable
|
|
157
|
+
# Scan-scope footer: how many sources are watched and how many pages are under
|
|
158
|
+
# baseline. Shown in every delivered digest so the reader sees the coverage.
|
|
159
|
+
digest.sources_scanned = len(resolve_sources(config))
|
|
160
|
+
digest.pages_watched = store.snapshot_count()
|
|
161
|
+
return digest
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _deliver_into(store: Store, digest: Digest, config: Config, channels: set[str]) -> None:
|
|
165
|
+
"""Open a digest, deliver per channel, record exactly the change ids each
|
|
166
|
+
channel conveyed, then commit. A channel that fails is left for next run."""
|
|
167
|
+
digest_id = store.open_digest()
|
|
168
|
+
for channel in channels:
|
|
169
|
+
try:
|
|
170
|
+
delivered = NOTIFIERS.get(channel)().send(digest, config)
|
|
171
|
+
except NotifyError:
|
|
172
|
+
log.exception("channel %s delivery failed; leaving for next run", channel)
|
|
173
|
+
continue
|
|
174
|
+
for change_id in delivered:
|
|
175
|
+
store.record_delivery(change_id, channel)
|
|
176
|
+
# Supersede older undelivered rows for every delivered (source_id, url).
|
|
177
|
+
for g in digest.groups:
|
|
178
|
+
for m in g.members:
|
|
179
|
+
if m.id is not None:
|
|
180
|
+
store.supersede_older(m.source_id, m.url, m.id)
|
|
181
|
+
store.commit_digest(digest_id)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def run_once(config: Config, *, force: bool = False, dry_run: bool = False) -> Digest:
|
|
185
|
+
"""Run the full detection-to-delivery pipeline once.
|
|
186
|
+
|
|
187
|
+
The digest is built from the ledger (substantive changes not yet delivered
|
|
188
|
+
to every enabled channel), never from this-run detections, so a prior run's
|
|
189
|
+
undelivered backlog is always retried. ``dry_run`` previews that standing
|
|
190
|
+
backlog without detecting, persisting, superseding, sending, or marking the
|
|
191
|
+
run successful.
|
|
192
|
+
"""
|
|
193
|
+
store = Store(db_path())
|
|
194
|
+
store.migrate()
|
|
195
|
+
# Fresh DB: import the shipped baseline seed so the first run diffs against it
|
|
196
|
+
# instead of crawling every page. No-op once any snapshot exists, or if no
|
|
197
|
+
# seed is bundled (the detectors then baseline fetch-free on first sight).
|
|
198
|
+
seeded = apply_seed_if_empty(store)
|
|
199
|
+
if seeded:
|
|
200
|
+
log.info("imported baseline seed dated %s (%d snapshots)", seeded, store.snapshot_count())
|
|
201
|
+
channels = _enabled_channels(config)
|
|
202
|
+
with run_lock(data_path()):
|
|
203
|
+
# Zero channels: nothing can be delivered. Do no reconcile/detect/triage/
|
|
204
|
+
# send, but still advance the catch-up window on the live path so
|
|
205
|
+
# re-enabling a channel later is not treated as "missed every cycle".
|
|
206
|
+
if not channels:
|
|
207
|
+
if not dry_run:
|
|
208
|
+
store.mark_successful_run(datetime.now(UTC))
|
|
209
|
+
return Digest(groups=[])
|
|
210
|
+
|
|
211
|
+
# dry_run: render from the existing ledger only; mutate nothing.
|
|
212
|
+
if dry_run:
|
|
213
|
+
return _build_ledger_digest(store, config, channels, None, None)
|
|
214
|
+
|
|
215
|
+
# Reconcile a crashed run: re-deliver the still-owed changes (per-channel
|
|
216
|
+
# idempotent, so no resend), then commit the stale inflight digest.
|
|
217
|
+
# Mirror the live-path empty-digest gate: only call _deliver_into when
|
|
218
|
+
# there is something to send (or config.digest.empty == "send"), but
|
|
219
|
+
# ALWAYS commit the inflight row so it does not recur on the next run.
|
|
220
|
+
inflight = store.inflight_digest()
|
|
221
|
+
if inflight is not None:
|
|
222
|
+
recon = _build_ledger_digest(store, config, channels, None, None)
|
|
223
|
+
recon_send_empty = recon.is_empty and config.digest.empty == "send"
|
|
224
|
+
if not recon.is_empty or recon_send_empty:
|
|
225
|
+
_deliver_into(store, recon, config, channels)
|
|
226
|
+
store.commit_digest(inflight)
|
|
227
|
+
|
|
228
|
+
# Catch-up gate: skip when the last successful run already covers this
|
|
229
|
+
# cycle and we are not forced.
|
|
230
|
+
if not _catch_up_due(store, config, force):
|
|
231
|
+
return Digest(groups=[])
|
|
232
|
+
|
|
233
|
+
sources = resolve_sources(config)
|
|
234
|
+
log.info(
|
|
235
|
+
"run starting: %d source(s), channels=%s, force=%s",
|
|
236
|
+
len(sources),
|
|
237
|
+
",".join(sorted(channels)) or "none",
|
|
238
|
+
force,
|
|
239
|
+
)
|
|
240
|
+
fetcher = Fetcher(store, user_agent=USER_AGENT.format(version=__version__))
|
|
241
|
+
t_detect = time.monotonic()
|
|
242
|
+
changes = asyncio.run(_run_async(sources, store, fetcher))
|
|
243
|
+
log.info("detection phase: %.1fs (%d changes)", time.monotonic() - t_detect, len(changes))
|
|
244
|
+
for change in changes:
|
|
245
|
+
change.id = store.record_change(change) # idempotent on (source,url,hash)
|
|
246
|
+
|
|
247
|
+
# Triage is WRITE-ONCE: only rows whose verdict is still NULL. Re-detected
|
|
248
|
+
# rows already carry a final verdict and must not be re-triaged.
|
|
249
|
+
untriaged = [c for c in changes if c.verdict is None]
|
|
250
|
+
mode = config.ai.mode if config.ai.mode != "off" else "noop"
|
|
251
|
+
t_triage = time.monotonic()
|
|
252
|
+
result = _triage_batched(TRIAGERS.get(mode)(), untriaged, config.ai)
|
|
253
|
+
log.info("triage phase: %.1fs (%d triaged)", time.monotonic() - t_triage, len(untriaged))
|
|
254
|
+
for change in result.changes:
|
|
255
|
+
if change.id is not None and change.verdict is not None:
|
|
256
|
+
store.set_verdict(
|
|
257
|
+
change.id,
|
|
258
|
+
change.verdict,
|
|
259
|
+
change.description,
|
|
260
|
+
change.group_key,
|
|
261
|
+
change.group_summary,
|
|
262
|
+
change.group_title,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Digest comes from the ledger (undelivered backlog), not this-run changes.
|
|
266
|
+
digest = _build_ledger_digest(store, config, channels, result.tldr, result.unavailable)
|
|
267
|
+
|
|
268
|
+
# Empty "nothing notable" digests go out at most once per catch-up window:
|
|
269
|
+
# only here, where the gate was due and mark_successful_run (below) will
|
|
270
|
+
# advance the window so the next empty run is not due. Non-empty digests
|
|
271
|
+
# are idempotent via the delivery ledger regardless.
|
|
272
|
+
send_empty = digest.is_empty and config.digest.empty == "send"
|
|
273
|
+
if not digest.is_empty or send_empty:
|
|
274
|
+
_deliver_into(store, digest, config, channels)
|
|
275
|
+
|
|
276
|
+
store.mark_successful_run(datetime.now(UTC))
|
|
277
|
+
log.info(
|
|
278
|
+
"run finished: %d detected, %d in digest, delivered to %s",
|
|
279
|
+
len(changes),
|
|
280
|
+
digest.change_count(),
|
|
281
|
+
",".join(sorted(channels)) or "none",
|
|
282
|
+
)
|
|
283
|
+
return digest
|