android-watcher 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- android_watcher/__init__.py +10 -0
- android_watcher/catalog/__init__.py +32 -0
- android_watcher/catalog/catalog.toml +531 -0
- android_watcher/cli.py +161 -0
- android_watcher/config.py +262 -0
- android_watcher/detect/__init__.py +1 -0
- android_watcher/detect/_normalize.py +192 -0
- android_watcher/detect/android_sitemap.py +540 -0
- android_watcher/detect/base.py +14 -0
- android_watcher/detect/content.py +99 -0
- android_watcher/detect/feed.py +135 -0
- android_watcher/detect/sitemap.py +203 -0
- android_watcher/doctor.py +125 -0
- android_watcher/fetch.py +162 -0
- android_watcher/group.py +79 -0
- android_watcher/lock.py +32 -0
- android_watcher/models.py +156 -0
- android_watcher/notify/__init__.py +1 -0
- android_watcher/notify/base.py +21 -0
- android_watcher/notify/email.py +52 -0
- android_watcher/notify/html.py +114 -0
- android_watcher/notify/render.py +239 -0
- android_watcher/notify/slack.py +124 -0
- android_watcher/notify/telegram.py +46 -0
- android_watcher/rank.py +84 -0
- android_watcher/registry.py +38 -0
- android_watcher/run.py +283 -0
- android_watcher/schedule.py +488 -0
- android_watcher/seed/__init__.py +45 -0
- android_watcher/seed/seed.sql.gz +0 -0
- android_watcher/store.py +492 -0
- android_watcher/triage/__init__.py +1 -0
- android_watcher/triage/base.py +25 -0
- android_watcher/triage/claude_cli.py +185 -0
- android_watcher/triage/noop.py +24 -0
- android_watcher/tui/__init__.py +1 -0
- android_watcher/tui/app.py +163 -0
- android_watcher/tui/configio.py +215 -0
- android_watcher/tui/screens.py +927 -0
- android_watcher-1.0.0.dist-info/METADATA +310 -0
- android_watcher-1.0.0.dist-info/RECORD +44 -0
- android_watcher-1.0.0.dist-info/WHEEL +4 -0
- android_watcher-1.0.0.dist-info/entry_points.txt +2 -0
- android_watcher-1.0.0.dist-info/licenses/LICENSE +21 -0
android_watcher/store.py
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
"""SQLite persistence layer.
|
|
2
|
+
|
|
3
|
+
Single synchronous connection per Store instance. All datetimes are stored as
|
|
4
|
+
ISO-8601 strings with UTC offset; the Store coerces naive datetimes to UTC at
|
|
5
|
+
the boundary so callers never need to worry about timezone hygiene.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import sqlite3
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import UTC, datetime
|
|
13
|
+
|
|
14
|
+
from .models import Change, SignalType, Verdict
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class Snapshot:
|
|
19
|
+
source_id: str
|
|
20
|
+
url: str
|
|
21
|
+
signal_type: SignalType
|
|
22
|
+
content_hash: str
|
|
23
|
+
lastmod: str
|
|
24
|
+
excerpt: str
|
|
25
|
+
fetched_at: datetime
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _now_iso() -> str:
|
|
29
|
+
return datetime.now(UTC).isoformat()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _to_utc(value: datetime) -> datetime:
|
|
33
|
+
"""Coerce any datetime to UTC-aware at the Store boundary.
|
|
34
|
+
|
|
35
|
+
A naive value is assumed to be UTC; an aware value is converted to UTC.
|
|
36
|
+
"""
|
|
37
|
+
if value.tzinfo is None:
|
|
38
|
+
return value.replace(tzinfo=UTC)
|
|
39
|
+
return value.astimezone(UTC)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_iso(value: str) -> datetime:
|
|
43
|
+
dt = datetime.fromisoformat(value)
|
|
44
|
+
return _to_utc(dt)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# C0 control chars to strip from seed literals, keeping tab/newline/return.
|
|
48
|
+
# executescript() runs SQL as a C string, so an embedded NUL (or other control
|
|
49
|
+
# byte that can slip in from page text) truncates or rejects the whole script.
|
|
50
|
+
_CTRL_STRIP = {c: None for c in range(0x20) if c not in (0x09, 0x0A, 0x0D)}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _sql_str(value: object) -> str:
|
|
54
|
+
"""Render a seed column value as a SQL string literal.
|
|
55
|
+
|
|
56
|
+
Every seed-table column is declared TEXT NOT NULL, so values are strings;
|
|
57
|
+
single quotes are doubled per SQL escaping, and control characters (notably
|
|
58
|
+
NUL) are stripped so executescript() can run the dump.
|
|
59
|
+
"""
|
|
60
|
+
return "'" + str(value).translate(_CTRL_STRIP).replace("'", "''") + "'"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Store:
|
|
64
|
+
"""Synchronous SQLite wrapper. Datetimes stored ISO-8601 UTC."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, path: str) -> None:
|
|
67
|
+
self.path = path
|
|
68
|
+
self._conn = sqlite3.connect(path)
|
|
69
|
+
self._conn.row_factory = sqlite3.Row
|
|
70
|
+
self._conn.execute("PRAGMA foreign_keys = ON")
|
|
71
|
+
|
|
72
|
+
def close(self) -> None:
|
|
73
|
+
"""Close the underlying connection. Idempotent."""
|
|
74
|
+
self._conn.close()
|
|
75
|
+
|
|
76
|
+
def __enter__(self) -> Store:
|
|
77
|
+
return self
|
|
78
|
+
|
|
79
|
+
def __exit__(self, *exc: object) -> None:
|
|
80
|
+
self.close()
|
|
81
|
+
|
|
82
|
+
def migrate(self) -> None:
|
|
83
|
+
self._conn.executescript(
|
|
84
|
+
"""
|
|
85
|
+
-- NOTE: spec section 6 keys snapshots by source_id only; we add a
|
|
86
|
+
-- `url` column (PK (source_id, url)) because get_snapshot needs
|
|
87
|
+
-- per-URL keying. Intentional deviation, documented in Interfaces.
|
|
88
|
+
CREATE TABLE IF NOT EXISTS snapshots (
|
|
89
|
+
source_id TEXT NOT NULL,
|
|
90
|
+
url TEXT NOT NULL,
|
|
91
|
+
signal_type TEXT NOT NULL,
|
|
92
|
+
content_hash TEXT NOT NULL,
|
|
93
|
+
lastmod TEXT NOT NULL DEFAULT '',
|
|
94
|
+
excerpt TEXT NOT NULL DEFAULT '',
|
|
95
|
+
fetched_at TEXT NOT NULL,
|
|
96
|
+
PRIMARY KEY (source_id, url)
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
CREATE TABLE IF NOT EXISTS changes (
|
|
100
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
101
|
+
source_id TEXT NOT NULL,
|
|
102
|
+
detected_at TEXT NOT NULL,
|
|
103
|
+
url TEXT NOT NULL,
|
|
104
|
+
change_kind TEXT NOT NULL,
|
|
105
|
+
title TEXT NOT NULL DEFAULT '',
|
|
106
|
+
raw_diff TEXT NOT NULL DEFAULT '',
|
|
107
|
+
description TEXT,
|
|
108
|
+
verdict TEXT,
|
|
109
|
+
fetched_hash TEXT NOT NULL DEFAULT '',
|
|
110
|
+
-- supersede_older sets this to 1 so older undelivered rows for a
|
|
111
|
+
-- (source_id, url) neither deliver nor recount in the digest.
|
|
112
|
+
superseded INTEGER NOT NULL DEFAULT 0,
|
|
113
|
+
group_key TEXT,
|
|
114
|
+
group_summary TEXT,
|
|
115
|
+
group_title TEXT
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
-- Idempotency key for record_change: re-detecting the same content
|
|
119
|
+
-- hash for a url must not duplicate rows or reset its verdict.
|
|
120
|
+
CREATE UNIQUE INDEX IF NOT EXISTS ux_changes_identity
|
|
121
|
+
ON changes (source_id, url, fetched_hash);
|
|
122
|
+
|
|
123
|
+
CREATE TABLE IF NOT EXISTS deliveries (
|
|
124
|
+
change_id INTEGER NOT NULL,
|
|
125
|
+
channel TEXT NOT NULL,
|
|
126
|
+
sent_at TEXT NOT NULL,
|
|
127
|
+
PRIMARY KEY (change_id, channel)
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
CREATE TABLE IF NOT EXISTS digests (
|
|
131
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
132
|
+
created_at TEXT NOT NULL,
|
|
133
|
+
committed_at TEXT
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
CREATE TABLE IF NOT EXISTS seen_feed_items (
|
|
137
|
+
source_id TEXT NOT NULL,
|
|
138
|
+
item_id TEXT NOT NULL,
|
|
139
|
+
content_hash TEXT NOT NULL,
|
|
140
|
+
PRIMARY KEY (source_id, item_id)
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
CREATE TABLE IF NOT EXISTS http_cache (
|
|
144
|
+
url TEXT PRIMARY KEY,
|
|
145
|
+
etag TEXT NOT NULL DEFAULT '',
|
|
146
|
+
last_modified TEXT NOT NULL DEFAULT '',
|
|
147
|
+
fetched_at TEXT NOT NULL
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
CREATE TABLE IF NOT EXISTS run_state (
|
|
151
|
+
key TEXT PRIMARY KEY,
|
|
152
|
+
value TEXT NOT NULL
|
|
153
|
+
);
|
|
154
|
+
"""
|
|
155
|
+
)
|
|
156
|
+
self._conn.commit()
|
|
157
|
+
self._add_column_if_missing("changes", "group_key", "TEXT")
|
|
158
|
+
self._add_column_if_missing("changes", "group_summary", "TEXT")
|
|
159
|
+
self._add_column_if_missing("changes", "group_title", "TEXT")
|
|
160
|
+
|
|
161
|
+
def _add_column_if_missing(self, table: str, column: str, decl: str) -> None:
|
|
162
|
+
cols = {r["name"] for r in self._conn.execute(f"PRAGMA table_info({table})").fetchall()}
|
|
163
|
+
if column not in cols:
|
|
164
|
+
self._conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {decl}")
|
|
165
|
+
self._conn.commit()
|
|
166
|
+
|
|
167
|
+
# snapshots ----------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
def snapshot_count(self) -> int:
|
|
170
|
+
"""How many baseline snapshots exist (0 => a fresh, unseeded DB)."""
|
|
171
|
+
row = self._conn.execute("SELECT COUNT(*) AS n FROM snapshots").fetchone()
|
|
172
|
+
return int(row["n"])
|
|
173
|
+
|
|
174
|
+
def source_has_snapshots(self, source_id: str) -> bool:
|
|
175
|
+
"""Whether this source already has a baseline. A never-seen URL counts as
|
|
176
|
+
genuinely 'new' only once the source has been baselined (so the first run /
|
|
177
|
+
seed import does not flood every URL as new)."""
|
|
178
|
+
row = self._conn.execute(
|
|
179
|
+
"SELECT 1 FROM snapshots WHERE source_id = ? LIMIT 1", (source_id,)
|
|
180
|
+
).fetchone()
|
|
181
|
+
return row is not None
|
|
182
|
+
|
|
183
|
+
def get_snapshot(self, source_id: str, url: str) -> Snapshot | None:
|
|
184
|
+
row = self._conn.execute(
|
|
185
|
+
"SELECT * FROM snapshots WHERE source_id = ? AND url = ?",
|
|
186
|
+
(source_id, url),
|
|
187
|
+
).fetchone()
|
|
188
|
+
if row is None:
|
|
189
|
+
return None
|
|
190
|
+
return Snapshot(
|
|
191
|
+
source_id=row["source_id"],
|
|
192
|
+
url=row["url"],
|
|
193
|
+
signal_type=row["signal_type"],
|
|
194
|
+
content_hash=row["content_hash"],
|
|
195
|
+
lastmod=row["lastmod"],
|
|
196
|
+
excerpt=row["excerpt"],
|
|
197
|
+
fetched_at=_parse_iso(row["fetched_at"]),
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def upsert_snapshot(
|
|
201
|
+
self,
|
|
202
|
+
source_id: str,
|
|
203
|
+
url: str,
|
|
204
|
+
*,
|
|
205
|
+
signal_type: SignalType,
|
|
206
|
+
content_hash: str,
|
|
207
|
+
lastmod: str,
|
|
208
|
+
excerpt: str,
|
|
209
|
+
) -> None:
|
|
210
|
+
self._conn.execute(
|
|
211
|
+
"""
|
|
212
|
+
INSERT INTO snapshots
|
|
213
|
+
(source_id, url, signal_type, content_hash, lastmod, excerpt, fetched_at)
|
|
214
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
215
|
+
ON CONFLICT(source_id, url) DO UPDATE SET
|
|
216
|
+
signal_type = excluded.signal_type,
|
|
217
|
+
content_hash = excluded.content_hash,
|
|
218
|
+
lastmod = excluded.lastmod,
|
|
219
|
+
excerpt = excluded.excerpt,
|
|
220
|
+
fetched_at = excluded.fetched_at
|
|
221
|
+
""",
|
|
222
|
+
(source_id, url, signal_type, content_hash, lastmod, excerpt, _now_iso()),
|
|
223
|
+
)
|
|
224
|
+
self._conn.commit()
|
|
225
|
+
|
|
226
|
+
# changes ------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
def record_change(self, change: Change) -> int:
|
|
229
|
+
"""Insert a change, IDEMPOTENT on (source_id, url, fetched_hash).
|
|
230
|
+
|
|
231
|
+
If a row with that identity already exists, return its id without
|
|
232
|
+
inserting or touching its verdict. Otherwise insert a fresh row with
|
|
233
|
+
verdict = NULL.
|
|
234
|
+
|
|
235
|
+
The insert uses ON CONFLICT DO NOTHING so the unique index
|
|
236
|
+
ux_changes_identity enforces idempotency atomically, eliminating the
|
|
237
|
+
SELECT-then-INSERT race window.
|
|
238
|
+
"""
|
|
239
|
+
self._conn.execute(
|
|
240
|
+
"""
|
|
241
|
+
INSERT INTO changes
|
|
242
|
+
(source_id, detected_at, url, change_kind, title, raw_diff,
|
|
243
|
+
description, verdict, fetched_hash)
|
|
244
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, NULL, ?)
|
|
245
|
+
ON CONFLICT(source_id, url, fetched_hash) DO NOTHING
|
|
246
|
+
""",
|
|
247
|
+
(
|
|
248
|
+
change.source_id,
|
|
249
|
+
_to_utc(change.detected_at).isoformat(),
|
|
250
|
+
change.url,
|
|
251
|
+
change.change_kind,
|
|
252
|
+
change.title,
|
|
253
|
+
change.raw_diff,
|
|
254
|
+
change.description,
|
|
255
|
+
change.fetched_hash,
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
self._conn.commit()
|
|
259
|
+
row = self._conn.execute(
|
|
260
|
+
"SELECT id FROM changes WHERE source_id = ? AND url = ? AND fetched_hash = ?",
|
|
261
|
+
(change.source_id, change.url, change.fetched_hash),
|
|
262
|
+
).fetchone()
|
|
263
|
+
change.id = int(row["id"])
|
|
264
|
+
return change.id
|
|
265
|
+
|
|
266
|
+
def changes_for_digest(self, channels: set[str]) -> list[Change]:
|
|
267
|
+
"""Substantive changes not yet delivered to EVERY channel in `channels`.
|
|
268
|
+
|
|
269
|
+
CONTRACTS edge rules:
|
|
270
|
+
- If `channels` is empty, return [] (run_once also short-circuits
|
|
271
|
+
before opening a digest when no channel is enabled).
|
|
272
|
+
- At most ONE row per (source_id, url): the latest by detected_at
|
|
273
|
+
(then id, as a stable tiebreak). Older undelivered substantive rows
|
|
274
|
+
for the same (source_id, url) are not emitted, so a page that
|
|
275
|
+
changes twice before delivery yields one digest line about current
|
|
276
|
+
content, not one current + one stale.
|
|
277
|
+
- `superseded` rows are excluded entirely.
|
|
278
|
+
A change is returned when it still misses at least one requested
|
|
279
|
+
channel, so a prior run's undelivered backlog is retried.
|
|
280
|
+
"""
|
|
281
|
+
if not channels:
|
|
282
|
+
return []
|
|
283
|
+
rows = self._conn.execute(
|
|
284
|
+
"""
|
|
285
|
+
SELECT c.* FROM changes c
|
|
286
|
+
WHERE c.verdict = 'substantive' AND c.superseded = 0
|
|
287
|
+
AND c.id = (
|
|
288
|
+
SELECT c2.id FROM changes c2
|
|
289
|
+
WHERE c2.source_id = c.source_id AND c2.url = c.url
|
|
290
|
+
AND c2.verdict = 'substantive' AND c2.superseded = 0
|
|
291
|
+
ORDER BY c2.detected_at DESC, c2.id DESC
|
|
292
|
+
LIMIT 1
|
|
293
|
+
)
|
|
294
|
+
ORDER BY c.detected_at DESC, c.id DESC
|
|
295
|
+
"""
|
|
296
|
+
).fetchall()
|
|
297
|
+
result: list[Change] = []
|
|
298
|
+
for r in rows:
|
|
299
|
+
delivered = self.delivered_channels(int(r["id"]))
|
|
300
|
+
if not channels <= delivered:
|
|
301
|
+
result.append(self._row_to_change(r))
|
|
302
|
+
return result
|
|
303
|
+
|
|
304
|
+
def supersede_older(self, source_id: str, url: str, keep_id: int) -> None:
|
|
305
|
+
"""Mark undelivered substantive rows for (source_id, url) other than
|
|
306
|
+
keep_id as superseded, so they neither deliver nor recount."""
|
|
307
|
+
self._conn.execute(
|
|
308
|
+
"""
|
|
309
|
+
UPDATE changes SET superseded = 1
|
|
310
|
+
WHERE source_id = ? AND url = ? AND id != ?
|
|
311
|
+
AND verdict = 'substantive' AND superseded = 0
|
|
312
|
+
AND id NOT IN (SELECT change_id FROM deliveries)
|
|
313
|
+
""",
|
|
314
|
+
(source_id, url, keep_id),
|
|
315
|
+
)
|
|
316
|
+
self._conn.commit()
|
|
317
|
+
|
|
318
|
+
def set_verdict(
|
|
319
|
+
self,
|
|
320
|
+
change_id: int,
|
|
321
|
+
verdict: Verdict,
|
|
322
|
+
description: str | None,
|
|
323
|
+
group_key: str | None = None,
|
|
324
|
+
group_summary: str | None = None,
|
|
325
|
+
group_title: str | None = None,
|
|
326
|
+
) -> None:
|
|
327
|
+
"""WRITE-ONCE: only sets verdict/group fields on a row whose verdict IS NULL."""
|
|
328
|
+
self._conn.execute(
|
|
329
|
+
"UPDATE changes SET verdict = ?, description = ?, group_key = ?, group_summary = ?, "
|
|
330
|
+
"group_title = ? WHERE id = ? AND verdict IS NULL",
|
|
331
|
+
(verdict, description, group_key, group_summary, group_title, change_id),
|
|
332
|
+
)
|
|
333
|
+
self._conn.commit()
|
|
334
|
+
|
|
335
|
+
@staticmethod
|
|
336
|
+
def _row_to_change(row: sqlite3.Row) -> Change:
|
|
337
|
+
return Change(
|
|
338
|
+
source_id=row["source_id"],
|
|
339
|
+
url=row["url"],
|
|
340
|
+
change_kind=row["change_kind"],
|
|
341
|
+
title=row["title"],
|
|
342
|
+
raw_diff=row["raw_diff"],
|
|
343
|
+
fetched_hash=row["fetched_hash"],
|
|
344
|
+
detected_at=_parse_iso(row["detected_at"]),
|
|
345
|
+
id=row["id"],
|
|
346
|
+
verdict=row["verdict"],
|
|
347
|
+
description=row["description"],
|
|
348
|
+
group_key=row["group_key"],
|
|
349
|
+
group_summary=row["group_summary"],
|
|
350
|
+
group_title=row["group_title"],
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# per-channel delivery -----------------------------------------------
|
|
354
|
+
|
|
355
|
+
def delivered_channels(self, change_id: int) -> set[str]:
|
|
356
|
+
rows = self._conn.execute(
|
|
357
|
+
"SELECT channel FROM deliveries WHERE change_id = ?", (change_id,)
|
|
358
|
+
).fetchall()
|
|
359
|
+
return {r["channel"] for r in rows}
|
|
360
|
+
|
|
361
|
+
def record_delivery(self, change_id: int, channel: str) -> None:
|
|
362
|
+
self._conn.execute(
|
|
363
|
+
"""
|
|
364
|
+
INSERT INTO deliveries (change_id, channel, sent_at)
|
|
365
|
+
VALUES (?, ?, ?)
|
|
366
|
+
ON CONFLICT(change_id, channel) DO NOTHING
|
|
367
|
+
""",
|
|
368
|
+
(change_id, channel, _now_iso()),
|
|
369
|
+
)
|
|
370
|
+
self._conn.commit()
|
|
371
|
+
|
|
372
|
+
# in-flight digest ---------------------------------------------------
|
|
373
|
+
|
|
374
|
+
def open_digest(self) -> int:
|
|
375
|
+
cur = self._conn.execute("INSERT INTO digests (created_at) VALUES (?)", (_now_iso(),))
|
|
376
|
+
self._conn.commit()
|
|
377
|
+
return int(cur.lastrowid)
|
|
378
|
+
|
|
379
|
+
def commit_digest(self, digest_id: int) -> None:
|
|
380
|
+
self._conn.execute(
|
|
381
|
+
"UPDATE digests SET committed_at = ? WHERE id = ?",
|
|
382
|
+
(_now_iso(), digest_id),
|
|
383
|
+
)
|
|
384
|
+
self._conn.commit()
|
|
385
|
+
|
|
386
|
+
def inflight_digest(self) -> int | None:
|
|
387
|
+
row = self._conn.execute(
|
|
388
|
+
"SELECT id FROM digests WHERE committed_at IS NULL ORDER BY id DESC LIMIT 1"
|
|
389
|
+
).fetchone()
|
|
390
|
+
return int(row["id"]) if row else None
|
|
391
|
+
|
|
392
|
+
# feed seen-set ------------------------------------------------------
|
|
393
|
+
|
|
394
|
+
def seen_feed_item(self, source_id: str, item_id: str) -> str | None:
|
|
395
|
+
row = self._conn.execute(
|
|
396
|
+
"SELECT content_hash FROM seen_feed_items WHERE source_id = ? AND item_id = ?",
|
|
397
|
+
(source_id, item_id),
|
|
398
|
+
).fetchone()
|
|
399
|
+
return row["content_hash"] if row else None
|
|
400
|
+
|
|
401
|
+
def upsert_seen_feed_item(self, source_id: str, item_id: str, content_hash: str) -> None:
|
|
402
|
+
self._conn.execute(
|
|
403
|
+
"""
|
|
404
|
+
INSERT INTO seen_feed_items (source_id, item_id, content_hash)
|
|
405
|
+
VALUES (?, ?, ?)
|
|
406
|
+
ON CONFLICT(source_id, item_id) DO UPDATE SET
|
|
407
|
+
content_hash = excluded.content_hash
|
|
408
|
+
""",
|
|
409
|
+
(source_id, item_id, content_hash),
|
|
410
|
+
)
|
|
411
|
+
self._conn.commit()
|
|
412
|
+
|
|
413
|
+
# http conditional-GET cache -----------------------------------------
|
|
414
|
+
|
|
415
|
+
def http_cache_get(self, url: str) -> tuple[str, str]:
|
|
416
|
+
row = self._conn.execute(
|
|
417
|
+
"SELECT etag, last_modified FROM http_cache WHERE url = ?", (url,)
|
|
418
|
+
).fetchone()
|
|
419
|
+
if row is None:
|
|
420
|
+
return ("", "")
|
|
421
|
+
return (row["etag"], row["last_modified"])
|
|
422
|
+
|
|
423
|
+
def http_cache_put(self, url: str, etag: str, last_modified: str) -> None:
|
|
424
|
+
self._conn.execute(
|
|
425
|
+
"""
|
|
426
|
+
INSERT INTO http_cache (url, etag, last_modified, fetched_at)
|
|
427
|
+
VALUES (?, ?, ?, ?)
|
|
428
|
+
ON CONFLICT(url) DO UPDATE SET
|
|
429
|
+
etag = excluded.etag,
|
|
430
|
+
last_modified = excluded.last_modified,
|
|
431
|
+
fetched_at = excluded.fetched_at
|
|
432
|
+
""",
|
|
433
|
+
(url, etag, last_modified, _now_iso()),
|
|
434
|
+
)
|
|
435
|
+
self._conn.commit()
|
|
436
|
+
|
|
437
|
+
# run bookkeeping ----------------------------------------------------
|
|
438
|
+
|
|
439
|
+
def last_successful_run(self) -> datetime | None:
|
|
440
|
+
row = self._conn.execute(
|
|
441
|
+
"SELECT value FROM run_state WHERE key = 'last_successful_run'"
|
|
442
|
+
).fetchone()
|
|
443
|
+
return _parse_iso(row["value"]) if row else None
|
|
444
|
+
|
|
445
|
+
def mark_successful_run(self, when: datetime) -> None:
|
|
446
|
+
self._conn.execute(
|
|
447
|
+
"""
|
|
448
|
+
INSERT INTO run_state (key, value) VALUES ('last_successful_run', ?)
|
|
449
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
|
450
|
+
""",
|
|
451
|
+
(_to_utc(when).isoformat(),),
|
|
452
|
+
)
|
|
453
|
+
self._conn.commit()
|
|
454
|
+
|
|
455
|
+
# seed import / export -----------------------------------------------
|
|
456
|
+
#
|
|
457
|
+
# A shipped seed is a pre-built baseline (snapshots + feed seen-set + HTTP
|
|
458
|
+
# validators) tagged with the date it was generated. Importing it on a fresh
|
|
459
|
+
# DB gives users a starting point so the first scheduled run diffs against it
|
|
460
|
+
# instead of crawling every page to establish a baseline.
|
|
461
|
+
|
|
462
|
+
# Tables carried in a seed; run_state's seed_date marker is appended separately.
|
|
463
|
+
_SEED_TABLES = ("snapshots", "seen_feed_items", "http_cache")
|
|
464
|
+
|
|
465
|
+
def seed_date(self) -> str | None:
|
|
466
|
+
"""The date the imported baseline was generated, or None if unseeded."""
|
|
467
|
+
row = self._conn.execute("SELECT value FROM run_state WHERE key = 'seed_date'").fetchone()
|
|
468
|
+
return row["value"] if row else None
|
|
469
|
+
|
|
470
|
+
def export_seed_sql(self, seed_date: str) -> str:
|
|
471
|
+
"""Serialize the baseline tables to portable `INSERT OR IGNORE` SQL.
|
|
472
|
+
|
|
473
|
+
Emits no schema (the importing DB already migrated), so it layers onto an
|
|
474
|
+
existing schema without clashing. The maintainer seed-builder gzips this.
|
|
475
|
+
"""
|
|
476
|
+
lines = [f"-- android-watcher seed; generated {seed_date}"]
|
|
477
|
+
for table in self._SEED_TABLES:
|
|
478
|
+
rows = self._conn.execute(f"SELECT * FROM {table}").fetchall()
|
|
479
|
+
for row in rows:
|
|
480
|
+
cols = row.keys()
|
|
481
|
+
vals = ", ".join(_sql_str(row[c]) for c in cols)
|
|
482
|
+
lines.append(f"INSERT OR IGNORE INTO {table} ({', '.join(cols)}) VALUES ({vals});")
|
|
483
|
+
lines.append(
|
|
484
|
+
"INSERT OR IGNORE INTO run_state (key, value) VALUES "
|
|
485
|
+
f"('seed_date', {_sql_str(seed_date)});"
|
|
486
|
+
)
|
|
487
|
+
return "\n".join(lines) + "\n"
|
|
488
|
+
|
|
489
|
+
def import_seed_sql(self, sql: str) -> None:
|
|
490
|
+
"""Apply seed `INSERT OR IGNORE` statements; existing rows are preserved."""
|
|
491
|
+
self._conn.executescript(sql)
|
|
492
|
+
self._conn.commit()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from . import claude_cli, noop # noqa: F401
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Triager protocol, TriageResult dataclass, and TRIAGERS registry."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Protocol, runtime_checkable
|
|
7
|
+
|
|
8
|
+
from android_watcher.config import AIConfig
|
|
9
|
+
from android_watcher.models import Change
|
|
10
|
+
from android_watcher.registry import Registry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class TriageResult:
|
|
15
|
+
changes: list[Change]
|
|
16
|
+
tldr: str | None = None
|
|
17
|
+
unavailable: str | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@runtime_checkable
|
|
21
|
+
class Triager(Protocol):
|
|
22
|
+
def triage(self, changes: list[Change], config: AIConfig) -> TriageResult: ...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
TRIAGERS: Registry[Triager] = Registry("triager")
|