android-watcher 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. android_watcher/__init__.py +10 -0
  2. android_watcher/catalog/__init__.py +32 -0
  3. android_watcher/catalog/catalog.toml +531 -0
  4. android_watcher/cli.py +161 -0
  5. android_watcher/config.py +262 -0
  6. android_watcher/detect/__init__.py +1 -0
  7. android_watcher/detect/_normalize.py +192 -0
  8. android_watcher/detect/android_sitemap.py +540 -0
  9. android_watcher/detect/base.py +14 -0
  10. android_watcher/detect/content.py +99 -0
  11. android_watcher/detect/feed.py +135 -0
  12. android_watcher/detect/sitemap.py +203 -0
  13. android_watcher/doctor.py +125 -0
  14. android_watcher/fetch.py +162 -0
  15. android_watcher/group.py +79 -0
  16. android_watcher/lock.py +32 -0
  17. android_watcher/models.py +156 -0
  18. android_watcher/notify/__init__.py +1 -0
  19. android_watcher/notify/base.py +21 -0
  20. android_watcher/notify/email.py +52 -0
  21. android_watcher/notify/html.py +114 -0
  22. android_watcher/notify/render.py +239 -0
  23. android_watcher/notify/slack.py +124 -0
  24. android_watcher/notify/telegram.py +46 -0
  25. android_watcher/rank.py +84 -0
  26. android_watcher/registry.py +38 -0
  27. android_watcher/run.py +283 -0
  28. android_watcher/schedule.py +488 -0
  29. android_watcher/seed/__init__.py +45 -0
  30. android_watcher/seed/seed.sql.gz +0 -0
  31. android_watcher/store.py +492 -0
  32. android_watcher/triage/__init__.py +1 -0
  33. android_watcher/triage/base.py +25 -0
  34. android_watcher/triage/claude_cli.py +185 -0
  35. android_watcher/triage/noop.py +24 -0
  36. android_watcher/tui/__init__.py +1 -0
  37. android_watcher/tui/app.py +163 -0
  38. android_watcher/tui/configio.py +215 -0
  39. android_watcher/tui/screens.py +927 -0
  40. android_watcher-1.0.0.dist-info/METADATA +310 -0
  41. android_watcher-1.0.0.dist-info/RECORD +44 -0
  42. android_watcher-1.0.0.dist-info/WHEEL +4 -0
  43. android_watcher-1.0.0.dist-info/entry_points.txt +2 -0
  44. android_watcher-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,492 @@
1
+ """SQLite persistence layer.
2
+
3
+ Single synchronous connection per Store instance. All datetimes are stored as
4
+ ISO-8601 strings with UTC offset; the Store coerces naive datetimes to UTC at
5
+ the boundary so callers never need to worry about timezone hygiene.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import sqlite3
11
+ from dataclasses import dataclass
12
+ from datetime import UTC, datetime
13
+
14
+ from .models import Change, SignalType, Verdict
15
+
16
+
17
+ @dataclass
18
+ class Snapshot:
19
+ source_id: str
20
+ url: str
21
+ signal_type: SignalType
22
+ content_hash: str
23
+ lastmod: str
24
+ excerpt: str
25
+ fetched_at: datetime
26
+
27
+
28
+ def _now_iso() -> str:
29
+ return datetime.now(UTC).isoformat()
30
+
31
+
32
+ def _to_utc(value: datetime) -> datetime:
33
+ """Coerce any datetime to UTC-aware at the Store boundary.
34
+
35
+ A naive value is assumed to be UTC; an aware value is converted to UTC.
36
+ """
37
+ if value.tzinfo is None:
38
+ return value.replace(tzinfo=UTC)
39
+ return value.astimezone(UTC)
40
+
41
+
42
+ def _parse_iso(value: str) -> datetime:
43
+ dt = datetime.fromisoformat(value)
44
+ return _to_utc(dt)
45
+
46
+
47
+ # C0 control chars to strip from seed literals, keeping tab/newline/return.
48
+ # executescript() runs SQL as a C string, so an embedded NUL (or other control
49
+ # byte that can slip in from page text) truncates or rejects the whole script.
50
+ _CTRL_STRIP = {c: None for c in range(0x20) if c not in (0x09, 0x0A, 0x0D)}
51
+
52
+
53
+ def _sql_str(value: object) -> str:
54
+ """Render a seed column value as a SQL string literal.
55
+
56
+ Every seed-table column is declared TEXT NOT NULL, so values are strings;
57
+ single quotes are doubled per SQL escaping, and control characters (notably
58
+ NUL) are stripped so executescript() can run the dump.
59
+ """
60
+ return "'" + str(value).translate(_CTRL_STRIP).replace("'", "''") + "'"
61
+
62
+
63
+ class Store:
64
+ """Synchronous SQLite wrapper. Datetimes stored ISO-8601 UTC."""
65
+
66
+ def __init__(self, path: str) -> None:
67
+ self.path = path
68
+ self._conn = sqlite3.connect(path)
69
+ self._conn.row_factory = sqlite3.Row
70
+ self._conn.execute("PRAGMA foreign_keys = ON")
71
+
72
+ def close(self) -> None:
73
+ """Close the underlying connection. Idempotent."""
74
+ self._conn.close()
75
+
76
+ def __enter__(self) -> Store:
77
+ return self
78
+
79
+ def __exit__(self, *exc: object) -> None:
80
+ self.close()
81
+
82
+ def migrate(self) -> None:
83
+ self._conn.executescript(
84
+ """
85
+ -- NOTE: spec section 6 keys snapshots by source_id only; we add a
86
+ -- `url` column (PK (source_id, url)) because get_snapshot needs
87
+ -- per-URL keying. Intentional deviation, documented in Interfaces.
88
+ CREATE TABLE IF NOT EXISTS snapshots (
89
+ source_id TEXT NOT NULL,
90
+ url TEXT NOT NULL,
91
+ signal_type TEXT NOT NULL,
92
+ content_hash TEXT NOT NULL,
93
+ lastmod TEXT NOT NULL DEFAULT '',
94
+ excerpt TEXT NOT NULL DEFAULT '',
95
+ fetched_at TEXT NOT NULL,
96
+ PRIMARY KEY (source_id, url)
97
+ );
98
+
99
+ CREATE TABLE IF NOT EXISTS changes (
100
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
101
+ source_id TEXT NOT NULL,
102
+ detected_at TEXT NOT NULL,
103
+ url TEXT NOT NULL,
104
+ change_kind TEXT NOT NULL,
105
+ title TEXT NOT NULL DEFAULT '',
106
+ raw_diff TEXT NOT NULL DEFAULT '',
107
+ description TEXT,
108
+ verdict TEXT,
109
+ fetched_hash TEXT NOT NULL DEFAULT '',
110
+ -- supersede_older sets this to 1 so older undelivered rows for a
111
+ -- (source_id, url) neither deliver nor recount in the digest.
112
+ superseded INTEGER NOT NULL DEFAULT 0,
113
+ group_key TEXT,
114
+ group_summary TEXT,
115
+ group_title TEXT
116
+ );
117
+
118
+ -- Idempotency key for record_change: re-detecting the same content
119
+ -- hash for a url must not duplicate rows or reset its verdict.
120
+ CREATE UNIQUE INDEX IF NOT EXISTS ux_changes_identity
121
+ ON changes (source_id, url, fetched_hash);
122
+
123
+ CREATE TABLE IF NOT EXISTS deliveries (
124
+ change_id INTEGER NOT NULL,
125
+ channel TEXT NOT NULL,
126
+ sent_at TEXT NOT NULL,
127
+ PRIMARY KEY (change_id, channel)
128
+ );
129
+
130
+ CREATE TABLE IF NOT EXISTS digests (
131
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
132
+ created_at TEXT NOT NULL,
133
+ committed_at TEXT
134
+ );
135
+
136
+ CREATE TABLE IF NOT EXISTS seen_feed_items (
137
+ source_id TEXT NOT NULL,
138
+ item_id TEXT NOT NULL,
139
+ content_hash TEXT NOT NULL,
140
+ PRIMARY KEY (source_id, item_id)
141
+ );
142
+
143
+ CREATE TABLE IF NOT EXISTS http_cache (
144
+ url TEXT PRIMARY KEY,
145
+ etag TEXT NOT NULL DEFAULT '',
146
+ last_modified TEXT NOT NULL DEFAULT '',
147
+ fetched_at TEXT NOT NULL
148
+ );
149
+
150
+ CREATE TABLE IF NOT EXISTS run_state (
151
+ key TEXT PRIMARY KEY,
152
+ value TEXT NOT NULL
153
+ );
154
+ """
155
+ )
156
+ self._conn.commit()
157
+ self._add_column_if_missing("changes", "group_key", "TEXT")
158
+ self._add_column_if_missing("changes", "group_summary", "TEXT")
159
+ self._add_column_if_missing("changes", "group_title", "TEXT")
160
+
161
+ def _add_column_if_missing(self, table: str, column: str, decl: str) -> None:
162
+ cols = {r["name"] for r in self._conn.execute(f"PRAGMA table_info({table})").fetchall()}
163
+ if column not in cols:
164
+ self._conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {decl}")
165
+ self._conn.commit()
166
+
167
+ # snapshots ----------------------------------------------------------
168
+
169
+ def snapshot_count(self) -> int:
170
+ """How many baseline snapshots exist (0 => a fresh, unseeded DB)."""
171
+ row = self._conn.execute("SELECT COUNT(*) AS n FROM snapshots").fetchone()
172
+ return int(row["n"])
173
+
174
+ def source_has_snapshots(self, source_id: str) -> bool:
175
+ """Whether this source already has a baseline. A never-seen URL counts as
176
+ genuinely 'new' only once the source has been baselined (so the first run /
177
+ seed import does not flood every URL as new)."""
178
+ row = self._conn.execute(
179
+ "SELECT 1 FROM snapshots WHERE source_id = ? LIMIT 1", (source_id,)
180
+ ).fetchone()
181
+ return row is not None
182
+
183
+ def get_snapshot(self, source_id: str, url: str) -> Snapshot | None:
184
+ row = self._conn.execute(
185
+ "SELECT * FROM snapshots WHERE source_id = ? AND url = ?",
186
+ (source_id, url),
187
+ ).fetchone()
188
+ if row is None:
189
+ return None
190
+ return Snapshot(
191
+ source_id=row["source_id"],
192
+ url=row["url"],
193
+ signal_type=row["signal_type"],
194
+ content_hash=row["content_hash"],
195
+ lastmod=row["lastmod"],
196
+ excerpt=row["excerpt"],
197
+ fetched_at=_parse_iso(row["fetched_at"]),
198
+ )
199
+
200
+ def upsert_snapshot(
201
+ self,
202
+ source_id: str,
203
+ url: str,
204
+ *,
205
+ signal_type: SignalType,
206
+ content_hash: str,
207
+ lastmod: str,
208
+ excerpt: str,
209
+ ) -> None:
210
+ self._conn.execute(
211
+ """
212
+ INSERT INTO snapshots
213
+ (source_id, url, signal_type, content_hash, lastmod, excerpt, fetched_at)
214
+ VALUES (?, ?, ?, ?, ?, ?, ?)
215
+ ON CONFLICT(source_id, url) DO UPDATE SET
216
+ signal_type = excluded.signal_type,
217
+ content_hash = excluded.content_hash,
218
+ lastmod = excluded.lastmod,
219
+ excerpt = excluded.excerpt,
220
+ fetched_at = excluded.fetched_at
221
+ """,
222
+ (source_id, url, signal_type, content_hash, lastmod, excerpt, _now_iso()),
223
+ )
224
+ self._conn.commit()
225
+
226
+ # changes ------------------------------------------------------------
227
+
228
+ def record_change(self, change: Change) -> int:
229
+ """Insert a change, IDEMPOTENT on (source_id, url, fetched_hash).
230
+
231
+ If a row with that identity already exists, return its id without
232
+ inserting or touching its verdict. Otherwise insert a fresh row with
233
+ verdict = NULL.
234
+
235
+ The insert uses ON CONFLICT DO NOTHING so the unique index
236
+ ux_changes_identity enforces idempotency atomically, eliminating the
237
+ SELECT-then-INSERT race window.
238
+ """
239
+ self._conn.execute(
240
+ """
241
+ INSERT INTO changes
242
+ (source_id, detected_at, url, change_kind, title, raw_diff,
243
+ description, verdict, fetched_hash)
244
+ VALUES (?, ?, ?, ?, ?, ?, ?, NULL, ?)
245
+ ON CONFLICT(source_id, url, fetched_hash) DO NOTHING
246
+ """,
247
+ (
248
+ change.source_id,
249
+ _to_utc(change.detected_at).isoformat(),
250
+ change.url,
251
+ change.change_kind,
252
+ change.title,
253
+ change.raw_diff,
254
+ change.description,
255
+ change.fetched_hash,
256
+ ),
257
+ )
258
+ self._conn.commit()
259
+ row = self._conn.execute(
260
+ "SELECT id FROM changes WHERE source_id = ? AND url = ? AND fetched_hash = ?",
261
+ (change.source_id, change.url, change.fetched_hash),
262
+ ).fetchone()
263
+ change.id = int(row["id"])
264
+ return change.id
265
+
266
+ def changes_for_digest(self, channels: set[str]) -> list[Change]:
267
+ """Substantive changes not yet delivered to EVERY channel in `channels`.
268
+
269
+ CONTRACTS edge rules:
270
+ - If `channels` is empty, return [] (run_once also short-circuits
271
+ before opening a digest when no channel is enabled).
272
+ - At most ONE row per (source_id, url): the latest by detected_at
273
+ (then id, as a stable tiebreak). Older undelivered substantive rows
274
+ for the same (source_id, url) are not emitted, so a page that
275
+ changes twice before delivery yields one digest line about current
276
+ content, not one current + one stale.
277
+ - `superseded` rows are excluded entirely.
278
+ A change is returned when it still misses at least one requested
279
+ channel, so a prior run's undelivered backlog is retried.
280
+ """
281
+ if not channels:
282
+ return []
283
+ rows = self._conn.execute(
284
+ """
285
+ SELECT c.* FROM changes c
286
+ WHERE c.verdict = 'substantive' AND c.superseded = 0
287
+ AND c.id = (
288
+ SELECT c2.id FROM changes c2
289
+ WHERE c2.source_id = c.source_id AND c2.url = c.url
290
+ AND c2.verdict = 'substantive' AND c2.superseded = 0
291
+ ORDER BY c2.detected_at DESC, c2.id DESC
292
+ LIMIT 1
293
+ )
294
+ ORDER BY c.detected_at DESC, c.id DESC
295
+ """
296
+ ).fetchall()
297
+ result: list[Change] = []
298
+ for r in rows:
299
+ delivered = self.delivered_channels(int(r["id"]))
300
+ if not channels <= delivered:
301
+ result.append(self._row_to_change(r))
302
+ return result
303
+
304
+ def supersede_older(self, source_id: str, url: str, keep_id: int) -> None:
305
+ """Mark undelivered substantive rows for (source_id, url) other than
306
+ keep_id as superseded, so they neither deliver nor recount."""
307
+ self._conn.execute(
308
+ """
309
+ UPDATE changes SET superseded = 1
310
+ WHERE source_id = ? AND url = ? AND id != ?
311
+ AND verdict = 'substantive' AND superseded = 0
312
+ AND id NOT IN (SELECT change_id FROM deliveries)
313
+ """,
314
+ (source_id, url, keep_id),
315
+ )
316
+ self._conn.commit()
317
+
318
+ def set_verdict(
319
+ self,
320
+ change_id: int,
321
+ verdict: Verdict,
322
+ description: str | None,
323
+ group_key: str | None = None,
324
+ group_summary: str | None = None,
325
+ group_title: str | None = None,
326
+ ) -> None:
327
+ """WRITE-ONCE: only sets verdict/group fields on a row whose verdict IS NULL."""
328
+ self._conn.execute(
329
+ "UPDATE changes SET verdict = ?, description = ?, group_key = ?, group_summary = ?, "
330
+ "group_title = ? WHERE id = ? AND verdict IS NULL",
331
+ (verdict, description, group_key, group_summary, group_title, change_id),
332
+ )
333
+ self._conn.commit()
334
+
335
+ @staticmethod
336
+ def _row_to_change(row: sqlite3.Row) -> Change:
337
+ return Change(
338
+ source_id=row["source_id"],
339
+ url=row["url"],
340
+ change_kind=row["change_kind"],
341
+ title=row["title"],
342
+ raw_diff=row["raw_diff"],
343
+ fetched_hash=row["fetched_hash"],
344
+ detected_at=_parse_iso(row["detected_at"]),
345
+ id=row["id"],
346
+ verdict=row["verdict"],
347
+ description=row["description"],
348
+ group_key=row["group_key"],
349
+ group_summary=row["group_summary"],
350
+ group_title=row["group_title"],
351
+ )
352
+
353
+ # per-channel delivery -----------------------------------------------
354
+
355
+ def delivered_channels(self, change_id: int) -> set[str]:
356
+ rows = self._conn.execute(
357
+ "SELECT channel FROM deliveries WHERE change_id = ?", (change_id,)
358
+ ).fetchall()
359
+ return {r["channel"] for r in rows}
360
+
361
+ def record_delivery(self, change_id: int, channel: str) -> None:
362
+ self._conn.execute(
363
+ """
364
+ INSERT INTO deliveries (change_id, channel, sent_at)
365
+ VALUES (?, ?, ?)
366
+ ON CONFLICT(change_id, channel) DO NOTHING
367
+ """,
368
+ (change_id, channel, _now_iso()),
369
+ )
370
+ self._conn.commit()
371
+
372
+ # in-flight digest ---------------------------------------------------
373
+
374
+ def open_digest(self) -> int:
375
+ cur = self._conn.execute("INSERT INTO digests (created_at) VALUES (?)", (_now_iso(),))
376
+ self._conn.commit()
377
+ return int(cur.lastrowid)
378
+
379
+ def commit_digest(self, digest_id: int) -> None:
380
+ self._conn.execute(
381
+ "UPDATE digests SET committed_at = ? WHERE id = ?",
382
+ (_now_iso(), digest_id),
383
+ )
384
+ self._conn.commit()
385
+
386
+ def inflight_digest(self) -> int | None:
387
+ row = self._conn.execute(
388
+ "SELECT id FROM digests WHERE committed_at IS NULL ORDER BY id DESC LIMIT 1"
389
+ ).fetchone()
390
+ return int(row["id"]) if row else None
391
+
392
+ # feed seen-set ------------------------------------------------------
393
+
394
+ def seen_feed_item(self, source_id: str, item_id: str) -> str | None:
395
+ row = self._conn.execute(
396
+ "SELECT content_hash FROM seen_feed_items WHERE source_id = ? AND item_id = ?",
397
+ (source_id, item_id),
398
+ ).fetchone()
399
+ return row["content_hash"] if row else None
400
+
401
+ def upsert_seen_feed_item(self, source_id: str, item_id: str, content_hash: str) -> None:
402
+ self._conn.execute(
403
+ """
404
+ INSERT INTO seen_feed_items (source_id, item_id, content_hash)
405
+ VALUES (?, ?, ?)
406
+ ON CONFLICT(source_id, item_id) DO UPDATE SET
407
+ content_hash = excluded.content_hash
408
+ """,
409
+ (source_id, item_id, content_hash),
410
+ )
411
+ self._conn.commit()
412
+
413
+ # http conditional-GET cache -----------------------------------------
414
+
415
+ def http_cache_get(self, url: str) -> tuple[str, str]:
416
+ row = self._conn.execute(
417
+ "SELECT etag, last_modified FROM http_cache WHERE url = ?", (url,)
418
+ ).fetchone()
419
+ if row is None:
420
+ return ("", "")
421
+ return (row["etag"], row["last_modified"])
422
+
423
+ def http_cache_put(self, url: str, etag: str, last_modified: str) -> None:
424
+ self._conn.execute(
425
+ """
426
+ INSERT INTO http_cache (url, etag, last_modified, fetched_at)
427
+ VALUES (?, ?, ?, ?)
428
+ ON CONFLICT(url) DO UPDATE SET
429
+ etag = excluded.etag,
430
+ last_modified = excluded.last_modified,
431
+ fetched_at = excluded.fetched_at
432
+ """,
433
+ (url, etag, last_modified, _now_iso()),
434
+ )
435
+ self._conn.commit()
436
+
437
+ # run bookkeeping ----------------------------------------------------
438
+
439
+ def last_successful_run(self) -> datetime | None:
440
+ row = self._conn.execute(
441
+ "SELECT value FROM run_state WHERE key = 'last_successful_run'"
442
+ ).fetchone()
443
+ return _parse_iso(row["value"]) if row else None
444
+
445
+ def mark_successful_run(self, when: datetime) -> None:
446
+ self._conn.execute(
447
+ """
448
+ INSERT INTO run_state (key, value) VALUES ('last_successful_run', ?)
449
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value
450
+ """,
451
+ (_to_utc(when).isoformat(),),
452
+ )
453
+ self._conn.commit()
454
+
455
+ # seed import / export -----------------------------------------------
456
+ #
457
+ # A shipped seed is a pre-built baseline (snapshots + feed seen-set + HTTP
458
+ # validators) tagged with the date it was generated. Importing it on a fresh
459
+ # DB gives users a starting point so the first scheduled run diffs against it
460
+ # instead of crawling every page to establish a baseline.
461
+
462
+ # Tables carried in a seed; run_state's seed_date marker is appended separately.
463
+ _SEED_TABLES = ("snapshots", "seen_feed_items", "http_cache")
464
+
465
+ def seed_date(self) -> str | None:
466
+ """The date the imported baseline was generated, or None if unseeded."""
467
+ row = self._conn.execute("SELECT value FROM run_state WHERE key = 'seed_date'").fetchone()
468
+ return row["value"] if row else None
469
+
470
+ def export_seed_sql(self, seed_date: str) -> str:
471
+ """Serialize the baseline tables to portable `INSERT OR IGNORE` SQL.
472
+
473
+ Emits no schema (the importing DB already migrated), so it layers onto an
474
+ existing schema without clashing. The maintainer seed-builder gzips this.
475
+ """
476
+ lines = [f"-- android-watcher seed; generated {seed_date}"]
477
+ for table in self._SEED_TABLES:
478
+ rows = self._conn.execute(f"SELECT * FROM {table}").fetchall()
479
+ for row in rows:
480
+ cols = row.keys()
481
+ vals = ", ".join(_sql_str(row[c]) for c in cols)
482
+ lines.append(f"INSERT OR IGNORE INTO {table} ({', '.join(cols)}) VALUES ({vals});")
483
+ lines.append(
484
+ "INSERT OR IGNORE INTO run_state (key, value) VALUES "
485
+ f"('seed_date', {_sql_str(seed_date)});"
486
+ )
487
+ return "\n".join(lines) + "\n"
488
+
489
+ def import_seed_sql(self, sql: str) -> None:
490
+ """Apply seed `INSERT OR IGNORE` statements; existing rows are preserved."""
491
+ self._conn.executescript(sql)
492
+ self._conn.commit()
@@ -0,0 +1 @@
1
+ from . import claude_cli, noop # noqa: F401
@@ -0,0 +1,25 @@
1
+ """Triager protocol, TriageResult dataclass, and TRIAGERS registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Protocol, runtime_checkable
7
+
8
+ from android_watcher.config import AIConfig
9
+ from android_watcher.models import Change
10
+ from android_watcher.registry import Registry
11
+
12
+
13
+ @dataclass
14
+ class TriageResult:
15
+ changes: list[Change]
16
+ tldr: str | None = None
17
+ unavailable: str | None = None
18
+
19
+
20
+ @runtime_checkable
21
+ class Triager(Protocol):
22
+ def triage(self, changes: list[Change], config: AIConfig) -> TriageResult: ...
23
+
24
+
25
+ TRIAGERS: Registry[Triager] = Registry("triager")