delimit-cli 4.5.2 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,509 @@
1
+ """Vendor-news sensor (LED-1250).
2
+
3
+ Polls watchlisted vendor accounts on X for fresh, high-engagement posts
4
+ that Delimit can riff on. Reuses the existing twttr241 fetch path from
5
+ ``ai.social_target`` so the LRU + SQLite cache + budget gate are honored
6
+ without duplication.
7
+
8
+ Output is a structured dict consumed by ``ai.vendor_news.drafter``:
9
+
10
+ {
11
+ "triggered": [tweet_obj, ...],
12
+ "skipped": [{tweet_id, reason}, ...],
13
+ "errors": [{handle, error}, ...],
14
+ "stats": {accounts_polled, posts_seen, ...},
15
+ }
16
+
17
+ Each ``tweet_obj`` carries the metadata the drafter needs to compose a
18
+ riff (vendor name, product list, source URL, metrics, age) without
19
+ re-walking the twttr241 response shape.
20
+
21
+ Budget control: ``DELIMIT_VENDOR_NEWS_BUDGET`` env var (default 50)
22
+ caps the number of *live* twttr241 calls per run. Cache hits are free
23
+ and do not count against the budget.
24
+
25
+ Logs every run as a JSONL line at ``~/.delimit/vendor_news_sensor.jsonl``.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import logging
32
+ import os
33
+ import time
34
+ import urllib.parse
35
+ from datetime import datetime, timedelta, timezone
36
+ from pathlib import Path
37
+ from typing import Any, Dict, List, Optional, Tuple
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # ── paths / config ────────────────────────────────────────────────────
43
+
44
+ _PKG_DIR = Path(__file__).resolve().parent
45
+ WATCHLIST_PATH = _PKG_DIR / "watchlist.yaml"
46
+
47
+ SENSOR_LOG_PATH = Path.home() / ".delimit" / "vendor_news_sensor.jsonl"
48
+
49
+ # Default per-run budget (live twttr241 calls). Overridable via env.
50
+ DEFAULT_RUN_BUDGET = 50
51
+
52
+ # Per-account fetch size. We only need recent tweets; small page = small
53
+ # response = lower processing cost.
54
+ DEFAULT_PER_ACCOUNT_LIMIT = 10
55
+
56
+
57
+ # ── watchlist loader ──────────────────────────────────────────────────
58
+
59
+
60
+ def load_watchlist(path: Optional[Path] = None) -> Dict[str, Any]:
61
+ """Load the watchlist YAML. Returns the parsed dict.
62
+
63
+ Falls back to a tiny built-in default if the file is missing OR
64
+ PyYAML is unavailable, so the sensor never hard-crashes on a bad
65
+ install. Callers should treat the return value as read-only.
66
+ """
67
+ p = Path(path) if path else WATCHLIST_PATH
68
+ default: Dict[str, Any] = {
69
+ "version": 1,
70
+ "accounts": [],
71
+ "trigger_thresholds": {
72
+ "min_likes": 200,
73
+ "min_retweets": 30,
74
+ "min_quotes": 15,
75
+ "velocity_likes_per_hour": 100,
76
+ "max_age_hours": 4,
77
+ },
78
+ "no_at_mention": True,
79
+ }
80
+ if not p.exists():
81
+ logger.warning("vendor_news watchlist missing: %s (using empty default)", p)
82
+ return default
83
+ try:
84
+ import yaml # local import — optional dep
85
+ loaded = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
86
+ if not isinstance(loaded, dict):
87
+ return default
88
+ # Merge defaults for any missing top-level keys.
89
+ for k, v in default.items():
90
+ loaded.setdefault(k, v)
91
+ # Ensure trigger_thresholds has all expected keys.
92
+ thresh = dict(default["trigger_thresholds"])
93
+ thresh.update(loaded.get("trigger_thresholds") or {})
94
+ loaded["trigger_thresholds"] = thresh
95
+ return loaded
96
+ except Exception as exc: # pragma: no cover — yaml/parse failures
97
+ logger.warning("vendor_news watchlist parse failed (%s); using default", exc)
98
+ return default
99
+
100
+
101
+ # ── tweet-time parsing ────────────────────────────────────────────────
102
+
103
+
104
+ _TWITTER_TS_FORMATS = (
105
+ # "Wed Apr 24 18:30:01 +0000 2026" — twttr241 legacy.created_at
106
+ "%a %b %d %H:%M:%S %z %Y",
107
+ )
108
+
109
+
110
+ def _parse_created_at(raw: str) -> Optional[datetime]:
111
+ if not raw:
112
+ return None
113
+ s = raw.strip()
114
+ # ISO-8601 (cache write paths sometimes normalize).
115
+ try:
116
+ if s.endswith("Z"):
117
+ s = s[:-1] + "+00:00"
118
+ return datetime.fromisoformat(s)
119
+ except (ValueError, TypeError):
120
+ pass
121
+ for fmt in _TWITTER_TS_FORMATS:
122
+ try:
123
+ return datetime.strptime(raw, fmt)
124
+ except (ValueError, TypeError):
125
+ continue
126
+ return None
127
+
128
+
129
+ def _age_hours(created: Optional[datetime], now: Optional[datetime] = None) -> Optional[float]:
130
+ if created is None:
131
+ return None
132
+ if created.tzinfo is None:
133
+ created = created.replace(tzinfo=timezone.utc)
134
+ cur = now or datetime.now(timezone.utc)
135
+ if cur.tzinfo is None:
136
+ cur = cur.replace(tzinfo=timezone.utc)
137
+ delta = cur - created
138
+ return max(0.0, delta.total_seconds() / 3600.0)
139
+
140
+
141
+ # ── twttr241 fetch (per-account search-v2 with from:HANDLE) ───────────
142
+
143
+
144
+ def _build_from_query(handle: str) -> str:
145
+ """Twitter search syntax for "tweets authored by @handle".
146
+
147
+ We use ``search-v2`` because the existing twttr241 wrapper already
148
+ routes that endpoint through the cache + budget gate. ``from:`` is
149
+ a standard Twitter search operator; combined with a fresh sort we
150
+ get the same shape we already parse in social_target._scan_x_twttr.
151
+ """
152
+ h = (handle or "").lstrip("@").strip()
153
+ return f"from:{h}" if h else ""
154
+
155
+
156
+ def _fetch_account_recent(
157
+ handle: str,
158
+ limit: int = DEFAULT_PER_ACCOUNT_LIMIT,
159
+ venture_tag: str = "delimit",
160
+ ) -> Dict[str, Any]:
161
+ """Fetch recent tweets authored by ``handle``.
162
+
163
+ Returns a dict:
164
+ {
165
+ "tweets": [normalized_tweet, ...],
166
+ "from_cache": bool,
167
+ "budget_exceeded": bool,
168
+ "mode": str | None,
169
+ "error": str | None,
170
+ }
171
+ Never raises. Reuses the cache+budget chain from social_target.
172
+ """
173
+ out: Dict[str, Any] = {
174
+ "tweets": [],
175
+ "from_cache": False,
176
+ "budget_exceeded": False,
177
+ "mode": None,
178
+ "error": None,
179
+ }
180
+
181
+ # Lazy import to keep the module import cheap (and to allow tests
182
+ # to monkeypatch the underlying functions).
183
+ try:
184
+ from ai.social_target import (
185
+ _twttr_fetch,
186
+ _get_rapidapi_key,
187
+ _extract_tweets_for_corpus,
188
+ )
189
+ except Exception as exc: # pragma: no cover — import failure is fatal upstream
190
+ out["error"] = f"social_target import failed: {exc}"
191
+ return out
192
+
193
+ api_key = _get_rapidapi_key()
194
+ if not api_key:
195
+ out["error"] = "rapidapi key not configured"
196
+ return out
197
+
198
+ query = _build_from_query(handle)
199
+ if not query:
200
+ out["error"] = f"invalid handle: {handle!r}"
201
+ return out
202
+
203
+ encoded = urllib.parse.quote(query)
204
+ url = (
205
+ f"https://twitter241.p.rapidapi.com/search-v2"
206
+ f"?query={encoded}&type=Latest&count={int(limit)}"
207
+ )
208
+ endpoint = "search/latest"
209
+ params = {"query": query, "type": "Latest", "count": int(limit)}
210
+
211
+ fetched = _twttr_fetch(
212
+ endpoint=endpoint,
213
+ params=params,
214
+ url=url,
215
+ api_key=api_key,
216
+ handle=handle,
217
+ venture_tag=venture_tag,
218
+ extract_tweets=_extract_tweets_for_corpus,
219
+ )
220
+
221
+ out["from_cache"] = bool(fetched.get("from_cache"))
222
+ out["budget_exceeded"] = bool(fetched.get("budget_exceeded"))
223
+ out["mode"] = fetched.get("mode")
224
+
225
+ response = fetched.get("response")
226
+ if response is None:
227
+ out["error"] = fetched.get("error") or out["mode"] or "no response"
228
+ return out
229
+
230
+ out["tweets"] = _normalize_search_response(response, expected_handle=handle)
231
+ return out
232
+
233
+
234
+ def _normalize_search_response(response: Dict[str, Any], expected_handle: str) -> List[Dict[str, Any]]:
235
+ """Walk the twttr241 search-v2 response and return per-tweet dicts.
236
+
237
+ Mirrors the navigation in ``social_target._scan_x_twttr`` but produces
238
+ a flat shape tuned to the riff drafter (id, text, author, metrics,
239
+ created_at). Filters out retweets and replies — only original posts
240
+ from the watched handle are eligible to ride the news cycle.
241
+ """
242
+ tweets: List[Dict[str, Any]] = []
243
+ expected = (expected_handle or "").lstrip("@").lower()
244
+ try:
245
+ instructions = (
246
+ (response or {})
247
+ .get("result", {})
248
+ .get("timeline", {})
249
+ .get("instructions", [])
250
+ )
251
+ except Exception:
252
+ return tweets
253
+
254
+ for instruction in instructions:
255
+ for entry in instruction.get("entries", []) or []:
256
+ tweet_result = (
257
+ (entry.get("content") or {})
258
+ .get("itemContent", {})
259
+ .get("tweet_results", {})
260
+ .get("result", {})
261
+ )
262
+ if not tweet_result:
263
+ continue
264
+ legacy = tweet_result.get("legacy") or {}
265
+ core = tweet_result.get("core") or {}
266
+ user_legacy = (
267
+ core.get("user_results", {}).get("result", {}).get("legacy") or {}
268
+ )
269
+ user_core = (
270
+ core.get("user_results", {}).get("result", {}).get("core") or {}
271
+ )
272
+
273
+ tid = legacy.get("id_str") or ""
274
+ if not tid:
275
+ continue
276
+
277
+ screen_name = (
278
+ user_core.get("screen_name")
279
+ or user_legacy.get("screen_name")
280
+ or ""
281
+ )
282
+ if expected and screen_name.lower() != expected:
283
+ # Search-v2 with from:HANDLE should always match, but
284
+ # defend against the API echoing quoted/retweeted authors.
285
+ continue
286
+
287
+ full_text = legacy.get("full_text") or legacy.get("text") or ""
288
+ in_reply_to = legacy.get("in_reply_to_status_id_str") or ""
289
+ is_retweet = bool(legacy.get("retweeted_status_result")) or full_text.lstrip().startswith("RT @")
290
+ if in_reply_to or is_retweet:
291
+ # Only ride original posts — replies and RTs aren't news.
292
+ continue
293
+
294
+ metrics = {
295
+ "favorite_count": int(legacy.get("favorite_count") or 0),
296
+ "retweet_count": int(legacy.get("retweet_count") or 0),
297
+ "reply_count": int(legacy.get("reply_count") or 0),
298
+ "quote_count": int(legacy.get("quote_count") or 0),
299
+ "view_count": int(((tweet_result.get("views") or {}).get("count")) or 0),
300
+ }
301
+
302
+ tweets.append(
303
+ {
304
+ "id": str(tid),
305
+ "text": full_text,
306
+ "author": screen_name,
307
+ "created_at": legacy.get("created_at", ""),
308
+ "metrics": metrics,
309
+ "url": (
310
+ f"https://x.com/{screen_name}/status/{tid}"
311
+ if screen_name
312
+ else f"https://x.com/i/status/{tid}"
313
+ ),
314
+ }
315
+ )
316
+ return tweets
317
+
318
+
319
+ # ── trigger logic ─────────────────────────────────────────────────────
320
+
321
+
322
+ def _meets_trigger(
323
+ metrics: Dict[str, Any],
324
+ age_hours: Optional[float],
325
+ thresholds: Dict[str, Any],
326
+ ) -> Tuple[bool, str]:
327
+ """Decide whether a tweet's metrics + age cross any trigger.
328
+
329
+ Returns (passed, reason). ``reason`` is a short tag suitable for
330
+ logging (``min_likes`` / ``velocity`` / ``min_retweets`` etc.) so we
331
+ can audit which heuristic fired in production.
332
+ """
333
+ likes = int(metrics.get("favorite_count") or 0)
334
+ retweets = int(metrics.get("retweet_count") or 0)
335
+ quotes = int(metrics.get("quote_count") or 0)
336
+
337
+ max_age = float(thresholds.get("max_age_hours", 4))
338
+ if age_hours is not None and age_hours > max_age:
339
+ return (False, f"too_old:{age_hours:.1f}h")
340
+
341
+ min_likes = int(thresholds.get("min_likes", 200))
342
+ min_rt = int(thresholds.get("min_retweets", 30))
343
+ min_q = int(thresholds.get("min_quotes", 15))
344
+ velocity = float(thresholds.get("velocity_likes_per_hour", 100))
345
+
346
+ if likes >= min_likes:
347
+ return (True, "min_likes")
348
+ if retweets >= min_rt:
349
+ return (True, "min_retweets")
350
+ if quotes >= min_q:
351
+ return (True, "min_quotes")
352
+ # Velocity: only meaningful for fresh posts. Avoid divide-by-zero
353
+ # by clamping the floor to 0.25h (15min). Posts younger than that
354
+ # rarely have stable metrics anyway.
355
+ if age_hours is not None:
356
+ denom = max(0.25, age_hours)
357
+ if (likes / denom) >= velocity:
358
+ return (True, "velocity")
359
+ return (False, "below_thresholds")
360
+
361
+
362
+ # ── log helpers ───────────────────────────────────────────────────────
363
+
364
+
365
+ def _append_sensor_log(payload: Dict[str, Any], path: Optional[Path] = None) -> None:
366
+ p = Path(path) if path else SENSOR_LOG_PATH
367
+ try:
368
+ p.parent.mkdir(parents=True, exist_ok=True)
369
+ with open(p, "a", encoding="utf-8") as f:
370
+ f.write(json.dumps(payload, ensure_ascii=False) + "\n")
371
+ except OSError as exc: # pragma: no cover — best-effort logging
372
+ logger.warning("vendor_news sensor log write failed: %s", exc)
373
+
374
+
375
+ # ── main entry ────────────────────────────────────────────────────────
376
+
377
+
378
+ def scan_vendor_news(
379
+ dry_run: bool = False,
380
+ watchlist_path: Optional[Path] = None,
381
+ log_path: Optional[Path] = None,
382
+ now: Optional[datetime] = None,
383
+ ) -> Dict[str, Any]:
384
+ """Scan watchlisted accounts for fresh high-engagement posts.
385
+
386
+ Args:
387
+ dry_run: When True, returns the same shape but suppresses the
388
+ JSONL log write. Useful for tests + ad-hoc inspection.
389
+ watchlist_path: Override the watchlist YAML location. Defaults
390
+ to ``ai/vendor_news/watchlist.yaml``.
391
+ log_path: Override the sensor JSONL log path.
392
+ now: Optional override for "current time" — exposed so tests
393
+ can pin the clock and exercise the age window deterministically.
394
+
395
+ Returns:
396
+ Dict with ``triggered``, ``skipped``, ``errors``, ``stats``.
397
+ ``triggered`` entries are ready to feed into ``draft_vendor_riff``.
398
+ """
399
+ cfg = load_watchlist(watchlist_path)
400
+ accounts = cfg.get("accounts") or []
401
+ thresholds = cfg.get("trigger_thresholds") or {}
402
+
403
+ budget = int(os.environ.get("DELIMIT_VENDOR_NEWS_BUDGET", DEFAULT_RUN_BUDGET))
404
+ live_calls = 0
405
+
406
+ triggered: List[Dict[str, Any]] = []
407
+ skipped: List[Dict[str, Any]] = []
408
+ errors: List[Dict[str, Any]] = []
409
+ posts_seen = 0
410
+
411
+ cur_now = now or datetime.now(timezone.utc)
412
+
413
+ for account in accounts:
414
+ handle = (account or {}).get("handle", "")
415
+ if not handle:
416
+ continue
417
+ vendor = account.get("vendor", "")
418
+ products = list(account.get("products") or [])
419
+
420
+ if live_calls >= budget:
421
+ skipped.append({"handle": handle, "reason": "budget_exhausted"})
422
+ continue
423
+
424
+ try:
425
+ fetched = _fetch_account_recent(handle=handle)
426
+ except Exception as exc: # pragma: no cover — defensive
427
+ errors.append({"handle": handle, "error": f"fetch_exc: {exc}"})
428
+ continue
429
+
430
+ # Only count live HTTP calls against the budget. Cache hits are free.
431
+ if not fetched.get("from_cache"):
432
+ live_calls += 1
433
+
434
+ if fetched.get("error"):
435
+ errors.append({"handle": handle, "error": fetched["error"]})
436
+ continue
437
+
438
+ if fetched.get("budget_exceeded"):
439
+ errors.append({"handle": handle, "error": f"twttr241_budget:{fetched.get('mode')}"})
440
+ continue
441
+
442
+ for tw in fetched.get("tweets") or []:
443
+ posts_seen += 1
444
+ metrics = tw.get("metrics") or {}
445
+ age = _age_hours(_parse_created_at(tw.get("created_at", "")), now=cur_now)
446
+ passed, reason = _meets_trigger(metrics, age, thresholds)
447
+
448
+ entry = {
449
+ "id": tw.get("id"),
450
+ "text": tw.get("text", ""),
451
+ "author": tw.get("author", ""),
452
+ "url": tw.get("url", ""),
453
+ "created_at": tw.get("created_at", ""),
454
+ "age_hours": age,
455
+ "metrics": metrics,
456
+ "vendor": vendor,
457
+ "products": products,
458
+ "trigger_reason": reason,
459
+ }
460
+
461
+ if passed:
462
+ triggered.append(entry)
463
+ else:
464
+ skipped.append({
465
+ "id": tw.get("id"),
466
+ "handle": handle,
467
+ "reason": reason,
468
+ "metrics": metrics,
469
+ "age_hours": age,
470
+ })
471
+
472
+ stats = {
473
+ "ts": cur_now.isoformat(),
474
+ "accounts_polled": len(accounts),
475
+ "posts_seen": posts_seen,
476
+ "posts_triggered": len(triggered),
477
+ "posts_skipped": len(skipped),
478
+ "errors": len(errors),
479
+ "live_calls": live_calls,
480
+ "budget": budget,
481
+ "dry_run": dry_run,
482
+ }
483
+
484
+ result = {
485
+ "triggered": triggered,
486
+ "skipped": skipped,
487
+ "errors": errors,
488
+ "stats": stats,
489
+ }
490
+
491
+ if not dry_run:
492
+ # Persist per-run summary (NOT the full triggered list — that
493
+ # can balloon the log file). Sample a few ids so we can grep
494
+ # for a specific post if a riff later misbehaves.
495
+ log_payload = dict(stats)
496
+ log_payload["triggered_ids"] = [t["id"] for t in triggered[:25]]
497
+ log_payload["error_handles"] = [e["handle"] for e in errors[:25]]
498
+ _append_sensor_log(log_payload, path=log_path)
499
+
500
+ return result
501
+
502
+
503
+ __all__ = [
504
+ "DEFAULT_RUN_BUDGET",
505
+ "SENSOR_LOG_PATH",
506
+ "WATCHLIST_PATH",
507
+ "load_watchlist",
508
+ "scan_vendor_news",
509
+ ]
@@ -0,0 +1,71 @@
1
+ # LED-1250 vendor-news riff watchlist.
2
+ #
3
+ # X accounts to monitor for high-engagement vendor announcements that
4
+ # Delimit can riff on for algorithm boost. Edits should be reviewed
5
+ # every 30 days (see review_cadence_days below).
6
+ #
7
+ # Operating contract:
8
+ # * `no_at_mention: true` — the riff drafter MUST NOT @-tag any vendor
9
+ # account. Bare-name only ("Anthropic", not "@AnthropicAI") per
10
+ # founder convention. The drafter enforces this, but call it out
11
+ # here so future editors don't add accounts assuming we'll @-tag.
12
+ # * Trigger thresholds are OR-of-conditions inside the first
13
+ # `max_age_hours` window. A post crosses the bar if ANY threshold
14
+ # is met. Velocity is computed as likes / age_hours so very fresh
15
+ # fast-rising posts can clear the floor before the absolute count
16
+ # catches up.
17
+
18
+ version: 1
19
+ last_reviewed: 2026-05-07
20
+ review_cadence_days: 30
21
+
22
+ accounts:
23
+ - handle: AnthropicAI
24
+ vendor: Anthropic
25
+ products: [Claude, Claude Code, Sonnet, Opus, Haiku]
26
+ - handle: OpenAI
27
+ vendor: OpenAI
28
+ products: [Codex, GPT, ChatGPT]
29
+ - handle: OpenAIDevs
30
+ vendor: OpenAI
31
+ products: [Codex, GPT-5]
32
+ - handle: cursor_ai
33
+ vendor: Cursor
34
+ products: [Cursor]
35
+ - handle: cursor
36
+ vendor: Cursor
37
+ products: [Cursor]
38
+ - handle: GeminiApp
39
+ vendor: Google
40
+ products: [Gemini, Gemini CLI]
41
+ - handle: GoogleAI
42
+ vendor: Google
43
+ products: [Gemini]
44
+ - handle: GoogleDeepMind
45
+ vendor: Google
46
+ products: [Gemini]
47
+ - handle: xai
48
+ vendor: xAI
49
+ products: [Grok]
50
+ - handle: grok
51
+ vendor: xAI
52
+ products: [Grok]
53
+ - handle: vercel
54
+ vendor: Vercel
55
+ products: [Next.js, Vercel]
56
+ - handle: replicate
57
+ vendor: Replicate
58
+ products: [models]
59
+
60
+ # Trigger thresholds — a post from a watched account counts as "newsworthy"
61
+ # when it crosses any threshold within its first 4 hours.
62
+ trigger_thresholds:
63
+ min_likes: 200
64
+ min_retweets: 30
65
+ min_quotes: 15
66
+ velocity_likes_per_hour: 100 # if rising fast, lower bar
67
+ max_age_hours: 4 # only fresh news rides the cycle
68
+
69
+ # Anti-tag rule: NEVER @-mention these accounts in the riff.
70
+ # Bare-name only ("Anthropic", not "@AnthropicAI") per founder convention.
71
+ no_at_mention: true