delimit-cli 4.1.53 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/bin/delimit-cli.js +1 -2
- package/bin/delimit-setup.js +22 -7
- package/gateway/ai/agent_dispatch.py +79 -0
- package/gateway/ai/daily_digest.py +386 -0
- package/gateway/ai/ledger_manager.py +32 -0
- package/gateway/ai/license_core.py +2 -0
- package/gateway/ai/notify.py +17 -11
- package/gateway/ai/reddit_proxy.py +28 -9
- package/gateway/ai/sensing/__init__.py +35 -0
- package/gateway/ai/sensing/schema.py +107 -0
- package/gateway/ai/sensing/signal_store.py +348 -0
- package/gateway/ai/server.py +419 -6
- package/gateway/ai/supabase_sync.py +308 -0
- package/gateway/ai/work_order.py +216 -0
- package/gateway/ai/workers/__init__.py +32 -0
- package/gateway/ai/workers/base.py +154 -0
- package/gateway/ai/workers/executor.py +861 -0
- package/gateway/ai/workers/outreach_drafter.py +161 -0
- package/gateway/ai/workers/pr_drafter.py +148 -0
- package/package.json +14 -1
- package/gateway/ai/continuity.py +0 -462
- package/gateway/ai/inbox_daemon_runner.py +0 -217
- package/gateway/ai/loop_engine.py +0 -1303
- package/gateway/ai/social_cache.py +0 -341
- package/gateway/ai/social_daemon.py +0 -483
- package/gateway/ai/tweet_corpus_schema.sql +0 -76
- package/scripts/crosspost_devto.py +0 -304
- package/scripts/demo-v420-clean.sh +0 -267
- package/scripts/demo-v420-deliberation.sh +0 -217
- package/scripts/demo-v420.sh +0 -55
- package/scripts/sync-gateway.sh +0 -112
|
@@ -9,24 +9,35 @@ from typing import Any, Dict, List, Optional
|
|
|
9
9
|
logger = logging.getLogger("delimit.ai.reddit_proxy")
|
|
10
10
|
|
|
11
11
|
def _get_proxy_config() -> Dict[str, str]:
|
|
12
|
-
"""Load proxy config from private secrets or environment.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
"""Load proxy config from private secrets or environment.
|
|
13
|
+
|
|
14
|
+
Returns {proxy_url, token}. The server-side proxy requires a bearer
|
|
15
|
+
token (LED-988 follow-up) — clients without a token still populate
|
|
16
|
+
proxy_url but will fail auth at the server unless the server is run
|
|
17
|
+
without a token (not recommended).
|
|
18
|
+
"""
|
|
19
|
+
config = {"proxy_url": "", "token": ""}
|
|
20
|
+
|
|
21
|
+
# 1. Environment variables
|
|
16
22
|
env_url = os.environ.get("DELIMIT_REDDIT_PROXY")
|
|
23
|
+
env_token = os.environ.get("DELIMIT_REDDIT_PROXY_TOKEN")
|
|
17
24
|
if env_url:
|
|
18
25
|
config["proxy_url"] = env_url
|
|
26
|
+
if env_token:
|
|
27
|
+
config["token"] = env_token
|
|
28
|
+
if config["proxy_url"]:
|
|
19
29
|
return config
|
|
20
30
|
|
|
21
|
-
# 2.
|
|
31
|
+
# 2. Secrets file
|
|
22
32
|
secrets_path = Path.home() / ".delimit" / "secrets" / "reddit-proxy.json"
|
|
23
33
|
if secrets_path.exists():
|
|
24
34
|
try:
|
|
25
35
|
secrets = json.loads(secrets_path.read_text())
|
|
26
|
-
config["proxy_url"] = secrets.get("proxy_url", "")
|
|
36
|
+
config["proxy_url"] = secrets.get("proxy_url", "") or config["proxy_url"]
|
|
37
|
+
config["token"] = secrets.get("token", "") or config["token"]
|
|
27
38
|
except Exception as e:
|
|
28
39
|
logger.debug(f"Failed to load reddit-proxy secrets: {e}")
|
|
29
|
-
|
|
40
|
+
|
|
30
41
|
return config
|
|
31
42
|
|
|
32
43
|
def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[Dict[str, Any]]:
|
|
@@ -42,7 +53,11 @@ def fetch_subreddit(subreddit: str, sort: str = "new", limit: int = 10) -> List[
|
|
|
42
53
|
if proxy_url:
|
|
43
54
|
try:
|
|
44
55
|
fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
|
|
45
|
-
|
|
56
|
+
headers = {"User-Agent": "Delimit/1.0"}
|
|
57
|
+
token = proxy_cfg.get("token", "")
|
|
58
|
+
if token:
|
|
59
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
60
|
+
req = urllib.request.Request(fetch_url, headers=headers)
|
|
46
61
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
47
62
|
body = json.loads(resp.read().decode())
|
|
48
63
|
children = body.get("data", {}).get("children", [])
|
|
@@ -84,7 +99,11 @@ def fetch_thread(thread_id: str) -> Optional[Dict[str, Any]]:
|
|
|
84
99
|
if proxy_url:
|
|
85
100
|
try:
|
|
86
101
|
fetch_url = f"{proxy_url}?url={urllib.parse.quote(reddit_url, safe='')}"
|
|
87
|
-
|
|
102
|
+
headers = {"User-Agent": "Delimit/1.0"}
|
|
103
|
+
token = proxy_cfg.get("token", "")
|
|
104
|
+
if token:
|
|
105
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
106
|
+
req = urllib.request.Request(fetch_url, headers=headers)
|
|
88
107
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
89
108
|
data = json.loads(resp.read().decode())
|
|
90
109
|
if isinstance(data, list) and len(data) > 0:
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Signal sensing layer (LED-877).
|
|
2
|
+
|
|
3
|
+
Physically separates observational signals from the ledger. Signals are a
|
|
4
|
+
deliberation corpus, not a task queue — they must never be pulled by
|
|
5
|
+
build_loop as work. Import from ai.sensing.signal_store for ingest/query.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from ai.sensing.schema import Signal, ValidationError, normalize_url, fingerprint_of
|
|
9
|
+
from ai.sensing.signal_store import (
|
|
10
|
+
ingest,
|
|
11
|
+
query,
|
|
12
|
+
dedup_check,
|
|
13
|
+
age_out_to_warm,
|
|
14
|
+
freeze_cold,
|
|
15
|
+
promote_to_ledger,
|
|
16
|
+
SIGNALS_DIR,
|
|
17
|
+
HOT_WINDOW_DAYS,
|
|
18
|
+
WARM_WINDOW_DAYS,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"Signal",
|
|
23
|
+
"ValidationError",
|
|
24
|
+
"normalize_url",
|
|
25
|
+
"fingerprint_of",
|
|
26
|
+
"ingest",
|
|
27
|
+
"query",
|
|
28
|
+
"dedup_check",
|
|
29
|
+
"age_out_to_warm",
|
|
30
|
+
"freeze_cold",
|
|
31
|
+
"promote_to_ledger",
|
|
32
|
+
"SIGNALS_DIR",
|
|
33
|
+
"HOT_WINDOW_DAYS",
|
|
34
|
+
"WARM_WINDOW_DAYS",
|
|
35
|
+
]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Signal schema + validation (LED-877).
|
|
2
|
+
|
|
3
|
+
A signal is an observation, not a commitment. Schema enforces enough metadata
|
|
4
|
+
for deliberation to work with, rejects empty-identity rows at ingest (killing
|
|
5
|
+
the LED-876 ghost-engage-task class of bug at its source).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field, asdict
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ValidationError(ValueError):
|
|
18
|
+
"""Raised when a signal fails schema validation on ingest."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_UTM_RE = re.compile(r"^utm_")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def normalize_url(url: str) -> str:
|
|
25
|
+
"""Canonicalize URL: strip utm_* query params, fragment, trailing slash."""
|
|
26
|
+
if not url:
|
|
27
|
+
return ""
|
|
28
|
+
try:
|
|
29
|
+
p = urlparse(url.strip())
|
|
30
|
+
except Exception:
|
|
31
|
+
return url.strip()
|
|
32
|
+
if not p.scheme:
|
|
33
|
+
return url.strip()
|
|
34
|
+
query = [(k, v) for k, v in parse_qsl(p.query) if not _UTM_RE.match(k)]
|
|
35
|
+
path = p.path.rstrip("/") or "/"
|
|
36
|
+
cleaned = urlunparse(
|
|
37
|
+
(p.scheme.lower(), p.netloc.lower(), path, "", urlencode(query), "")
|
|
38
|
+
)
|
|
39
|
+
return cleaned
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def fingerprint_of(platform: str, canonical_url: str, author: str) -> str:
|
|
43
|
+
"""Stable dedup key for a signal."""
|
|
44
|
+
raw = f"{(platform or '').lower()}|{normalize_url(canonical_url)}|{(author or '').lower()}"
|
|
45
|
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class Signal:
|
|
50
|
+
"""A sensed observation from an external platform.
|
|
51
|
+
|
|
52
|
+
Mandatory: canonical_url AND (author OR content_snippet).
|
|
53
|
+
Anything weaker than that is rejected at ingest because deliberation
|
|
54
|
+
cannot draw useful conclusions from a row with no identity.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
fingerprint: str
|
|
58
|
+
platform: str
|
|
59
|
+
canonical_url: str
|
|
60
|
+
author: str = ""
|
|
61
|
+
author_handle: str = ""
|
|
62
|
+
content_snippet: str = ""
|
|
63
|
+
posted_at: str = ""
|
|
64
|
+
ingested_at: str = ""
|
|
65
|
+
classification: str = "signal"
|
|
66
|
+
relevance_score: float = 0.0
|
|
67
|
+
themes: List[str] = field(default_factory=list)
|
|
68
|
+
raw_ref: str = ""
|
|
69
|
+
id: str = ""
|
|
70
|
+
|
|
71
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
72
|
+
return asdict(self)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def validate_and_normalize(raw: Dict[str, Any]) -> Signal:
|
|
76
|
+
"""Convert a raw target dict from social_target.py into a validated Signal.
|
|
77
|
+
|
|
78
|
+
Raises ValidationError on missing mandatory fields so bugs surface loudly
|
|
79
|
+
at ingest rather than producing empty-identity rows that pollute the
|
|
80
|
+
corpus (the LED-876 failure mode).
|
|
81
|
+
"""
|
|
82
|
+
platform = (raw.get("platform") or "").strip()
|
|
83
|
+
canonical_url = normalize_url(raw.get("canonical_url") or raw.get("url") or "")
|
|
84
|
+
author = (raw.get("author") or "").strip()
|
|
85
|
+
content_snippet = (raw.get("content_snippet") or raw.get("title") or "").strip()[:500]
|
|
86
|
+
|
|
87
|
+
if not canonical_url:
|
|
88
|
+
raise ValidationError("canonical_url is required")
|
|
89
|
+
if not author and not content_snippet:
|
|
90
|
+
raise ValidationError("at least one of author or content_snippet is required")
|
|
91
|
+
if not platform:
|
|
92
|
+
raise ValidationError("platform is required")
|
|
93
|
+
|
|
94
|
+
return Signal(
|
|
95
|
+
fingerprint=fingerprint_of(platform, canonical_url, author),
|
|
96
|
+
platform=platform,
|
|
97
|
+
canonical_url=canonical_url,
|
|
98
|
+
author=author,
|
|
99
|
+
author_handle=(raw.get("author_handle") or "").strip(),
|
|
100
|
+
content_snippet=content_snippet,
|
|
101
|
+
posted_at=(raw.get("posted_at") or "").strip(),
|
|
102
|
+
ingested_at="", # filled by signal_store.ingest
|
|
103
|
+
classification=(raw.get("classification") or "signal").strip(),
|
|
104
|
+
relevance_score=float(raw.get("relevance_score") or 0.0),
|
|
105
|
+
themes=list(raw.get("themes") or []),
|
|
106
|
+
raw_ref=(raw.get("raw_ref") or raw.get("source_url") or canonical_url).strip(),
|
|
107
|
+
)
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""Signal store (LED-877).
|
|
2
|
+
|
|
3
|
+
Physically separated from the ledger. Daily shards, append-only. Reuses
|
|
4
|
+
~/.delimit/intel/ as the parent directory so intel_* tooling can already
|
|
5
|
+
query it via intel_dataset_list.
|
|
6
|
+
|
|
7
|
+
Consumers: delimit sense CLI, delimit_signals_query MCP tool (future).
|
|
8
|
+
NOT a consumer: build_loop, agent_dispatch, ledger_manager.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
import uuid
|
|
17
|
+
from collections import Counter, defaultdict
|
|
18
|
+
from datetime import datetime, timedelta, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any, Dict, Iterable, List, Optional
|
|
21
|
+
|
|
22
|
+
from ai.sensing.schema import (
|
|
23
|
+
Signal,
|
|
24
|
+
ValidationError,
|
|
25
|
+
fingerprint_of,
|
|
26
|
+
normalize_url,
|
|
27
|
+
validate_and_normalize,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StorageError(RuntimeError):
|
|
32
|
+
"""Raised when signal persistence fails."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
INTEL_DIR = Path.home() / ".delimit" / "intel"
|
|
36
|
+
SIGNALS_DIR = INTEL_DIR / "signals"
|
|
37
|
+
ARCHIVE_DIR = SIGNALS_DIR / "archive"
|
|
38
|
+
DEDUP_INDEX_PATH = SIGNALS_DIR / "_dedup_index.json"
|
|
39
|
+
|
|
40
|
+
HOT_WINDOW_DAYS = 7
|
|
41
|
+
WARM_WINDOW_DAYS = 30
|
|
42
|
+
MAX_SIGNALS_PER_AUTHOR_PER_DAY = 3
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _now() -> datetime:
|
|
46
|
+
return datetime.now(timezone.utc)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _today_shard_path(when: Optional[datetime] = None) -> Path:
|
|
50
|
+
when = when or _now()
|
|
51
|
+
return SIGNALS_DIR / f"{when.date().isoformat()}.jsonl"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _ensure_dirs() -> None:
|
|
55
|
+
SIGNALS_DIR.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _load_dedup_index() -> Dict[str, Dict[str, Any]]:
|
|
60
|
+
if not DEDUP_INDEX_PATH.exists():
|
|
61
|
+
return {}
|
|
62
|
+
try:
|
|
63
|
+
return json.loads(DEDUP_INDEX_PATH.read_text())
|
|
64
|
+
except (json.JSONDecodeError, OSError):
|
|
65
|
+
return {}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _save_dedup_index(index: Dict[str, Dict[str, Any]]) -> None:
|
|
69
|
+
_ensure_dirs()
|
|
70
|
+
tmp = DEDUP_INDEX_PATH.with_suffix(".tmp")
|
|
71
|
+
tmp.write_text(json.dumps(index))
|
|
72
|
+
tmp.replace(DEDUP_INDEX_PATH)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def dedup_check(fingerprint: str, window_days: int = WARM_WINDOW_DAYS) -> bool:
|
|
76
|
+
"""Return True if a signal with this fingerprint was ingested within window_days."""
|
|
77
|
+
if not fingerprint:
|
|
78
|
+
return False
|
|
79
|
+
index = _load_dedup_index()
|
|
80
|
+
entry = index.get(fingerprint)
|
|
81
|
+
if not entry:
|
|
82
|
+
return False
|
|
83
|
+
try:
|
|
84
|
+
ingested = datetime.fromisoformat(entry.get("ingested_at", ""))
|
|
85
|
+
except ValueError:
|
|
86
|
+
return False
|
|
87
|
+
cutoff = _now() - timedelta(days=window_days)
|
|
88
|
+
return ingested >= cutoff
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _author_count_today(author: str) -> int:
|
|
92
|
+
if not author:
|
|
93
|
+
return 0
|
|
94
|
+
path = _today_shard_path()
|
|
95
|
+
if not path.exists():
|
|
96
|
+
return 0
|
|
97
|
+
count = 0
|
|
98
|
+
needle = author.lower()
|
|
99
|
+
try:
|
|
100
|
+
for line in path.read_text().splitlines():
|
|
101
|
+
if not line.strip():
|
|
102
|
+
continue
|
|
103
|
+
try:
|
|
104
|
+
row = json.loads(line)
|
|
105
|
+
except json.JSONDecodeError:
|
|
106
|
+
continue
|
|
107
|
+
if (row.get("author") or "").lower() == needle:
|
|
108
|
+
count += 1
|
|
109
|
+
except OSError:
|
|
110
|
+
return 0
|
|
111
|
+
return count
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def ingest(raw: Dict[str, Any]) -> Dict[str, Any]:
|
|
115
|
+
"""Ingest a raw target dict as a validated Signal.
|
|
116
|
+
|
|
117
|
+
Raises ValidationError on schema violation (caller decides whether to
|
|
118
|
+
log-and-skip or propagate). Returns the stored signal with assigned id
|
|
119
|
+
and ingested_at, or a dedup/rate-limit notice.
|
|
120
|
+
"""
|
|
121
|
+
signal = validate_and_normalize(raw)
|
|
122
|
+
|
|
123
|
+
if dedup_check(signal.fingerprint):
|
|
124
|
+
index = _load_dedup_index()
|
|
125
|
+
entry = index.get(signal.fingerprint, {})
|
|
126
|
+
entry["hit_count"] = int(entry.get("hit_count", 1)) + 1
|
|
127
|
+
entry["last_seen_at"] = _now().isoformat()
|
|
128
|
+
index[signal.fingerprint] = entry
|
|
129
|
+
_save_dedup_index(index)
|
|
130
|
+
return {
|
|
131
|
+
"status": "deduped",
|
|
132
|
+
"fingerprint": signal.fingerprint,
|
|
133
|
+
"hit_count": entry["hit_count"],
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if _author_count_today(signal.author) >= MAX_SIGNALS_PER_AUTHOR_PER_DAY:
|
|
137
|
+
return {
|
|
138
|
+
"status": "rate_limited",
|
|
139
|
+
"author": signal.author,
|
|
140
|
+
"limit": MAX_SIGNALS_PER_AUTHOR_PER_DAY,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
_ensure_dirs()
|
|
144
|
+
now = _now()
|
|
145
|
+
signal.ingested_at = now.isoformat()
|
|
146
|
+
signal.id = f"SIG-{uuid.uuid4().hex[:10].upper()}"
|
|
147
|
+
|
|
148
|
+
shard = _today_shard_path(now)
|
|
149
|
+
try:
|
|
150
|
+
with shard.open("a") as f:
|
|
151
|
+
f.write(json.dumps(signal.to_dict()) + "\n")
|
|
152
|
+
except OSError as exc:
|
|
153
|
+
raise StorageError(f"failed to write signal shard {shard}: {exc}") from exc
|
|
154
|
+
|
|
155
|
+
index = _load_dedup_index()
|
|
156
|
+
index[signal.fingerprint] = {
|
|
157
|
+
"id": signal.id,
|
|
158
|
+
"ingested_at": signal.ingested_at,
|
|
159
|
+
"hit_count": 1,
|
|
160
|
+
"shard": shard.name,
|
|
161
|
+
}
|
|
162
|
+
_save_dedup_index(index)
|
|
163
|
+
|
|
164
|
+
return {"status": "ingested", "signal": signal.to_dict(), "shard": shard.name}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _iter_shards(since_days: int = HOT_WINDOW_DAYS) -> Iterable[Path]:
|
|
168
|
+
if not SIGNALS_DIR.exists():
|
|
169
|
+
return []
|
|
170
|
+
cutoff = (_now() - timedelta(days=since_days)).date()
|
|
171
|
+
paths = []
|
|
172
|
+
for path in SIGNALS_DIR.glob("*.jsonl"):
|
|
173
|
+
if path.name.startswith("_"):
|
|
174
|
+
continue
|
|
175
|
+
try:
|
|
176
|
+
shard_date = datetime.fromisoformat(path.stem).date()
|
|
177
|
+
except ValueError:
|
|
178
|
+
continue
|
|
179
|
+
if shard_date >= cutoff:
|
|
180
|
+
paths.append(path)
|
|
181
|
+
return sorted(paths, reverse=True)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def query(
|
|
185
|
+
since_days: int = 1,
|
|
186
|
+
platform: str = "",
|
|
187
|
+
limit: int = 50,
|
|
188
|
+
) -> List[Dict[str, Any]]:
|
|
189
|
+
"""Return signals from the hot window, newest first.
|
|
190
|
+
|
|
191
|
+
since_days=1 returns the last 24h of signals (the default `delimit sense`
|
|
192
|
+
view). platform filters to a specific source; empty = all.
|
|
193
|
+
"""
|
|
194
|
+
rows: List[Dict[str, Any]] = []
|
|
195
|
+
want_platform = (platform or "").strip().lower()
|
|
196
|
+
for shard in _iter_shards(since_days):
|
|
197
|
+
try:
|
|
198
|
+
for line in shard.read_text().splitlines():
|
|
199
|
+
if not line.strip():
|
|
200
|
+
continue
|
|
201
|
+
try:
|
|
202
|
+
row = json.loads(line)
|
|
203
|
+
except json.JSONDecodeError:
|
|
204
|
+
continue
|
|
205
|
+
if want_platform and (row.get("platform") or "").lower() != want_platform:
|
|
206
|
+
continue
|
|
207
|
+
rows.append(row)
|
|
208
|
+
if len(rows) >= limit:
|
|
209
|
+
return rows
|
|
210
|
+
except OSError:
|
|
211
|
+
continue
|
|
212
|
+
return rows
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def age_out_to_warm(days: int = HOT_WINDOW_DAYS) -> int:
|
|
216
|
+
"""No-op placeholder: hot/warm separation is a query boundary, not a move.
|
|
217
|
+
|
|
218
|
+
We keep all shards in SIGNALS_DIR and rely on query()'s since_days filter
|
|
219
|
+
to enforce the hot window. Returns the count of shards older than `days`
|
|
220
|
+
for reporting.
|
|
221
|
+
"""
|
|
222
|
+
if not SIGNALS_DIR.exists():
|
|
223
|
+
return 0
|
|
224
|
+
cutoff = (_now() - timedelta(days=days)).date()
|
|
225
|
+
old = 0
|
|
226
|
+
for path in SIGNALS_DIR.glob("*.jsonl"):
|
|
227
|
+
if path.name.startswith("_"):
|
|
228
|
+
continue
|
|
229
|
+
try:
|
|
230
|
+
shard_date = datetime.fromisoformat(path.stem).date()
|
|
231
|
+
except ValueError:
|
|
232
|
+
continue
|
|
233
|
+
if shard_date < cutoff:
|
|
234
|
+
old += 1
|
|
235
|
+
return old
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def freeze_cold(month: str) -> str:
|
|
239
|
+
"""Move all shards whose date starts with `month` (YYYY-MM) into ARCHIVE_DIR/{month}.jsonl.
|
|
240
|
+
|
|
241
|
+
Returns the archive path. Safe to run repeatedly; reruns append.
|
|
242
|
+
"""
|
|
243
|
+
if not month or len(month) != 7 or month[4] != "-":
|
|
244
|
+
raise ValidationError(f"month must be YYYY-MM, got {month!r}")
|
|
245
|
+
_ensure_dirs()
|
|
246
|
+
archive_path = ARCHIVE_DIR / f"{month}.jsonl"
|
|
247
|
+
moved = 0
|
|
248
|
+
with archive_path.open("a") as out:
|
|
249
|
+
for path in sorted(SIGNALS_DIR.glob(f"{month}-*.jsonl")):
|
|
250
|
+
try:
|
|
251
|
+
out.write(path.read_text())
|
|
252
|
+
except OSError:
|
|
253
|
+
continue
|
|
254
|
+
try:
|
|
255
|
+
path.unlink()
|
|
256
|
+
moved += 1
|
|
257
|
+
except OSError:
|
|
258
|
+
pass
|
|
259
|
+
return str(archive_path)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def promote_to_ledger(
|
|
263
|
+
signal_id: str,
|
|
264
|
+
ledger: str = "ops",
|
|
265
|
+
priority: str = "P2",
|
|
266
|
+
extra_tags: Optional[List[str]] = None,
|
|
267
|
+
) -> Dict[str, Any]:
|
|
268
|
+
"""Explicit founder-initiated promotion of a signal to a ledger item.
|
|
269
|
+
|
|
270
|
+
This is the ONLY path from intel → ledger. Writes via ledger_manager.add_item
|
|
271
|
+
with promoted_by set so the guard accepts it.
|
|
272
|
+
"""
|
|
273
|
+
signal = _find_signal(signal_id)
|
|
274
|
+
if not signal:
|
|
275
|
+
raise ValidationError(f"signal {signal_id} not found in hot shards")
|
|
276
|
+
|
|
277
|
+
from ai.ledger_manager import add_item
|
|
278
|
+
|
|
279
|
+
title = f"[{(signal.get('platform') or 'signal').upper()}] Promoted: {signal.get('author') or signal.get('canonical_url')}"
|
|
280
|
+
description = (
|
|
281
|
+
f"Promoted from intel store (signal {signal_id}).\n"
|
|
282
|
+
f"URL: {signal.get('canonical_url', '')}\n"
|
|
283
|
+
f"Author: {signal.get('author', '')}\n"
|
|
284
|
+
f"Snippet: {(signal.get('content_snippet') or '')[:400]}\n"
|
|
285
|
+
f"Posted: {signal.get('posted_at', '')}\n"
|
|
286
|
+
f"Fingerprint: {signal.get('fingerprint', '')}"
|
|
287
|
+
)
|
|
288
|
+
tags = ["promoted-signal", signal.get("platform", "")]
|
|
289
|
+
if extra_tags:
|
|
290
|
+
tags.extend(extra_tags)
|
|
291
|
+
|
|
292
|
+
# Guard checks source=='promoted_signal' + promoted_by set, so bypass the
|
|
293
|
+
# social_scan rejection.
|
|
294
|
+
os.environ.setdefault("_DELIMIT_SIGNAL_PROMOTED_BY", "founder")
|
|
295
|
+
try:
|
|
296
|
+
result = add_item(
|
|
297
|
+
title=title,
|
|
298
|
+
ledger=ledger,
|
|
299
|
+
type="task",
|
|
300
|
+
priority=priority,
|
|
301
|
+
description=description,
|
|
302
|
+
source=f"promoted_signal:{signal_id}",
|
|
303
|
+
tags=tags,
|
|
304
|
+
context=f"Promoted from signal {signal_id} for strategic action.",
|
|
305
|
+
)
|
|
306
|
+
finally:
|
|
307
|
+
os.environ.pop("_DELIMIT_SIGNAL_PROMOTED_BY", None)
|
|
308
|
+
return result
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _find_signal(signal_id: str) -> Optional[Dict[str, Any]]:
|
|
312
|
+
if not signal_id:
|
|
313
|
+
return None
|
|
314
|
+
for shard in _iter_shards(since_days=WARM_WINDOW_DAYS):
|
|
315
|
+
try:
|
|
316
|
+
for line in shard.read_text().splitlines():
|
|
317
|
+
if not line.strip():
|
|
318
|
+
continue
|
|
319
|
+
try:
|
|
320
|
+
row = json.loads(line)
|
|
321
|
+
except json.JSONDecodeError:
|
|
322
|
+
continue
|
|
323
|
+
if row.get("id") == signal_id:
|
|
324
|
+
return row
|
|
325
|
+
except OSError:
|
|
326
|
+
continue
|
|
327
|
+
return None
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def digest(since_days: int = HOT_WINDOW_DAYS, top_n: int = 20) -> Dict[str, Any]:
|
|
331
|
+
"""Cluster recent signals by platform + top authors + theme counters."""
|
|
332
|
+
rows = query(since_days=since_days, limit=1000)
|
|
333
|
+
by_platform: Counter[str] = Counter()
|
|
334
|
+
by_author: Counter[str] = Counter()
|
|
335
|
+
by_theme: Counter[str] = Counter()
|
|
336
|
+
for row in rows:
|
|
337
|
+
by_platform[row.get("platform", "?")] += 1
|
|
338
|
+
by_author[row.get("author", "?")] += 1
|
|
339
|
+
for theme in row.get("themes") or []:
|
|
340
|
+
by_theme[theme] += 1
|
|
341
|
+
return {
|
|
342
|
+
"window_days": since_days,
|
|
343
|
+
"total_signals": len(rows),
|
|
344
|
+
"top_platforms": by_platform.most_common(10),
|
|
345
|
+
"top_authors": by_author.most_common(top_n),
|
|
346
|
+
"top_themes": by_theme.most_common(top_n),
|
|
347
|
+
"sample": rows[:5],
|
|
348
|
+
}
|