pmkit 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pmkit/__init__.py +8 -0
- pmkit/backlog.py +409 -0
- pmkit/cli.py +723 -0
- pmkit/connectors/__init__.py +35 -0
- pmkit/connectors/base.py +67 -0
- pmkit/connectors/changelog.py +37 -0
- pmkit/connectors/github.py +49 -0
- pmkit/connectors/hn.py +42 -0
- pmkit/connectors/reddit.py +42 -0
- pmkit/connectors/web.py +44 -0
- pmkit/connectors/x.py +50 -0
- pmkit/dedup.py +64 -0
- pmkit/discover.py +83 -0
- pmkit/dogfood/__init__.py +7 -0
- pmkit/dogfood/file_gaps.py +52 -0
- pmkit/dogfood/install.py +111 -0
- pmkit/dogfood/mcp.py +73 -0
- pmkit/dogfood/report.py +157 -0
- pmkit/dogfood/sample.py +32 -0
- pmkit/dogfood/ui.py +106 -0
- pmkit/killtest.py +31 -0
- pmkit/launch/__init__.py +15 -0
- pmkit/launch/collateral.py +159 -0
- pmkit/launch/drafts.py +53 -0
- pmkit/launch/listen.py +88 -0
- pmkit/launch/plan.py +82 -0
- pmkit/launch/policy.py +153 -0
- pmkit/launch/store.py +260 -0
- pmkit/rice.py +54 -0
- pmkit-0.1.1.dist-info/METADATA +29 -0
- pmkit-0.1.1.dist-info/RECORD +33 -0
- pmkit-0.1.1.dist-info/WHEEL +4 -0
- pmkit-0.1.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Tier-A collateral capture — record the real product working.
|
|
2
|
+
|
|
3
|
+
The most credible, least-slop dev collateral is the tool itself in action. This module
|
|
4
|
+
captures that authentically by reusing the pm-dogfood drivers: Playwright screenshots/video
|
|
5
|
+
of a running app, asciinema CLI casts, and SVG/HTML rendered to PNG via the same browser. No
|
|
6
|
+
AI-generated media (that's deferred v2).
|
|
7
|
+
|
|
8
|
+
``plan_capture`` (pure) validates a capture spec; ``run_capture`` (live, gated) performs it.
|
|
9
|
+
Each capture kind is gated on its tool's availability — a missing tool degrades that step to a
|
|
10
|
+
clean skip, never a crash. Pure validation is unit-tested; the live pass is exercised by the
|
|
11
|
+
scenario/integration run and on a real machine.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import shutil
|
|
18
|
+
import tempfile
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
CAPTURE_KINDS = ("screenshot", "video", "cli_cast", "diagram")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def plan_capture(requests: list[dict]) -> list[dict]:
|
|
25
|
+
"""Validate + normalize capture requests into a plan. Pure (no browser, no shell)."""
|
|
26
|
+
plan: list[dict] = []
|
|
27
|
+
for i, r in enumerate(requests):
|
|
28
|
+
kind = r.get("kind")
|
|
29
|
+
if kind not in CAPTURE_KINDS:
|
|
30
|
+
raise ValueError(f"request {i}: unknown kind {kind!r} (expected {CAPTURE_KINDS})")
|
|
31
|
+
name = r.get("name") or f"{kind}-{i}"
|
|
32
|
+
if kind in ("screenshot", "video"):
|
|
33
|
+
if not r.get("url"):
|
|
34
|
+
raise ValueError(f"request {i} ({kind}): missing 'url'")
|
|
35
|
+
plan.append({"kind": kind, "name": name, "url": r["url"],
|
|
36
|
+
"steps": r.get("steps") or []})
|
|
37
|
+
elif kind == "cli_cast":
|
|
38
|
+
if not r.get("command"):
|
|
39
|
+
raise ValueError(f"request {i} (cli_cast): missing 'command'")
|
|
40
|
+
plan.append({"kind": kind, "name": name, "command": r["command"]})
|
|
41
|
+
else: # diagram
|
|
42
|
+
if not r.get("html") and not r.get("html_path"):
|
|
43
|
+
raise ValueError(f"request {i} (diagram): needs 'html' or 'html_path'")
|
|
44
|
+
plan.append({"kind": kind, "name": name,
|
|
45
|
+
"html": r.get("html"), "html_path": r.get("html_path")})
|
|
46
|
+
return plan
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _tool_for(kind: str) -> str:
|
|
50
|
+
return "asciinema" if kind == "cli_cast" else "playwright/chromium"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def capture_available(kind: str) -> bool:
|
|
54
|
+
"""Is the tool needed for this capture kind installed and launchable?"""
|
|
55
|
+
if kind == "cli_cast":
|
|
56
|
+
return shutil.which("asciinema") is not None
|
|
57
|
+
# screenshot / video / diagram all need a launchable browser (reuse the dogfood gate).
|
|
58
|
+
from ..dogfood.ui import playwright_available
|
|
59
|
+
return playwright_available()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def run_capture(plan: list[dict], outdir: str, *, url_timeout_ms: int = 15000) -> list[dict]:
|
|
63
|
+
"""Perform the capture plan. Each step's tool is gated; unavailable -> clean skip.
|
|
64
|
+
|
|
65
|
+
Returns per-step results: ``{kind, name, ok, [path], [skipped], [reason]}``. Never raises
|
|
66
|
+
for a single step's failure — the run continues so independent captures still land.
|
|
67
|
+
"""
|
|
68
|
+
os.makedirs(outdir, exist_ok=True)
|
|
69
|
+
results: list[dict] = []
|
|
70
|
+
for step in plan:
|
|
71
|
+
kind = step["kind"]
|
|
72
|
+
if not capture_available(kind):
|
|
73
|
+
results.append({"kind": kind, "name": step["name"], "ok": False,
|
|
74
|
+
"skipped": True, "reason": f"{_tool_for(kind)} unavailable"})
|
|
75
|
+
continue
|
|
76
|
+
try:
|
|
77
|
+
path = _capture_one(step, outdir, url_timeout_ms)
|
|
78
|
+
results.append({"kind": kind, "name": step["name"], "ok": True, "path": path})
|
|
79
|
+
except Exception as e:
|
|
80
|
+
results.append({"kind": kind, "name": step["name"], "ok": False,
|
|
81
|
+
"skipped": False, "reason": f"{type(e).__name__}: {e}"})
|
|
82
|
+
return results
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _capture_one(step: dict, outdir: str, url_timeout_ms: int) -> Optional[str]:
|
|
86
|
+
kind = step["kind"]
|
|
87
|
+
if kind == "cli_cast":
|
|
88
|
+
return _capture_cli_cast(step, outdir)
|
|
89
|
+
if kind == "diagram":
|
|
90
|
+
return _capture_diagram(step, outdir)
|
|
91
|
+
return _capture_browser(step, outdir, url_timeout_ms)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _apply_steps(page, steps: list[dict]) -> None:
|
|
95
|
+
# Reuse the pm-dogfood Streamlit-aware UI stepping (fill+Enter+settle).
|
|
96
|
+
from ..dogfood.ui import _run_step, translate_steps
|
|
97
|
+
for s in translate_steps(steps):
|
|
98
|
+
_run_step(page, s)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _capture_browser(step: dict, outdir: str, url_timeout_ms: int) -> Optional[str]:
|
|
102
|
+
from playwright.sync_api import sync_playwright
|
|
103
|
+
|
|
104
|
+
with sync_playwright() as p:
|
|
105
|
+
browser = p.chromium.launch()
|
|
106
|
+
try:
|
|
107
|
+
if step["kind"] == "video":
|
|
108
|
+
ctx = browser.new_context(record_video_dir=outdir)
|
|
109
|
+
page = ctx.new_page()
|
|
110
|
+
page.goto(step["url"], timeout=url_timeout_ms)
|
|
111
|
+
_apply_steps(page, step["steps"])
|
|
112
|
+
vid = page.video.path() if page.video else None
|
|
113
|
+
ctx.close()
|
|
114
|
+
return vid
|
|
115
|
+
page = browser.new_page()
|
|
116
|
+
page.goto(step["url"], timeout=url_timeout_ms)
|
|
117
|
+
_apply_steps(page, step["steps"])
|
|
118
|
+
out = os.path.join(outdir, f"{step['name']}.png")
|
|
119
|
+
page.screenshot(path=out, full_page=True)
|
|
120
|
+
return out
|
|
121
|
+
finally:
|
|
122
|
+
browser.close()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _capture_diagram(step: dict, outdir: str) -> Optional[str]:
|
|
126
|
+
from playwright.sync_api import sync_playwright
|
|
127
|
+
|
|
128
|
+
html_path = step.get("html_path")
|
|
129
|
+
tmp = None
|
|
130
|
+
if not html_path:
|
|
131
|
+
fd, tmp = tempfile.mkstemp(suffix=".html", prefix="pmkit-diagram-")
|
|
132
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
133
|
+
fh.write(step["html"])
|
|
134
|
+
html_path = tmp
|
|
135
|
+
try:
|
|
136
|
+
with sync_playwright() as p:
|
|
137
|
+
browser = p.chromium.launch()
|
|
138
|
+
try:
|
|
139
|
+
page = browser.new_page()
|
|
140
|
+
page.goto(f"file://{os.path.abspath(html_path)}")
|
|
141
|
+
out = os.path.join(outdir, f"{step['name']}.png")
|
|
142
|
+
page.screenshot(path=out, full_page=True)
|
|
143
|
+
return out
|
|
144
|
+
finally:
|
|
145
|
+
browser.close()
|
|
146
|
+
finally:
|
|
147
|
+
if tmp:
|
|
148
|
+
os.unlink(tmp)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _capture_cli_cast(step: dict, outdir: str) -> Optional[str]:
|
|
152
|
+
import subprocess
|
|
153
|
+
|
|
154
|
+
out = os.path.join(outdir, f"{step['name']}.cast")
|
|
155
|
+
subprocess.run(
|
|
156
|
+
["asciinema", "rec", "--overwrite", "--command", step["command"], out],
|
|
157
|
+
check=True, capture_output=True, text=True, timeout=120,
|
|
158
|
+
)
|
|
159
|
+
return out
|
pmkit/launch/drafts.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Draft starting-points + slop-critic verdicts — with a structural never-final guardrail.
|
|
2
|
+
|
|
3
|
+
The drafting agent produces *starting-points* and the independent slop-critic judges them.
|
|
4
|
+
This module stores and emits them. The guardrail is structural, not cosmetic: there is no
|
|
5
|
+
"final"/"postable" state anywhere in the data model or this API, so nothing here can be
|
|
6
|
+
mistaken for a ready post. Every emitted draft is labeled a starting-point the operator must
|
|
7
|
+
rewrite in their own voice. The operator writes the final post and posts it.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
DRAFT_KIND = "starting_point" # the only kind — there is deliberately no 'final'/'postable'
|
|
13
|
+
|
|
14
|
+
_PREAMBLE = (
|
|
15
|
+
"STARTING-POINTS — raw material, NOT posts. Rewrite each in your own voice; "
|
|
16
|
+
"never paste as-is. You write the final post and you post it."
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def record_draft(
|
|
21
|
+
store,
|
|
22
|
+
product: str,
|
|
23
|
+
platform: str,
|
|
24
|
+
text: str,
|
|
25
|
+
*,
|
|
26
|
+
community: str | None = None,
|
|
27
|
+
critic: dict | None = None,
|
|
28
|
+
) -> int:
|
|
29
|
+
"""Store one draft starting-point (optionally with its slop-critic verdict)."""
|
|
30
|
+
return store.add_draft(product, platform, text, community=community, critic=critic)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def emit(drafts: list[dict]) -> str:
|
|
34
|
+
"""Render stored drafts, ALWAYS labeled starting-points; flag any the critic flagged."""
|
|
35
|
+
lines = [_PREAMBLE, ""]
|
|
36
|
+
if not drafts:
|
|
37
|
+
lines.append("_(no drafts)_")
|
|
38
|
+
return "\n".join(lines) + "\n"
|
|
39
|
+
for d in drafts:
|
|
40
|
+
loc = f"{d['platform']}" + (f" · {d['community']}" if d.get("community") else "")
|
|
41
|
+
lines.append(f"## [{d['id']}] {loc} — STARTING-POINT (rewrite before posting)")
|
|
42
|
+
if d.get("critic_flagged"):
|
|
43
|
+
critic = d.get("critic") or {}
|
|
44
|
+
tells = ", ".join(critic.get("tells", [])) or "reads as AI slop"
|
|
45
|
+
lines.append(f"> ⚠ slop-critic FLAGGED (score {critic.get('score', '?')}): {tells}")
|
|
46
|
+
if critic.get("suggestion"):
|
|
47
|
+
lines.append(f"> fix: {critic['suggestion']}")
|
|
48
|
+
elif d.get("critic_flagged") is False:
|
|
49
|
+
lines.append("> slop-critic: clear")
|
|
50
|
+
lines.append("")
|
|
51
|
+
lines.append(d["text"])
|
|
52
|
+
lines.append("")
|
|
53
|
+
return "\n".join(lines).rstrip() + "\n"
|
pmkit/launch/listen.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""The listen loop — fold post-launch reactions back into the discovery backlog.
|
|
2
|
+
|
|
3
|
+
This is what closes the funnel into a loop: after a launch, the reactions (comments,
|
|
4
|
+
mentions, threads) are ingested as ``launch-feedback`` candidates through the *same*
|
|
5
|
+
dedup/attach path discovery already uses (``backlog.find_existing`` → ``attach_evidence``
|
|
6
|
+
or ``add_candidate``), reusing ``Config.min_engagement`` and ``dedup.find_near_duplicate``.
|
|
7
|
+
Feedback that echoes a known opportunity accrues as evidence; a genuinely new pain becomes a
|
|
8
|
+
new ``new`` candidate the funnel can pick up. Read-only — listening never engages or posts.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from ..backlog import Backlog, make_dedup_key
|
|
16
|
+
from ..connectors import get_connectors
|
|
17
|
+
from ..connectors.base import Config, ConnectorError
|
|
18
|
+
from ..dedup import DEFAULT_THRESHOLD, find_near_duplicate
|
|
19
|
+
|
|
20
|
+
SOURCE_TAG = "launch-feedback"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def run_listen(
|
|
24
|
+
backlog: Backlog,
|
|
25
|
+
target: str,
|
|
26
|
+
connectors: Optional[list] = None,
|
|
27
|
+
cfg: Optional[Config] = None,
|
|
28
|
+
limit: int = 25,
|
|
29
|
+
near_threshold: float = DEFAULT_THRESHOLD,
|
|
30
|
+
) -> dict:
|
|
31
|
+
"""Ingest reactions for ``target`` as launch-feedback. Mirrors discovery's ingestion
|
|
32
|
+
flow but tags provenance ``launch-feedback`` (origin connector preserved)."""
|
|
33
|
+
cfg = cfg or Config.from_env()
|
|
34
|
+
connectors = connectors if connectors is not None else get_connectors()
|
|
35
|
+
|
|
36
|
+
summary = {
|
|
37
|
+
"target": target,
|
|
38
|
+
"fetched": 0,
|
|
39
|
+
"new": 0,
|
|
40
|
+
"merged": 0,
|
|
41
|
+
"low_confidence": 0,
|
|
42
|
+
"by_source": {},
|
|
43
|
+
"skipped": [],
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
current = [it for it in backlog.list() if it["target"] == target]
|
|
47
|
+
|
|
48
|
+
for conn in connectors:
|
|
49
|
+
ok, reason = conn.available(cfg)
|
|
50
|
+
if not ok:
|
|
51
|
+
summary["skipped"].append({"source": conn.name, "reason": reason})
|
|
52
|
+
continue
|
|
53
|
+
try:
|
|
54
|
+
cands = conn.fetch(target, cfg, limit)
|
|
55
|
+
except ConnectorError as e:
|
|
56
|
+
summary["skipped"].append({"source": conn.name, "reason": str(e)})
|
|
57
|
+
continue
|
|
58
|
+
except Exception as e: # a connector bug must not abort the listen pass
|
|
59
|
+
summary["skipped"].append({"source": conn.name, "reason": f"unexpected: {e}"})
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
summary["by_source"][conn.name] = len(cands)
|
|
63
|
+
for cand in cands:
|
|
64
|
+
summary["fetched"] += 1
|
|
65
|
+
low_conf = cand["engagement"] < cfg.min_engagement or not cand["source"].get("url")
|
|
66
|
+
# Retag provenance as launch-feedback, preserving the origin connector.
|
|
67
|
+
source = {**cand["source"], "type": SOURCE_TAG,
|
|
68
|
+
"origin": cand["source"].get("type")}
|
|
69
|
+
key = make_dedup_key(target, cand["problem"])
|
|
70
|
+
dup = (backlog.find_existing(target, key)
|
|
71
|
+
or find_near_duplicate(cand, current, near_threshold))
|
|
72
|
+
if dup is not None:
|
|
73
|
+
backlog.attach_evidence(dup["id"], [source])
|
|
74
|
+
summary["merged"] += 1
|
|
75
|
+
continue
|
|
76
|
+
opp_id = backlog.add_candidate(
|
|
77
|
+
target=target,
|
|
78
|
+
title=f"[launch-feedback] {cand['title']}",
|
|
79
|
+
problem=cand["problem"],
|
|
80
|
+
sources=[source],
|
|
81
|
+
low_confidence=low_conf,
|
|
82
|
+
)
|
|
83
|
+
summary["new"] += 1
|
|
84
|
+
if low_conf:
|
|
85
|
+
summary["low_confidence"] += 1
|
|
86
|
+
current.append({"id": opp_id, "title": cand["title"], "problem": cand["problem"]})
|
|
87
|
+
|
|
88
|
+
return summary
|
pmkit/launch/plan.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""The emit-only launch plan renderer.
|
|
2
|
+
|
|
3
|
+
Turns structured targets (produced by the ``pm-launch-targeter`` agent, each carrying its
|
|
4
|
+
mod-policy verdict from ``policy.py``) into a dated checklist the operator follows. Pure and
|
|
5
|
+
deterministic. **Emit-only**: this renders an artifact and creates no cron entries and posts
|
|
6
|
+
nothing — the human owns timing and the act of posting.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
# Human-readable verdict markers (ASCII — renders safely on any terminal).
|
|
12
|
+
_MARK = {
|
|
13
|
+
"block": "BLOCK — DO NOT POST HERE",
|
|
14
|
+
"warn": "WARN — check the cited rule",
|
|
15
|
+
"ok": "OK",
|
|
16
|
+
"unavailable": "RULES UNAVAILABLE — check manually",
|
|
17
|
+
"unknown": "UNKNOWN — research policy first",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_plan(product: str, targets: list[dict]) -> dict:
|
|
22
|
+
"""Validate + normalize targets into an ordered plan. Pure.
|
|
23
|
+
|
|
24
|
+
Each target: ``{platform, community|channel, [thread], [angle], [day], [policy]}`` where
|
|
25
|
+
``policy`` is the dict from ``policy.resolve_policy`` (``{verdict, cited_rules, [note]}``).
|
|
26
|
+
"""
|
|
27
|
+
norm: list[dict] = []
|
|
28
|
+
for i, t in enumerate(targets):
|
|
29
|
+
platform = t.get("platform")
|
|
30
|
+
community = t.get("community") or t.get("channel")
|
|
31
|
+
if not platform:
|
|
32
|
+
raise ValueError(f"target {i}: missing 'platform'")
|
|
33
|
+
if not community:
|
|
34
|
+
raise ValueError(f"target {i}: missing 'community'/'channel'")
|
|
35
|
+
policy = t.get("policy") or {}
|
|
36
|
+
norm.append({
|
|
37
|
+
"platform": platform,
|
|
38
|
+
"community": community,
|
|
39
|
+
"thread": t.get("thread"),
|
|
40
|
+
"angle": t.get("angle"),
|
|
41
|
+
"day": int(t.get("day", 0)),
|
|
42
|
+
"verdict": policy.get("verdict") or "unknown",
|
|
43
|
+
"cited_rules": policy.get("cited_rules") or [],
|
|
44
|
+
"note": policy.get("note"),
|
|
45
|
+
})
|
|
46
|
+
norm.sort(key=lambda x: (x["day"], x["platform"], x["community"]))
|
|
47
|
+
return {"product": product, "targets": norm}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def render_markdown(plan: dict) -> str:
|
|
51
|
+
"""Render the plan as a dated, emit-only checklist."""
|
|
52
|
+
lines = [
|
|
53
|
+
f"# Launch plan: {plan['product']}",
|
|
54
|
+
"",
|
|
55
|
+
"> Emit-only. pm-launch prepared this; **you** write the final post in your voice "
|
|
56
|
+
"and post it — the system never posts.",
|
|
57
|
+
"",
|
|
58
|
+
]
|
|
59
|
+
targets = plan.get("targets", [])
|
|
60
|
+
if not targets:
|
|
61
|
+
lines.append("_(no targets)_")
|
|
62
|
+
return "\n".join(lines) + "\n"
|
|
63
|
+
|
|
64
|
+
by_day: dict[int, list[dict]] = {}
|
|
65
|
+
for t in targets:
|
|
66
|
+
by_day.setdefault(t["day"], []).append(t)
|
|
67
|
+
|
|
68
|
+
for day in sorted(by_day):
|
|
69
|
+
lines.append(f"## Day {day}")
|
|
70
|
+
for t in by_day[day]:
|
|
71
|
+
mark = _MARK.get(t["verdict"], t["verdict"])
|
|
72
|
+
lines.append(f"- [ ] **{t['platform']} · {t['community']}** — {mark}")
|
|
73
|
+
for r in t["cited_rules"]:
|
|
74
|
+
lines.append(f" - rule: {r.get('text', '')}")
|
|
75
|
+
if t.get("note"):
|
|
76
|
+
lines.append(f" - note: {t['note']}")
|
|
77
|
+
if t.get("thread"):
|
|
78
|
+
lines.append(f" - thread: {t['thread']}")
|
|
79
|
+
if t.get("angle"):
|
|
80
|
+
lines.append(f" - angle: {t['angle']}")
|
|
81
|
+
lines.append("")
|
|
82
|
+
return "\n".join(lines).rstrip() + "\n"
|
pmkit/launch/policy.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Moderator-policy research — the killer feature of the launch stage.
|
|
2
|
+
|
|
3
|
+
For a community, produce a ``block`` / ``warn`` / ``ok`` verdict with the *cited rule(s)* so
|
|
4
|
+
the operator never gets a post pulled by a moderator again. The verdict logic is a **pure
|
|
5
|
+
function** (``decide_policy``) over structured rules, so it is reproducible and unit-testable.
|
|
6
|
+
Reading a community's prose rules into that structure is judgment that belongs to the
|
|
7
|
+
``pm-launch-policy`` agent; the live Reddit fetch here is a convenience for the deterministic
|
|
8
|
+
path and is gated — a fetch failure degrades to a clean ``unavailable`` verdict, never a crash.
|
|
9
|
+
|
|
10
|
+
Caching (``resolve_policy``) reuses ``LaunchStore``'s ``mod_policy_cache`` with a 30-day TTL.
|
|
11
|
+
Non-Reddit platforms have no machine-readable rules; they return ``ok`` plus a norm note.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
17
|
+
from typing import Callable, Optional
|
|
18
|
+
|
|
19
|
+
from ..connectors.base import http_get_json
|
|
20
|
+
|
|
21
|
+
# Keyword heuristics over rule text. BLOCK takes precedence over WARN; conservative by design
|
|
22
|
+
# (advisory — the verdict cites the rule so the human makes the final call).
|
|
23
|
+
_BLOCK_SIGNALS = (
|
|
24
|
+
"no self-promotion", "no self promotion", "no selfpromo", "no advertising",
|
|
25
|
+
"no promotion", "no marketing", "no blogspam", "no blog spam", "not allowed",
|
|
26
|
+
"prohibited", "banned", "zero tolerance", "no spam", "no soliciting",
|
|
27
|
+
"do not post your own", "no links to your own",
|
|
28
|
+
)
|
|
29
|
+
_WARN_SIGNALS = (
|
|
30
|
+
"ratio", "1:10", "9:1", "10%", "10 percent", "once per", "once a week",
|
|
31
|
+
"limit", "flair", "approval required", "must be approved", "account age",
|
|
32
|
+
"karma", "weekly thread", "megathread", "self-promotion saturday", "only on",
|
|
33
|
+
"must include", "no more than",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Best-effort norm notes for platforms without machine-readable rules (R1 / Scope: others).
|
|
37
|
+
NORM_NOTES = {
|
|
38
|
+
"hackernews": "No machine-readable rules. Use 'Show HN:' for your own work; one post; "
|
|
39
|
+
"no reposting; engage genuinely in comments.",
|
|
40
|
+
"x": "No hard self-promo gate; norms favor a narrative thread over a bare link, and "
|
|
41
|
+
"replying in relevant conversations over broadcasting.",
|
|
42
|
+
"linkedin": "No hard self-promo gate; norms favor a story/lesson framing over a raw "
|
|
43
|
+
"announcement; external links can suppress reach.",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
DEFAULT_TTL_DAYS = 30
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def decide_policy(rules: list[dict]) -> tuple[str, list[dict]]:
|
|
50
|
+
"""Pure verdict over structured rules. Each rule is ``{"text": str, "url": str}``.
|
|
51
|
+
|
|
52
|
+
Returns ``(verdict, cited_rules)`` where verdict is ``block`` | ``warn`` | ``ok`` and
|
|
53
|
+
cited_rules is the subset that triggered a non-ok verdict (empty for ``ok``).
|
|
54
|
+
"""
|
|
55
|
+
blockers = [r for r in rules if _matches(r, _BLOCK_SIGNALS)]
|
|
56
|
+
if blockers:
|
|
57
|
+
return "block", blockers
|
|
58
|
+
warners = [r for r in rules if _matches(r, _WARN_SIGNALS)]
|
|
59
|
+
if warners:
|
|
60
|
+
return "warn", warners
|
|
61
|
+
return "ok", []
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _matches(rule: dict, signals: tuple[str, ...]) -> bool:
|
|
65
|
+
text = (rule.get("text") or "").lower()
|
|
66
|
+
return any(sig in text for sig in signals)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def is_stale(fetched_at: str, ttl_days: int, now: Optional[datetime] = None) -> bool:
|
|
70
|
+
"""Pure: is a cache row older than its TTL? Unparseable timestamps count as stale."""
|
|
71
|
+
now = now or datetime.now(timezone.utc)
|
|
72
|
+
try:
|
|
73
|
+
ts = datetime.fromisoformat(fetched_at)
|
|
74
|
+
except (ValueError, TypeError):
|
|
75
|
+
return True
|
|
76
|
+
if ts.tzinfo is None:
|
|
77
|
+
ts = ts.replace(tzinfo=timezone.utc)
|
|
78
|
+
return now - ts > timedelta(days=int(ttl_days))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def parse_subreddit_rules(data: dict) -> list[dict]:
|
|
82
|
+
"""Pure: turn Reddit's ``about/rules.json`` payload into structured rules."""
|
|
83
|
+
out: list[dict] = []
|
|
84
|
+
for r in (data or {}).get("rules", []):
|
|
85
|
+
name = (r.get("short_name") or "").strip()
|
|
86
|
+
desc = (r.get("description") or "").strip()
|
|
87
|
+
text = f"{name}: {desc}".strip(": ").strip()
|
|
88
|
+
if text:
|
|
89
|
+
out.append({"text": text, "url": ""})
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def fetch_subreddit_rules(community: str, timeout: float = 15.0) -> list[dict]:
|
|
94
|
+
"""Live fetch of a subreddit's rules (keyless public JSON). Raises on network failure;
|
|
95
|
+
the resolver catches that and degrades to an ``unavailable`` verdict."""
|
|
96
|
+
sub = community.strip().lstrip("/")
|
|
97
|
+
if sub.lower().startswith("r/"):
|
|
98
|
+
sub = sub[2:]
|
|
99
|
+
url = f"https://www.reddit.com/r/{sub}/about/rules.json"
|
|
100
|
+
data = http_get_json(url, {}, timeout)
|
|
101
|
+
return parse_subreddit_rules(data)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _default_fetcher(platform: str) -> Optional[Callable[[str], list[dict]]]:
|
|
105
|
+
if platform == "reddit":
|
|
106
|
+
return fetch_subreddit_rules
|
|
107
|
+
return None # non-reddit: no machine-readable rules
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def resolve_policy(
|
|
111
|
+
store,
|
|
112
|
+
community: str,
|
|
113
|
+
*,
|
|
114
|
+
platform: str = "reddit",
|
|
115
|
+
fetcher: Optional[Callable[[str], list[dict]]] = None,
|
|
116
|
+
ttl_days: int = DEFAULT_TTL_DAYS,
|
|
117
|
+
now: Optional[datetime] = None,
|
|
118
|
+
use_cache: bool = True,
|
|
119
|
+
) -> dict:
|
|
120
|
+
"""Resolve a policy verdict, reading/refreshing the cache. Never raises on fetch failure.
|
|
121
|
+
|
|
122
|
+
Returns a dict: ``{platform, community, verdict, cited_rules, cached, [note], [error]}``.
|
|
123
|
+
"""
|
|
124
|
+
if use_cache:
|
|
125
|
+
cached = store.get_policy(platform, community)
|
|
126
|
+
if cached and not is_stale(cached["fetched_at"], cached["ttl_days"], now):
|
|
127
|
+
return {
|
|
128
|
+
"platform": platform, "community": community,
|
|
129
|
+
"verdict": cached["verdict"], "cited_rules": cached["cited_rules"],
|
|
130
|
+
"cached": True,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fetcher = fetcher if fetcher is not None else _default_fetcher(platform)
|
|
134
|
+
if fetcher is None:
|
|
135
|
+
# Non-reddit platform with no machine-readable rules: ok + a norm note.
|
|
136
|
+
return {
|
|
137
|
+
"platform": platform, "community": community, "verdict": "ok",
|
|
138
|
+
"cited_rules": [], "cached": False,
|
|
139
|
+
"note": NORM_NOTES.get(platform, "No machine-readable rules; follow community norms."),
|
|
140
|
+
}
|
|
141
|
+
try:
|
|
142
|
+
rules = fetcher(community)
|
|
143
|
+
except Exception as e: # network / parse failure -> clean unavailable, not a crash
|
|
144
|
+
return {
|
|
145
|
+
"platform": platform, "community": community, "verdict": "unavailable",
|
|
146
|
+
"cited_rules": [], "cached": False, "error": f"{type(e).__name__}: {e}",
|
|
147
|
+
}
|
|
148
|
+
verdict, cited = decide_policy(rules)
|
|
149
|
+
store.put_policy(platform, community, verdict, cited, ttl_days=ttl_days)
|
|
150
|
+
return {
|
|
151
|
+
"platform": platform, "community": community, "verdict": verdict,
|
|
152
|
+
"cited_rules": cited, "cached": False,
|
|
153
|
+
}
|