cc-pushback 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cc_pushback/report.py ADDED
@@ -0,0 +1,484 @@
1
+ """Render the collected feedback corpus into a single self-contained HTML page.
2
+
3
+ The page leads with a corpus summary and a handful of highlights, then lists every
4
+ sample with a kind filter, a free-text search, and an expandable context window. The
5
+ summary and highlights are written by the ``claude`` CLI when it is available and
6
+ fall back to deterministic heuristics otherwise.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import re
13
+ import subprocess
14
+ from collections import Counter, defaultdict
15
+ from dataclasses import dataclass
16
+ from datetime import UTC, datetime
17
+ from html import escape
18
+ from itertools import zip_longest
19
+ from pathlib import Path
20
+ from typing import TYPE_CHECKING
21
+
22
+ from cc_transcript.domains.mining import NOISE_FLOOR, effective_confidence
23
+ from cc_transcript.domains.mining.confidence import from_payload
24
+
25
+ from cc_pushback.claude import claude_available, run_claude
26
+ from cc_pushback.context import ContextSnapshot
27
+
28
+ if TYPE_CHECKING:
29
+ from collections.abc import Mapping, Sequence
30
+ from typing import Any
31
+
32
+ from cc_transcript.domains.mining import CandidateSignal
33
+
34
+ from cc_pushback.context import ContextTurn
35
+
36
+ CONTEXT_TURN_LIMIT = 700
37
+ SAMPLE_TEXT_LIMIT = 400
38
+ HIGHLIGHT_POOL_PER_KIND = 8
39
+ HEURISTIC_HIGHLIGHTS = 12
40
+
41
+ SUMMARY_SYSTEM = """\
42
+ You analyze a developer's "pushback" — the corrective feedback they give an AI coding assistant.
43
+ You receive corpus statistics and a numbered pool of real feedback samples.
44
+ Return ONLY a JSON object, with no prose around it, of exactly this shape:
45
+ {"narrative": "<2-4 sentences on the developer's pushback style and recurring themes>",
46
+ "highlights": [{"id": <sample id>, "why": "<one short clause on why it is representative>"}]}
47
+ Pick 8-12 highlights, only from the provided sample ids, favoring variety across feedback kinds.
48
+ """
49
+
50
+ CSS = """
51
+ :root{--bg:#0d1117;--panel:#161b22;--border:#30363d;--fg:#e6edf3;--muted:#8b949e;--accent:#58a6ff}
52
+ *{box-sizing:border-box}
53
+ body{margin:0;background:var(--bg);color:var(--fg);font:14px/1.5 ui-monospace,SFMono-Regular,Menlo,monospace}
54
+ h1,h2{font-weight:600}
55
+ header.top{padding:24px;border-bottom:1px solid var(--border)}
56
+ header.top .sub{color:var(--muted)}
57
+ section{padding:16px 24px}
58
+ .stat-cards{display:flex;gap:12px;flex-wrap:wrap}
59
+ .stat{background:var(--panel);border:1px solid var(--border);border-radius:8px;padding:12px 16px}
60
+ .stat .n{font-size:20px;font-weight:600}
61
+ .stat .l{color:var(--muted);font-size:12px}
62
+ table.dist{border-collapse:collapse;margin-top:14px}
63
+ table.dist td{padding:2px 10px 2px 0;white-space:nowrap}
64
+ .bar{display:inline-block;height:10px;background:var(--accent);border-radius:3px;vertical-align:middle}
65
+ .months{display:flex;gap:3px;align-items:flex-end;margin-top:14px}
66
+ .mcol{display:flex;flex-direction:column;align-items:center;justify-content:flex-end}
67
+ .mcol .m{width:22px;background:var(--accent);border-radius:3px 3px 0 0}
68
+ .mcol span{font-size:9px;color:var(--muted);margin-top:3px}
69
+ .narrative{background:var(--panel);border:1px solid var(--border);border-left:3px solid var(--accent);
70
+ border-radius:8px;padding:14px 18px;max-width:80ch;margin-top:14px}
71
+ #controls{position:sticky;top:0;background:var(--bg);display:flex;gap:8px;align-items:center;
72
+ flex-wrap:wrap;border-bottom:1px solid var(--border);z-index:2}
73
+ .kind-btn{background:var(--panel);color:var(--fg);border:1px solid var(--border);border-radius:14px;
74
+ padding:4px 12px;cursor:pointer;font:inherit}
75
+ .kind-btn.active{background:var(--accent);color:#0d1117;border-color:var(--accent)}
76
+ #search{flex:1;min-width:200px;background:var(--panel);color:var(--fg);border:1px solid var(--border);
77
+ border-radius:6px;padding:6px 10px;font:inherit}
78
+ #count{color:var(--muted)}
79
+ label.noise{color:var(--muted);display:flex;gap:4px;align-items:center;cursor:pointer}
80
+ .card{background:var(--panel);border:1px solid var(--border);border-radius:8px;padding:12px 16px;margin:12px 0}
81
+ .card header{display:flex;gap:8px;align-items:center;flex-wrap:wrap;margin-bottom:8px}
82
+ .badge{font-size:11px;padding:2px 8px;border-radius:10px;background:#21262d;border:1px solid var(--border)}
83
+ .badge-transcript_message{color:#8b949e}.badge-review_comment{color:#7ee787}.badge-plan_review{color:#d2a8ff}
84
+ .badge-interrupt_rejection{color:#ff7b72}.badge-superset_issue{color:#ffa657}
85
+ time{color:var(--muted);font-size:12px}
86
+ .chip{font-size:11px;color:var(--muted);background:#21262d;border-radius:6px;padding:1px 6px}
87
+ .text pre{white-space:pre-wrap;word-break:break-word;margin:0;font:inherit}
88
+ details.ctx{margin-top:10px}
89
+ details.ctx summary{color:var(--accent);cursor:pointer}
90
+ .turn{border-left:2px solid var(--border);padding:4px 0 4px 10px;margin:6px 0}
91
+ .turn .role{font-size:10px;text-transform:uppercase;color:var(--muted)}
92
+ .turn .tools{font-size:10px;color:var(--accent);margin-left:6px}
93
+ .turn pre{white-space:pre-wrap;word-break:break-word;margin:2px 0 0;font:inherit;color:var(--muted)}
94
+ .turn-user pre{color:var(--fg)}
95
+ .turn-trigger{border-left-color:var(--accent)}
96
+ .turn-trigger .role::after{content:" \\2190 pushed back on";color:var(--accent)}
97
+ .why{color:var(--accent);font-style:italic;margin:0 0 6px}
98
+ .highlight{margin:12px 0}
99
+ """
100
+
101
+ JS = """
102
+ const cards=[...document.querySelectorAll('#samples .card')];
103
+ const search=document.getElementById('search');
104
+ const count=document.getElementById('count');
105
+ const hideNoise=document.getElementById('hide-noise');
106
+ let kind='all';
107
+ function apply(){
108
+ const q=search.value.trim().toLowerCase();
109
+ let shown=0;
110
+ for(const c of cards){
111
+ const okKind=kind==='all'||c.dataset.kind===kind;
112
+ const okNoise=!hideNoise.checked||c.dataset.noise!=='1';
113
+ const okText=!q||c.textContent.toLowerCase().includes(q);
114
+ const vis=okKind&&okNoise&&okText;
115
+ c.style.display=vis?'':'none';
116
+ if(vis)shown++;
117
+ }
118
+ count.textContent=shown+' / '+cards.length;
119
+ }
120
+ document.querySelectorAll('.kind-btn').forEach(b=>b.addEventListener('click',()=>{
121
+ kind=b.dataset.kind;
122
+ document.querySelectorAll('.kind-btn').forEach(x=>x.classList.toggle('active',x===b));
123
+ apply();
124
+ }));
125
+ search.addEventListener('input',apply);
126
+ hideNoise.addEventListener('change',apply);
127
+ apply();
128
+ """
129
+
130
+
131
+ @dataclass(frozen=True, slots=True)
132
+ class Sample:
133
+ """One stored feedback event, decoded from a :meth:`FeedbackStore.events` row.
134
+
135
+ Attributes:
136
+ id: The event's database id.
137
+ source_kind: Which detector produced it.
138
+ occurred_at: The ISO timestamp of the feedback.
139
+ text: The verbatim pushback text.
140
+ payload: The detector-specific metadata, decoded from ``payload_json``.
141
+ context: The conversational window around the feedback.
142
+ origin_path: The transcript file the event came from.
143
+ session_id: The session the event came from.
144
+ signal: The de-noising confidence signal, decoded from the payload.
145
+ """
146
+
147
+ id: int
148
+ source_kind: str
149
+ occurred_at: str
150
+ text: str
151
+ payload: Mapping[str, Any]
152
+ context: ContextSnapshot
153
+ origin_path: str | None
154
+ session_id: str | None
155
+ signal: CandidateSignal | None = None
156
+
157
+ @classmethod
158
+ def from_row(cls, row: Mapping[str, object]) -> Sample:
159
+ """Decodes a :meth:`FeedbackStore.events` row into a :class:`Sample`."""
160
+ payload = json.loads(str(row["payload_json"])) if row["payload_json"] else {}
161
+ return cls(
162
+ id=int(str(row["id"])),
163
+ source_kind=str(row["source_kind"]),
164
+ occurred_at=str(row["occurred_at"]),
165
+ text=str(row["text"]),
166
+ payload=payload,
167
+ context=ContextSnapshot.from_json(str(row["context_json"])),
168
+ origin_path=str(row["origin_path"]) if row["origin_path"] else None,
169
+ session_id=str(row["session_id"]) if row["session_id"] else None,
170
+ signal=from_payload(payload.get("signal")),
171
+ )
172
+
173
+
174
+ @dataclass(frozen=True, slots=True)
175
+ class CorpusStats:
176
+ """Aggregate counts describing the whole corpus.
177
+
178
+ Attributes:
179
+ total: The total number of samples.
180
+ by_kind: Sample counts keyed by source kind, most common first.
181
+ noise: The number of low-signal samples (bare interrupt markers, hook
182
+ errors, and near-empty messages).
183
+ sessions: The number of distinct sessions.
184
+ projects: The number of distinct originating projects.
185
+ first: The earliest sample date (``YYYY-MM-DD``).
186
+ last: The latest sample date (``YYYY-MM-DD``).
187
+ by_month: Sample counts keyed by ``YYYY-MM``, in chronological order.
188
+ """
189
+
190
+ total: int
191
+ by_kind: Mapping[str, int]
192
+ noise: int
193
+ sessions: int
194
+ projects: int
195
+ first: str
196
+ last: str
197
+ by_month: Mapping[str, int]
198
+
199
+
200
+ @dataclass(frozen=True, slots=True)
201
+ class Highlight:
202
+ """A standout sample chosen for the summary, with an optional rationale.
203
+
204
+ Attributes:
205
+ event_id: The id of the highlighted sample.
206
+ why: A short clause on why it is representative, when one was written.
207
+ """
208
+
209
+ event_id: int
210
+ why: str | None = None
211
+
212
+
213
+ @dataclass(frozen=True, slots=True)
214
+ class Summary:
215
+ """The corpus overview rendered above the sample list.
216
+
217
+ Attributes:
218
+ stats: The aggregate corpus counts.
219
+ highlights: The standout samples chosen for the summary.
220
+ narrative: A prose description of the developer's pushback style, when the
221
+ ``claude`` CLI produced one.
222
+ """
223
+
224
+ stats: CorpusStats
225
+ highlights: tuple[Highlight, ...]
226
+ narrative: str | None
227
+
228
+
229
+ def is_noise(sample: Sample) -> bool:
230
+ return effective_confidence(sample.signal) < NOISE_FLOOR
231
+
232
+
233
+ def project_label(origin_path: str) -> str:
234
+ name = Path(origin_path).parent.name
235
+ return next(
236
+ (name.rsplit(marker, 1)[-1] for marker in ("-Code-", "-projects-", "-worktrees-") if marker in name),
237
+ name.lstrip("-"),
238
+ )
239
+
240
+
241
+ def corpus_stats(samples: Sequence[Sample]) -> CorpusStats:
242
+ times = sorted(s.occurred_at for s in samples)
243
+ return CorpusStats(
244
+ total=len(samples),
245
+ by_kind=dict(Counter(s.source_kind for s in samples).most_common()),
246
+ noise=sum(is_noise(s) for s in samples),
247
+ sessions=len({s.session_id for s in samples if s.session_id}),
248
+ projects=len({Path(s.origin_path).parent.name for s in samples if s.origin_path}),
249
+ first=times[0][:10] if times else "",
250
+ last=times[-1][:10] if times else "",
251
+ by_month=dict(sorted(Counter(s.occurred_at[:7] for s in samples).items())),
252
+ )
253
+
254
+
255
+ def candidate_pool(samples: Sequence[Sample]) -> dict[str, list[Sample]]:
256
+ pool: dict[str, list[Sample]] = defaultdict(list)
257
+ for sample in samples:
258
+ if not is_noise(sample):
259
+ pool[sample.source_kind].append(sample)
260
+ return {
261
+ kind: sorted(items, key=lambda s: len(s.text), reverse=True)[:HIGHLIGHT_POOL_PER_KIND]
262
+ for kind, items in pool.items()
263
+ }
264
+
265
+
266
+ def heuristic_highlight_ids(pool: Mapping[str, Sequence[Sample]]) -> list[int]:
267
+ rows = [s for group in zip_longest(*pool.values()) for s in group if s is not None]
268
+ return [s.id for s in rows[:HEURISTIC_HIGHLIGHTS]]
269
+
270
+
271
+ def summary_prompt(pool: Mapping[str, Sequence[Sample]], stats: CorpusStats) -> str:
272
+ return "\n".join(
273
+ [
274
+ f"Corpus: {stats.total} samples across {stats.sessions} sessions, {stats.first} to {stats.last}.",
275
+ "By kind: " + ", ".join(f"{kind}={n}" for kind, n in stats.by_kind.items()),
276
+ "",
277
+ "Feedback samples (id, kind, text):",
278
+ *(
279
+ f"[{s.id}] ({kind}) {' '.join(s.text.split())[:SAMPLE_TEXT_LIMIT]}"
280
+ for kind, group in pool.items()
281
+ for s in group
282
+ ),
283
+ ]
284
+ )
285
+
286
+
287
+ def parse_summary_json(raw: str) -> tuple[str, list[dict[str, Any]]] | None:
288
+ if not (match := re.search(r"\{.*\}", raw, re.DOTALL)):
289
+ return None
290
+ try:
291
+ data = json.loads(match.group(0))
292
+ except json.JSONDecodeError:
293
+ return None
294
+ narrative, picks = data.get("narrative"), data.get("highlights")
295
+ if not isinstance(narrative, str) or not isinstance(picks, list):
296
+ return None
297
+ return narrative, [p for p in picks if isinstance(p, dict) and isinstance(p.get("id"), int)]
298
+
299
+
300
+ async def llm_summary(
301
+ pool: Mapping[str, Sequence[Sample]], stats: CorpusStats, model: str
302
+ ) -> tuple[str, tuple[Highlight, ...]] | None:
303
+ try:
304
+ raw = await run_claude(summary_prompt(pool, stats), system=SUMMARY_SYSTEM, model=model)
305
+ except subprocess.SubprocessError:
306
+ return None
307
+ if (parsed := parse_summary_json(raw)) is None:
308
+ return None
309
+ narrative, picks = parsed
310
+ valid = {s.id for group in pool.values() for s in group}
311
+ highlights = tuple(Highlight(pick["id"], pick.get("why")) for pick in picks if pick["id"] in valid)
312
+ return (narrative, highlights) if highlights else None
313
+
314
+
315
+ async def build_summary(samples: Sequence[Sample], *, use_llm: bool, model: str) -> Summary:
316
+ """Builds the corpus :class:`Summary`, using the ``claude`` CLI when allowed.
317
+
318
+ When ``use_llm`` is set and ``claude`` is on the path, the narrative and
319
+ highlights come from the model; on any failure to produce or parse a result the
320
+ summary falls back to deterministic heuristics, so the export never depends on
321
+ the model succeeding.
322
+
323
+ Args:
324
+ samples: The full corpus to summarize.
325
+ use_llm: Whether to consult the ``claude`` CLI for the narrative.
326
+ model: The model to run when consulting ``claude``.
327
+
328
+ Returns:
329
+ The assembled :class:`Summary`.
330
+ """
331
+ stats, pool = corpus_stats(samples), candidate_pool(samples)
332
+ if use_llm and claude_available() and (result := await llm_summary(pool, stats, model)) is not None:
333
+ return Summary(stats=stats, highlights=result[1], narrative=result[0])
334
+ return Summary(stats=stats, highlights=tuple(map(Highlight, heuristic_highlight_ids(pool))), narrative=None)
335
+
336
+
337
+ def truncate(text: str, limit: int = CONTEXT_TURN_LIMIT) -> str:
338
+ return text if len(text) <= limit else text[:limit].rstrip() + "…"
339
+
340
+
341
+ def render_turn(turn: ContextTurn, *, is_trigger: bool = False) -> str:
342
+ cls = f"turn turn-{turn.role}" + (" turn-trigger" if is_trigger else "")
343
+ tools = f'<span class="tools">{escape(" ".join(turn.tool_calls))}</span>' if turn.tool_calls else ""
344
+ return (
345
+ f'<div class="{cls}"><span class="role">{escape(turn.role)}</span>{tools}'
346
+ f"<pre>{escape(truncate(turn.text))}</pre></div>"
347
+ )
348
+
349
+
350
+ def render_context(ctx: ContextSnapshot) -> str:
351
+ turns = [render_turn(turn, is_trigger=turn == ctx.trigger) for turn in ctx.before]
352
+ if ctx.trigger is not None and ctx.trigger not in ctx.before:
353
+ turns.append(render_turn(ctx.trigger, is_trigger=True))
354
+ turns.extend(render_turn(turn) for turn in ctx.after)
355
+ if not turns:
356
+ return ""
357
+ return f'<details class="ctx"><summary>context ({len(turns)} turns)</summary>{"".join(turns)}</details>'
358
+
359
+
360
+ def meta_chips(sample: Sample) -> str:
361
+ payload = sample.payload
362
+ chips = [str(payload[key]) for key in ("detector", "format", "tool", "severity", "track") if payload.get(key)]
363
+ if file := payload.get("file"):
364
+ line = payload.get("line_start") or payload.get("line")
365
+ chips.append(f"{file}:{line}" if line else str(file))
366
+ if sample.origin_path:
367
+ chips.append(project_label(sample.origin_path))
368
+ return "".join(f'<span class="chip">{escape(chip)}</span>' for chip in chips)
369
+
370
+
371
+ def render_card(sample: Sample) -> str:
372
+ return "".join(
373
+ [
374
+ f'<article class="card" data-kind="{escape(sample.source_kind)}" '
375
+ f'data-noise="{"1" if is_noise(sample) else "0"}">',
376
+ f'<header><span class="badge badge-{escape(sample.source_kind)}">{escape(sample.source_kind)}</span>',
377
+ f"<time>{escape(sample.occurred_at[:19])}</time>{meta_chips(sample)}</header>",
378
+ f'<div class="text"><pre>{escape(sample.text)}</pre></div>',
379
+ render_context(sample.context),
380
+ "</article>",
381
+ ]
382
+ )
383
+
384
+
385
+ def render_highlight(sample: Sample, why: str | None) -> str:
386
+ blurb = f'<p class="why">{escape(why)}</p>' if why else ""
387
+ return f'<div class="highlight">{blurb}{render_card(sample)}</div>'
388
+
389
+
390
+ def render_stat_cards(stats: CorpusStats) -> str:
391
+ cards = (
392
+ (stats.total, "samples"),
393
+ (stats.sessions, "sessions"),
394
+ (stats.projects, "projects"),
395
+ (stats.noise, "low-signal"),
396
+ (f"{stats.first} – {stats.last}", "span"),
397
+ )
398
+ return '<div class="stat-cards">' + "".join(
399
+ f'<div class="stat"><div class="n">{escape(str(value))}</div><div class="l">{escape(label)}</div></div>'
400
+ for value, label in cards
401
+ ) + "</div>"
402
+
403
+
404
+ def render_dist(stats: CorpusStats) -> str:
405
+ top = max(stats.by_kind.values(), default=1)
406
+ rows = "".join(
407
+ f"<tr><td>{escape(kind)}</td><td>{n}</td>"
408
+ f'<td><span class="bar" style="width:{round(n / top * 200)}px"></span></td></tr>'
409
+ for kind, n in stats.by_kind.items()
410
+ )
411
+ return f'<table class="dist">{rows}</table>'
412
+
413
+
414
+ def render_months(by_month: Mapping[str, int]) -> str:
415
+ if not by_month:
416
+ return ""
417
+ top = max(by_month.values())
418
+ cols = "".join(
419
+ f'<div class="mcol"><div class="m" style="height:{round(n / top * 72) + 4}px" '
420
+ f'title="{escape(month)}: {n}"></div><span>{escape(month[5:])}</span></div>'
421
+ for month, n in by_month.items()
422
+ )
423
+ return f'<div class="months">{cols}</div>'
424
+
425
+
426
+ def render_controls(stats: CorpusStats) -> str:
427
+ buttons = "".join(
428
+ f'<button class="kind-btn{" active" if kind == "all" else ""}" data-kind="{escape(kind)}">'
429
+ f'{escape(kind)}{"" if kind == "all" else f" {n}"}</button>'
430
+ for kind, n in [("all", stats.total), *stats.by_kind.items()]
431
+ )
432
+ return (
433
+ f'<section id="controls"><div class="kinds">{buttons}</div>'
434
+ f'<input id="search" type="search" placeholder="search text…">'
435
+ f'<label class="noise"><input type="checkbox" id="hide-noise"> hide low-signal</label>'
436
+ f'<span id="count">{stats.total} / {stats.total}</span></section>'
437
+ )
438
+
439
+
440
+ def render_html(samples: Sequence[Sample], summary: Summary) -> str:
441
+ """Renders the whole corpus and its summary into one self-contained HTML page.
442
+
443
+ The returned string embeds its own CSS and JavaScript and references no external
444
+ resources, so it can be written to a file and opened directly in a browser.
445
+
446
+ Args:
447
+ samples: Every sample to list, in display order.
448
+ summary: The overview to render above the list.
449
+
450
+ Returns:
451
+ The complete HTML document.
452
+ """
453
+ by_id = {sample.id: sample for sample in samples}
454
+ highlights = "\n".join(
455
+ render_highlight(by_id[h.event_id], h.why) for h in summary.highlights if h.event_id in by_id
456
+ )
457
+ narrative = f'<div class="narrative">{escape(summary.narrative)}</div>' if summary.narrative else ""
458
+ generated = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
459
+ return "".join(
460
+ [
461
+ "<!doctype html><html lang='en'><head><meta charset='utf-8'>",
462
+ "<meta name='viewport' content='width=device-width,initial-scale=1'>",
463
+ "<title>cc-pushback samples</title><style>",
464
+ CSS,
465
+ "</style></head><body>",
466
+ f'<header class="top"><h1>cc-pushback — feedback samples</h1>'
467
+ f'<div class="sub">{summary.stats.total} samples · generated {escape(generated)}</div></header>',
468
+ "<section><h2>Summary</h2>",
469
+ render_stat_cards(summary.stats),
470
+ render_dist(summary.stats),
471
+ render_months(summary.stats.by_month),
472
+ narrative,
473
+ "</section>",
474
+ '<section id="highlights"><h2>Highlights</h2>',
475
+ highlights or "<p>none</p>",
476
+ "</section>",
477
+ render_controls(summary.stats),
478
+ '<section id="samples">',
479
+ "\n".join(render_card(sample) for sample in samples),
480
+ "</section><script>",
481
+ JS,
482
+ "</script></body></html>",
483
+ ]
484
+ )
cc_pushback/scan.py ADDED
@@ -0,0 +1,58 @@
1
+ """The scan orchestrator: discover, parse, detect, and persist, incrementally."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING
7
+
8
+ from cc_transcript import TranscriptDiscovery, TranscriptParser
9
+
10
+ from cc_pushback.detectors import detect
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Sequence
14
+ from pathlib import Path
15
+
16
+ from cc_pushback.store import FeedbackStore
17
+
18
+
19
+ @dataclass(frozen=True, slots=True)
20
+ class ScanReport:
21
+ """The outcome of one scan pass.
22
+
23
+ Attributes:
24
+ scanned: The number of transcripts parsed and recorded.
25
+ inserted: The number of newly inserted feedback events.
26
+ """
27
+
28
+ scanned: int
29
+ inserted: int
30
+
31
+
32
+ async def scan(store: FeedbackStore, roots: Sequence[Path], *, full: bool = False) -> ScanReport:
33
+ """Scans transcripts under ``roots`` for feedback, incrementally.
34
+
35
+ Each transcript is parsed only when new or modified since the last scan
36
+ (unless ``full``), parsing runs concurrently across files, and every candidate
37
+ is inserted idempotently. A transcript that fails to parse — for example one
38
+ Claude Code is still appending to — is silently skipped by the parser and left
39
+ unrecorded, so the next scan retries it.
40
+
41
+ Args:
42
+ store: The store to read mtimes from and write candidates to.
43
+ roots: The directories to search recursively for transcripts.
44
+ full: When set, re-scan every transcript, ignoring recorded mtimes.
45
+
46
+ Returns:
47
+ The :class:`ScanReport` for this pass.
48
+ """
49
+ known = None if full else await store.file_mtimes()
50
+ paths: list[tuple[Path, float]] = []
51
+ for root in roots:
52
+ paths.extend(await TranscriptDiscovery.find_in(root, known_mtimes=known))
53
+ scanned = 0
54
+ inserted = 0
55
+ async for parsed in TranscriptParser.stream_transcripts(paths):
56
+ inserted += await store.record_file_scan(str(parsed.path), parsed.mtime, detect(parsed.path, parsed.events))
57
+ scanned += 1
58
+ return ScanReport(scanned=scanned, inserted=inserted)
cc_pushback/serve.py ADDED
@@ -0,0 +1,60 @@
1
+ """Serve a rendered page from memory over a transient async HTTP server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import socket
6
+ import webbrowser
7
+
8
+ import anyio
9
+ import click
10
+ from aiohttp import web
11
+
12
+ BIND_HOST = "0.0.0.0"
13
+
14
+
15
+ def build_app(page: bytes) -> web.Application:
16
+ async def handler(request: web.Request) -> web.Response:
17
+ return web.Response(body=page, content_type="text/html", charset="utf-8")
18
+
19
+ app = web.Application()
20
+ app.router.add_get("/{tail:.*}", handler)
21
+ return app
22
+
23
+
24
+ def lan_ip() -> str:
25
+ with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as probe:
26
+ try:
27
+ probe.connect(("10.255.255.255", 1))
28
+ return probe.getsockname()[0]
29
+ except OSError:
30
+ return "127.0.0.1"
31
+
32
+
33
+ async def serve(page: bytes, *, port: int, open_browser: bool) -> None:
34
+ """Serves ``page`` on all interfaces until interrupted, printing its URLs.
35
+
36
+ Binds ``0.0.0.0`` so the page is reachable from other hosts (for example over
37
+ Tailscale), and prints both the loopback and LAN/Tailscale-facing URLs.
38
+
39
+ Args:
40
+ page: The HTML document to serve on every request.
41
+ port: The port to bind; ``0`` lets the OS pick a free one.
42
+ open_browser: Whether to open the loopback URL in a browser once serving.
43
+ """
44
+ runner = web.AppRunner(build_app(page))
45
+ await runner.setup()
46
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
47
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
48
+ sock.bind((BIND_HOST, port))
49
+ bound = sock.getsockname()[1]
50
+ await web.SockSite(runner, sock).start()
51
+ local = f"http://127.0.0.1:{bound}/"
52
+ click.echo(f"serving on {local} · http://{lan_ip()}:{bound}/ (Ctrl-C to stop)")
53
+ if open_browser:
54
+ webbrowser.open(local)
55
+ try:
56
+ await anyio.sleep_forever()
57
+ finally:
58
+ with anyio.CancelScope(shield=True):
59
+ await runner.cleanup()
60
+ click.echo("\nstopped")
cc_pushback/spec.py ADDED
@@ -0,0 +1,37 @@
1
+ """cc-pushback's event-filter policy, composed from cc-transcript primitives.
2
+
3
+ Keeps user turns that carry pushback worth learning from: drops structural noise,
4
+ agent-injected banners, approve-and-advance directives, automated stop-hook output,
5
+ trivial acknowledgements, very short control messages, and sidechain/meta/compacted/
6
+ empty turns. Interrupt markers are deliberately kept, so a turn that pairs a marker
7
+ with a real correction survives; a bare marker is dropped by the detectors.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from cc_transcript import (
13
+ RESUME_PHRASE_SET,
14
+ TRIVIAL_ACK_SET,
15
+ USERS,
16
+ FilterSpec,
17
+ build_spec,
18
+ drop_compacted,
19
+ drop_empty,
20
+ drop_junk,
21
+ drop_meta_flag,
22
+ drop_phrases,
23
+ drop_short,
24
+ drop_sidechain,
25
+ keep_only,
26
+ )
27
+
28
+ PUSHBACK_SPEC: FilterSpec = build_spec(
29
+ keep_only("user"),
30
+ drop_sidechain(),
31
+ drop_meta_flag("is_meta"),
32
+ drop_compacted(),
33
+ drop_empty(only_from=USERS),
34
+ drop_junk("structural", "agent_injection", "stop_hook", "continuation", "command_echo"),
35
+ drop_phrases(TRIVIAL_ACK_SET | RESUME_PHRASE_SET),
36
+ drop_short(2),
37
+ )
cc_pushback/store.py ADDED
@@ -0,0 +1,34 @@
1
+ """The SQLite feedback store: the mining-domain store with cc-pushback's default path.
2
+
3
+ The store mechanism lives in :mod:`cc_transcript.domains.mining`; this module adds
4
+ cc-pushback's default database location and re-exports the store building blocks for
5
+ back-compat.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ from cc_transcript.domains.mining import FEEDBACK_DDL, Stats, event_row
13
+ from cc_transcript.domains.mining import FeedbackStore as BaseFeedbackStore
14
+
15
+ __all__ = ["FEEDBACK_DDL", "FeedbackStore", "Stats", "event_row"]
16
+
17
+
18
+ class FeedbackStore(BaseFeedbackStore):
19
+ """Persistent store for collected feedback over a :class:`FileStateStore`.
20
+
21
+ Layers the ``feedback_events`` table onto cc-transcript's file-mtime ledger.
22
+ Recording a scanned file and inserting its candidates commit in one
23
+ transaction, so a scan is atomic: it either records the file and all its
24
+ candidates or neither.
25
+
26
+ Example:
27
+ >>> async with await FeedbackStore.open(FeedbackStore.default_path()) as store:
28
+ ... await store.record_file_scan(str(path), mtime, candidates)
29
+ """
30
+
31
+ @staticmethod
32
+ def default_path() -> Path:
33
+ """Returns the default database path, ``~/.cc-pushback/feedback.db``."""
34
+ return Path.home() / ".cc-pushback" / "feedback.db"