cognit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognit/__init__.py +1 -0
- cognit/__main__.py +2 -0
- cognit/cli/__init__.py +36 -0
- cognit/cli/take.py +246 -0
- cognit/cli/version.py +1 -0
- cognit/comment/__init__.py +0 -0
- cognit/comment/parse.py +51 -0
- cognit/comment/render.py +123 -0
- cognit/engine/__init__.py +0 -0
- cognit/engine/_mermaid_docker/Dockerfile +20 -0
- cognit/engine/_mermaid_docker/README.md +28 -0
- cognit/engine/_mermaid_docker/__init__.py +0 -0
- cognit/engine/_mermaid_docker/validate.mjs +32 -0
- cognit/engine/generate.py +147 -0
- cognit/engine/grade.py +53 -0
- cognit/engine/llm.py +26 -0
- cognit/engine/llm_anthropic.py +268 -0
- cognit/engine/llm_claude_agent.py +216 -0
- cognit/engine/llm_fake.py +52 -0
- cognit/engine/mermaid.py +243 -0
- cognit/engine/models.py +138 -0
- cognit/engine/prompts/__init__.py +0 -0
- cognit/engine/prompts/generate.txt +15 -0
- cognit/engine/prompts/grade_open.txt +13 -0
- cognit/engine/prompts/mermaid.txt +15 -0
- cognit/engine/prompts/system_generate.txt +39 -0
- cognit/engine/prompts/system_grade.txt +29 -0
- cognit/engine/prompts/system_mermaid.txt +32 -0
- cognit/ghio/__init__.py +0 -0
- cognit/ghio/diff.py +78 -0
- cognit/ghio/pr.py +86 -0
- cognit/py.typed +0 -0
- cognit/server/__init__.py +0 -0
- cognit/server/app.py +106 -0
- cognit/server/assets/index.html +50 -0
- cognit/server/assets/mermaid.min.js +2024 -0
- cognit/server/assets/quiz.js +593 -0
- cognit/server/assets/styles.css +739 -0
- cognit-0.1.0.dist-info/METADATA +13 -0
- cognit-0.1.0.dist-info/RECORD +43 -0
- cognit-0.1.0.dist-info/WHEEL +4 -0
- cognit-0.1.0.dist-info/entry_points.txt +2 -0
- cognit-0.1.0.dist-info/licenses/LICENSE +21 -0
cognit/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Voluntary PR-author comprehension quiz tool."""
|
cognit/__main__.py
ADDED
cognit/cli/__init__.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
from cognit.cli.version import __version__
|
|
3
|
+
from cognit.cli import take as _take
|
|
4
|
+
|
|
5
|
+
app = typer.Typer(no_args_is_help=True, help="PR-author comprehension quiz tool")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _version_callback(value: bool) -> None:
|
|
9
|
+
if value:
|
|
10
|
+
typer.echo(f"cognit {__version__}")
|
|
11
|
+
raise typer.Exit()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.callback()
|
|
15
|
+
def root(
|
|
16
|
+
version: bool = typer.Option(False, "--version", callback=_version_callback, is_eager=True),
|
|
17
|
+
) -> None:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.command("take")
|
|
22
|
+
def take_cmd(
|
|
23
|
+
pr: str | None = typer.Option(None, "--pr", help="PR URL (default: auto-detect)"),
|
|
24
|
+
show_results: bool = typer.Option(False, "--show-results"),
|
|
25
|
+
model: str = typer.Option("claude-sonnet-4-6", "--model"),
|
|
26
|
+
min_diff_lines: int = typer.Option(50, "--min-diff-lines"),
|
|
27
|
+
max_diff_lines: int = typer.Option(2000, "--max-diff-lines"),
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Take a quiz on a PR. Generates one if none exists, opens browser, grades in-session, optional publish."""
|
|
30
|
+
_take.run(
|
|
31
|
+
pr,
|
|
32
|
+
show_results=show_results,
|
|
33
|
+
model=model,
|
|
34
|
+
min_diff_lines=min_diff_lines,
|
|
35
|
+
max_diff_lines=max_diff_lines,
|
|
36
|
+
)
|
cognit/cli/take.py
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""`cognit take` — the only command.
|
|
2
|
+
|
|
3
|
+
Generates the quiz in memory (cached locally for resume), opens the browser quiz,
|
|
4
|
+
grades in-session, opt-in publishes the results comment to the PR. The quiz itself
|
|
5
|
+
is **never posted to the PR** — only the results comment, and only when the user
|
|
6
|
+
clicks Publish.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import socket
|
|
14
|
+
import subprocess
|
|
15
|
+
import tempfile
|
|
16
|
+
import threading
|
|
17
|
+
import webbrowser
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import typer
|
|
22
|
+
import uvicorn
|
|
23
|
+
from anthropic import APIError as AnthropicAPIError
|
|
24
|
+
from pydantic import ValidationError
|
|
25
|
+
|
|
26
|
+
from cognit.comment.parse import parse_results
|
|
27
|
+
from cognit.engine.generate import generate_quiz
|
|
28
|
+
from cognit.engine.llm import LLMClient
|
|
29
|
+
from cognit.engine.llm_anthropic import AnthropicLLM
|
|
30
|
+
from cognit.engine.llm_claude_agent import ClaudeAgentLLM
|
|
31
|
+
from cognit.engine.models import Quiz
|
|
32
|
+
from cognit.ghio.diff import fetch_diff_and_files, read_file_at_head
|
|
33
|
+
from cognit.ghio.pr import fetch_pr_info, find_latest_marker_comment, post_comment
|
|
34
|
+
from cognit.server.app import build_app
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger("cognit.cli.take")
|
|
37
|
+
|
|
38
|
+
_MARKER_RESULTS = "<!-- cognit:results v1 -->"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _make_llm(model: str) -> LLMClient:
|
|
42
|
+
"""Pick the adapter based on the only auth signal that matters.
|
|
43
|
+
|
|
44
|
+
`ANTHROPIC_API_KEY` set → direct Anthropic SDK (fastest, no subprocess).
|
|
45
|
+
Otherwise → `claude_agent_sdk` (subprocesses the `claude` binary, which is
|
|
46
|
+
the only path that unlocks sonnet/opus for OAuth-only users; see
|
|
47
|
+
docs/superpowers/specs/2026-05-22-claude-agent-sdk-engine-design.md).
|
|
48
|
+
"""
|
|
49
|
+
if os.environ.get("ANTHROPIC_API_KEY"):
|
|
50
|
+
return AnthropicLLM(model=model)
|
|
51
|
+
return ClaudeAgentLLM(model=model)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _detect_pr_from_branch() -> str | None:
|
|
55
|
+
"""Use `gh pr view` to find the PR for the current branch."""
|
|
56
|
+
try:
|
|
57
|
+
result = subprocess.run(
|
|
58
|
+
["gh", "pr", "view", "--json", "url"],
|
|
59
|
+
capture_output=True,
|
|
60
|
+
text=True,
|
|
61
|
+
check=True,
|
|
62
|
+
)
|
|
63
|
+
return str(json.loads(result.stdout)["url"])
|
|
64
|
+
except subprocess.CalledProcessError:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _free_port() -> int:
|
|
69
|
+
"""Find an unused localhost TCP port."""
|
|
70
|
+
with socket.socket() as s:
|
|
71
|
+
s.bind(("127.0.0.1", 0))
|
|
72
|
+
port: int = s.getsockname()[1]
|
|
73
|
+
return port
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _cache_path_for(pr_url: str) -> Path:
|
|
77
|
+
"""Local cache path for a generated quiz, keyed by PR URL digest.
|
|
78
|
+
|
|
79
|
+
Lives under `$TMPDIR/cognit/`. OS reboot clears it. No explicit lifecycle.
|
|
80
|
+
"""
|
|
81
|
+
digest = hashlib.sha1(pr_url.encode("utf-8")).hexdigest()[:16]
|
|
82
|
+
cache_dir = Path(tempfile.gettempdir()) / "cognit"
|
|
83
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
return cache_dir / f"{digest}.json"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _serve_blocking(
|
|
88
|
+
quiz: Quiz,
|
|
89
|
+
pr_url: str,
|
|
90
|
+
llm: LLMClient,
|
|
91
|
+
post_comment_fn: Callable[[str], str],
|
|
92
|
+
) -> None:
|
|
93
|
+
"""Build the FastAPI app, launch the browser, run uvicorn until killed."""
|
|
94
|
+
app = build_app(quiz=quiz, pr_url=pr_url, llm=llm, post_comment=post_comment_fn)
|
|
95
|
+
port = _free_port()
|
|
96
|
+
url = f"http://127.0.0.1:{port}"
|
|
97
|
+
typer.echo(f"opening {url} in your browser... (Ctrl-C to quit)")
|
|
98
|
+
threading.Thread(target=lambda: webbrowser.open(url), daemon=True).start()
|
|
99
|
+
uvicorn.run(app, host="127.0.0.1", port=port, log_level="warning")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _generate_in_memory(
|
|
103
|
+
pr_url: str,
|
|
104
|
+
llm: LLMClient,
|
|
105
|
+
model: str,
|
|
106
|
+
min_diff_lines: int,
|
|
107
|
+
max_diff_lines: int,
|
|
108
|
+
) -> Quiz | None:
|
|
109
|
+
"""Generate a quiz from the PR's diff. Returns the Quiz, or None if skipped.
|
|
110
|
+
|
|
111
|
+
Unlike the previous behaviour, does NOT post anything to the PR — the quiz
|
|
112
|
+
lives only in memory (and the local cache `_load_or_generate` writes).
|
|
113
|
+
"""
|
|
114
|
+
info = fetch_pr_info(pr_url)
|
|
115
|
+
if "quiz: skip" in info.body.lower():
|
|
116
|
+
typer.echo("quiz: skip in PR body — skipping.")
|
|
117
|
+
return None
|
|
118
|
+
diff, files = fetch_diff_and_files(pr_url, fetch_file_contents=read_file_at_head)
|
|
119
|
+
diff_lines = diff.count("\n")
|
|
120
|
+
if diff_lines < min_diff_lines:
|
|
121
|
+
typer.echo(f"diff is {diff_lines} lines (< {min_diff_lines}) — skipping.")
|
|
122
|
+
return None
|
|
123
|
+
if diff_lines > max_diff_lines:
|
|
124
|
+
typer.echo(f"diff is {diff_lines} lines (> {max_diff_lines}) — skipping.")
|
|
125
|
+
return None
|
|
126
|
+
try:
|
|
127
|
+
return generate_quiz(
|
|
128
|
+
diff=diff,
|
|
129
|
+
pr_title=info.title,
|
|
130
|
+
pr_body=info.body,
|
|
131
|
+
files=files,
|
|
132
|
+
pr_number=info.number,
|
|
133
|
+
llm=llm,
|
|
134
|
+
model=model,
|
|
135
|
+
)
|
|
136
|
+
except AnthropicAPIError as e:
|
|
137
|
+
typer.echo(f"LLM call failed: {type(e).__name__}: {e}", err=True)
|
|
138
|
+
raise typer.Exit(code=1) from None
|
|
139
|
+
except ValidationError as e:
|
|
140
|
+
typer.echo(f"LLM returned malformed quiz: {e}", err=True)
|
|
141
|
+
raise typer.Exit(code=1) from None
|
|
142
|
+
except RuntimeError as e:
|
|
143
|
+
typer.echo(f"LLM call failed: {e}", err=True)
|
|
144
|
+
raise typer.Exit(code=1) from None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _load_or_generate(
|
|
148
|
+
pr_url: str,
|
|
149
|
+
llm: LLMClient,
|
|
150
|
+
model: str,
|
|
151
|
+
min_diff_lines: int,
|
|
152
|
+
max_diff_lines: int,
|
|
153
|
+
) -> Quiz | None:
|
|
154
|
+
"""Return a Quiz: from local cache if present, else generate fresh and cache it."""
|
|
155
|
+
cache_path = _cache_path_for(pr_url)
|
|
156
|
+
if cache_path.exists():
|
|
157
|
+
logger.debug("cache hit: loading quiz from %s", cache_path)
|
|
158
|
+
try:
|
|
159
|
+
return Quiz.model_validate_json(cache_path.read_text())
|
|
160
|
+
except ValidationError:
|
|
161
|
+
logger.debug("cache at %s is invalid — regenerating", cache_path)
|
|
162
|
+
cache_path.unlink(missing_ok=True)
|
|
163
|
+
logger.debug("cache miss: will generate fresh quiz (will write to %s)", cache_path)
|
|
164
|
+
typer.echo("generating quiz from diff...")
|
|
165
|
+
quiz = _generate_in_memory(pr_url, llm, model, min_diff_lines, max_diff_lines)
|
|
166
|
+
if quiz is None:
|
|
167
|
+
return None
|
|
168
|
+
cache_path.write_text(quiz.model_dump_json())
|
|
169
|
+
logger.debug("quiz cached at %s (%d bytes)", cache_path, cache_path.stat().st_size)
|
|
170
|
+
return quiz
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _run_take_flow(
|
|
174
|
+
pr_url: str,
|
|
175
|
+
show_results_only: bool,
|
|
176
|
+
llm: LLMClient,
|
|
177
|
+
model: str = "claude-sonnet-4-6",
|
|
178
|
+
min_diff_lines: int = 50,
|
|
179
|
+
max_diff_lines: int = 2000,
|
|
180
|
+
) -> None:
|
|
181
|
+
if show_results_only:
|
|
182
|
+
results_md = find_latest_marker_comment(pr_url, _MARKER_RESULTS)
|
|
183
|
+
if results_md is None:
|
|
184
|
+
typer.echo("no results comment found on this PR.")
|
|
185
|
+
raise typer.Exit(code=1)
|
|
186
|
+
typer.echo(parse_results(results_md).model_dump_json(indent=2))
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
quiz = _load_or_generate(
|
|
190
|
+
pr_url,
|
|
191
|
+
llm=llm,
|
|
192
|
+
model=model,
|
|
193
|
+
min_diff_lines=min_diff_lines,
|
|
194
|
+
max_diff_lines=max_diff_lines,
|
|
195
|
+
)
|
|
196
|
+
if quiz is None:
|
|
197
|
+
return
|
|
198
|
+
_serve_blocking(
|
|
199
|
+
quiz,
|
|
200
|
+
pr_url,
|
|
201
|
+
llm=llm,
|
|
202
|
+
post_comment_fn=lambda md: post_comment(pr_url, md),
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _configure_logging() -> None:
|
|
207
|
+
"""Wire up `COGNIT_LOG_LEVEL` so debug traces from the engine layers are visible.
|
|
208
|
+
|
|
209
|
+
Default is WARNING (quiet). Set `COGNIT_LOG_LEVEL=DEBUG` to see which mermaid
|
|
210
|
+
validator is being used, cache hits, and other internal decisions:
|
|
211
|
+
|
|
212
|
+
COGNIT_LOG_LEVEL=DEBUG cognit take
|
|
213
|
+
|
|
214
|
+
`force=True` so a parent harness (uvicorn, pytest) that already configured
|
|
215
|
+
root logging doesn't make our env var silently a no-op.
|
|
216
|
+
"""
|
|
217
|
+
level_name = os.environ.get("COGNIT_LOG_LEVEL", "WARNING").upper()
|
|
218
|
+
level = getattr(logging, level_name, logging.WARNING)
|
|
219
|
+
logging.basicConfig(
|
|
220
|
+
level=level,
|
|
221
|
+
format="%(name)s %(levelname)s: %(message)s",
|
|
222
|
+
force=True,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def run(
|
|
227
|
+
pr: str | None,
|
|
228
|
+
show_results: bool,
|
|
229
|
+
model: str = "claude-sonnet-4-6",
|
|
230
|
+
min_diff_lines: int = 50,
|
|
231
|
+
max_diff_lines: int = 2000,
|
|
232
|
+
) -> None:
|
|
233
|
+
_configure_logging()
|
|
234
|
+
pr_url = pr or _detect_pr_from_branch()
|
|
235
|
+
if pr_url is None:
|
|
236
|
+
typer.echo("error: no PR detected from current branch; pass --pr <url>")
|
|
237
|
+
raise typer.Exit(code=1)
|
|
238
|
+
llm = _make_llm(model)
|
|
239
|
+
_run_take_flow(
|
|
240
|
+
pr_url,
|
|
241
|
+
show_results_only=show_results,
|
|
242
|
+
llm=llm,
|
|
243
|
+
model=model,
|
|
244
|
+
min_diff_lines=min_diff_lines,
|
|
245
|
+
max_diff_lines=max_diff_lines,
|
|
246
|
+
)
|
cognit/cli/version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
File without changes
|
cognit/comment/parse.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from cognit.engine.models import Quiz, Answers, Results, QuestionResult
|
|
3
|
+
|
|
4
|
+
_JSON_BLOCK = re.compile(r"```json\s*\n(.*?)\n```", re.DOTALL)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _extract_json(md: str, marker: str) -> str:
|
|
8
|
+
if marker not in md:
|
|
9
|
+
raise ValueError(f"marker {marker!r} not found")
|
|
10
|
+
after = md.split(marker, 1)[1]
|
|
11
|
+
m = _JSON_BLOCK.search(after)
|
|
12
|
+
if not m:
|
|
13
|
+
raise ValueError(f"no json block after {marker!r}")
|
|
14
|
+
return m.group(1)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_quiz(md: str) -> Quiz:
|
|
18
|
+
return Quiz.model_validate_json(_extract_json(md, "<!-- cognit:quiz v1 -->"))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_answers(md: str) -> Answers:
|
|
22
|
+
return Answers.model_validate_json(_extract_json(md, "<!-- cognit:answers v1 -->"))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_results(md: str) -> Results:
|
|
26
|
+
"""Parse a results comment. Prefers the embedded JSON state; falls back to scraping the human text."""
|
|
27
|
+
marker = "<!-- cognit:results v1 -->"
|
|
28
|
+
if marker not in md:
|
|
29
|
+
raise ValueError("not a results comment")
|
|
30
|
+
# Prefer JSON state if present (added in v1; older comments may lack it).
|
|
31
|
+
try:
|
|
32
|
+
return Results.model_validate_json(_extract_json(md, marker))
|
|
33
|
+
except ValueError:
|
|
34
|
+
pass # No JSON block; fall back to scraping.
|
|
35
|
+
total = 0
|
|
36
|
+
m = re.search(r"\*\*Total:\s*(\d+)%\*\*", md)
|
|
37
|
+
if m:
|
|
38
|
+
total = int(m.group(1))
|
|
39
|
+
per: list[QuestionResult] = []
|
|
40
|
+
for line in md.splitlines():
|
|
41
|
+
m2 = re.match(r"- (✅|❌) `([^`]+)` — (\d+)%", line)
|
|
42
|
+
if m2:
|
|
43
|
+
per.append(
|
|
44
|
+
QuestionResult(
|
|
45
|
+
question_id=m2.group(2),
|
|
46
|
+
correct=m2.group(1) == "✅",
|
|
47
|
+
score=int(m2.group(3)),
|
|
48
|
+
feedback="",
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
return Results(pr_number=0, total_score=total, per_question=per)
|
cognit/comment/render.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from cognit.engine.models import (
|
|
2
|
+
Quiz,
|
|
3
|
+
Answers,
|
|
4
|
+
Results,
|
|
5
|
+
MCQQuestion,
|
|
6
|
+
MermaidQuestion,
|
|
7
|
+
TrueFalseQuestion,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
_MARKER_QUIZ = "<!-- cognit:quiz v1 -->"
|
|
11
|
+
_MARKER_ANSWERS = "<!-- cognit:answers v1 -->"
|
|
12
|
+
_MARKER_RESULTS = "<!-- cognit:results v1 -->"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def render_quiz(quiz: Quiz) -> str:
|
|
16
|
+
parts: list[str] = [
|
|
17
|
+
_MARKER_QUIZ,
|
|
18
|
+
"## Quiz on your PR",
|
|
19
|
+
"",
|
|
20
|
+
"Take it: `cognit take` or scroll down.",
|
|
21
|
+
"",
|
|
22
|
+
]
|
|
23
|
+
for i, q in enumerate(quiz.questions, 1):
|
|
24
|
+
parts.append(f"### Question {i} — {q.type}")
|
|
25
|
+
parts.append(q.prompt)
|
|
26
|
+
parts.append("")
|
|
27
|
+
if isinstance(q, MCQQuestion):
|
|
28
|
+
for label in q.options:
|
|
29
|
+
parts.append(f"- {label}")
|
|
30
|
+
elif isinstance(q, MermaidQuestion):
|
|
31
|
+
for label, src in q.options.items():
|
|
32
|
+
parts.append(f"#### Option {label}")
|
|
33
|
+
parts.append("```mermaid")
|
|
34
|
+
parts.append(src)
|
|
35
|
+
parts.append("```")
|
|
36
|
+
elif isinstance(q, TrueFalseQuestion):
|
|
37
|
+
parts.append("- true / false")
|
|
38
|
+
# open: just the prompt; no extra rendering
|
|
39
|
+
parts.append("")
|
|
40
|
+
parts.append("---")
|
|
41
|
+
parts.append("<details><summary>Quiz state (used by the CLI)</summary>")
|
|
42
|
+
parts.append("")
|
|
43
|
+
parts.append("```json")
|
|
44
|
+
parts.append(quiz.model_dump_json(indent=2))
|
|
45
|
+
parts.append("```")
|
|
46
|
+
parts.append("</details>")
|
|
47
|
+
return "\n".join(parts)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def render_answers(ans: Answers, deterministic_score: int) -> str:
|
|
51
|
+
return (
|
|
52
|
+
f"{_MARKER_ANSWERS}\n"
|
|
53
|
+
f"## My answers\n\n"
|
|
54
|
+
f"Deterministic-grade score (MCQ + mermaid + T/F): **{deterministic_score}%**\n\n"
|
|
55
|
+
f"```json\n{ans.model_dump_json(indent=2)}\n```\n"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def render_results(res: Results) -> str:
|
|
60
|
+
lines: list[str] = [
|
|
61
|
+
_MARKER_RESULTS,
|
|
62
|
+
"## Quiz results",
|
|
63
|
+
"",
|
|
64
|
+
f"**Total: {res.total_score}%**",
|
|
65
|
+
"",
|
|
66
|
+
]
|
|
67
|
+
for r in res.per_question:
|
|
68
|
+
icon = "✅" if r.correct else "❌"
|
|
69
|
+
lines.append(f"- {icon} `{r.question_id}` — {r.score}%")
|
|
70
|
+
if r.feedback:
|
|
71
|
+
lines.append(f" > {r.feedback}")
|
|
72
|
+
lines.append("")
|
|
73
|
+
lines.append("---")
|
|
74
|
+
lines.append("<details><summary>Results state (used by the CLI)</summary>")
|
|
75
|
+
lines.append("")
|
|
76
|
+
lines.append("```json")
|
|
77
|
+
lines.append(res.model_dump_json(indent=2))
|
|
78
|
+
lines.append("```")
|
|
79
|
+
lines.append("</details>")
|
|
80
|
+
return "\n".join(lines)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def render_results_inlined(quiz: Quiz, answers: Answers, results: Results) -> str:
|
|
84
|
+
"""Render results with question prompts and author answers inlined.
|
|
85
|
+
|
|
86
|
+
The in-memory-only flow no longer posts a quiz comment to the PR, so the published
|
|
87
|
+
results comment must be self-contained. Each question is rendered with its prompt,
|
|
88
|
+
the author's answer, the score, and any feedback. The JSON state block at the
|
|
89
|
+
bottom is preserved so `parse_results` still round-trips.
|
|
90
|
+
"""
|
|
91
|
+
answer_by_qid = {e.question_id: e.value for e in answers.entries}
|
|
92
|
+
result_by_qid = {r.question_id: r for r in results.per_question}
|
|
93
|
+
|
|
94
|
+
lines: list[str] = [
|
|
95
|
+
_MARKER_RESULTS,
|
|
96
|
+
"## Quiz results",
|
|
97
|
+
"",
|
|
98
|
+
f"**Total: {results.total_score}%**",
|
|
99
|
+
"",
|
|
100
|
+
]
|
|
101
|
+
for i, q in enumerate(quiz.questions, 1):
|
|
102
|
+
r = result_by_qid.get(q.id)
|
|
103
|
+
if r is None:
|
|
104
|
+
continue
|
|
105
|
+
icon = "✅" if r.correct else "❌"
|
|
106
|
+
lines.append(f"### Question {i} — {icon} {r.score}%")
|
|
107
|
+
lines.append("")
|
|
108
|
+
lines.append(f"**Prompt:** {q.prompt}")
|
|
109
|
+
lines.append("")
|
|
110
|
+
user_answer = answer_by_qid.get(q.id, "")
|
|
111
|
+
lines.append(f"**Your answer:** `{user_answer}`")
|
|
112
|
+
if r.feedback:
|
|
113
|
+
lines.append("")
|
|
114
|
+
lines.append(f"> {r.feedback}")
|
|
115
|
+
lines.append("")
|
|
116
|
+
lines.append("---")
|
|
117
|
+
lines.append("<details><summary>Results state (used by the CLI)</summary>")
|
|
118
|
+
lines.append("")
|
|
119
|
+
lines.append("```json")
|
|
120
|
+
lines.append(results.model_dump_json(indent=2))
|
|
121
|
+
lines.append("```")
|
|
122
|
+
lines.append("</details>")
|
|
123
|
+
return "\n".join(lines)
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Tiny parse-only mermaid validator. No Chromium, no rendering — just mermaid.parse().
|
|
2
|
+
#
|
|
3
|
+
# The official `@mermaid-js/mermaid-cli` image (~500MB) bundles Puppeteer +
|
|
4
|
+
# Chromium for image rendering. We only need parsing, so we install just the
|
|
5
|
+
# `mermaid` library + a Node-side DOM shim (jsdom). Image size: ~200MB.
|
|
6
|
+
FROM node:20-alpine
|
|
7
|
+
|
|
8
|
+
WORKDIR /app
|
|
9
|
+
|
|
10
|
+
# Manual package.json so npm doesn't complain. ESM type so mermaid v11 works.
|
|
11
|
+
# Pinned to specific minor versions for reproducible builds — bump deliberately
|
|
12
|
+
# when picking up a new mermaid release rather than letting a silent upstream
|
|
13
|
+
# change drift the validator.
|
|
14
|
+
RUN echo '{"name":"cognit-mermaid-validator","version":"1.0.0","type":"module","private":true}' > package.json \
|
|
15
|
+
&& npm install --no-audit --no-fund --no-progress mermaid@11.15.0 jsdom@24.0.0 \
|
|
16
|
+
&& npm cache clean --force
|
|
17
|
+
|
|
18
|
+
COPY validate.mjs /app/validate.mjs
|
|
19
|
+
|
|
20
|
+
ENTRYPOINT ["node", "/app/validate.mjs"]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Dockerised mermaid validator
|
|
2
|
+
|
|
3
|
+
Tiny parse-only mermaid validator. Used by `engine/mermaid.py` when `mmdc` is not
|
|
4
|
+
installed locally but `docker` is.
|
|
5
|
+
|
|
6
|
+
**Why this exists.** The official `@mermaid-js/mermaid-cli` Docker image bundles
|
|
7
|
+
Puppeteer + Chromium (~500MB) because `mmdc` does image rendering. We only need
|
|
8
|
+
*parsing* to validate LLM-generated diagrams, so this image installs just the
|
|
9
|
+
`mermaid` JS library + `jsdom` (~200MB total).
|
|
10
|
+
|
|
11
|
+
**Built lazily.** `cognit` runs `docker build` the first time it needs the
|
|
12
|
+
validator and an image isn't already present. After that, validation runs in
|
|
13
|
+
milliseconds per diagram via `docker run`.
|
|
14
|
+
|
|
15
|
+
**Build manually:**
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
docker build -t cognit-mermaid-validator:local src/cognit/engine/_mermaid_docker/
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**Use manually:**
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
echo 'flowchart LR
|
|
25
|
+
A --> B' | docker run --rm -i cognit-mermaid-validator:local
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Exit code 0 = valid, 1 = parse error (message on stderr).
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
// Parse-only mermaid validator. Reads a mermaid source from stdin, calls
|
|
2
|
+
// mermaid.parse(), exits 0 on success and 1 on parse error. The error message
|
|
3
|
+
// (if any) goes to stderr so the caller can surface it.
|
|
4
|
+
//
|
|
5
|
+
// We set up jsdom before importing mermaid because mermaid touches `document`
|
|
6
|
+
// at module-load time even in pure-parse mode. The HTML doc is empty — we
|
|
7
|
+
// never render, just parse.
|
|
8
|
+
|
|
9
|
+
import { JSDOM } from "jsdom";
|
|
10
|
+
|
|
11
|
+
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>", {
|
|
12
|
+
pretendToBeVisual: true,
|
|
13
|
+
});
|
|
14
|
+
globalThis.window = dom.window;
|
|
15
|
+
globalThis.document = dom.window.document;
|
|
16
|
+
globalThis.navigator = dom.window.navigator;
|
|
17
|
+
globalThis.HTMLElement = dom.window.HTMLElement;
|
|
18
|
+
globalThis.Node = dom.window.Node;
|
|
19
|
+
|
|
20
|
+
const mermaid = (await import("mermaid")).default;
|
|
21
|
+
|
|
22
|
+
const chunks = [];
|
|
23
|
+
for await (const chunk of process.stdin) chunks.push(chunk);
|
|
24
|
+
const src = Buffer.concat(chunks).toString("utf-8");
|
|
25
|
+
|
|
26
|
+
try {
|
|
27
|
+
await mermaid.parse(src);
|
|
28
|
+
process.exit(0);
|
|
29
|
+
} catch (e) {
|
|
30
|
+
process.stderr.write(String(e && e.message ? e.message : e) + "\n");
|
|
31
|
+
process.exit(1);
|
|
32
|
+
}
|