handzon-core 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/collections.ts +97 -3
- package/src/components/ai/ChatButton.tsx +51 -3
- package/src/components/ai/ChatPanel.tsx +86 -23
- package/src/components/ai/CopyStep.tsx +44 -0
- package/src/components/ai/OpenInAgent.tsx +55 -0
- package/src/components/ai/SelectionAsk.tsx +98 -0
- package/src/components/ai/StepHelp.tsx +31 -0
- package/src/components/mdx/Checkpoint.tsx +66 -2
- package/src/components/mdx/CopyPrompt.astro +10 -0
- package/src/components/mdx/CopyPrompt.tsx +56 -0
- package/src/components/mdx/HelpMe.astro +10 -0
- package/src/components/mdx/HelpMe.tsx +29 -0
- package/src/components/mdx/Playground.tsx +61 -9
- package/src/components/mdx/Quiz.tsx +18 -0
- package/src/index.ts +5 -0
- package/src/layouts/TutorialLayout.astro +19 -0
- package/src/lib/ai/assist.ts +81 -0
- package/src/lib/ai/prompts.ts +126 -0
- package/src/lib/ai/stepData.ts +74 -0
- package/src/lib/mdx-components.ts +4 -0
- package/src/lib/progress/remote.ts +86 -25
- package/src/lib/progress/types.ts +23 -0
- package/src/lib/progress/useProgress.ts +8 -4
- package/src/pages/TutorialStep.astro +12 -1
- package/src/server/auth.ts +84 -1
- package/src/server/db/schema.ts +53 -0
- package/src/server/handlers/helpInbox.ts +45 -0
- package/src/server/handlers/mcp.ts +72 -0
- package/src/server/handlers/progress.ts +7 -51
- package/src/server/handlers/progressEvents.ts +68 -0
- package/src/server/mcp/protocol.ts +99 -0
- package/src/server/mcp/server.ts +94 -0
- package/src/server/mcp/tools.ts +175 -0
- package/src/server/mcp/writeTools.ts +407 -0
- package/src/server/progress.ts +86 -0
- package/src/server/progressBus.ts +51 -0
- package/src/server/tokens.ts +80 -0
- package/src/server/verify/evaluator.ts +134 -0
- package/src/types/ai.ts +6 -0
- package/styles/components/assist.css +101 -0
- package/styles/components/checkpoint.css +29 -0
- package/styles/components.css +1 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import type { VerifyCheck, VerifySpec } from "../../collections.ts";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Observed values the agent reports for one check. The set of valid
|
|
5
|
+
* fields depends on the check `kind` — extras are ignored. Server
|
|
6
|
+
* never trusts pass/fail flags from the agent; it always re-scores
|
|
7
|
+
* the observations against the spec.
|
|
8
|
+
*/
|
|
9
|
+
export interface CheckObservation {
|
|
10
|
+
/** file_exists / file_contains */
|
|
11
|
+
exists?: boolean;
|
|
12
|
+
/** file_contains */
|
|
13
|
+
body?: string;
|
|
14
|
+
/** shell */
|
|
15
|
+
exitCode?: number;
|
|
16
|
+
stdout?: string;
|
|
17
|
+
/** http */
|
|
18
|
+
status?: number;
|
|
19
|
+
responseBody?: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface VerifyVerdict {
|
|
23
|
+
passed: boolean;
|
|
24
|
+
/** Index into spec.checks of the first failing check (when passed === false). */
|
|
25
|
+
failingCheckIndex?: number;
|
|
26
|
+
/** The author's hint for the failing check, if any. */
|
|
27
|
+
hint?: string;
|
|
28
|
+
/** A short server-side explanation of *why* the check failed. */
|
|
29
|
+
reason?: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function fail(idx: number, check: VerifyCheck, reason: string): VerifyVerdict {
|
|
33
|
+
return { passed: false, failingCheckIndex: idx, hint: check.hint, reason };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Score one check's observation against its declared spec. Returns
|
|
38
|
+
* either a pass marker or a structured failure with reason + hint
|
|
39
|
+
* so the calling MCP tool can echo something useful to the agent.
|
|
40
|
+
*
|
|
41
|
+
* Pure of I/O. The agent does the side effects; the server scores.
|
|
42
|
+
*/
|
|
43
|
+
function evaluateOne(check: VerifyCheck, idx: number, obs: CheckObservation): VerifyVerdict {
|
|
44
|
+
switch (check.kind) {
|
|
45
|
+
case "file_exists": {
|
|
46
|
+
if (obs.exists === true) return { passed: true };
|
|
47
|
+
return fail(
|
|
48
|
+
idx,
|
|
49
|
+
check,
|
|
50
|
+
`Expected ${check.path} to exist but the agent reported it does not.`,
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
case "file_contains": {
|
|
54
|
+
if (obs.exists === false) {
|
|
55
|
+
return fail(idx, check, `File ${check.path} does not exist.`);
|
|
56
|
+
}
|
|
57
|
+
const body = obs.body ?? "";
|
|
58
|
+
if (new RegExp(check.pattern).test(body)) return { passed: true };
|
|
59
|
+
return fail(idx, check, `Expected ${check.path} to match /${check.pattern}/.`);
|
|
60
|
+
}
|
|
61
|
+
case "shell": {
|
|
62
|
+
if (typeof check.expect.exitCode === "number") {
|
|
63
|
+
if (obs.exitCode !== check.expect.exitCode) {
|
|
64
|
+
return fail(
|
|
65
|
+
idx,
|
|
66
|
+
check,
|
|
67
|
+
`Expected exit code ${check.expect.exitCode} from \`${check.run}\`, got ${obs.exitCode}.`,
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (check.expect.stdoutMatches) {
|
|
72
|
+
const stdout = obs.stdout ?? "";
|
|
73
|
+
if (!new RegExp(check.expect.stdoutMatches).test(stdout)) {
|
|
74
|
+
return fail(
|
|
75
|
+
idx,
|
|
76
|
+
check,
|
|
77
|
+
`Expected stdout of \`${check.run}\` to match /${check.expect.stdoutMatches}/.`,
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return { passed: true };
|
|
82
|
+
}
|
|
83
|
+
case "http": {
|
|
84
|
+
if (typeof check.expect.status === "number") {
|
|
85
|
+
if (obs.status !== check.expect.status) {
|
|
86
|
+
return fail(
|
|
87
|
+
idx,
|
|
88
|
+
check,
|
|
89
|
+
`Expected status ${check.expect.status} from ${check.url}, got ${obs.status}.`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
const body = obs.responseBody ?? "";
|
|
94
|
+
if (check.expect.bodyIncludes && !body.includes(check.expect.bodyIncludes)) {
|
|
95
|
+
return fail(
|
|
96
|
+
idx,
|
|
97
|
+
check,
|
|
98
|
+
`Expected response body of ${check.url} to include "${check.expect.bodyIncludes}".`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
if (check.expect.bodyMatches && !new RegExp(check.expect.bodyMatches).test(body)) {
|
|
102
|
+
return fail(
|
|
103
|
+
idx,
|
|
104
|
+
check,
|
|
105
|
+
`Expected response body of ${check.url} to match /${check.expect.bodyMatches}/.`,
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
return { passed: true };
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Pure-function evaluator: spec + agent-reported observations →
|
|
115
|
+
* pass/fail verdict. Short-circuits on first failure so the
|
|
116
|
+
* failingCheckIndex is the first thing wrong, not the last.
|
|
117
|
+
*
|
|
118
|
+
* `observations` must be the same length as `spec.checks` — one
|
|
119
|
+
* observation per check, in order. The agent is responsible for
|
|
120
|
+
* filling in the right shape per check kind.
|
|
121
|
+
*/
|
|
122
|
+
export function evaluate(spec: VerifySpec, observations: CheckObservation[]): VerifyVerdict {
|
|
123
|
+
if (observations.length !== spec.checks.length) {
|
|
124
|
+
return {
|
|
125
|
+
passed: false,
|
|
126
|
+
reason: `Expected ${spec.checks.length} observations, got ${observations.length}.`,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
for (let i = 0; i < spec.checks.length; i++) {
|
|
130
|
+
const verdict = evaluateOne(spec.checks[i]!, i, observations[i] ?? {});
|
|
131
|
+
if (!verdict.passed) return verdict;
|
|
132
|
+
}
|
|
133
|
+
return { passed: true };
|
|
134
|
+
}
|
package/src/types/ai.ts
CHANGED
|
@@ -12,6 +12,12 @@ export interface AiConfig {
|
|
|
12
12
|
contextBudgetTokens: number;
|
|
13
13
|
includeFutureSteps: boolean;
|
|
14
14
|
tools: { suggestPlaygroundEdit: boolean };
|
|
15
|
+
/**
|
|
16
|
+
* When true, the tutorial layout renders a "Stuck on this step?"
|
|
17
|
+
* footer under every step body so authors don't have to drop a
|
|
18
|
+
* <HelpMe /> by hand. Off by default.
|
|
19
|
+
*/
|
|
20
|
+
autoStepHelp?: boolean;
|
|
15
21
|
disabledSkills?: string[];
|
|
16
22
|
allowedDomains?: string[];
|
|
17
23
|
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/* Family A: in-app tutor touchpoints (HelpMe, quiz wrong-answer hint,
|
|
2
|
+
* checkpoint stuck nudge, selection ask, playground fix, step footer)
|
|
3
|
+
* and Family B: bring-your-own-agent surfaces (CopyPrompt, deep-link
|
|
4
|
+
* row, copy-step button). Shared visual language so author-placed
|
|
5
|
+
* affordances feel like one family.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
.hz-helpme,
|
|
9
|
+
.hz-assist-link,
|
|
10
|
+
.hz-copy-prompt {
|
|
11
|
+
display: inline-flex;
|
|
12
|
+
align-items: center;
|
|
13
|
+
gap: 0.4rem;
|
|
14
|
+
background: none;
|
|
15
|
+
border: var(--border-default) solid var(--color-border);
|
|
16
|
+
padding: 0.35rem 0.65rem;
|
|
17
|
+
font: inherit;
|
|
18
|
+
font-size: 0.85em;
|
|
19
|
+
color: var(--color-accent);
|
|
20
|
+
cursor: pointer;
|
|
21
|
+
text-decoration: none;
|
|
22
|
+
border-radius: 0;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
.hz-helpme:hover,
|
|
26
|
+
.hz-assist-link:hover,
|
|
27
|
+
.hz-copy-prompt:hover {
|
|
28
|
+
background: var(--color-surface);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
.hz-helpme[disabled],
|
|
32
|
+
.hz-copy-prompt[disabled] {
|
|
33
|
+
opacity: 0.5;
|
|
34
|
+
cursor: not-allowed;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/* Block-level "Stuck on this step?" step footer */
|
|
38
|
+
.hz-step-help {
|
|
39
|
+
margin-top: 2rem;
|
|
40
|
+
padding: 0.9rem 1.1rem;
|
|
41
|
+
background: var(--color-surface);
|
|
42
|
+
border: var(--border-default) solid var(--color-border);
|
|
43
|
+
display: flex;
|
|
44
|
+
align-items: center;
|
|
45
|
+
gap: 0.75rem;
|
|
46
|
+
font-size: 0.9em;
|
|
47
|
+
}
|
|
48
|
+
.hz-step-help-text {
|
|
49
|
+
color: var(--color-muted);
|
|
50
|
+
flex: 1;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* Checkpoint "Stuck?" nudge: rendered inside .checkpoint when the
|
|
54
|
+
* step has been on-screen + unchecked for a while.
|
|
55
|
+
*/
|
|
56
|
+
.checkpoint-nudge {
|
|
57
|
+
margin-left: auto;
|
|
58
|
+
font-size: 0.8em;
|
|
59
|
+
color: var(--color-accent);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/* Selection-anchored floating "Ask about this" */
|
|
63
|
+
.hz-selection-ask {
|
|
64
|
+
position: absolute;
|
|
65
|
+
z-index: 60;
|
|
66
|
+
background: var(--color-bg);
|
|
67
|
+
border: var(--border-default) solid var(--color-accent);
|
|
68
|
+
padding: 0.3rem 0.6rem;
|
|
69
|
+
font-size: 0.8em;
|
|
70
|
+
color: var(--color-accent);
|
|
71
|
+
cursor: pointer;
|
|
72
|
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/* Deep-link / open-in row */
|
|
76
|
+
.hz-openin {
|
|
77
|
+
margin-top: 1.5rem;
|
|
78
|
+
display: flex;
|
|
79
|
+
flex-wrap: wrap;
|
|
80
|
+
gap: 0.5rem;
|
|
81
|
+
align-items: center;
|
|
82
|
+
}
|
|
83
|
+
.hz-openin-label {
|
|
84
|
+
font-size: 0.8em;
|
|
85
|
+
color: var(--color-muted);
|
|
86
|
+
text-transform: uppercase;
|
|
87
|
+
letter-spacing: 0.06em;
|
|
88
|
+
margin-right: 0.25rem;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/* Quiz wrong-answer help inline link */
|
|
92
|
+
.quiz .hz-helpme {
|
|
93
|
+
margin-top: 0.4rem;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/* Playground toolbar: row sitting just under the Sandpack iframe */
|
|
97
|
+
.playground-toolbar {
|
|
98
|
+
display: flex;
|
|
99
|
+
justify-content: flex-end;
|
|
100
|
+
margin-top: 0.4rem;
|
|
101
|
+
}
|
|
@@ -47,3 +47,32 @@
|
|
|
47
47
|
letter-spacing: 0.06em;
|
|
48
48
|
margin-left: auto;
|
|
49
49
|
}
|
|
50
|
+
|
|
51
|
+
/* Family D: inline failure feedback under the checkpoint when a
|
|
52
|
+
* submit_verification call from the agent reported a failed check.
|
|
53
|
+
* Styled as a subdued warning band, not a toast — failure UX is the
|
|
54
|
+
* agent's job in its own surface; this is a backstop. */
|
|
55
|
+
.checkpoint-feedback {
|
|
56
|
+
flex-basis: 100%;
|
|
57
|
+
background: var(--color-bg);
|
|
58
|
+
border-left: var(--border-thick) solid var(--color-warning, #d9a200);
|
|
59
|
+
padding: 0.6rem 0.85rem;
|
|
60
|
+
margin-top: 0.5rem;
|
|
61
|
+
font-size: 0.9em;
|
|
62
|
+
color: var(--color-fg);
|
|
63
|
+
}
|
|
64
|
+
.checkpoint-feedback strong {
|
|
65
|
+
display: block;
|
|
66
|
+
font-family: var(--font-mono);
|
|
67
|
+
font-size: 0.8em;
|
|
68
|
+
text-transform: uppercase;
|
|
69
|
+
letter-spacing: 0.06em;
|
|
70
|
+
color: var(--color-warning, #d9a200);
|
|
71
|
+
}
|
|
72
|
+
.checkpoint-feedback p {
|
|
73
|
+
margin: 0.25rem 0 0;
|
|
74
|
+
}
|
|
75
|
+
.checkpoint-feedback-hint {
|
|
76
|
+
color: var(--color-muted);
|
|
77
|
+
font-size: 0.95em;
|
|
78
|
+
}
|