@vibe-hero/server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +151 -0
- package/dist/catalog/bundled/claude-code/.gitkeep +0 -0
- package/dist/catalog/bundled/claude-code/context-management.yaml +302 -0
- package/dist/catalog/bundled/claude-code/planning.yaml +313 -0
- package/dist/catalog/bundled/claude-code/subagents.yaml +357 -0
- package/dist/catalog/bundled/general/.gitkeep +0 -0
- package/dist/catalog/bundled/general/_placeholder.yaml +39 -0
- package/dist/catalog/bundled/general/task-decomposition.yaml +390 -0
- package/dist/catalog/bundled/index.d.ts +39 -0
- package/dist/catalog/bundled/index.d.ts.map +1 -0
- package/dist/catalog/bundled/index.js +41 -0
- package/dist/catalog/bundled/index.js.map +1 -0
- package/dist/catalog/fetcher.d.ts +201 -0
- package/dist/catalog/fetcher.d.ts.map +1 -0
- package/dist/catalog/fetcher.js +452 -0
- package/dist/catalog/fetcher.js.map +1 -0
- package/dist/catalog/loader.d.ts +165 -0
- package/dist/catalog/loader.d.ts.map +1 -0
- package/dist/catalog/loader.js +241 -0
- package/dist/catalog/loader.js.map +1 -0
- package/dist/catalog/resolve.d.ts +85 -0
- package/dist/catalog/resolve.d.ts.map +1 -0
- package/dist/catalog/resolve.js +103 -0
- package/dist/catalog/resolve.js.map +1 -0
- package/dist/cli/getOffer.d.ts +38 -0
- package/dist/cli/getOffer.d.ts.map +1 -0
- package/dist/cli/getOffer.js +150 -0
- package/dist/cli/getOffer.js.map +1 -0
- package/dist/cli/index.d.ts +46 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +88 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/config.d.ts +34 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +63 -0
- package/dist/config.js.map +1 -0
- package/dist/engine/elo.d.ts +76 -0
- package/dist/engine/elo.d.ts.map +1 -0
- package/dist/engine/elo.js +79 -0
- package/dist/engine/elo.js.map +1 -0
- package/dist/engine/graduation.d.ts +108 -0
- package/dist/engine/graduation.d.ts.map +1 -0
- package/dist/engine/graduation.js +161 -0
- package/dist/engine/graduation.js.map +1 -0
- package/dist/engine/lapse.d.ts +80 -0
- package/dist/engine/lapse.d.ts.map +1 -0
- package/dist/engine/lapse.js +125 -0
- package/dist/engine/lapse.js.map +1 -0
- package/dist/engine/selection.d.ts +84 -0
- package/dist/engine/selection.d.ts.map +1 -0
- package/dist/engine/selection.js +119 -0
- package/dist/engine/selection.js.map +1 -0
- package/dist/grading/deterministic.d.ts +102 -0
- package/dist/grading/deterministic.d.ts.map +1 -0
- package/dist/grading/deterministic.js +118 -0
- package/dist/grading/deterministic.js.map +1 -0
- package/dist/grading/freeform.d.ts +64 -0
- package/dist/grading/freeform.d.ts.map +1 -0
- package/dist/grading/freeform.js +85 -0
- package/dist/grading/freeform.js.map +1 -0
- package/dist/index.d.ts +52 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +91 -0
- package/dist/index.js.map +1 -0
- package/dist/observation/hookEvents.d.ts +113 -0
- package/dist/observation/hookEvents.d.ts.map +1 -0
- package/dist/observation/hookEvents.js +170 -0
- package/dist/observation/hookEvents.js.map +1 -0
- package/dist/observation/offers.d.ts +215 -0
- package/dist/observation/offers.d.ts.map +1 -0
- package/dist/observation/offers.js +327 -0
- package/dist/observation/offers.js.map +1 -0
- package/dist/observation/source.d.ts +133 -0
- package/dist/observation/source.d.ts.map +1 -0
- package/dist/observation/source.js +105 -0
- package/dist/observation/source.js.map +1 -0
- package/dist/profile/migrate.d.ts +122 -0
- package/dist/profile/migrate.d.ts.map +1 -0
- package/dist/profile/migrate.js +147 -0
- package/dist/profile/migrate.js.map +1 -0
- package/dist/profile/store.d.ts +84 -0
- package/dist/profile/store.d.ts.map +1 -0
- package/dist/profile/store.js +267 -0
- package/dist/profile/store.js.map +1 -0
- package/dist/schemas/common.d.ts +95 -0
- package/dist/schemas/common.d.ts.map +1 -0
- package/dist/schemas/common.js +106 -0
- package/dist/schemas/common.js.map +1 -0
- package/dist/schemas/content.d.ts +828 -0
- package/dist/schemas/content.d.ts.map +1 -0
- package/dist/schemas/content.js +219 -0
- package/dist/schemas/content.js.map +1 -0
- package/dist/schemas/profile.d.ts +599 -0
- package/dist/schemas/profile.d.ts.map +1 -0
- package/dist/schemas/profile.js +177 -0
- package/dist/schemas/profile.js.map +1 -0
- package/dist/schemas/tools.d.ts +1581 -0
- package/dist/schemas/tools.d.ts.map +1 -0
- package/dist/schemas/tools.js +286 -0
- package/dist/schemas/tools.js.map +1 -0
- package/dist/tools/config.d.ts +51 -0
- package/dist/tools/config.d.ts.map +1 -0
- package/dist/tools/config.js +104 -0
- package/dist/tools/config.js.map +1 -0
- package/dist/tools/gate.d.ts +50 -0
- package/dist/tools/gate.d.ts.map +1 -0
- package/dist/tools/gate.js +67 -0
- package/dist/tools/gate.js.map +1 -0
- package/dist/tools/guidance.d.ts +36 -0
- package/dist/tools/guidance.d.ts.map +1 -0
- package/dist/tools/guidance.js +117 -0
- package/dist/tools/guidance.js.map +1 -0
- package/dist/tools/listTopics.d.ts +55 -0
- package/dist/tools/listTopics.d.ts.map +1 -0
- package/dist/tools/listTopics.js +78 -0
- package/dist/tools/listTopics.js.map +1 -0
- package/dist/tools/offers.d.ts +60 -0
- package/dist/tools/offers.d.ts.map +1 -0
- package/dist/tools/offers.js +152 -0
- package/dist/tools/offers.js.map +1 -0
- package/dist/tools/placeholders.d.ts +27 -0
- package/dist/tools/placeholders.d.ts.map +1 -0
- package/dist/tools/placeholders.js +49 -0
- package/dist/tools/placeholders.js.map +1 -0
- package/dist/tools/recordObservation.d.ts +52 -0
- package/dist/tools/recordObservation.d.ts.map +1 -0
- package/dist/tools/recordObservation.js +87 -0
- package/dist/tools/recordObservation.js.map +1 -0
- package/dist/tools/startQuiz.d.ts +82 -0
- package/dist/tools/startQuiz.d.ts.map +1 -0
- package/dist/tools/startQuiz.js +180 -0
- package/dist/tools/startQuiz.js.map +1 -0
- package/dist/tools/status.d.ts +59 -0
- package/dist/tools/status.d.ts.map +1 -0
- package/dist/tools/status.js +133 -0
- package/dist/tools/status.js.map +1 -0
- package/dist/tools/submitAnswer.d.ts +156 -0
- package/dist/tools/submitAnswer.d.ts.map +1 -0
- package/dist/tools/submitAnswer.js +402 -0
- package/dist/tools/submitAnswer.js.map +1 -0
- package/dist/tools/types.d.ts +82 -0
- package/dist/tools/types.d.ts.map +1 -0
- package/dist/tools/types.js +48 -0
- package/dist/tools/types.js.map +1 -0
- package/dist/tools/us2/standing.d.ts +111 -0
- package/dist/tools/us2/standing.d.ts.map +1 -0
- package/dist/tools/us2/standing.js +143 -0
- package/dist/tools/us2/standing.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Bundled placeholder topic (T016).
|
|
2
|
+
#
|
|
3
|
+
# This is a minimal, VALID topic so the bundled-catalog loader has something to
|
|
4
|
+
# load on first run / offline (FR-025) before the real curriculum lands in later
|
|
5
|
+
# tasks (T040–T042, T058). It is deliberately tiny and tool-agnostic.
|
|
6
|
+
#
|
|
7
|
+
# Replace / remove once real bundled content is authored. Authoring format is
|
|
8
|
+
# YAML, validated by Zod on load (research OD-004); one file per (topic × class)
|
|
9
|
+
# carrying all tiers and trigger signals (FR-004a).
|
|
10
|
+
|
|
11
|
+
id: _placeholder
|
|
12
|
+
class:
|
|
13
|
+
kind: general
|
|
14
|
+
title: Placeholder Topic
|
|
15
|
+
summary: A trivial bundled topic so the server works offline on first run.
|
|
16
|
+
# Trigger signals may be empty — this topic is reachable via the pull-based path
|
|
17
|
+
# (status / on-demand quiz / guidance) without any observed-activity attribution.
|
|
18
|
+
triggerSignals: []
|
|
19
|
+
items:
|
|
20
|
+
- id: placeholder-100-mc
|
|
21
|
+
tier: 100
|
|
22
|
+
bloom: remember
|
|
23
|
+
# Fixed authored Elo item-rating (never self-updates); 200 = "easy" seed.
|
|
24
|
+
difficulty: 200
|
|
25
|
+
type: multiple_choice
|
|
26
|
+
prompt: >-
|
|
27
|
+
This is a placeholder question shipped with the bundled catalog. Which
|
|
28
|
+
answer is correct?
|
|
29
|
+
choices:
|
|
30
|
+
- id: a
|
|
31
|
+
text: This one.
|
|
32
|
+
- id: b
|
|
33
|
+
text: Not this one.
|
|
34
|
+
answerKey:
|
|
35
|
+
kind: choice
|
|
36
|
+
correctChoiceId: a
|
|
37
|
+
guidance: >-
|
|
38
|
+
Placeholder guidance. Real teaching content arrives with the authored
|
|
39
|
+
curriculum in later tasks (T040–T042).
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
# Topic: task-decomposition (general)
|
|
2
|
+
#
|
|
3
|
+
# Tool-agnostic agentic-coding concept: how to break complex work into bounded,
|
|
4
|
+
# verifiable units before delegating or implementing. Tiers 100–500.
|
|
5
|
+
#
|
|
6
|
+
# triggerSignals is empty: this concept is not tied to any one tool's tool-calls.
|
|
7
|
+
# It surfaces via the pull-based path (status / on-demand quiz / guidance).
|
|
8
|
+
|
|
9
|
+
id: task-decomposition
|
|
10
|
+
class:
|
|
11
|
+
kind: general
|
|
12
|
+
title: Task Decomposition & Planning
|
|
13
|
+
summary: >-
|
|
14
|
+
How to break complex agentic work into bounded, sequenced, verifiable units
|
|
15
|
+
before delegating or implementing — and how to recognise when decomposition
|
|
16
|
+
has gone wrong.
|
|
17
|
+
|
|
18
|
+
triggerSignals: []
|
|
19
|
+
|
|
20
|
+
items:
|
|
21
|
+
# ── Tier 100 — Remember ──────────────────────────────────────────────────
|
|
22
|
+
- id: task-decomposition-100-mc-a
|
|
23
|
+
tier: 100
|
|
24
|
+
bloom: remember
|
|
25
|
+
difficulty: 150
|
|
26
|
+
type: multiple_choice
|
|
27
|
+
prompt: >-
|
|
28
|
+
What is the primary goal of task decomposition in agentic coding?
|
|
29
|
+
choices:
|
|
30
|
+
- id: a
|
|
31
|
+
text: To reduce the number of files changed in a single commit
|
|
32
|
+
- id: b
|
|
33
|
+
text: >-
|
|
34
|
+
To break work into units small enough that each can be independently
|
|
35
|
+
implemented, verified, and rolled back
|
|
36
|
+
- id: c
|
|
37
|
+
text: To assign each task to a different AI model
|
|
38
|
+
- id: d
|
|
39
|
+
text: To produce a Gantt chart for project management
|
|
40
|
+
answerKey:
|
|
41
|
+
kind: choice
|
|
42
|
+
correctChoiceId: b
|
|
43
|
+
guidance: >-
|
|
44
|
+
Decomposition serves verifiability and recoverability. When each unit of
|
|
45
|
+
work is bounded, you can confirm it is done (or not done) without reading
|
|
46
|
+
all surrounding code, and you can undo it without unravelling unrelated
|
|
47
|
+
changes. Reducing file count or cross-model assignment are incidental; the
|
|
48
|
+
core purpose is making each piece independently checkable.
|
|
49
|
+
|
|
50
|
+
- id: task-decomposition-100-mc-b
|
|
51
|
+
tier: 100
|
|
52
|
+
bloom: remember
|
|
53
|
+
difficulty: 160
|
|
54
|
+
type: multiple_choice
|
|
55
|
+
prompt: >-
|
|
56
|
+
Which of the following is a key property of a well-decomposed task?
|
|
57
|
+
choices:
|
|
58
|
+
- id: a
|
|
59
|
+
text: It is as large as possible to minimise the number of handoffs
|
|
60
|
+
- id: b
|
|
61
|
+
text: It has a clear, observable completion criterion
|
|
62
|
+
- id: c
|
|
63
|
+
text: It must always be implemented by a subagent, never inline
|
|
64
|
+
- id: d
|
|
65
|
+
text: It never touches existing code — only adds new files
|
|
66
|
+
answerKey:
|
|
67
|
+
kind: choice
|
|
68
|
+
correctChoiceId: b
|
|
69
|
+
guidance: >-
|
|
70
|
+
A well-decomposed task has a completion criterion you can actually observe:
|
|
71
|
+
a test passes, a file is present, a command returns a specific output. Without
|
|
72
|
+
that criterion you cannot tell whether the task is done, making it impossible
|
|
73
|
+
to verify, sequence, or hand off reliably. Size, delegation strategy, and
|
|
74
|
+
file-change scope are secondary concerns.
|
|
75
|
+
|
|
76
|
+
# ── Tier 200 — Understand ───────────────────────────────────────────────
|
|
77
|
+
- id: task-decomposition-200-mc-a
|
|
78
|
+
tier: 200
|
|
79
|
+
bloom: understand
|
|
80
|
+
difficulty: 250
|
|
81
|
+
type: multiple_choice
|
|
82
|
+
prompt: >-
|
|
83
|
+
You have a feature request: "Add user authentication with OAuth, a profile
|
|
84
|
+
page, and email notifications." Why is this a poor unit of work for a
|
|
85
|
+
single agentic task?
|
|
86
|
+
choices:
|
|
87
|
+
- id: a
|
|
88
|
+
text: >-
|
|
89
|
+
It spans multiple concerns, making it impossible to verify any one
|
|
90
|
+
piece without finishing all of them
|
|
91
|
+
- id: b
|
|
92
|
+
text: It involves OAuth, which agents cannot implement
|
|
93
|
+
- id: c
|
|
94
|
+
text: Profile pages require UI work that agents should never do
|
|
95
|
+
- id: d
|
|
96
|
+
text: Email notifications always introduce external API complexity
|
|
97
|
+
answerKey:
|
|
98
|
+
kind: choice
|
|
99
|
+
correctChoiceId: a
|
|
100
|
+
guidance: >-
|
|
101
|
+
The problem is scope and verifiability, not the technologies involved.
|
|
102
|
+
When a task bundles three distinct concerns — authentication, profile
|
|
103
|
+
rendering, and notification delivery — there is no single observable signal
|
|
104
|
+
that marks any one concern as done. If authentication fails mid-way, the
|
|
105
|
+
entire task is unverifiable. Decomposing into three sequential tasks gives
|
|
106
|
+
you checkpoints: OAuth done (tests pass), profile done (page renders), and
|
|
107
|
+
notifications done (email sent in test).
|
|
108
|
+
|
|
109
|
+
- id: task-decomposition-200-sa-a
|
|
110
|
+
tier: 200
|
|
111
|
+
bloom: understand
|
|
112
|
+
difficulty: 260
|
|
113
|
+
type: short_answer
|
|
114
|
+
prompt: >-
|
|
115
|
+
What term describes the type of dependency between two tasks when Task B
|
|
116
|
+
requires Task A's output before it can be meaningfully specified or
|
|
117
|
+
started?
|
|
118
|
+
answerKey:
|
|
119
|
+
kind: keyword
|
|
120
|
+
anyOf:
|
|
121
|
+
- sequential dependency
|
|
122
|
+
- sequential
|
|
123
|
+
- data dependency
|
|
124
|
+
- ordering dependency
|
|
125
|
+
- dependency ordering
|
|
126
|
+
normalize: lower
|
|
127
|
+
guidance: >-
|
|
128
|
+
When Task B's inputs come from Task A's outputs the two tasks have a
|
|
129
|
+
sequential dependency (also called a data dependency or ordering
|
|
130
|
+
dependency). This means B cannot run in parallel with A and its prompt
|
|
131
|
+
cannot be fully written until A finishes. Recognising this dependency
|
|
132
|
+
pattern is what separates tasks that must be sequenced from tasks that
|
|
133
|
+
can be parallelised safely.
|
|
134
|
+
|
|
135
|
+
# ── Tier 300 — Apply ────────────────────────────────────────────────────
|
|
136
|
+
- id: task-decomposition-300-mc-a
|
|
137
|
+
tier: 300
|
|
138
|
+
bloom: apply
|
|
139
|
+
difficulty: 350
|
|
140
|
+
type: multiple_choice
|
|
141
|
+
prompt: >-
|
|
142
|
+
You need to: (1) research the existing API contract, (2) write a migration
|
|
143
|
+
script, and (3) write tests for the migration. Which sequencing is correct
|
|
144
|
+
and why?
|
|
145
|
+
choices:
|
|
146
|
+
- id: a
|
|
147
|
+
text: >-
|
|
148
|
+
All three in parallel — they are independent enough to run concurrently
|
|
149
|
+
- id: b
|
|
150
|
+
text: >-
|
|
151
|
+
Research first (must finish before writing), then migration and tests
|
|
152
|
+
in parallel — both use the research findings but do not depend on each
|
|
153
|
+
other
|
|
154
|
+
- id: c
|
|
155
|
+
text: >-
|
|
156
|
+
Migration first, then research, then tests — implementation defines
|
|
157
|
+
what needs researching
|
|
158
|
+
- id: d
|
|
159
|
+
text: >-
|
|
160
|
+
Tests first (TDD), then research, then migration
|
|
161
|
+
answerKey:
|
|
162
|
+
kind: choice
|
|
163
|
+
correctChoiceId: b
|
|
164
|
+
guidance: >-
|
|
165
|
+
Research is a prerequisite for both the migration script and the tests:
|
|
166
|
+
you cannot write either accurately without knowing the API contract. Once
|
|
167
|
+
research is complete, the migration and tests have independent inputs and
|
|
168
|
+
no shared mutable state, so they can run in parallel. Running research in
|
|
169
|
+
parallel with the others would produce an under-specified migration
|
|
170
|
+
(missing contract knowledge) and useless tests (validating against assumed
|
|
171
|
+
rather than real behaviour).
|
|
172
|
+
|
|
173
|
+
- id: task-decomposition-300-mc-b
|
|
174
|
+
tier: 300
|
|
175
|
+
bloom: apply
|
|
176
|
+
difficulty: 360
|
|
177
|
+
type: multiple_choice
|
|
178
|
+
prompt: >-
|
|
179
|
+
A task is described as "refactor the data layer and fix the three open
|
|
180
|
+
bugs while you're in there." What is the main decomposition problem with
|
|
181
|
+
this description?
|
|
182
|
+
choices:
|
|
183
|
+
- id: a
|
|
184
|
+
text: Refactoring and bug-fixing are the same activity; no decomposition is needed
|
|
185
|
+
- id: b
|
|
186
|
+
text: >-
|
|
187
|
+
Combining refactoring (behaviour-preserving) with bug fixes
|
|
188
|
+
(behaviour-changing) in one task makes it impossible to isolate which
|
|
189
|
+
change caused any regression
|
|
190
|
+
- id: c
|
|
191
|
+
text: Bugs should always be fixed before any refactoring occurs
|
|
192
|
+
- id: d
|
|
193
|
+
text: The data layer is too large a scope for any single task
|
|
194
|
+
answerKey:
|
|
195
|
+
kind: choice
|
|
196
|
+
correctChoiceId: b
|
|
197
|
+
guidance: >-
|
|
198
|
+
Refactoring means changing structure without changing observable behaviour;
|
|
199
|
+
bug-fixing deliberately changes observable behaviour. Mixing them in a
|
|
200
|
+
single task destroys the ability to diff-bisect a regression: if something
|
|
201
|
+
breaks, you cannot tell whether the refactor or the bug fix caused it.
|
|
202
|
+
Correct decomposition: refactor first (verify all tests still pass), then
|
|
203
|
+
apply each bug fix as a separate task. This gives you a clean causal chain.
|
|
204
|
+
|
|
205
|
+
# ── Tier 400 — Analyze ──────────────────────────────────────────────────
|
|
206
|
+
- id: task-decomposition-400-mc-a
|
|
207
|
+
tier: 400
|
|
208
|
+
bloom: analyze
|
|
209
|
+
difficulty: 430
|
|
210
|
+
type: multiple_choice
|
|
211
|
+
prompt: >-
|
|
212
|
+
A tasks.md file has 20 items all marked [X] (complete). Before reporting
|
|
213
|
+
the feature as done, what is the most important verification step and why?
|
|
214
|
+
choices:
|
|
215
|
+
- id: a
|
|
216
|
+
text: >-
|
|
217
|
+
Count that all 20 items exist — if the list is intact, the work is done
|
|
218
|
+
- id: b
|
|
219
|
+
text: >-
|
|
220
|
+
Re-run the full test suite; if it passes, the checkboxes can be trusted
|
|
221
|
+
- id: c
|
|
222
|
+
text: >-
|
|
223
|
+
Spot-check 3–4 completed tasks against actual code evidence, because
|
|
224
|
+
checkboxes reflect intent, not implementation
|
|
225
|
+
- id: d
|
|
226
|
+
text: >-
|
|
227
|
+
Ask the implementing agent to confirm — it has the best context about
|
|
228
|
+
what it did
|
|
229
|
+
answerKey:
|
|
230
|
+
kind: choice
|
|
231
|
+
correctChoiceId: c
|
|
232
|
+
guidance: >-
|
|
233
|
+
Checkboxes in a task list are claimed completions, not verified ones.
|
|
234
|
+
An agent may mark a task done optimistically, or it may have completed a
|
|
235
|
+
subtly different scope than intended. The ground truth is the code.
|
|
236
|
+
Spot-checking means finding the function, file, or test that the task
|
|
237
|
+
said would exist and confirming it is actually there and does what the
|
|
238
|
+
task specified. Asking the implementing agent reintroduces confirmation
|
|
239
|
+
bias; it will tend to endorse its own claimed work. Tests help but may not
|
|
240
|
+
cover every task's success criterion.
|
|
241
|
+
|
|
242
|
+
- id: task-decomposition-400-sa-a
|
|
243
|
+
tier: 400
|
|
244
|
+
bloom: analyze
|
|
245
|
+
difficulty: 440
|
|
246
|
+
type: short_answer
|
|
247
|
+
prompt: >-
|
|
248
|
+
Name the failure mode where an agent marks tasks complete based on its
|
|
249
|
+
intent or partial output rather than verifying the implementation actually
|
|
250
|
+
exists and functions correctly.
|
|
251
|
+
answerKey:
|
|
252
|
+
kind: keyword
|
|
253
|
+
anyOf:
|
|
254
|
+
- phantom completion
|
|
255
|
+
- phantom completions
|
|
256
|
+
- phantom
|
|
257
|
+
- false completion
|
|
258
|
+
- false positive completion
|
|
259
|
+
normalize: lower
|
|
260
|
+
guidance: >-
|
|
261
|
+
"Phantom completion" (also called a false completion) is when a task is
|
|
262
|
+
marked done but the backing code is missing, dead, or does not satisfy the
|
|
263
|
+
task's acceptance criterion. It occurs because the agent that completed the
|
|
264
|
+
task is subject to confirmation bias — it is more likely to conclude its
|
|
265
|
+
own work is correct than an independent reviewer would be. The counter-
|
|
266
|
+
measure is external verification: checking the actual code against the
|
|
267
|
+
task's stated criterion rather than re-reading the task list.
|
|
268
|
+
|
|
269
|
+
# ── Tier 500 — Evaluate ─────────────────────────────────────────────────
|
|
270
|
+
- id: task-decomposition-500-mc-a
|
|
271
|
+
tier: 500
|
|
272
|
+
bloom: evaluate
|
|
273
|
+
difficulty: 480
|
|
274
|
+
type: multiple_choice
|
|
275
|
+
prompt: >-
|
|
276
|
+
A plan has 40 tasks, each scoped to a single function change, with
|
|
277
|
+
explicit sequential dependencies between every pair of consecutive tasks.
|
|
278
|
+
An experienced engineer flags this plan as over-decomposed. What is the
|
|
279
|
+
most likely cost of this granularity?
|
|
280
|
+
choices:
|
|
281
|
+
- id: a
|
|
282
|
+
text: >-
|
|
283
|
+
It is impossible to over-decompose; finer granularity always improves
|
|
284
|
+
verifiability
|
|
285
|
+
- id: b
|
|
286
|
+
text: >-
|
|
287
|
+
At this granularity, the overhead of task handoffs, context
|
|
288
|
+
re-establishment, and sequential blocking exceeds the value of
|
|
289
|
+
independent verifiability for each unit
|
|
290
|
+
- id: c
|
|
291
|
+
text: >-
|
|
292
|
+
40 tasks is too many for any AI to reason about, so it will refuse to
|
|
293
|
+
execute the plan
|
|
294
|
+
- id: d
|
|
295
|
+
text: >-
|
|
296
|
+
Single-function tasks are never the right decomposition because
|
|
297
|
+
functions are too small to test in isolation
|
|
298
|
+
answerKey:
|
|
299
|
+
kind: choice
|
|
300
|
+
correctChoiceId: b
|
|
301
|
+
guidance: >-
|
|
302
|
+
Decomposition has diminishing returns. Each task boundary introduces
|
|
303
|
+
context-switching cost: the implementing agent (or human) must reload
|
|
304
|
+
intent, re-read the relevant code, and establish local context before
|
|
305
|
+
making a one-line change. When every pair is sequentially dependent there
|
|
306
|
+
is no parallelism benefit either. The right granularity is the smallest
|
|
307
|
+
unit that has a meaningful, independently observable outcome — typically a
|
|
308
|
+
coherent behaviour change, not a single function edit. The goal is
|
|
309
|
+
verifiability per unit of work, not maximising unit count.
|
|
310
|
+
|
|
311
|
+
- id: task-decomposition-500-ff-a
|
|
312
|
+
tier: 500
|
|
313
|
+
bloom: evaluate
|
|
314
|
+
difficulty: 485
|
|
315
|
+
type: free_form
|
|
316
|
+
prompt: >-
|
|
317
|
+
You are handed a tasks.md with 15 tasks for a new feature. Describe the
|
|
318
|
+
process you would use to evaluate whether the decomposition is correct
|
|
319
|
+
before any implementation begins. What signals would tell you the plan
|
|
320
|
+
needs to be revised, and what specific revisions would you make?
|
|
321
|
+
rubric:
|
|
322
|
+
criteria:
|
|
323
|
+
- id: checks-completion-criteria
|
|
324
|
+
text: >-
|
|
325
|
+
Checks that each task has an observable completion criterion (a test,
|
|
326
|
+
a command output, a file's existence) rather than describing only
|
|
327
|
+
activity ("write the function").
|
|
328
|
+
- id: identifies-dependency-ordering
|
|
329
|
+
text: >-
|
|
330
|
+
Verifies that sequentially dependent tasks are ordered correctly and
|
|
331
|
+
that tasks claimed to be parallelisable do not share mutable state
|
|
332
|
+
(files, schema, in-memory objects).
|
|
333
|
+
- id: flags-mixed-concerns
|
|
334
|
+
text: >-
|
|
335
|
+
Identifies tasks that bundle behaviour-preserving changes with
|
|
336
|
+
behaviour-changing changes (e.g. refactor + bug fix) and recommends
|
|
337
|
+
splitting them.
|
|
338
|
+
- id: flags-granularity-extremes
|
|
339
|
+
text: >-
|
|
340
|
+
Flags both under-decomposition (tasks too large to verify
|
|
341
|
+
independently) and over-decomposition (tasks so small that handoff
|
|
342
|
+
overhead exceeds verifiability benefit), and explains the appropriate
|
|
343
|
+
scope.
|
|
344
|
+
- id: revision-specificity
|
|
345
|
+
text: >-
|
|
346
|
+
Proposes concrete revisions — not just "fix the dependencies" but
|
|
347
|
+
specific re-orderings, splits, or merges with reasoning tied to the
|
|
348
|
+
signals found.
|
|
349
|
+
referenceAnswer: >-
|
|
350
|
+
Before implementation begins, review each of the 15 tasks against five
|
|
351
|
+
criteria:
|
|
352
|
+
|
|
353
|
+
1. Observable completion criterion. Each task should state what you can
|
|
354
|
+
check when it is done: a test name that passes, a route that returns a
|
|
355
|
+
specific status code, a file at a given path. A task that says "implement
|
|
356
|
+
the service layer" with no checkable outcome is under-specified. Revision:
|
|
357
|
+
rewrite the task with an explicit acceptance criterion.
|
|
358
|
+
|
|
359
|
+
2. Correct dependency ordering. For each pair of tasks, ask: "Could Task B
|
|
360
|
+
be fully specified today, without Task A's output?" If no, they are
|
|
361
|
+
sequentially dependent and must be ordered accordingly. If yes, check
|
|
362
|
+
whether they write to any shared resource (the same file, the same
|
|
363
|
+
database table, the same in-memory structure). Shared mutable state makes
|
|
364
|
+
parallel execution unsafe. Revision: re-order sequential dependencies;
|
|
365
|
+
split tasks that share state into a shared-state task followed by
|
|
366
|
+
independent tasks.
|
|
367
|
+
|
|
368
|
+
3. No mixed concerns. Flag any task that combines a behaviour-preserving
|
|
369
|
+
change (refactor, rename, move) with a behaviour-changing change (new
|
|
370
|
+
feature, bug fix). These cannot be safely merged: a regression from one
|
|
371
|
+
cannot be attributed to a cause. Revision: split into a refactoring task
|
|
372
|
+
(verified by no test failures) followed by a feature/fix task.
|
|
373
|
+
|
|
374
|
+
4. Granularity check. Flag tasks covering multiple subsystems or requiring
|
|
375
|
+
changes to more than 3–4 files as candidates for splitting. Equally, flag
|
|
376
|
+
runs of tasks that each change a single line in a single file with no
|
|
377
|
+
observable intermediate outcome — these are candidates for merging. The
|
|
378
|
+
right granularity is one coherent, testable behaviour change per task.
|
|
379
|
+
|
|
380
|
+
5. Completeness of coverage. Read the feature's acceptance criteria and
|
|
381
|
+
confirm every criterion maps to at least one task. Gaps mean the plan will
|
|
382
|
+
produce code that passes its tasks but misses user-visible requirements.
|
|
383
|
+
Revision: add tasks for uncovered criteria.
|
|
384
|
+
passThreshold: 0.6
|
|
385
|
+
guidance: >-
|
|
386
|
+
Strong answers engage with all five signals: missing completion criteria,
|
|
387
|
+
dependency misordering, mixed concerns, granularity extremes, and coverage
|
|
388
|
+
gaps. Weak answers describe only one or two of these and offer vague
|
|
389
|
+
revisions ("make tasks smaller"). The key differentiator is specificity:
|
|
390
|
+
the candidate should name what to look for AND what to do when they find it.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Bundled catalog snapshot loader (T016).
|
|
3
|
+
*
|
|
4
|
+
* Loads the YAML topic files shipped inside this directory so the server has a
|
|
5
|
+
* baseline catalog with no network and on first run (FR-025). For now the
|
|
6
|
+
* bundle is a single tiny placeholder topic (`general/_placeholder.yaml`); the
|
|
7
|
+
* real curriculum lands in later tasks (T040–T042, T058).
|
|
8
|
+
*
|
|
9
|
+
* Path resolution: the bundled directory is located relative to THIS module via
|
|
10
|
+
* `import.meta.url`, so it works both when running from source (dev / vitest,
|
|
11
|
+
* where this file is `src/catalog/bundled/index.ts` next to the YAML) and from
|
|
12
|
+
* the built output (`dist/catalog/bundled/index.js`).
|
|
13
|
+
*
|
|
14
|
+
* BUILD NOTE / TODO (T056 packaging): `tsc` compiles `.ts` only and does NOT
|
|
15
|
+
* copy `.yaml` assets into `dist/`. The build pipeline MUST copy
|
|
16
|
+
* `src/catalog/bundled/**\/*.yaml` → `dist/catalog/bundled/**` (e.g. a postbuild
|
|
17
|
+
* `cp -R` / copyfiles / a Vite static-assets step) or the built server will find
|
|
18
|
+
* an empty bundled directory at runtime. Until that copy step exists, the built
|
|
19
|
+
* artifact resolves the same relative path under `dist` and would report zero
|
|
20
|
+
* topics — tests run from `src` and are unaffected.
|
|
21
|
+
*
|
|
22
|
+
* Source of truth: spec FR-025, research.md OD-004.
|
|
23
|
+
*/
|
|
24
|
+
import { type CatalogLoadResult } from "../loader.js";
|
|
25
|
+
/** Absolute path to the directory holding the bundled YAML topic files. */
|
|
26
|
+
export declare const BUNDLED_CATALOG_DIR: string;
|
|
27
|
+
/**
|
|
28
|
+
* Load the bundled catalog snapshot shipped with the package.
|
|
29
|
+
*
|
|
30
|
+
* Resolves the bundled directory relative to this module (works from both `src`
|
|
31
|
+
* and `dist`) and delegates to {@link loadCatalogFromDir}, so the same
|
|
32
|
+
* per-file validation and error-collection semantics apply: valid topics are
|
|
33
|
+
* returned and any malformed bundled file is reported rather than aborting the
|
|
34
|
+
* load (FR-004). A healthy bundle returns `errors: []`.
|
|
35
|
+
*
|
|
36
|
+
* @returns The bundled topics plus any per-file load errors.
|
|
37
|
+
*/
|
|
38
|
+
export declare const loadBundledCatalog: () => CatalogLoadResult;
|
|
39
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/catalog/bundled/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAIH,OAAO,EAEL,KAAK,iBAAiB,EACvB,MAAM,cAAc,CAAC;AAEtB,2EAA2E;AAC3E,eAAO,MAAM,mBAAmB,QAA0C,CAAC;AAE3E;;;;;;;;;;GAUG;AACH,eAAO,MAAM,kBAAkB,QAAO,iBACS,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Bundled catalog snapshot loader (T016).
|
|
3
|
+
*
|
|
4
|
+
* Loads the YAML topic files shipped inside this directory so the server has a
|
|
5
|
+
* baseline catalog with no network and on first run (FR-025). For now the
|
|
6
|
+
* bundle is a single tiny placeholder topic (`general/_placeholder.yaml`); the
|
|
7
|
+
* real curriculum lands in later tasks (T040–T042, T058).
|
|
8
|
+
*
|
|
9
|
+
* Path resolution: the bundled directory is located relative to THIS module via
|
|
10
|
+
* `import.meta.url`, so it works both when running from source (dev / vitest,
|
|
11
|
+
* where this file is `src/catalog/bundled/index.ts` next to the YAML) and from
|
|
12
|
+
* the built output (`dist/catalog/bundled/index.js`).
|
|
13
|
+
*
|
|
14
|
+
* BUILD NOTE / TODO (T056 packaging): `tsc` compiles `.ts` only and does NOT
|
|
15
|
+
* copy `.yaml` assets into `dist/`. The build pipeline MUST copy
|
|
16
|
+
* `src/catalog/bundled/**\/*.yaml` → `dist/catalog/bundled/**` (e.g. a postbuild
|
|
17
|
+
* `cp -R` / copyfiles / a Vite static-assets step) or the built server will find
|
|
18
|
+
* an empty bundled directory at runtime. Until that copy step exists, the built
|
|
19
|
+
* artifact resolves the same relative path under `dist` and would report zero
|
|
20
|
+
* topics — tests run from `src` and are unaffected.
|
|
21
|
+
*
|
|
22
|
+
* Source of truth: spec FR-025, research.md OD-004.
|
|
23
|
+
*/
|
|
24
|
+
import { fileURLToPath } from "node:url";
|
|
25
|
+
import { dirname, join } from "node:path";
|
|
26
|
+
import { loadCatalogFromDir, } from "../loader.js";
|
|
27
|
+
/** Absolute path to the directory holding the bundled YAML topic files. */
|
|
28
|
+
export const BUNDLED_CATALOG_DIR = dirname(fileURLToPath(import.meta.url));
|
|
29
|
+
/**
|
|
30
|
+
* Load the bundled catalog snapshot shipped with the package.
|
|
31
|
+
*
|
|
32
|
+
* Resolves the bundled directory relative to this module (works from both `src`
|
|
33
|
+
* and `dist`) and delegates to {@link loadCatalogFromDir}, so the same
|
|
34
|
+
* per-file validation and error-collection semantics apply: valid topics are
|
|
35
|
+
* returned and any malformed bundled file is reported rather than aborting the
|
|
36
|
+
* load (FR-004). A healthy bundle returns `errors: []`.
|
|
37
|
+
*
|
|
38
|
+
* @returns The bundled topics plus any per-file load errors.
|
|
39
|
+
*/
|
|
40
|
+
export const loadBundledCatalog = () => loadCatalogFromDir(join(BUNDLED_CATALOG_DIR));
|
|
41
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/catalog/bundled/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EACL,kBAAkB,GAEnB,MAAM,cAAc,CAAC;AAEtB,2EAA2E;AAC3E,MAAM,CAAC,MAAM,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE3E;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,GAAsB,EAAE,CACxD,kBAAkB,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC"}
|