thoth-agents 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import type { AgentRoleName } from './agent-pack';
1
2
  export type SddPipelineType = 'direct' | 'accelerated' | 'full';
2
3
  export type SddPhaseId = 'requirements-interview' | 'proposal' | 'spec' | 'design' | 'tasks' | 'plan-review' | 'implementation-confirmation' | 'apply' | 'verify' | 'archive';
3
4
  export interface SddPhaseContract {
@@ -8,6 +9,10 @@ export interface SddPhaseContract {
8
9
  producesArtifact: boolean;
9
10
  gate?: 'oracle-review' | 'user-confirmation';
10
11
  owner: 'orchestrator' | 'write-capable-agent' | 'oracle' | 'user';
12
+ artifactSkill?: string;
13
+ artifactMeaning?: string;
14
+ defaultAgentRole?: AgentRoleName;
15
+ alternateAgentRoles?: AgentRoleName[];
11
16
  }
12
17
  export interface SddWorkflowContract {
13
18
  phases: SddPhaseContract[];
@@ -30,6 +35,8 @@ export declare const SDD_PHASES: readonly [{
30
35
  readonly prerequisites: ["requirements-interview"];
31
36
  readonly producesArtifact: true;
32
37
  readonly owner: "write-capable-agent";
38
+ readonly artifactSkill: "sdd-propose";
39
+ readonly defaultAgentRole: "deep";
33
40
  }, {
34
41
  readonly id: "spec";
35
42
  readonly order: 2;
@@ -37,6 +44,8 @@ export declare const SDD_PHASES: readonly [{
37
44
  readonly prerequisites: ["proposal"];
38
45
  readonly producesArtifact: true;
39
46
  readonly owner: "write-capable-agent";
47
+ readonly artifactSkill: "sdd-spec";
48
+ readonly defaultAgentRole: "deep";
40
49
  }, {
41
50
  readonly id: "design";
42
51
  readonly order: 3;
@@ -44,6 +53,9 @@ export declare const SDD_PHASES: readonly [{
44
53
  readonly prerequisites: ["proposal", "spec"];
45
54
  readonly producesArtifact: true;
46
55
  readonly owner: "write-capable-agent";
56
+ readonly artifactSkill: "sdd-design";
57
+ readonly artifactMeaning: "technical-solution-design";
58
+ readonly defaultAgentRole: "deep";
47
59
  }, {
48
60
  readonly id: "tasks";
49
61
  readonly order: 4;
@@ -51,6 +63,8 @@ export declare const SDD_PHASES: readonly [{
51
63
  readonly prerequisites: ["proposal", "spec", "design"];
52
64
  readonly producesArtifact: true;
53
65
  readonly owner: "write-capable-agent";
66
+ readonly artifactSkill: "sdd-tasks";
67
+ readonly defaultAgentRole: "deep";
54
68
  }, {
55
69
  readonly id: "plan-review";
56
70
  readonly order: 5;
@@ -74,6 +88,8 @@ export declare const SDD_PHASES: readonly [{
74
88
  readonly prerequisites: ["implementation-confirmation"];
75
89
  readonly producesArtifact: false;
76
90
  readonly owner: "write-capable-agent";
91
+ readonly defaultAgentRole: "deep";
92
+ readonly alternateAgentRoles: ["quick", "designer"];
77
93
  }, {
78
94
  readonly id: "verify";
79
95
  readonly order: 8;
@@ -81,6 +97,8 @@ export declare const SDD_PHASES: readonly [{
81
97
  readonly prerequisites: ["apply"];
82
98
  readonly producesArtifact: true;
83
99
  readonly owner: "write-capable-agent";
100
+ readonly artifactSkill: "sdd-verify";
101
+ readonly defaultAgentRole: "deep";
84
102
  }, {
85
103
  readonly id: "archive";
86
104
  readonly order: 9;
@@ -88,6 +106,8 @@ export declare const SDD_PHASES: readonly [{
88
106
  readonly prerequisites: ["verify"];
89
107
  readonly producesArtifact: true;
90
108
  readonly owner: "write-capable-agent";
109
+ readonly artifactSkill: "sdd-archive";
110
+ readonly defaultAgentRole: "deep";
91
111
  }];
92
112
  export declare const SDD_WORKFLOW_CONTRACT: SddWorkflowContract;
93
113
  export declare function getSddWorkflowContract(): SddWorkflowContract;
@@ -53,7 +53,7 @@ export declare const BUNDLED_SKILL_REGISTRY: readonly [{
53
53
  readonly purpose: "sdd";
54
54
  }, {
55
55
  readonly name: "sdd-design";
56
- readonly description: "Create technical design artifacts for changes";
56
+ readonly description: "Create technical solution design artifacts for changes";
57
57
  readonly allowedRoles: AgentRoleName[];
58
58
  readonly sourcePath: "src/skills/sdd-design";
59
59
  readonly kind: "skill";
@@ -138,7 +138,7 @@ export declare const SKILL_REGISTRY: readonly [{
138
138
  readonly purpose: "sdd";
139
139
  }, {
140
140
  readonly name: "sdd-design";
141
- readonly description: "Create technical design artifacts for changes";
141
+ readonly description: "Create technical solution design artifacts for changes";
142
142
  readonly allowedRoles: AgentRoleName[];
143
143
  readonly sourcePath: "src/skills/sdd-design";
144
144
  readonly kind: "skill";
@@ -2,6 +2,7 @@ import type { SkillRegistryEntry } from '../core/skills';
2
2
  import type { HarnessArtifact, HarnessDiagnostic } from '../types';
3
3
  export interface CodexSkillLayoutInput {
4
4
  projectRoot: string;
5
+ packageRoot?: string;
5
6
  skills: SkillRegistryEntry[];
6
7
  surfaceId: string;
7
8
  outputMode?: CodexSkillOutputMode;
package/dist/index.js CHANGED
@@ -511,7 +511,7 @@ Push back when context, risk, or assumptions are weak. Avoid verbosity.
511
511
  - Load \`thoth-mem-agents\` and \`requirements-interview\`.
512
512
  - You MUST NOT read or write any file in the workspace except \`openspec/\` coordination artifacts for the SDD pipeline.
513
513
  - Delegate all inspection, writing, searching, debugging, and verification.
514
- - Own the thinking: analyze the request, choose the approach, synthesize facts, make decisions, ask \`{{userQuestionTool}}\`, manage progress, and own root-session memory.
514
+ - Own the thinking: analyze, choose approach, handle task sequencing, synthesize facts, decide, ask \`{{userQuestionTool}}\` for blocking user input, manage progress, own root-session memory, and write the final report.
515
515
  - Use sub-agents for evidence and action, not to outsource architecture or planning.
516
516
  - Never request raw file dumps from sub-agents; ask for findings, paths, line anchors, diffs, verification, and blockers.
517
517
  - Use openspec/ for coordination artifacts, especially
@@ -519,6 +519,7 @@ Push back when context, risk, or assumptions are weak. Avoid verbosity.
519
519
  - Visual or UX work and screenshots always go to {{role.designer}}.
520
520
  - Verify through delegation, not inline.
521
521
  - Verification should follow the user's project instructions and use the smallest sufficient delegated checks: typecheck, lint, focused tests, or build when appropriate.
522
+ - When a harness cannot enforce a rule directly, preserve the rule as instruction-only guidance and disclose the enforcement gap instead of weakening the contract.
522
523
  </core-rules>
523
524
 
524
525
  <session-bootstrap>
@@ -551,18 +552,11 @@ Tiebreakers:
551
552
  <internal-handoff>
552
553
  Before dispatching {{role.designer}}, {{role.quick}}, or {{role.deep}} after discovery, synthesize a compact internal handoff. This is an implementation detail between you and sub-agents, not a user-facing step or artifact.
553
554
 
554
- Internal handoff fields:
555
- - Goal: the specific outcome for this task.
556
- - Decision: the chosen approach and why it is the right next move.
557
- - Evidence: relevant files, symbols, line anchors, docs, constraints, and known invariants from {{role.explorer}}/{{role.librarian}}.
558
- - Scope: exact files/areas to change and non-goals to avoid.
559
- - Steps: ordered implementation instructions, including what to preserve.
560
- - Verification: smallest sufficient checks or visual QA required.
561
- - Uncertainty: remaining unknowns the implementer may resolve locally, plus what should be escalated instead of guessed.
555
+ Internal handoff fields: Goal, Decision, Evidence, Scope, Steps, Verification, and Uncertainty. Include relevant files, symbols, anchors, constraints, non-goals, and what to escalate instead of guessing.
562
556
 
563
557
  Never mention the internal handoff to the user, ask the user to prepare it, or present handoff preparation as the recommended next step. To the user, describe the actual work: discovery, design, implementation, verification, or the concrete decision needed.
564
558
 
565
- For {{role.explorer}}/{{role.librarian}}, ask narrow fact-finding questions that will fill missing internal handoff fields: likely files, symbols, call sites, constraints, examples, versioned API facts, and verification targets. Require decision-ready findings, not raw context.
559
+ For {{role.explorer}}/{{role.librarian}}, ask narrow fact-finding questions for likely files, symbols, call sites, constraints, examples, versioned API facts, and verification targets. Require decision-ready findings, not raw context.
566
560
  </internal-handoff>
567
561
 
568
562
  <dispatch>
@@ -638,10 +632,11 @@ function createReadOnlySpecialistPromptSections(role) {
638
632
  mode: "read-only",
639
633
  dispatch: "task",
640
634
  scope: "local repository discovery",
641
- responsibility: "Find workspace facts fast. Return decision-ready evidence for internal handoffs: paths, lines, symbols, constraints, edit targets, and conclusions.",
635
+ responsibility: "Find workspace facts fast. Return decision-ready evidence for internal handoffs: paths, lines, symbols, candidate files, constraints, edit targets, verification targets, and conclusions.",
642
636
  rules: [
643
637
  "- Questions should be rare; exhaust local evidence first.",
644
638
  "- Prefer paths, lines, symbols, and concise summaries over dumps.",
639
+ "- Do not implement, edit files, mutate the repository, or own durable session memory.",
645
640
  "- When full content is explicitly requested, reproduce it faithfully."
646
641
  ],
647
642
  memoryAccess: "readonly",
@@ -657,11 +652,11 @@ FINDINGS: bullets with claim, evidence type [direct|inferred|assumed], confidenc
657
652
 
658
653
  ALTERNATIVES CONSIDERED: ranked candidates when more than one plausible match exists. Omit if only one candidate.
659
654
 
660
- UNRESOLVED QUESTIONS: what remains ambiguous. State what additional context would unblock the search.
655
+ UNRESOLVED QUESTIONS: ambiguity and what context would unblock it.
661
656
 
662
657
  UNCHECKED AREAS: what you did not inspect that could change the answer. Omit if nothing notable.
663
658
 
664
- SHORT EVIDENCE: at most one short excerpt per key finding, max 2 lines each. Skip if citations are self-explanatory.
659
+ SHORT EVIDENCE: at most one 2-line excerpt per key finding.
665
660
 
666
661
  Lead with STATUS. Stay under 40 lines total when possible. If the schema forces more lines, exceed the budget rather than drop required fields.`
667
662
  });
@@ -671,12 +666,13 @@ Lead with STATUS. Stay under 40 lines total when possible. If the schema forces
671
666
  role,
672
667
  mode: "read-only",
673
668
  dispatch: "task",
674
- scope: "external research plus local confirmation when needed",
675
- responsibility: "Gather authoritative external evidence that helps the orchestrator make implementation decisions. Prefer official docs first, then high-signal public examples. Every substantive claim must carry a source URL.",
669
+ scope: "external docs and research plus local confirmation when needed",
670
+ responsibility: "Gather authoritative external evidence that helps the orchestrator make implementation decisions. Prefer official docs first, include version sensitivity, then high-signal public examples. Every substantive claim must carry a source URL.",
676
671
  rules: [
677
672
  "- Questions should be rare; exhaust available sources first.",
678
673
  "- Prefer official documentation over commentary when both answer the same point.",
679
- "- Distinguish clearly between official guidance and community examples."
674
+ "- Distinguish clearly between official guidance and community examples.",
675
+ "- Do not mutate the repository, invent undocumented APIs, or perform broad implementation work."
680
676
  ],
681
677
  memoryAccess: "readonly",
682
678
  output: `- Organize by finding. Include a source URL for every claim.
@@ -690,11 +686,12 @@ Lead with STATUS. Stay under 40 lines total when possible. If the schema forces
690
686
  mode: "read-only",
691
687
  dispatch: "synchronous task only",
692
688
  scope: "advice, diagnosis, architecture, code review, and plan review",
693
- responsibility: "Provide strategic technical guidance anchored to evidence. Use systematic-debugging for bugs, plan-reviewer for SDD plans, and web-assisted research when deeper diagnosis needs it.",
689
+ responsibility: "Provide read-only review and strategic technical guidance anchored to evidence, including findings, risks, assumptions, and decision-ready conclusions. Use systematic-debugging for bugs, plan-reviewer for SDD plans, and web-assisted research when deeper diagnosis needs it.",
694
690
  rules: [
695
691
  "- Cite exact files and lines for local claims.",
696
692
  "- Separate observations, risks, and recommendations.",
697
- "- Ask only when tradeoffs, risk tolerance, or approval materially change the recommendation."
693
+ "- Ask only when tradeoffs, risk tolerance, or approval materially change the recommendation.",
694
+ "- Do not produce SDD artifacts, implement edits, or mutate the workspace."
698
695
  ],
699
696
  memoryAccess: "readonly",
700
697
  output: `- Cite exact files and lines \u2014 do not quote large code blocks.
@@ -710,13 +707,14 @@ function createWriteCapableSpecialistPromptSections(role) {
710
707
  mode: "write-capable",
711
708
  dispatch: "synchronous task only",
712
709
  scope: "UI/UX decisions, implementation, and visual verification",
713
- responsibility: "Own the user-facing solution end to end: choose the UX approach, implement it, and verify it visually. Use playwright-cli only in non-interactive, single-run mode (for example `playwright test`), never with persistent UI or watcher flags.\nWhen dispatched for QA-only tasks (no implementation), take screenshots, inspect the UI, and return a structured visual QA report: what looks correct, what has issues, and recommended fixes.",
710
+ responsibility: "Own the user-facing solution: choose the UX approach, implement it, and verify it visually across responsive states when screens change. Use the harness-available visual verification surface in a non-blocking, single-run mode and capture evidence that supports your findings.\nFor visual QA-only tasks, inspect the UI, summarize what looks correct, note issues, and recommend fixes.",
714
711
  rules: [
715
712
  "- Treat the orchestrator's internal handoff as the handoff; do not rediscover settled scope or constraints.",
716
713
  "- Own UX decisions instead of bouncing them back unless a real user preference is required.",
717
- "- Verify visually when feasible; do not stop at code that merely compiles.",
714
+ "- Verify visually and check responsive behavior when feasible; do not stop at code that merely compiles.",
718
715
  "- Keep changes focused on the user-facing outcome.",
719
- "- NEVER run blocking or long-running commands: no `playwright test --ui`, `playwright show-report`, `--headed --debug`, dev servers, or watchers. Use single-run variants and capture screenshots/traces as artifacts."
716
+ "- Preserve unrelated working-tree changes.",
717
+ "- Avoid interactive, blocking, or persistent visual verification modes unless explicitly requested; keep verification single-run and evidence-driven."
720
718
  ],
721
719
  memoryAccess: "writable",
722
720
  output: `For SDD tasks: use the Task Result envelope (Status, Task, What was done, Files changed, Verification, Issues).
@@ -731,10 +729,11 @@ For non-SDD work: state what was implemented, verification status, and remaining
731
729
  mode: "write-capable",
732
730
  dispatch: "synchronous task only",
733
731
  scope: "fast bounded implementation",
734
- responsibility: "Implement well-defined changes quickly. Favor speed over exhaustive analysis when the task is narrow and the path is clear.",
732
+ responsibility: "Implement well-defined changes quickly. Favor speed over exhaustive analysis when the task is narrow, low-risk, mechanical, and the path is clear.",
735
733
  rules: [
736
734
  "- Optimize for fast execution on narrow, clear tasks.",
737
735
  "- Treat the orchestrator's internal handoff as the starting point; follow its file anchors, scope, non-goals, and verification target.",
736
+ "- Preserve unrelated working-tree changes.",
738
737
  "- Read only the context you need.",
739
738
  "- Do not redo broad discovery. If the handoff lacks essential anchors, surface the missing context instead of turning the task into open-ended exploration.",
740
739
  "- Avoid multi-step planning; if the task stops being bounded, surface it.",
@@ -757,6 +756,7 @@ For non-SDD work: status + summary + files changed + issues. Nothing more.
757
756
  "- Treat the orchestrator's internal handoff as the architecture handoff; validate it against nearby code, but do not restart upstream discovery unless evidence contradicts it.",
758
757
  "- Do not skip verification \u2014 thoroughness is your value proposition.",
759
758
  "- Investigate related files, types, and call sites before changing shared behavior, prioritizing the anchors and constraints in the handoff.",
759
+ "- Preserve unrelated working-tree changes.",
760
760
  "- Ask when a real architecture or implementation tradeoff blocks correct execution."
761
761
  ],
762
762
  memoryAccess: "writable",
@@ -2896,7 +2896,7 @@ var BUNDLED_SKILL_REGISTRY = [
2896
2896
  },
2897
2897
  {
2898
2898
  name: "sdd-design",
2899
- description: "Create technical design artifacts for changes",
2899
+ description: "Create technical solution design artifacts for changes",
2900
2900
  allowedRoles: ORCHESTRATOR_ONLY,
2901
2901
  sourcePath: "src/skills/sdd-design",
2902
2902
  kind: "skill",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "thoth-agents",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Delegate-first OpenCode plugin with seven agents, thoth-mem persistence, and bundled SDD skills.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -43,6 +43,7 @@
43
43
  "scripts": {
44
44
  "build": "tsup && tsc --emitDeclarationOnly && pnpm run generate-schema",
45
45
  "generate-schema": "tsx scripts/generate-schema.ts",
46
+ "release:notes": "tsx scripts/generate-release-notes.ts",
46
47
  "typecheck": "tsc --noEmit",
47
48
  "test": "vitest run",
48
49
  "lint": "biome lint .",
@@ -1,11 +1,13 @@
1
1
  ---
2
2
  name: sdd-design
3
- description: Create `design.md` with architecture decisions and file changes.
3
+ description: Create `design.md` as a technical solution design with architecture decisions and file changes.
4
4
  ---
5
5
 
6
6
  # SDD Design Skill
7
7
 
8
- Create the technical design that explains how the approved spec will be built.
8
+ Create the technical solution design that explains how the approved spec will
9
+ be built. OpenSpec `design.md` is a technical approach artifact covering
10
+ implementation architecture, tradeoffs, and repository patterns.
9
11
 
10
12
  ## Shared Conventions
11
13
 
@@ -28,6 +30,10 @@ The orchestrator passes the artifact store mode (`thoth-mem`, `openspec`, or
28
30
  - Proposal and specs exist and implementation planning needs technical depth
29
31
  - A prior design needs to be revised after spec changes
30
32
 
33
+ This phase is not a UI/UX design task. Do not route this phase to the designer
34
+ agent because it is named `design`; the default implementation owner is a
35
+ technical write-capable role such as `deep`.
36
+
31
37
  ## Prerequisites
32
38
 
33
39
  - `change-name`
@@ -86,6 +92,10 @@ Return:
86
92
  ## Rules
87
93
 
88
94
  - Base the design on the actual codebase, not generic assumptions.
95
+ - Do not route this phase to the designer agent. `sdd-design` itself always
96
+ stays with the technical write-capable agent.
97
+ - Later `sdd-apply` tasks may route to the designer agent when the work is
98
+ specifically user-facing UI, visual work, screenshots, or visual QA.
89
99
  - Every architecture decision must include rationale.
90
100
  - Use concrete file paths and interfaces.
91
101
  - Keep implementation details aligned with the spec and repository patterns.