academic-army 0.3.3 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { runPipelinesCli } from "coding-agent-forge";
3
- import { developingPipeline, developingSkillPipeline } from "developing-agent-forge";
3
+ import { developingPipeline } from "developing-agent-forge";
4
+ import { developingSkillPipeline } from "./developing-skill/index.js";
4
5
  import { evolveSkillPipeline } from "./evolve-skill/index.js";
5
6
  await runPipelinesCli([developingPipeline, developingSkillPipeline, evolveSkillPipeline], process.argv.slice(2));
6
7
  //# sourceMappingURL=cli.js.map
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AACrF,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAE9D,MAAM,eAAe,CACnB,CAAC,kBAAkB,EAAE,uBAAuB,EAAE,mBAAmB,CAAC,EAClE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CACtB,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC5D,OAAO,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAE9D,MAAM,eAAe,CACnB,CAAC,kBAAkB,EAAE,uBAAuB,EAAE,mBAAmB,CAAC,EAClE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CACtB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { AgentFactoryMap } from "coding-agent-forge";
2
+ export declare const agentFactories: AgentFactoryMap;
3
+ //# sourceMappingURL=factory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"factory.d.ts","sourceRoot":"","sources":["../../../src/developing-skill/agents/factory.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAK1D,eAAO,MAAM,cAAc,EAAE,eAG5B,CAAC"}
@@ -0,0 +1,7 @@
1
+ import { developingPipeline } from "developing-agent-forge";
2
+ import { TrajectoryOptimizerAgent } from "./trajectory-optimizer.js";
3
+ export const agentFactories = {
4
+ ...developingPipeline.agentFactories,
5
+ "trajectory-optimizer": (thread, constants) => new TrajectoryOptimizerAgent(thread, constants),
6
+ };
7
+ //# sourceMappingURL=factory.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"factory.js","sourceRoot":"","sources":["../../../src/developing-skill/agents/factory.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAE5D,OAAO,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAErE,MAAM,CAAC,MAAM,cAAc,GAAoB;IAC7C,GAAG,kBAAkB,CAAC,cAAc;IACpC,sBAAsB,EAAE,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,IAAI,wBAAwB,CAAC,MAAM,EAAE,SAAS,CAAC;CAC/F,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { agentFactories } from "./factory.js";
2
+ export { TrajectoryOptimizerAgent, type TrajectoryOptimizerVariables, } from "./trajectory-optimizer.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/developing-skill/agents/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EACL,wBAAwB,EACxB,KAAK,4BAA4B,GAClC,MAAM,2BAA2B,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { agentFactories } from "./factory.js";
2
+ export { TrajectoryOptimizerAgent, } from "./trajectory-optimizer.js";
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/developing-skill/agents/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EACL,wBAAwB,GAEzB,MAAM,2BAA2B,CAAC"}
@@ -0,0 +1,12 @@
1
+ import { Agent } from "coding-agent-forge/agent";
2
+ import type { DevelopingAgentVariables } from "developing-agent-forge/agents";
3
+ export type TrajectoryOptimizerVariables = DevelopingAgentVariables & {
4
+ codingStyleSkillPath: string;
5
+ taskBrief: string;
6
+ taskRoundSummary: string;
7
+ metaskillPath: string;
8
+ };
9
+ export declare class TrajectoryOptimizerAgent extends Agent<TrajectoryOptimizerVariables> {
10
+ protected buildPrompt(variables: Readonly<TrajectoryOptimizerVariables>): string;
11
+ }
12
+ //# sourceMappingURL=trajectory-optimizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trajectory-optimizer.d.ts","sourceRoot":"","sources":["../../../src/developing-skill/agents/trajectory-optimizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,0BAA0B,CAAC;AACjD,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAG9E,MAAM,MAAM,4BAA4B,GAAG,wBAAwB,GAAG;IACpE,oBAAoB,EAAE,MAAM,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF,qBAAa,wBAAyB,SAAQ,KAAK,CAAC,4BAA4B,CAAC;IAC/E,SAAS,CAAC,WAAW,CAAC,SAAS,EAAE,QAAQ,CAAC,4BAA4B,CAAC,GAAG,MAAM;CA2BjF"}
@@ -0,0 +1,32 @@
1
+ import { Agent } from "coding-agent-forge/agent";
2
+ import { readFileSync } from "node:fs";
3
+ export class TrajectoryOptimizerAgent extends Agent {
4
+ buildPrompt(variables) {
5
+ const metaskill = readFileSync(variables.metaskillPath, "utf8");
6
+ return `
7
+ Revise the skill at ${variables.codingStyleSkillPath} so it produces better development trajectories.
8
+
9
+ The metaskill below contains the design goals and tips of this skill:
10
+
11
+ ${metaskill}
12
+
13
+ The sections below describe the task this skill just executed and what happened during that round.
14
+
15
+ Target repository at ${variables.targetPath}/.
16
+
17
+ Goal:
18
+ ${variables.goal}
19
+
20
+ Task Brief:
21
+ ${variables.taskBrief}
22
+
23
+ Reality-aware task round summary:
24
+ ${variables.taskRoundSummary}
25
+
26
+ Evaluate whether the skill produced a good modification trajectory, then edit the skill directly. Focus on missing, misleading, or redundant guidance that affected task selection, coding, or review.
27
+
28
+ Output a concise optimizer report with the main skill changes.
29
+ `;
30
+ }
31
+ }
32
+ //# sourceMappingURL=trajectory-optimizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trajectory-optimizer.js","sourceRoot":"","sources":["../../../src/developing-skill/agents/trajectory-optimizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,0BAA0B,CAAC;AAEjD,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AASvC,MAAM,OAAO,wBAAyB,SAAQ,KAAmC;IACrE,WAAW,CAAC,SAAiD;QACrE,MAAM,SAAS,GAAG,YAAY,CAAC,SAAS,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;QAChE,OAAO;sBACW,SAAS,CAAC,oBAAoB;;;;EAIlD,SAAS;;;;uBAIY,SAAS,CAAC,UAAU;;;EAGzC,SAAS,CAAC,IAAI;;;EAGd,SAAS,CAAC,SAAS;;;EAGnB,SAAS,CAAC,gBAAgB;;;;;CAK3B,CAAC;IACA,CAAC;CACF"}
@@ -0,0 +1,3 @@
1
+ export { developingSkill, developingSkillArgsOptions, developingSkillPipeline, type DevelopingSkillAgentVariables, type DevelopingSkillOptions, } from "./pipeline.js";
2
+ export { TrajectoryOptimizerAgent, type TrajectoryOptimizerVariables } from "./agents/index.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/developing-skill/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,0BAA0B,EAC1B,uBAAuB,EACvB,KAAK,6BAA6B,EAClC,KAAK,sBAAsB,GAC5B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,wBAAwB,EAAE,KAAK,4BAA4B,EAAE,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { developingSkill, developingSkillArgsOptions, developingSkillPipeline, } from "./pipeline.js";
2
+ export { TrajectoryOptimizerAgent } from "./agents/index.js";
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/developing-skill/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,0BAA0B,EAC1B,uBAAuB,GAGxB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,wBAAwB,EAAqC,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,109 @@
1
+ import { AgentTeam, type PipelineOptions } from "coding-agent-forge";
2
+ import { type ProjectDevLoopAgentVariablesByName } from "developing-agent-forge";
3
+ import type { TrajectoryOptimizerVariables } from "./agents/index.js";
4
+ export type DevelopingSkillAgentVariables = ProjectDevLoopAgentVariablesByName & {
5
+ "trajectory-optimizer": TrajectoryOptimizerVariables;
6
+ };
7
+ export declare const developingSkillArgsOptions: {
8
+ readonly "target-path": {
9
+ readonly type: "string";
10
+ readonly description: "Target repository folder to create or modify";
11
+ };
12
+ readonly "goal-path": {
13
+ readonly type: "string";
14
+ readonly description: "Goal document path";
15
+ };
16
+ readonly "achive-dir": {
17
+ readonly type: "string";
18
+ readonly description: "Archive folder for per-iteration reports";
19
+ };
20
+ readonly "max-iterations": {
21
+ readonly type: "string";
22
+ readonly default: "10";
23
+ readonly description: "Maximum number of project iterations";
24
+ };
25
+ readonly "max-task-devloop-iterations": {
26
+ readonly type: "string";
27
+ readonly default: "3";
28
+ readonly description: "Maximum developer/reviewer attempts per selected task";
29
+ };
30
+ readonly "project-progress-memory-path": {
31
+ readonly type: "string";
32
+ readonly description: "Memory directory for project progress continuity";
33
+ };
34
+ readonly "code-design-memory-path": {
35
+ readonly type: "string";
36
+ readonly description: "Memory directory for code design continuity";
37
+ };
38
+ readonly "max-memory-rounds": {
39
+ readonly type: "string";
40
+ readonly default: "3";
41
+ readonly description: "Maximum recall and remember refinement rounds";
42
+ };
43
+ readonly "memory-clean-interval": {
44
+ readonly type: "string";
45
+ readonly default: "0";
46
+ readonly description: "Project iterations between memory clean runs; 0 disables automatic clean";
47
+ };
48
+ readonly "coding-style-skill-path": {
49
+ readonly type: "string";
50
+ readonly description: "Coding-style skill directory or file revised by the optimizer";
51
+ };
52
+ readonly "metaskill-path": {
53
+ readonly type: "string";
54
+ readonly description: "Metaskill design document used by the trajectory optimizer";
55
+ };
56
+ };
57
+ export type DevelopingSkillOptions = PipelineOptions<typeof developingSkillArgsOptions>;
58
+ export declare function developingSkill(team: AgentTeam<DevelopingSkillAgentVariables>, options: DevelopingSkillOptions): Promise<void>;
59
+ export declare const developingSkillPipeline: import("coding-agent-forge").Pipeline<{
60
+ readonly "target-path": {
61
+ readonly type: "string";
62
+ readonly description: "Target repository folder to create or modify";
63
+ };
64
+ readonly "goal-path": {
65
+ readonly type: "string";
66
+ readonly description: "Goal document path";
67
+ };
68
+ readonly "achive-dir": {
69
+ readonly type: "string";
70
+ readonly description: "Archive folder for per-iteration reports";
71
+ };
72
+ readonly "max-iterations": {
73
+ readonly type: "string";
74
+ readonly default: "10";
75
+ readonly description: "Maximum number of project iterations";
76
+ };
77
+ readonly "max-task-devloop-iterations": {
78
+ readonly type: "string";
79
+ readonly default: "3";
80
+ readonly description: "Maximum developer/reviewer attempts per selected task";
81
+ };
82
+ readonly "project-progress-memory-path": {
83
+ readonly type: "string";
84
+ readonly description: "Memory directory for project progress continuity";
85
+ };
86
+ readonly "code-design-memory-path": {
87
+ readonly type: "string";
88
+ readonly description: "Memory directory for code design continuity";
89
+ };
90
+ readonly "max-memory-rounds": {
91
+ readonly type: "string";
92
+ readonly default: "3";
93
+ readonly description: "Maximum recall and remember refinement rounds";
94
+ };
95
+ readonly "memory-clean-interval": {
96
+ readonly type: "string";
97
+ readonly default: "0";
98
+ readonly description: "Project iterations between memory clean runs; 0 disables automatic clean";
99
+ };
100
+ readonly "coding-style-skill-path": {
101
+ readonly type: "string";
102
+ readonly description: "Coding-style skill directory or file revised by the optimizer";
103
+ };
104
+ readonly "metaskill-path": {
105
+ readonly type: "string";
106
+ readonly description: "Metaskill design document used by the trajectory optimizer";
107
+ };
108
+ }, DevelopingSkillAgentVariables>;
109
+ //# sourceMappingURL=pipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/developing-skill/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,SAAS,EAGT,KAAK,eAAe,EAErB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAGL,KAAK,kCAAkC,EAExC,MAAM,wBAAwB,CAAC;AAEhC,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,mBAAmB,CAAC;AAEtE,MAAM,MAAM,6BAA6B,GAAG,kCAAkC,GAAG;IAC/E,sBAAsB,EAAE,4BAA4B,CAAC;CACtD,CAAC;AAEF,eAAO,MAAM,0BAA0B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAUC,CAAC;AAEzC,MAAM,MAAM,sBAAsB,GAAG,eAAe,CAAC,OAAO,0BAA0B,CAAC,CAAC;AAExF,wBAAsB,eAAe,CACnC,IAAI,EAAE,SAAS,CAAC,6BAA6B,CAAC,EAC9C,OAAO,EAAE,sBAAsB,GAC9B,OAAO,CAAC,IAAI,CAAC,CAoCf;AAED,eAAO,MAAM,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iCAQlC,CAAC"}
@@ -0,0 +1,52 @@
1
+ import { definePipeline, } from "coding-agent-forge";
2
+ import { developingArgsOptions, developingPipeline, } from "developing-agent-forge";
3
+ import { agentFactories } from "./agents/index.js";
4
+ export const developingSkillArgsOptions = {
5
+ "coding-style-skill-path": {
6
+ type: "string",
7
+ description: "Coding-style skill directory or file revised by the optimizer",
8
+ },
9
+ "metaskill-path": {
10
+ type: "string",
11
+ description: "Metaskill design document used by the trajectory optimizer",
12
+ },
13
+ ...developingArgsOptions,
14
+ };
15
+ export async function developingSkill(team, options) {
16
+ const codingStyleSkillPath = options["coding-style-skill-path"];
17
+ const metaskillPath = options["metaskill-path"];
18
+ if (codingStyleSkillPath === undefined || metaskillPath === undefined) {
19
+ throw new Error("--coding-style-skill-path and --metaskill-path are required");
20
+ }
21
+ const logRecord = (thread, record) => {
22
+ console.log(thread.recordToPrettyString(record));
23
+ };
24
+ const callbacks = {
25
+ onTaskFinish: async (agentVariables, taskBrief, taskResult) => {
26
+ const trajectoryOptimizer = await team.createAgent("trajectory-optimizer");
27
+ const optimizerReport = (await trajectoryOptimizer.runStreamed({
28
+ ...agentVariables,
29
+ codingStyleSkillPath,
30
+ taskBrief,
31
+ taskRoundSummary: taskResult.taskRoundSummary,
32
+ metaskillPath,
33
+ }, logRecord)).trim();
34
+ console.log(`\n# Skill trajectory optimizer report\n${optimizerReport}\n`);
35
+ },
36
+ };
37
+ const developingOptions = {
38
+ ...options,
39
+ callbacks,
40
+ };
41
+ await developingPipeline.run(team, developingOptions);
42
+ }
43
+ export const developingSkillPipeline = definePipeline({
44
+ name: "developing-skill",
45
+ description: "Run the code development loop and evolve its skill.",
46
+ argsOptions: developingSkillArgsOptions,
47
+ agentFactories,
48
+ async run(team, options) {
49
+ await developingSkill(team, options);
50
+ },
51
+ });
52
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/developing-skill/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,cAAc,GAIf,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,qBAAqB,EACrB,kBAAkB,GAGnB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAOnD,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACxC,yBAAyB,EAAE;QACzB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,+DAA+D;KAC7E;IACD,gBAAgB,EAAE;QAChB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,4DAA4D;KAC1E;IACD,GAAG,qBAAqB;CACc,CAAC;AAIzC,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,IAA8C,EAC9C,OAA+B;IAE/B,MAAM,oBAAoB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;IAChE,MAAM,aAAa,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAChD,IAAI,oBAAoB,KAAK,SAAS,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;QACtE,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACjF,CAAC;IAED,MAAM,SAAS,GAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE;QACnD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC;IAEF,MAAM,SAAS,GAAG;QAChB,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,UAAU,EAAE,EAAE;YAC5D,MAAM,mBAAmB,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,sBAAsB,CAAC,CAAC;YAC3E,MAAM,eAAe,GAAG,CACtB,MAAM,mBAAmB,CAAC,WAAW,CACnC;gBACE,GAAG,cAAc;gBACjB,oBAAoB;gBACpB,SAAS;gBACT,gBAAgB,EAAE,UAAU,CAAC,gBAAgB;gBAC7C,aAAa;aACd,EACD,SAAS,CACV,CACF,CAAC,IAAI,EAAE,CAAC;YAET,OAAO,CAAC,GAAG,CAAC,0CAA0C,eAAe,IAAI,CAAC,CAAC;QAC7E,CAAC;KACyC,CAAC;IAE7C,MAAM,iBAAiB,GAAG;QACxB,GAAG,OAAO;QACV,SAAS;KACV,CAAC;IACF,MAAM,kBAAkB,CAAC,GAAG,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,CAAC,MAAM,uBAAuB,GAAG,cAAc,CAAC;IACpD,IAAI,EAAE,kBAAkB;IACxB,WAAW,EAAE,qDAAqD;IAClE,WAAW,EAAE,0BAA0B;IACvC,cAAc;IACd,KAAK,CAAC,GAAG,CAAC,IAA8C,EAAE,OAA+B;QACvF,MAAM,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACvC,CAAC;CACF,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "academic-army",
3
- "version": "0.3.3",
3
+ "version": "0.3.6",
4
4
  "description": "Agent workflows and skills for AcademicArmy.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -47,9 +47,8 @@
47
47
  "evolve-skill": "tsx src/cli.ts evolve-skill"
48
48
  },
49
49
  "dependencies": {
50
- "@openai/codex-sdk": "^0.134.0",
51
- "coding-agent-forge": "^1.3.2",
52
- "developing-agent-forge": "^2.4.0"
50
+ "coding-agent-forge": ">=1.3.5",
51
+ "developing-agent-forge": ">=2.5.3"
53
52
  },
54
53
  "devDependencies": {
55
54
  "@eslint/js": "^10.0.1",
package/runs/develop.sh CHANGED
@@ -8,7 +8,6 @@ npm run developing -- \
8
8
  --achive-dir "output/developing-archives" \
9
9
  --project-progress-memory-path "output/developing-memory/project-progress-memory" \
10
10
  --code-design-memory-path "output/developing-memory/code-design-memory" \
11
- --coding-style-skill-path "skills/academic-army-coding-style" \
12
11
  --goal-path "output/goal.md" \
13
12
  --max-iterations "100" \
14
13
  --max-task-devloop-iterations "10" \
@@ -46,6 +46,79 @@ Keep these experiment directories when they already exist:
46
46
  Do not force a fixed test directory. Tests follow the repository's existing
47
47
  layout, project configuration, initialization docs, or adjacent test style.
48
48
 
49
+ ### Task Claims vs. Worktree Reality
50
+
51
+ **Treat the task's stated current-state as a claim to verify, not as ground truth.**
52
+ Before acting, check each factual premise against the worktree: the paths,
53
+ importability, or artifacts the task asserts as present or absent. When the
54
+ worktree contradicts the task, surface the contradiction first and re-scope the
55
+ remaining work to the real gap. Do not proceed on a stale premise to manufacture
56
+ work the worktree already satisfies.
57
+
58
+ This is the most important guardrail in the skill. A task brief may be written
59
+ against stale memory, an earlier snapshot, or a plan that was since executed.
60
+ Trust the worktree; memory files and task narratives are secondary.
61
+
62
+ #### Already-Complete-on-Disk Sequence
63
+
64
+ When the worktree already satisfies the task's stated objective, the task is an
65
+ **already-complete-on-disk** case — it was executed in a prior session but the
66
+ memory/trajectory layer was never advanced. The correct sequence is
67
+ verification, not re-implementation:
68
+
69
+ 1. **Inspect** target files, directories, and shims to confirm on-disk state
70
+ matches the task objective.
71
+ 2. **Run scoped verification** the task requests (test suite, import smoke,
72
+ whitespace check). Record exact pass/fail counts that are **from this
73
+ session's rerun**, not copied from a prior slice entry or task narrative.
74
+ Each slice entry in memory must carry re-verified counts.
75
+ 3. **Update all lagging memory/trajectory files** to record the verified
76
+ state — flip phase status, record exact test counts, shim identities,
77
+ import findings, and advance the stale selection pointer. This is the
78
+ primary deliverable in already-complete-on-disk cases. When memory-file
79
+ paths referenced by the task do not exist on disk, create the directory
80
+ chain and the files — an absent directory is a missing scaffold, not a
81
+ blocker.
82
+ 4. Do not re-execute structural work just because memory says it was never
83
+ done. Memory is the stale surface; the worktree is the source of truth.
84
+
85
+ #### Multi-File Memory Quorum
86
+
87
+ When a project tracks the same counter or phase in multiple independent memory
88
+ files (e.g. a design-memory file, a status file, and a validation file), and
89
+ some are already correct while others lag, the **correct file is the quorum
90
+ anchor**, not the task brief alone. The pattern:
91
+
92
+ - **Identify the split**: one file may already show "N completed, M remaining"
93
+ while others still read "N-1 completed, M+1 remaining" and lack the latest
94
+ slice entry. The task brief may even document this drift explicitly.
95
+ - **Verify against the worktree**: the on-disk state (clean `git status`,
96
+ absent shim, passing suite) confirms which files are correct. The worktree
97
+ is always the final arbiter.
98
+ - **Update the lagging files**: bring them to the same counter, add the
99
+ missing slice entry with this-session rerun counts, and remove the completed
100
+ subject from every "remaining"/"next"/tier list they contain. Do not treat
101
+ the lagging files as evidence that work is unfinished — they are the stale
102
+ surface; the worktree plus the correct memory file form the quorum.
103
+ - **After updating, all memory files must agree** on the counter, the
104
+ completed-slice list, and what the next real gap is. Any file that still
105
+ lists a completed subject as "remaining" will cause the next agent to
106
+ re-execute already-done work.
107
+
108
+ #### Bytecode and Order-Dependent Checks
109
+
110
+ Before running the import smoke in an already-complete-on-disk case, **clear
111
+ `__pycache__`** directories that may hold bytecode from a prior session.
112
+ Stale `.pyc` files can make an import smoke silently resolve against old paths,
113
+ masking a real resolution failure. Use the language's normal bytecode-cache
114
+ clearing mechanism (e.g. `find . -type d -name __pycache__ -exec rm -rf {} +`).
115
+
116
+ **Order-dependent test results**: a test suite that passes when modules load in
117
+ one order (e.g. alphabetical) may fail in another (e.g. entrypoint-first).
118
+ After a move that creates or repoints shims, always probe the entrypoint
119
+ import order in addition to the test suite. Isolated import probes catch
120
+ order-dependent partial-init failures that the test runner may dodge.
121
+
49
122
  ## Runtime Binding
50
123
 
51
124
  Keep the skill project-agnostic. Bind names, paths, classes, functions,
@@ -78,6 +151,22 @@ Before editing, establish a small task-relevant inventory:
78
151
  `from <package>.<module> import`, `from <package> import <module>`, and any
79
152
  indirect imports through package `__init__.py` — so the full consumer set is
80
153
  known before the first edit;
154
+ - **monkeypatch surface**: when a module will be moved and replaced by a shim,
155
+ search all test files for `monkeypatch.setattr(<module>, "name", ...)` and
156
+ `from <module> import _<name>` — any underscore-prefixed name that a test
157
+ imports or monkeypatches must appear in the shim's re-export list or the
158
+ test will fail with `AttributeError`. Also audit **module-object attribute
159
+ access**: when a test does `from <pkg> import <module>` and then accesses
160
+ `<module>.<attr>`, the shim must expose `<attr>` (stdlib singletons like
161
+ `subprocess`, `os` are common targets — re-export them from the canonical
162
+ module even if no consumer does `from <module> import <attr>`);
163
+ - **relative-import depth**: when a file moves from a flat package into a
164
+ subpackage, count the new depth. From `<pkg>.sub1.sub2`, one dot is
165
+ `<pkg>.sub1.sub2`, two dots is `<pkg>.sub1`, three dots is `<pkg>`. Read
166
+ every `from .X import` / `from ..X import` line in the file being moved and
167
+ adjust each dot-count to reach the same target from the new location. A file
168
+ with only stdlib imports needs zero changes. A file with sibling imports to
169
+ other root-level modules needs one extra dot per subpackage level;
81
170
  - accepted constructor fields, identity fields, validation owner, provenance
82
171
  fields, and export surfaces for record-backed helpers;
83
172
  - accepted callable signatures, default values, aggregation or identity keys,
@@ -86,47 +175,16 @@ Before editing, establish a small task-relevant inventory:
86
175
  - task-stated current-state claims — what the task says already exists, is
87
176
  missing, is on or off a path, or has or has not run — reconciled against the
88
177
  actual worktree before acting, with contradictions surfaced as the first
89
- finding.
178
+ finding. **Verify every claimed gap, not just the headline objective** — a
179
+ task brief may assert N things are missing but the worktree may show M of
180
+ them already done. Surface the exact subset that is complete vs. the subset
181
+ that is genuinely missing rather than proceeding on all N as if they were
182
+ equally stale.
90
183
 
91
184
  Treat a suddenly empty or partially missing tree as an integrity blocker. Do not
92
185
  reconstruct missing code from memory, plans, reports, or old outputs unless the
93
186
  user asks for restoration from a trusted source.
94
187
 
95
- Treat the task's stated current-state as a claim to verify, not as ground truth.
96
- Before acting, check each factual premise against the worktree: the paths,
97
- importability, or artifacts the task asserts as present or absent. When the
98
- worktree contradicts the task — artifacts the task calls absent already exist, a
99
- module the task calls off-path is importable, a run the task says never happened
100
- already produced outputs — surface the contradiction first and re-scope the
101
- remaining work to the real gap. Do not proceed on a stale premise to manufacture
102
- work the worktree already satisfies, and do not overwrite or ignore existing
103
- artifacts to match the task's description. Accepted work is what the real gap
104
- requires, not what the task's narrative implies.
105
-
106
- When the worktree already satisfies the task's stated objective (the target
107
- package exists, the files are in place, the shims are written, the structure
108
- matches the refactor plan), the task is an **already-complete-on-disk** case.
109
- The correct operational sequence is verification, not re-implementation:
110
-
111
- 1. Run the scoped verification the task requests (test suite, import check,
112
- whitespace check).
113
- 2. When verification passes, the primary deliverable is updating memory and
114
- trajectory files to record the verified state — flipping the phase status,
115
- recording exact test counts, shim identities, and import findings, and
116
- advancing the stale selection pointer.
117
- 3. Do not re-execute the structural work (re-create files, re-split modules,
118
- re-write shims) just because memory says it was never done. Memory is the
119
- stale surface; the worktree is the source of truth.
120
- 4. After updating memory, the next-slice pointer should move past this phase
121
- to the next real gap. List candidate next phases without selecting one
122
- unless the task or active workflow explicitly selects it.
123
-
124
- This pattern is common in phased refactors where work was done in a prior
125
- session but memory was never updated. The no-op guard in memory files (e.g.
126
- "N consecutive no-op rounds") exists precisely to detect and break this cycle:
127
- when memory says "not started" but the worktree says "done," verify and record,
128
- do not re-execute.
129
-
130
188
  If the request names an exact allowed-file set, edit only those files. Do not
131
189
  touch package entrypoints, export tests, docs, registries, harnesses, artifact
132
190
  writers, TODO or memory files, generated outputs, or adjacent modules unless
@@ -198,6 +256,15 @@ Classify the task before editing:
198
256
  those behaviors.
199
257
  - **Refactor or cleanup**: move, split, merge, rename, or delete code only to
200
258
  improve locality, readability, or testability for the current change.
259
+ - **Shim deletion (cleanup slice)**: migrate every consumer of a re-export shim
260
+ to the canonical path, then delete the shim file from disk and git.
261
+ This is the closing phase of staged package migration — the shim was a
262
+ temporary bridge; now it is removed. Assess complexity before starting: count
263
+ re-exported names, count consumers, check for monkeypatch bindings to the
264
+ shim's module object, and check whether any root-level shim transitively
265
+ imports through the target shim. A monkeypatch-free shim with few names and
266
+ few consumers is the simplest case; monkeypatch bindings or transitive root-shim
267
+ imports require additional care but do not block deletion.
201
268
  - **Harness work**: keep harness code under the relevant `harness/` area; make
202
269
  objective, inputs, metrics, raw artifacts, and run loop explicit.
203
270
  - **Test work**: place tests in the existing test system's natural location and
@@ -401,7 +468,9 @@ A proper shim:
401
468
  them — but when in doubt, re-export the full surface to be safe;
402
469
  - has no logic, no side effects, and no new imports beyond the re-export line;
403
470
  - lives as a temporary bridge; the shim is deleted once all consumers have
404
- migrated to the canonical path.
471
+ migrated to the canonical path. See the **Shim Deletion** section below for
472
+ the full deletion protocol: consumer inventory, migration ordering,
473
+ verification checklist, and root-shim transitive handling.
405
474
 
406
475
  Before writing a shim, inventory the full import surface of the module being
407
476
  moved: search for `from <package>.<module> import`, `from .<module> import`,
@@ -433,7 +502,7 @@ module look up the function through the shim at call time (e.g. `import
433
502
  bare-name import. This keeps the monkeypatch-able namespace as the single
434
503
  point of indirection without changing any logic.
435
504
 
436
- This pattern applies when the monkeypatch target is a **refabr-internal
505
+ This pattern applies when the monkeypatch target is a **repo-internal
437
506
  function binding** — a function defined inside the repository that the
438
507
  canonical module imports via `from <module> import <func>`. A different
439
508
  mechanism applies when the monkeypatch target is a **shared stdlib singleton**
@@ -448,7 +517,7 @@ the stdlib module binding (e.g. `from <canonical> import importlib`) so that
448
517
  `<shim>.<stdlib_module>` resolves. Before applying either fix, determine
449
518
  which mechanism is in play: test `import sys; <canonical>.<module> is
450
519
  sys.modules['<module>']`. If true, it's shared-singleton propagation and needs
451
- only shim re-export. If false, it's a refabr function binding and needs the
520
+ only shim re-export. If false, it's a repo-internal function binding and needs the
452
521
  module-level-alias fix.
453
522
 
454
523
  Also verify **transitive import direction** — not just what the new module
@@ -498,6 +567,287 @@ dedicated decoupling pass. Do not reorder imports, add lazy imports, or
498
567
  restructure sibling dependencies to force the `__init__.py` re-export — those
499
568
  are behavior-sensitive changes that belong to a later phase.
500
569
 
570
+ **Dependency-group ordering across sub-phases**: when moving a set of related
571
+ modules into a shared subpackage, move the modules with no intra-group
572
+ dependencies first (the ones that import only domain, ports, stdlib, or
573
+ third-party code). Move modules that import other members of the same group
574
+ only after their dependencies are co-located in the subpackage. Example: if
575
+ `<b>.py` does `from .<a> import <name>` and both are intended for
576
+ `adapters/<group>/`, move `<a>.py` first. In the next sub-phase, `<b>.py` can
577
+ follow with a clean sibling `.` import. Moving `<b>` before `<a>` creates a
578
+ mixed-state `..<a>` reference to the still-flat root file, which resolves
579
+ differently under test collection and violates the co-location contract. Audit
580
+ intra-group imports before selecting the first module in any new subpackage,
581
+ and name the dependency chain in the task brief so later sub-phases have clear
582
+ ordering.
583
+
584
+ ### Shim Deletion
585
+
586
+ When all consumers of a re-export shim have been migrated (or the task is to
587
+ migrate them as part of the deletion), the shim can be removed. This closes the
588
+ staged-package-migration lifecycle.
589
+
590
+ **Complexity assessment.** Before editing, inventory:
591
+
592
+ - **Re-exported names**: what the shim exports. Note the distinction between
593
+ names the shim re-exports (often the full canonical surface — dozens of names)
594
+ and names consumers actually import (typically a small subset). Only the
595
+ consumer-imported names matter for migration; the canonical location already
596
+ owns the full surface and remains available at the canonical path.
597
+ - **Consumer set**: every import site that references the shim's module path, in
598
+ both source and test trees. Search all import forms: `from <shim_path> import`
599
+ (absolute), `from .<shim> import` (1 dot), `from ..<shim> import` (2 dots),
600
+ `from ...<shim> import` (3 dots), and beyond for deeper nesting. Also check
601
+ indirect imports through package `__init__.py`. Count **sites, not files** —
602
+ a single consumer file may have multiple import sites (e.g. a top-level import
603
+ plus a deferred import inside a function body), and every site must be
604
+ migrated. A consumer nested 3+ levels deep needs the corresponding dot count —
605
+ stopping at 2 dots misses it. Note which specific names each consumer imports —
606
+ this is the verification target, not the full shim re-export list.
607
+ - **Monkeypatch bindings**: search tests for `monkeypatch.setattr(<shim_module>, ...)`
608
+ and `setattr(<shim_module>, ...)`. Zero hits = simplest case; no shim-module
609
+ binding needed after deletion. Positive hits = complex case; the monkeypatch
610
+ surface requires a shim-module binding to survive. **Memory tier labels are
611
+ claims, not facts** — re-verify monkeypatch counts against the worktree every
612
+ slice, even when memory files classify a shim as "easy-tier no monkeypatch."
613
+ A memory file written in a prior session may predate the discovery of
614
+ monkeypatch bindings; the worktree grep is the truth.
615
+ - **Deferred-shim consumers**: when a consumer of the current shim is itself a
616
+ deferred root shim (its own deletion belongs to a future slice), its import
617
+ line must still be redirected to prevent `ImportError` when the current shim
618
+ is deleted. Redirect only the import line — do not delete or rewrite the
619
+ deferred shim. Its own deletion is a separate future slice.
620
+ - **Root-shim transitives**: check whether any root-level re-export shim imports
621
+ names from the target shim's module path. If a root shim does `from .<target_shim>
622
+ import <name>`, trace whether it already gets those names through a canonical
623
+ path transitively — the root shim may import from `<intermediate_module>` which
624
+ itself imports from the target shim. If the root shim's import chain survives
625
+ after migration (root → intermediate → canonical), the root shim needs no edit.
626
+ - **`__init__.py` gate**: check whether any package `__init__.py` imports from
627
+ the shim. If `<pkg>/__init__.py` does `from .<shim> import (...)`, the redirect
628
+ is a **blocking gate** — the shim cannot be deleted until `__init__.py` is
629
+ retargeted to the canonical path. This redirect is order-sensitive:
630
+ `__init__.py` runs first in any `import <pkg>`, so it must resolve first.
631
+ Verify `import <pkg>` succeeds before deleting the shim. After the redirect,
632
+ `__init__.py` must re-export the same names verbatim from the canonical path.
633
+ - **`_`-prefixed private names**: the shim may re-export underscore-prefixed
634
+ private names that the canonical module's own public consumers don't use.
635
+ Before deletion, grep test files for imports of any `_`-prefixed name from
636
+ the shim path. If found, those names are part of the test contract and must
637
+ be included in the canonical import verification. If none found (typical),
638
+ note the result and proceed.
639
+
640
+ **Migration order.** Migrate every consumer before deleting the shim:
641
+
642
+ 0. **`__init__.py` gate first** (if applicable): when any package `__init__.py`
643
+ imports from the shim, redirect it to the canonical path before any consumer
644
+ migration. This is order-sensitive — `__init__.py` runs first in `import <pkg>`.
645
+ Preserve all imported names verbatim. Verify with `import <pkg>` before
646
+ proceeding. The shim stays on disk until all consumers (including `__init__.py`)
647
+ resolve to canonical.
648
+
649
+ 1. Calculate the correct import depth for each consumer:
650
+ - Files **inside the same subpackage** as the canonical module use a sibling
651
+ import: `from .<canonical> import <name>` (one dot).
652
+ - Files **outside that subpackage** import from the parent level:
653
+ `from ..<canonical_pkg>.<canonical> import <name>` (count dots to reach the
654
+ common ancestor, then down to the canonical module).
655
+ - **Root-module consumers**: a root-level module in the same package as the
656
+ shim that imports the shim by one dot (`from .<shim> import`) is still a
657
+ consumer. After migration: `from .<subpackage>.<canonical> import` — the dot
658
+ count stays 1, reaching into the canonical's subpackage. Root-module consumers
659
+ are easy to miss because they aren't nested in a subdirectory; inventory them
660
+ explicitly when the shim lives at the package root.
661
+ - **Multi-file canonical targets**: when the shim re-exports names from
662
+ multiple separate canonical files (e.g. one class per file under a shared
663
+ subpackage), apply the depth rule to each file individually. The shared
664
+ subpackage prefix sets the depth; only the filename differs per import.
665
+ - **`as` aliases**: when a consumer imports a name with an `as` alias
666
+ (`from <shim> import <Name> as <Alias>`), migrate the alias verbatim:
667
+ `from <canonical> import <Name> as <Alias>`. Dropping or altering an alias
668
+ creates a silent name mismatch — the import succeeds but downstream code
669
+ that uses the alias name fails with `AttributeError`. Grep consumers for
670
+ `as` clauses before editing.
671
+ - **`import <module> as <alias>`**: when a consumer imports the entire shim
672
+ module as an object (`import <shim> as <alias>`), the redirect is
673
+ `from <canonical_pkg> import <canonical> as <alias>`. Preserve the alias
674
+ verbatim — downstream code that does `<alias>.<func>(...)` must resolve to
675
+ the same module object after migration. This pattern often co-occurs with
676
+ `monkeypatch.setattr(<alias>, ...)` — the alias name is the continuity
677
+ binding.
678
+ 2. Edit every consumer's import line to resolve to the canonical path. Migrate
679
+ source consumers first (if any), then test consumers. A shim with zero source
680
+ consumers and only test consumers is valid — proceed directly to the test
681
+ imports. Order within each group does not matter when the shim is still on
682
+ disk during migration. For multi-file targets, edit
683
+ only the names each consumer actually imports — do not expand a partial
684
+ consumer to import all names just because the shim re-exports all of them.
685
+
686
+ **Surgical migration**: when a consumer file has adjacent import blocks that
687
+ reference different root shims (e.g. `from ..<current_shim> import (...)`
688
+ on one line and `from ..<deferred_shim> import (...)` on the next), migrate
689
+ only the block that references the current shim. Leave the adjacent
690
+ deferred-shim block untouched — its migration belongs to its own future slice.
691
+
692
+ **Bulk path-only migration**: when the slice is path-only (no aliases to
693
+ preserve, no monkeypatch bindings, no continuity redirects — only the
694
+ module path changes), a depth-ordered regex sweep replaces every consumer
695
+ in a single pass. Order the sweeps by depth: 3-dot consumers first (the
696
+ deepest), then 2-dot, then 1-dot root modules, then absolute imports.
697
+ This prevents an earlier sweep from rewriting a deeper import into an
698
+ incorrect intermediate form. The only exceptions are `__init__.py`
699
+ (handled first, step 0) and `import … as <alias>` lines (continuity
700
+ bindings requiring hand-edit). After the sweep, verify every rewritten
701
+ import line resolves to the correct depth and preserves the same names.
702
+ 3. After all consumers are migrated, delete the shim: `git rm <path_to_shim>`.
703
+ Leave no comment-only stub, empty placeholder, or `.bak` rename.
704
+
705
+ **Verification checklist.** After deletion, run in order:
706
+
707
+ 1. **Shim-gone check**: `python -c "import <shim_path>"` must fail with
708
+ `ModuleNotFoundError`. Confirm the file does not exist on disk.
709
+ 2. **Canonical-import check**: `python -c "from <canonical_path> import <exported_names>"` must succeed for every name that any consumer imports. Verify only the consumer-imported subset — the canonical location owns the full surface; checking every name the shim re-exports is unnecessary when the canonical location was not modified. **Byte-identity**: confirm the canonical file was not modified by the migration — `git diff -- <canonical_subpackage>/` must be empty. (For a single-file canonical target, `git diff -- <path_to_canonical_file>` is sufficient.) The slice only rewrites consumer import lines; the canonical source is the immutable source of truth.
710
+ 3. **Scoped test suite**: run the test files that were edited plus any test files
711
+ that exercise the deleted shim's exports. Expected count should match
712
+ pre-migration baseline — no new failures. **When monkeypatch bindings exist**,
713
+ this step is the continuity smoke: the scoped run must include the test file
714
+ with the `monkeypatch.setattr(<alias>, ...)` calls, and all must pass.
715
+ A pass here (not just import resolution) proves that the test patches the
716
+ same module object that the canonical code calls.
717
+ 4. **Full test suite**: run the repository's full test command. The pass/fail count
718
+ must match the pre-migration baseline exactly. Any new failure is a migration
719
+ defect.
720
+ 5. **Entrypoint-order import smoke**: import modules in the order that exercises
721
+ the package's initialization dependencies. This catches order-dependent
722
+ partial-init failures that alphabetical test collection can mask. When the
723
+ deleted shim was imported through an eager package `__init__.py` chain
724
+ (e.g. `<pkg>.__init__` → `<subpkg>.__init__` → `<consumer>` → shim), the
725
+ smoke must exercise that exact chain with the shim deleted. **Construction
726
+ formula**: build a single import command that imports every outermost
727
+ package entrypoint of the migrated consumers (the closest `__init__.py`
728
+ ancestor for each consumer subpackage) plus the canonical module itself,
729
+ ordered from the lowest-layer package upward. For example, if consumers live
730
+ in `<pkg>.experiments`, `<pkg>.evidence.summarizers`, and the root
731
+ `<pkg>`, the smoke imports `<pkg>.experiments`, `<pkg>.evidence.summarizers`,
732
+ and `<pkg>.<canonical_subpackage>.<canonical>`. A pass means every module
733
+ resolves without `ImportError`.
734
+ 6. **Absence grep**: search the source and test trees for any remaining import
735
+ of the deleted shim's module path. Use a catch-all regex that matches all
736
+ depth forms and absolute forms at once: `from.*<shim_name> import` across
737
+ `src/` and `tests/`. Then filter out the canonical module's own sibling
738
+ self-import (e.g. `from .<canonical> import` inside the canonical file
739
+ itself). Every remaining hit must be a canonical-path import — any hit that
740
+ resolves to the old shim path is a blocking leftover. Do not rely on
741
+ listing specific depth forms (1-dot, 2-dot, 3-dot) individually; a
742
+ catch-all regex catches every depth at once. (Documentation references to
743
+ the old path are not code defects — they are documentation staleness,
744
+ tracked separately.)
745
+ 7. **Root-shim transitives**: verify any root shim that previously resolved names
746
+ through the deleted shim still imports them correctly through the new canonical
747
+ chain. An `import <root_shim>` smoke test is sufficient when the root shim's
748
+ own import chain was not edited.
749
+ 8. **Whitespace check**: `git diff --check` from the repository root. Report
750
+ findings in files the task did not touch as pre-existing; note them but do not
751
+ fix them.
752
+
753
+ Treat a new test failure, a broken import smoke, or a leftover import of the
754
+ deleted shim path as a blocking defect. A pre-existing test failure that is
755
+ unchanged from baseline is not a defect.
756
+
757
+ **Stale bytecode.** After deleting the shim, clear the language's bytecode
758
+ cache under the source tree (e.g. `find <src_root> -type d -name __pycache__ -exec rm -rf {} +`).
759
+ A `.pyc` file from a prior session can make `import <shim_path>` silently
760
+ resolve against the deleted source, masking a real resolution failure. Run
761
+ the shim-gone check after clearing, not before.
762
+
763
+ **Multi-file canonical targets**: when the shim re-exports names from many
764
+ separate files (e.g. one class per file), the byte-identity check (step 2)
765
+ still applies per file. Verify each canonical file individually — the shared
766
+ subpackage prefix may mask a single-file modification when checking the whole
767
+ directory at once.
768
+
769
+ **Do not** migrate other shims in the same slice unless the task explicitly
770
+ scopes them. One shim per cleanup slice keeps review simple and defects local.
771
+ Root shims that transitively chain through the deleted shim survive without
772
+ edits when their import chain resolves through the canonical location —
773
+ verify this, do not assume it.
774
+
775
+ **Canonical importing through not-yet-deleted root shims.** In phased refactors,
776
+ a canonical module may itself import through root shims that are still on disk
777
+ (e.g. `from ..<upstream_shim> import (...)` inside `<canonical_subpackage>/<canonical>.py`).
778
+ These are the canonical module's own upstream dependencies — they were not
779
+ migrated when the canonical module was moved, and they belong to their own
780
+ future shim-deletion slices. During the current shim's deletion, the canonical
781
+ module's root-shim imports are a **frozen surface**: they must be left
782
+ untouched, and the byte-identity check (`git diff -- <canonical_subpackage>/`)
783
+ proves they were preserved. The only edits in the current slice are consumer
784
+ import lines; the canonical module's internal imports are out of scope.
785
+
786
+ Shims typically re-export every name the canonical location exposes — often
787
+ dozens of names — even when only a handful have active consumers. This is
788
+ expected: the shim was a wholesale bridge, not a curated export list. When
789
+ deleting, focus inventory and verification on the consumer-imported subset.
790
+ The full canonical surface remains available at the canonical path for any
791
+ future consumer that needs it.
792
+
793
+ ### Move-Only Refactor Verification Checklist
794
+
795
+ When a task is a pure move-only refactor (files relocated, shims left behind,
796
+ no logic changes), the verification surface is specific and mechanical. After
797
+ creating the canonical files and root shims, run this checklist in order:
798
+
799
+ 1. **Import smoke**: import every moved module through both the shim path and
800
+ the canonical path in a single smoke command. For Python, use
801
+ `python -B -c "import <shim_path>, <shim_path2>, <canonical_path>, <canonical_path2>"`.
802
+ An `ImportError` or `ModuleNotFoundError` at this stage catches depth errors
803
+ before the test suite runs.
804
+
805
+ 2. **Scoped test suite**: run the exact test files the task brief names. Record
806
+ the pass/fail count. The pre-existing failure count (if any) should match the
807
+ task brief's documented baseline — no new failures introduced.
808
+
809
+ 3. **Byte-identical verification**: diff each moved canonical file against the
810
+ git-HEAD original. The only permitted difference is the import-depth line(s)
811
+ that changed because the file moved one or more package levels deeper. No
812
+ logic, signature, docstring, or whitespace changes. If the file has no
813
+ relative repo-internal imports, it must be byte-identical.
814
+
815
+ 4. **Whitespace check**: run `git diff --check` from the repo root. Whitespace
816
+ findings in files the task did not touch are pre-existing and should be noted
817
+ as such, not fixed.
818
+
819
+ 5. **Import direction scan**: verify the new canonical module does not import
820
+ back through the root shim. Grep the canonical file for any import that
821
+ resolves to the old shim path. For Python, search for `from <shim_relative_path> import`
822
+ patterns that would create a circular chain.
823
+
824
+ 6. **Downstream `__init__.py` circular-import check**: when a flat module is
825
+ moved and its old location becomes a shim, any package `__init__.py` that
826
+ eagerly imports through the shim can create a circular chain that did not
827
+ exist when the old location was a flat file. For each package `__init__.py`
828
+ in the repository, trace whether an eager module-level import reaches the
829
+ shim and whether the shim's canonical target imports back into that package.
830
+ If found, classify as pre-existing (the `__init__.py` re-export and the
831
+ shim both predate this slice) or task-induced (the current move created the
832
+ chain). Task-induced circular imports are blocking defects. Pre-existing ones
833
+ are recording targets — but if they block test collection in the scoped
834
+ suite, narrow the test command to the collectable subset, record the excluded
835
+ file and the circular chain in the verification report, and file the
836
+ pre-existing violation as a deferred decoupling gap in memory. Do not
837
+ silently omit uncollectable tests from the command without noting them.
838
+
839
+ 7. **Root-init check**: confirm `__init__.py` of the package root has zero diff
840
+ (not widened). Only the new subpackage's own `__init__.py` is new.
841
+
842
+ 8. **Memory update**: after all checks pass, update the memory/trajectory files
843
+ the task brief names with the verified state, exact test counts, shim
844
+ identities, and the fact that the phase is now complete. Create the memory
845
+ file directory if it does not yet exist.
846
+
847
+ This checklist is the minimum acceptance gate for every move-only refactor
848
+ slice. Skipping any step risks silent import failures, circular dependencies,
849
+ or stale memory that causes future rounds to re-execute completed work.
850
+
501
851
  ### Domain-Local Extraction vs. Cross-Module Dedup
502
852
 
503
853
  When a task asks to extract shared helpers, distinguish two operations with
@@ -827,95 +1177,28 @@ caveat or candidate, not as a selected handoff.
827
1177
 
828
1178
  ## Trajectory And TODO Maintenance
829
1179
 
830
- Trajectory files should record accepted facts, exact validation commands and
831
- results, cache cleanup or no-cache findings, and explicit exclusions that
832
- preserve scope.
833
-
834
- For docs-only or TODO-only accepted work, record the readback and targeted
835
- search checks that replaced test execution, and state that tests were skipped
836
- because no executable code or test files changed.
837
- For a no-op docs sync, record the targeted searches and requested-surface
838
- readback that proved the docs were already current, plus a changed-file check
839
- showing no requested docs were modified.
840
-
841
- For scoped docs-only or TODO-only work, also record a changed-file check or
842
- equivalent scope check showing that edits stayed inside the allowed file set.
843
- If an executable, test, dependency, export, harness, generated artifact, or
844
- paper-result file changed accidentally, treat the run as no longer docs-only
845
- and validate or repair according to the user's scope.
846
-
847
- Do not use TODO or handoff files to invent the next source, harness, docs, or
848
- experiment task. Select a next task only when the user has explicitly selected
849
- it, the current workflow instruction names that handoff, or an existing active
850
- trajectory already contains that selected task. Otherwise leave a neutral
851
- waiting state such as "no next developer task is selected."
852
-
853
- Listing **candidate next phases** (e.g. "Next slice options: <phase_a> or
854
- <phase_b>") is not the same as selecting a next task. Candidates document
855
- the real gaps visible after the current phase completes — they inform a future
856
- selection decision but do not make one. A memory file that ends with a
857
- candidate list and no explicit selection is in a neutral waiting state, not a
858
- selected handoff. When updating memory after completing a phase, advance the
859
- stale pointer past the completed phase and list the remaining real gaps as
860
- candidates; do not promote any candidate to "selected next task" unless the
861
- user, workflow, or active trajectory explicitly selects it.
862
-
863
- When the accepted source/test task explicitly excluded docs, TODO, exports,
864
- harnesses, experiments, or generated outputs, preserve that exclusion in the
865
- trajectory. A later TODO-only pass may record accepted work and verified stale
866
- surfaces, but it must not turn excluded surfaces into selected follow-up work
867
- without explicit task selection.
868
- Verified stale docs, exports, harnesses, or artifacts are evidence for a future
869
- task-selection pass, not a selected next task by themselves. A repository habit,
870
- recent sequence, or reasonable maintenance preference is not explicit selection
871
- when the just-finished task excluded that surface. Require selection language
872
- from the user, a workflow instruction, or an already-active backlog item before
873
- writing "next developer task: sync docs" or any equivalent handoff after a
874
- source/test task that excluded docs.
875
- Explicit exclusions in the current task are not backlog seeds. If the user says
876
- not to add a capability, parser family, registry, export, adapter, harness,
877
- CLI, artifact, experiment, or paper output, a TODO-only pass may record that
878
- the exclusion was preserved, but must not select that excluded capability as
879
- the next task unless a later explicit task-selection input asks for it.
880
-
881
- After an accepted docs-only or TODO-only update, treat any next implementation
882
- task as a separate task-selection decision, not as a consequence of making docs
883
- current. If a next source/test task is recorded, tie it to an explicit upstream
884
- selector, accepted backlog item, or already-scanned stale implementation gap;
885
- otherwise leave the trajectory neutral.
886
- Do not use a docs-only sync that merely documented an accepted helper as the
887
- reason to select an adjacent implementation task. Newly visible omissions in
888
- the docs may be recorded as candidates for later selection, but the next
889
- developer task stays neutral unless the user or active workflow explicitly
890
- selects that implementation work.
891
-
892
- If a docs-only sync is explicitly selected, name the exact stale current
893
- surfaces found in a read-only scan and make clear that it is a separate future
894
- pass, not part of a source/test task that excluded docs. If no live stale
895
- surface was verified, do not create a generic documentation task.
896
-
897
- When a handoff selects a docs-only follow-up, include a short stale-surface map:
898
- the document files and surface types to update, such as helper/API lists,
899
- emitted names, package or module summaries, layout rows, test summaries, or
900
- absence clauses. A generic "sync docs for <accepted change>" task is not enough
901
- unless those concrete stale surfaces are also named. Keep the handoff small:
902
- name stable contracts and stale surface types, not every fixture ID,
903
- selected-object list, or assertion from the tests.
904
-
905
- If accepted review tightened a discriminator, rejection reason, metadata value,
906
- or mutation target that docs must preserve, carry that exact detail into the
907
- handoff scope. Do not copy unrelated fixture lists merely because they were
908
- accepted in tests.
909
-
910
- When review corrections changed wording, hierarchy, or absence scope, record
911
- the final accepted correction as the current contract. Do not preserve rejected
912
- draft wording as a new TODO item unless the reviewer or user explicitly asks
913
- for a follow-up.
914
-
915
- After validation-only work, record only the command, result, no-fix status, and
916
- cache cleanup/no-cache finding. A green validation run confirms current
917
- contracts; it does not create new feature, docs, export, harness, or experiment
918
- work.
1180
+ Trajectory files record accepted facts: exact validation commands and results,
1181
+ cache findings, and explicit exclusions that preserve scope.
1182
+
1183
+ **Do not invent the next task.** Select a next task only when the user,
1184
+ workflow, or active trajectory explicitly selects one. Otherwise leave a
1185
+ neutral waiting state.
1186
+
1187
+ Listing **candidate next phases** ("Next slice options: <phase_a> or <phase_b>")
1188
+ documents real gaps without selecting one. A memory file that ends with a
1189
+ candidate list and no explicit selection is in a neutral waiting state. When
1190
+ updating memory after completing a phase, advance the stale pointer past the
1191
+ completed phase and list remaining real gaps as candidates.
1192
+
1193
+ **Excluded surfaces stay excluded.** If the current task explicitly excluded
1194
+ docs, exports, harnesses, or a capability category, preserve that exclusion in
1195
+ trajectory files. A later TODO-only pass may record accepted work but must not
1196
+ select excluded surfaces as follow-up work without explicit task selection.
1197
+
1198
+ **Record exact validation results.** For each run, record the command, exact
1199
+ pass/fail counts, pre-existing failures documented separately from new
1200
+ failures, and cache cleanup findings. A green validation run confirms current
1201
+ contracts; it does not create new feature, docs, or experiment work.
919
1202
 
920
1203
  ## Naming, State, And References
921
1204
 
@@ -1444,33 +1727,17 @@ repositories.
1444
1727
 
1445
1728
  ## Final Response
1446
1729
 
1447
- Keep the final response concise:
1448
-
1449
- - changed paths;
1450
- - behavior or contract covered;
1451
- - validation performed, using readback/search checks for docs-only work;
1452
- - caveats that affect the user's next action.
1453
-
1454
- A round's real delta is not only source/test edits: a verification or smoke run
1455
- that wrote on-disk artifacts, a status or memory-record flip, and a docs/config
1456
- sync all count. Do not open a report with "no changes needed", "no work
1457
- required", or "already complete and verified on disk" when any such delta
1458
- exists; "no source/behavior changes" is the precise, sanctioned phrasing when
1459
- only non-source surfaces moved, and those edits must still be listed as this
1460
- round's work. Do not sustain a no-op framing by attributing this round's delta
1461
- to a prior round (for example labeling this round's status flip as "updated
1462
- previous turn"), and do not list state already on disk before this round as
1463
- this round's work — mis-attribution in either direction is the same honesty
1464
- defect. Report only the real delta, verified against the prior on-disk state,
1465
- not against memory or a task narrative; a verification run plus a status/memory
1466
- flip is a deliverable, not "no work".
1467
-
1468
- A pure verification pass — where the task asks to confirm that prior work is
1469
- intact and the developer inspects state, runs tests, and finds no new edits are
1470
- needed — is the exception. In that case the report should describe the checks
1471
- performed and their results, and may state "no new changes made" or "prior work
1472
- confirmed intact on disk." Do not use this exception to relabel an
1473
- implementation pass where work was silently skipped.
1730
+ Keep the final response concise: changed paths, behavior or contract covered,
1731
+ validation performed, and caveats that affect the user's next action.
1732
+
1733
+ A round's real delta includes verification runs, memory-record flips, and
1734
+ docs/config syncs, not only source/test edits. Report the real delta verified
1735
+ against prior on-disk state, not against memory or a task narrative. A
1736
+ verification run plus a status/memory flip IS a deliverable — not "no work."
1737
+
1738
+ For a pure verification pass where no new edits are needed, describe the checks
1739
+ performed and their results, and state "no new changes made" or "prior work
1740
+ confirmed intact on disk."
1474
1741
 
1475
1742
  Do not explain skill internals, tool mechanics, or style theory unless the user
1476
1743
  asked for a skill optimizer report.
@@ -50,6 +50,8 @@ Send `.lit.md` fragments, not generated `.tex`. Send a self-contained packet: us
50
50
 
51
51
  Ask for revised `.lit.md` blocks or concrete replacement suggestions.
52
52
 
53
+ When applying `writing_master` output locally, respect its wording. For manuscript body text inside `latex` blocks, use the `writing_master` sentences exactly as written, without changing even one character. For explanations outside `latex` blocks, adaptation is allowed, but do not drop any information from the `writing_master` explanation.
54
+
53
55
  Apply accepted feedback to `.lit.md`, update explanations, then run `python -m tanglemd2tex`.
54
56
 
55
57
  ## Report