cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. cogames_agents/__init__.py +0 -0
  2. cogames_agents/evals/__init__.py +5 -0
  3. cogames_agents/evals/planky_evals.py +415 -0
  4. cogames_agents/policy/__init__.py +0 -0
  5. cogames_agents/policy/evolution/__init__.py +0 -0
  6. cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
  7. cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
  8. cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
  9. cogames_agents/policy/nim_agents/__init__.py +20 -0
  10. cogames_agents/policy/nim_agents/agents.py +98 -0
  11. cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
  12. cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
  13. cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
  14. cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
  15. cogames_agents/policy/nim_agents/common.nim +1054 -0
  16. cogames_agents/policy/nim_agents/install.sh +1 -0
  17. cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
  18. cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
  19. cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
  20. cogames_agents/policy/nim_agents/nimby.lock +3 -0
  21. cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
  22. cogames_agents/policy/nim_agents/random_agents.nim +68 -0
  23. cogames_agents/policy/nim_agents/test_agents.py +53 -0
  24. cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
  25. cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
  26. cogames_agents/policy/scripted_agent/README.md +360 -0
  27. cogames_agents/policy/scripted_agent/__init__.py +0 -0
  28. cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
  29. cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
  30. cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
  31. cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
  32. cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
  33. cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
  34. cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
  35. cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
  36. cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
  37. cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
  38. cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
  39. cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
  40. cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
  41. cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
  42. cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
  43. cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
  44. cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
  45. cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
  46. cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
  47. cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
  48. cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
  49. cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
  50. cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
  51. cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
  52. cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
  53. cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
  54. cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
  55. cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
  56. cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
  57. cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
  58. cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
  59. cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
  60. cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
  61. cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
  62. cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
  63. cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
  64. cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
  65. cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
  66. cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
  67. cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
  68. cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
  69. cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
  70. cogames_agents/policy/scripted_agent/common/roles.py +34 -0
  71. cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
  72. cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
  73. cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
  74. cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
  75. cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
  76. cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
  77. cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
  78. cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
  79. cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
  80. cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
  81. cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
  82. cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
  83. cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
  84. cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
  85. cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
  86. cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
  87. cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
  88. cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
  89. cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
  90. cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
  91. cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
  92. cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
  93. cogames_agents/policy/scripted_agent/planky/README.md +214 -0
  94. cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
  95. cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
  96. cogames_agents/policy/scripted_agent/planky/context.py +68 -0
  97. cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
  98. cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
  99. cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
  100. cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
  101. cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
  102. cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
  103. cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
  104. cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
  105. cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
  106. cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
  107. cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
  108. cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
  109. cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
  110. cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
  111. cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
  112. cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
  113. cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
  114. cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
  115. cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
  116. cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
  117. cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
  118. cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
  119. cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
  120. cogames_agents/policy/scripted_agent/types.py +239 -0
  121. cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
  122. cogames_agents/policy/scripted_agent/utils.py +381 -0
  123. cogames_agents/policy/scripted_registry.py +80 -0
  124. cogames_agents/py.typed +0 -0
  125. cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
  126. cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
  127. cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
  128. cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,124 @@
1
+ # Planky — LLM Development Guide
2
+
3
+ ## Objective
4
+
5
+ Maximize reward in CogsGuard by improving the Planky scripted agent. Reward = junction hold time. See `STRATEGY.md` for
6
+ game mechanics and role system.
7
+
8
+ ## File Layout
9
+
10
+ ```
11
+ planky/
12
+ policy.py # Multi-agent policy, role distribution defaults, per-tick brain loop
13
+ goal.py # Goal base class and evaluate_goals()
14
+ context.py # PlankyContext (state snapshot, blackboard, navigator, trace)
15
+ navigator.py # Pathfinding (A*, exploration, direction bias)
16
+ entity_map.py # Spatial memory of observed entities
17
+ obs_parser.py # Raw observation → StateSnapshot + visible entities
18
+ trace.py # Debug trace formatting
19
+ goals/
20
+ gear.py # GetGearGoal base — gear acquisition with reserve checks
21
+ miner.py # ExploreHub, GetMinerGear, PickResource, DepositCargo, MineResource
22
+ aligner.py # GetAlignerGear, AlignJunction
23
+ scrambler.py # GetScramblerGear, ScrambleJunction
24
+ scout.py # GetScoutGear, Explore
25
+ shared.py # GetHearts, FallbackMine (used by aligner/scrambler)
26
+ survive.py # SurviveGoal — retreat when HP low
27
+ stem.py # SelectRoleGoal — dynamic role selection
28
+ tests/
29
+ conftest.py # Fixtures: miner_episode, aligner_episode, etc.
30
+ helpers.py # run_planky_episode(), EpisodeResult
31
+ test_miner.py # Miner capability tests
32
+ test_aligner.py # Aligner capability tests
33
+ test_scrambler.py
34
+ test_scout.py
35
+ test_stem.py
36
+ STRATEGY.md # Game mechanics, role costs, strategic loop
37
+ ```
38
+
39
+ ## Core Debugging Loop
40
+
41
+ This is the iteration cycle for improving Planky:
42
+
43
+ ### 1. Run an episode and measure reward
44
+
45
+ ```bash
46
+ uv run cogames play --mission cogsguard_machina_1.basic \
47
+ -p planky --cogs 5 --steps 1000 --render none
48
+ ```
49
+
50
+ ### 2. Run with tracing to diagnose behavior
51
+
52
+ ```bash
53
+ # Trace a specific agent
54
+ uv run cogames play --mission cogsguard_machina_1.basic \
55
+ -p 'metta://policy/planky?miner=2&aligner=3&trace=1&trace_level=2&trace_agent=0' \
56
+ --cogs 5 --steps 1000 --render none
57
+
58
+ # Trace all agents
59
+ uv run cogames play --mission cogsguard_machina_1.basic \
60
+ -p 'metta://policy/planky?miner=2&aligner=3&trace=1&trace_level=2' \
61
+ --cogs 5 --steps 1000 --render none
62
+ ```
63
+
64
+ Trace output shows per-tick: goal chain, skipped goals (with reason), active goal, action, idle counter. Collective
65
+ resource logs print every 25 steps.
66
+
67
+ ### 3. Edit goals/policy code
68
+
69
+ Each role has a goal list in `policy.py:_make_goal_list()`. Goals are evaluated in priority order. A goal's
70
+ `is_satisfied()` returns True to skip it; `execute()` returns an Action.
71
+
72
+ ### 4. Validate with multi-seed sweep
73
+
74
+ ```bash
75
+ # 10-seed reward sweep (copy-paste this)
76
+ total=0; for i in $(seq 1 10); do \
77
+ r=$(uv run cogames play --mission cogsguard_machina_1.basic \
78
+ -p planky --cogs 5 --steps 1000 --render none --seed $i 2>&1 \
79
+ | grep "Reward" | grep -oE '[0-9]+\.[0-9]+'); \
80
+ echo "Seed $i: $r"; total=$(echo "$total + $r" | bc); \
81
+ done; echo "Average: $(echo "scale=2; $total / 10" | bc)"
82
+ ```
83
+
84
+ ### 5. Run unit tests
85
+
86
+ ```bash
87
+ metta pytest packages/cogames-agents/src/cogames_agents/policy/scripted_agent/planky/tests/ -v
88
+ ```
89
+
90
+ Always run tests after changes. All 15 tests + 1 xfail must pass.
91
+
92
+ ## Current Configuration
93
+
94
+ - **5 agents**: 2 miners, 3 aligners (set in `policy.py` defaults)
95
+ - **Mining stop**: miners idle when collective has >100 of every resource (`miner.py:COLLECTIVE_SUFFICIENT_THRESHOLD`)
96
+ - **Deposit threshold**: 50% cargo capacity (`miner.py:DepositCargoGoal`)
97
+ - **Gear reserve**: collective must have cost + 3 of each resource before buying gear (`gear.py:RESOURCE_RESERVE`)
98
+ - **Heart reserve**: collective must have 1 + 3 of each resource before buying hearts
99
+ (`shared.py:GetHeartsGoal.RESOURCE_RESERVE`)
100
+ - **Miner gear**: no reserve requirement (miners are resource producers) but skipped when resources sufficient
101
+
102
+ ## Reward Baseline
103
+
104
+ 10-seed average at 1000 steps, --cogs=5: **~3.3 reward**
105
+
106
+ ## Key Reward Insights
107
+
108
+ - Reward is `(aligned.junction.held / num_junctions) * (100 / max_steps)` — purely junction hold time
109
+ - Clips claims ~11 junctions and doesn't lose them (no scrambler in default config)
110
+ - More aligners = more reward, but they need miners to fund gear + hearts
111
+ - Scramblers tested poorly (1.84 avg with 2m/2a/1s) — hearts are too expensive
112
+ - Seed variance is high; always evaluate across 10+ seeds
113
+
114
+ ## What to Improve
115
+
116
+ Read `STRATEGY.md` for full context. High-impact areas:
117
+
118
+ 1. **Aligner junction targeting** (`goals/aligner.py:AlignJunctionGoal`) — prioritize junctions that maximize hold time
119
+ (e.g., cluster nearby, avoid clips AOE)
120
+ 2. **Dynamic role switching** — miners could become aligners once resources are sufficient instead of idling
121
+ 3. **Early game economy** — first 50 steps are critical; miners need to deposit quickly so aligners get hearts
122
+ 4. **Heart acquisition timing** — aligners sometimes waste time walking to chests when collective can't afford hearts
123
+ 5. **Navigation efficiency** (`navigator.py`) — A\* pathfinding could be improved, agents sometimes get stuck
124
+ 6. **Coordination** — multiple aligners targeting the same junction wastes effort
@@ -0,0 +1,160 @@
1
+ # Planky Improvement Log
2
+
3
+ Track each improvement attempt with scrimmage scores to measure progress.
4
+
5
+ ## Benchmark Command
6
+
7
+ ```bash
8
+ # Standard benchmark (explicit roles, 5 episodes)
9
+ cogames scrimmage --mission cogsguard_machina_1.basic \
10
+ --policy "metta://policy/planky?miner=3&aligner=3&scrambler=4" \
11
+ --episodes 5 --seed 42
12
+
13
+ # IMPORTANT: When using stem=10, you MUST zero out explicit roles:
14
+ cogames scrimmage --mission cogsguard_machina_1.basic \
15
+ --policy "metta://policy/planky?miner=0&aligner=0&scrambler=0&stem=10" \
16
+ --episodes 5 --seed 42
17
+ ```
18
+
19
+ ---
20
+
21
+ ## Critical Blockers Identified (2025-01-29)
22
+
23
+ ### Blocker #1: Gear Station Interaction Fails
24
+
25
+ Agents can reach gear stations (dist=1) but bumping doesn't give them gear:
26
+
27
+ - Agent bumps station repeatedly (move_north/south/etc fails)
28
+ - Position doesn't change (correct - you bump to interact)
29
+ - But gear is never acquired
30
+ - Likely cause: collective resources depleted, or wrong station being detected
31
+
32
+ **Evidence**:
33
+
34
+ ```
35
+ [t=27] GetMinerGear dist=1 → move_north (fails)
36
+ [t=28] GetMinerGear dist=1 → move_north (fails)
37
+ [t=29] GetMinerGear dist=1 → move_north (fails)
38
+ [t=30] ForceExplore kicks in, agent wanders away
39
+ ```
40
+
41
+ **To Fix**: Debug the gear station interaction in the game layer, or add wealth=100 to give starting resources.
42
+
43
+ ### Blocker #2: stem=10 Doesn't Override Defaults
44
+
45
+ When using `?stem=10`, the default role counts (miner=4, aligner=2, etc.) still apply. Must explicitly set
46
+ `miner=0&aligner=0&scrambler=0&stem=10` for actual stem mode.
47
+
48
+ ---
49
+
50
+ ## Improvement History
51
+
52
+ ### Baseline
53
+
54
+ **Date**: 2025-01-29 **Config**: miner=4, aligner=2, scrambler=4 (explicit)
55
+
56
+ ```
57
+ Episodes: 3, Seed: 42
58
+ Mean Reward: 0.04
59
+ Junction Aligned: 0.7
60
+ Junction Scrambled: 0.1
61
+ Miners got gear: 0.2 (2 total)
62
+ ```
63
+
64
+ **Notes**: Very poor performance. Miners stuck at gear station.
65
+
66
+ ---
67
+
68
+ ### Attempt #1: Fix Stem Role Selection
69
+
70
+ **Date**: 2025-01-29 **Change**: Distribute roles by agent_id in early game instead of all becoming scouts
71
+
72
+ **Result**: NO CHANGE (still ~0.04 reward) **Notes**: Roles distributed correctly, but gear acquisition still broken.
73
+
74
+ ---
75
+
76
+ ### Attempt #2: Improve Gear Station Approach
77
+
78
+ **Date**: 2025-01-29 **Change**: Track bump attempts, try different approach sides, clear cache when stuck
79
+
80
+ **Result**: NO CHANGE **Notes**: Agent still can't get gear even when approaching from different directions.
81
+
82
+ ---
83
+
84
+ ### Attempt #3: Skip Miner Gear
85
+
86
+ **Date**: 2025-01-29 **Change**: Miners skip gear acquisition, mine directly (reduced cargo capacity)
87
+
88
+ ```
89
+ Episodes: 5, Seed: 42
90
+ Mean Reward: 0.04
91
+ Junction Aligned: 0.9
92
+ Hearts gained: 2.5
93
+ ```
94
+
95
+ **Result**: NO CHANGE **Notes**: Miners function but economy doesn't sustain combat roles. Aligners/scramblers still
96
+ need gear.
97
+
98
+ ---
99
+
100
+ ### Attempt #4: Resource-Aware Gear & Heart Goals
101
+
102
+ **Date**: 2025-01-28 **Change**: GetGearGoal and GetHeartsGoal now check collective resources before attempting. Agents
103
+ skip gear/heart acquisition when collective can't afford it, falling through to productive goals (mining, exploring)
104
+ instead of wasting time bumping empty stations.
105
+
106
+ Also added:
107
+
108
+ - AlignJunctionGoal/ScrambleJunctionGoal skip when agent lacks gear or heart (was bumping junctions uselessly)
109
+ - FallbackMineGoal at end of aligner/scrambler goal lists (mine when idle)
110
+ - Default role distribution changed to 6 miners / 2 aligners / 2 scramblers
111
+
112
+ ```
113
+ Episodes: 20, Seed: 42, Config: stem=10 (defaults to miner=6, aligner=2, scrambler=2)
114
+ Mean Reward: ~0.25 (range 0.00-0.92)
115
+ junction.aligned_by_agent: 19.80
116
+ junction.scrambled_by_agent: 0.90
117
+ heart.gained: 30.60
118
+ ```
119
+
120
+ **Result**: SIGNIFICANT IMPROVEMENT — from 0.04 baseline to ~0.25 mean reward. Junction alignments went from ~0 to 19.8
121
+ per episode average.
122
+
123
+ ---
124
+
125
+ ### Attempt #5: Deposit fix, nav timeout, role rebalance
126
+
127
+ **Date**: 2026-01-28
128
+
129
+ Changes:
130
+
131
+ - Fixed deposit threshold for ungeared miners (was 10, capacity is 4 — never deposited!)
132
+ - Added navigation timeout (40 steps) for aligner/scrambler junction goals
133
+ - Rebalanced default roles: 6 miners / 4 aligners / 0 scramblers
134
+ - Hub-targeted exploration for gear station discovery
135
+
136
+ ```
137
+ Episodes: 20, Seed: 42, Config: stem=10
138
+ Mean Reward: ~0.93 (range 0.00-2.46)
139
+ junction.aligned_by_agent: 47.70
140
+ heart.gained: 60.80
141
+ ```
142
+
143
+ **Result**: 23x improvement from baseline. Economy-first strategy works.
144
+
145
+ ---
146
+
147
+ ## Next Steps
148
+
149
+ 1. **Reduce 0.00 episodes** — 4/20 still score zero (unfavorable map layouts?)
150
+ 2. **Faster gear acquisition** — aligners wait ~80 steps for collective resources
151
+ 3. **Junction defense** — aligned junctions get scrambled back by clips
152
+
153
+ ## Current Best Config
154
+
155
+ ```bash
156
+ cogames scrimmage --mission cogsguard_machina_1.basic \
157
+ --policy "metta://policy/planky?stem=10" \
158
+ --episodes 20 --seed 42
159
+ # Mean reward: ~0.93
160
+ ```
@@ -0,0 +1,153 @@
1
+ # Planky Debugging Notes
2
+
3
+ ## Session: 2025-01-29
4
+
5
+ ### Goal
6
+
7
+ Improve Planky agent to achieve 100 reward in CogsGuard scrimmage.
8
+
9
+ ### Current Performance
10
+
11
+ - **Mean Reward:** ~0.04 (target: 100)
12
+ - **Best Single Episode:** 1.62
13
+
14
+ ---
15
+
16
+ ## Critical Findings
17
+
18
+ ### 1. stem=10 Policy URL Bug
19
+
20
+ When using `?stem=10`, the default explicit role counts still apply:
21
+
22
+ - Default: `miner=4, aligner=2, scrambler=4, stem=0`
23
+ - With `?stem=10`: `miner=4, aligner=2, scrambler=4, stem=10` (20 total slots!)
24
+
25
+ Since CogsGuard only has 10 agents, the first 10 role slots are used. So `stem=10` actually gives you 4 miners + 2
26
+ aligners + 4 scramblers, NOT 10 stem agents.
27
+
28
+ **Fix:** Must explicitly zero out other roles:
29
+
30
+ ```bash
31
+ --policy "metta://policy/planky?miner=0&aligner=0&scrambler=0&stem=10"
32
+ ```
33
+
34
+ ### 2. Gear Station Interaction — Collective Resources Required
35
+
36
+ Agents reach gear stations but bumping fails when collective resources are insufficient. Gear stations use
37
+ `actorCollectiveHas(cost)` filter — the bump silently fails if resources are missing.
38
+
39
+ **Root cause:** Collective resources deplete quickly when multiple agents gear up. Agents were wasting dozens of steps
40
+ bumping stations that couldn't dispense gear.
41
+
42
+ **Fix applied:** `GetGearGoal.is_satisfied()` now checks collective resources via `_collective_can_afford()` before
43
+ walking to the station. If the collective can't afford the gear, the goal is skipped and the agent falls through to its
44
+ next goal (e.g., mining). Same fix applied to `GetHeartsGoal` (heart costs 1 of each element).
45
+
46
+ **Gear costs (from collective):**
47
+
48
+ - Miner: C1 O1 G3 S1
49
+ - Aligner: C3 O1 G1 S1
50
+ - Scrambler: C1 O3 G1 S1
51
+ - Scout: C1 O1 G1 S3
52
+
53
+ ### 3. Multiple Station Positions Detected
54
+
55
+ Different agents find "miner_station" at different positions — this is normal, there may be multiple gear stations in
56
+ the hub area.
57
+
58
+ ### 4. Miners Can Function Without Gear
59
+
60
+ Miners can mine without gear (just smaller cargo capacity: 4 vs 40). Now with resource-aware gear goals, miners will
61
+ attempt gear when affordable, and fall through to mining without gear when the collective can't afford it.
62
+
63
+ ---
64
+
65
+ ## Code Changes Made
66
+
67
+ ### goals/stem.py - Role Selection
68
+
69
+ Fixed early-game role distribution:
70
+
71
+ ```python
72
+ # Before: All agents became scouts when map knowledge low
73
+ # After: Distribute by agent_id
74
+ if explored_count < 50 and len(extractors) == 0:
75
+ if agent_id < 2:
76
+ return "miner" # Agents 0-1
77
+ elif agent_id < 5:
78
+ return "aligner" # Agents 2-4
79
+ elif agent_id < 9:
80
+ return "scrambler" # Agents 5-8
81
+ else:
82
+ return "scout" # Agent 9
83
+ ```
84
+
85
+ ### goals/gear.py - Stuck Detection
86
+
87
+ Added stuck detection and cache clearing:
88
+
89
+ - Track bump attempts at dist=1
90
+ - Clear navigator cache when stuck
91
+ - Explore randomly to find alternative path
92
+ - Reduced MAX_TOTAL_ATTEMPTS to 80, RETRY_INTERVAL to 150
93
+
94
+ ### policy.py - Skip Miner Gear
95
+
96
+ Removed gear requirement for miners (they can mine without it).
97
+
98
+ ---
99
+
100
+ ## Diagnostic Commands
101
+
102
+ ```bash
103
+ # Trace specific agent
104
+ cogames play --mission cogsguard_machina_1.basic \
105
+ --policy "metta://policy/planky?miner=3&aligner=3&scrambler=4&trace=1&trace_level=2&trace_agent=0" \
106
+ --steps 100 --render none
107
+
108
+ # Test with wealth (bypass resource constraints)
109
+ # Edit missions.py: add wealth=100 to CogsGuardMachina1Mission
110
+ cogames play --mission cogsguard_machina_1.basic \
111
+ --policy "metta://policy/planky?aligner=10" --steps 300
112
+
113
+ # Single episode with stats
114
+ cogames play --mission cogsguard_machina_1.basic \
115
+ --policy "metta://policy/planky?miner=3&aligner=3&scrambler=4" \
116
+ --steps 200 --render none
117
+ ```
118
+
119
+ ---
120
+
121
+ ## Next Steps to Investigate
122
+
123
+ 1. **Debug gear station interaction**
124
+ - Add logging to gear station bump handler in game layer
125
+ - Check if `type:miner_station` tag is correct
126
+ - Verify collective resource levels when bumping
127
+
128
+ 2. **Test with wealth=100**
129
+ - Temporarily set wealth in mission config
130
+ - Isolate whether issue is resources vs interaction
131
+
132
+ 3. **Check entity detection**
133
+ - Print what obs_parser detects as miner_station
134
+ - Verify only one miner_station exists in hub
135
+
136
+ 4. **Compare with Nim implementation**
137
+ - The Nim scripted agent works - what does it do differently?
138
+ - Check how Nim handles gear station interaction
139
+
140
+ ---
141
+
142
+ ## Stats Reference
143
+
144
+ Key metrics to watch in scrimmage output:
145
+
146
+ - `miner.gained` - How many miners got gear
147
+ - `aligner.gained` - How many aligners got gear
148
+ - `scrambler.gained` - How many scramblers got gear
149
+ - `junction.aligned_by_agent` - Junctions captured
150
+ - `junction.scrambled_by_agent` - Enemy junctions neutralized
151
+ - `heart.gained` - Hearts acquired for combat roles
152
+ - `action.move.failed` - High = agents stuck
153
+ - `status.max_steps_without_motion` - Stuck indicator
@@ -0,0 +1,254 @@
1
+ # Planky Improvement Plan
2
+
3
+ Iterative improvement loop for the Planky CogsGuard agent.
4
+
5
+ ## The Loop
6
+
7
+ ```
8
+ 1. BENCHMARK → Run scrimmage, collect baseline metrics
9
+ 2. IDENTIFY → Find the biggest weakness from metrics/observation
10
+ 3. IMPLEMENT → Make a targeted fix
11
+ 4. TEST → Run unit tests + scrimmage
12
+ 5. COMMIT → If improved, commit. If not, revert and try different approach
13
+ 6. REPEAT
14
+ ```
15
+
16
+ ## Benchmark Command
17
+
18
+ **Default**: Use `stem=10` to let agents dynamically choose roles. Only use explicit role counts (e.g.,
19
+ `miner=4&aligner=2&scrambler=4`) when testing a specific role behavior.
20
+
21
+ ```bash
22
+ # Quick debug (3 episodes, 500 steps max, ~30 sec)
23
+ cogames scrimmage --mission cogsguard_machina_1.basic \
24
+ --policy "metta://policy/planky?stem=10" \
25
+ --episodes 3 --steps 500 --seed 42
26
+
27
+ # Standard benchmark (5 episodes, ~2 min)
28
+ cogames scrimmage --mission cogsguard_machina_1.basic \
29
+ --policy "metta://policy/planky?stem=10" \
30
+ --episodes 5 --seed 42
31
+
32
+ # Full benchmark (20 episodes, ~5 min)
33
+ cogames scrimmage --mission cogsguard_machina_1.basic \
34
+ --policy "metta://policy/planky?stem=10" \
35
+ --episodes 20 --seed 42
36
+
37
+ # Testing a specific role (only when needed):
38
+ cogames scrimmage --mission cogsguard_machina_1.basic \
39
+ --policy "metta://policy/planky?miner=10" \
40
+ --episodes 3 --steps 500 --seed 42
41
+ ```
42
+
43
+ ### Key Metrics to Track
44
+
45
+ **Focus on Cog score** - ignore Clip performance, we only care about maximizing Cog outcomes.
46
+
47
+ | Metric | Target | Description |
48
+ | ------------------------ | ------ | -------------------------------- |
49
+ | `cogs.junctions` (final) | > 10 | Territory control at episode end |
50
+ | `cogs.junctions` (peak) | High | Best territory control achieved |
51
+ | `Reward` (mean) | > 15 | Average reward across episodes |
52
+ | Resources gathered | High | Total resources mined/deposited |
53
+ | Steps to first junction | < 200 | Early game expansion speed |
54
+
55
+ ## Test Commands
56
+
57
+ ```bash
58
+ # Run all planky behavior tests
59
+ metta pytest packages/cogames-agents/tests/test_planky_behaviors.py -v
60
+
61
+ # Run specific test category
62
+ metta pytest packages/cogames-agents/tests/test_planky_behaviors.py::TestPlankyMiner -v
63
+ metta pytest packages/cogames-agents/tests/test_planky_behaviors.py::TestPlankyAligner -v
64
+ metta pytest packages/cogames-agents/tests/test_planky_behaviors.py::TestPlankyScrambler -v
65
+
66
+ # Quick debug play (stem=10, limited steps)
67
+ cogames play --mission cogsguard_machina_1.basic \
68
+ --policy "metta://policy/planky?stem=10&trace=1&trace_level=2" \
69
+ --steps 300
70
+
71
+ # Debug a specific role (only when testing that role):
72
+ cogames play --mission cogsguard_machina_1.basic \
73
+ --policy "metta://policy/planky?miner=10&trace=1&trace_level=2" \
74
+ --steps 300
75
+ ```
76
+
77
+ ## Diagnostic Tips
78
+
79
+ ### Testing Combat Roles Without Economy
80
+
81
+ The `CogsGuardMission` has a `wealth` field that multiplies initial collective resources. To test aligners/scramblers
82
+ without resource constraints, temporarily edit `mission.py`:
83
+
84
+ ```python
85
+ # In packages/cogames/src/cogames/cogs_vs_clips/missions.py
86
+ # Change wealth=100 for 1000 of each resource + 500 hearts
87
+ CogsGuardMachina1Mission = CogsGuardMission(
88
+ name="basic",
89
+ ...
90
+ wealth=100, # Add this line temporarily
91
+ )
92
+ ```
93
+
94
+ **IMPORTANT**: Revert this change before committing. Do NOT commit changes outside cogames-agents.
95
+
96
+ ### Alternative: Use policy-level resource injection
97
+
98
+ For unit tests, the diagnostic evals in `planky_evals.py` can set up custom initial conditions.
99
+
100
+ ## Score Tracking
101
+
102
+ Record improvements in `IMPROVEMENTS.md` - update it after each successful fix with scrimmage scores.
103
+
104
+ ## Improvement Backlog
105
+
106
+ ### Priority 1: Economy (Miners)
107
+
108
+ **Problem**: Combat roles (aligner/scrambler) can't act without hearts. Hearts require collective resources.
109
+
110
+ - [x] **1.1 Resource prioritization**: Mine the resource most needed for hearts (balanced) ✓
111
+ - [x] **1.2 Deposit efficiency**: Miners deposit when >= 50% full ✓
112
+ - [ ] **1.3 Base extractors first**: Each base corner has one extractor per resource type
113
+ - Miners should prioritize these nearby, safe extractors initially
114
+ - Only explore for distant extractors once base extractors are depleted
115
+ - Benefits: short travel time, safe from enemy AOE, quick early economy
116
+ - [ ] **1.4 Dynamic role balance**: Start with more miners, shift to combat as economy stabilizes
117
+ - [ ] **1.5 Co-mining**: Miners should stay near each other for synergy bonuses
118
+ - Extractors give bonus output when multiple agents mine together (see `synergy` field in stations.py)
119
+ - Germanium extractors have 50% synergy bonus per additional miner
120
+ - Miners should coordinate to arrive at extractors together
121
+ - Consider: leader/follower pattern, or shared target selection
122
+
123
+ ### Priority 2: Scrambler Targeting
124
+
125
+ **Problem**: Scramblers may chase distant junctions while closer threats expand.
126
+
127
+ - [ ] **2.1 Distance weighting**: Heavily weight distance in target selection (closer = better)
128
+ - [ ] **2.2 Threat assessment**: Prioritize junctions that are actively expanding clips territory
129
+ - [ ] **2.3 Coordination**: Multiple scramblers shouldn't target the same junction
130
+
131
+ ### Priority 3: Aligner Efficiency
132
+
133
+ **Problem**: Aligners avoid AOE but may ignore good opportunities.
134
+
135
+ - [ ] **3.1 Risk/reward scoring**: Accept some AOE risk for high-value junctions
136
+ - [ ] **3.2 Cluster targeting**: Prefer junctions that would create cogs clusters
137
+ - [ ] **3.3 Follow-up coordination**: Align junctions right after scramblers neutralize them
138
+
139
+ ### Priority 4: Survival & Recovery
140
+
141
+ **Problem**: Agents die in enemy AOE and lose gear/hearts.
142
+
143
+ - [ ] **4.1 Proactive retreat**: Retreat before HP hits threshold, not after
144
+ - [ ] **4.2 AOE awareness**: All roles should avoid enemy AOE, not just aligners
145
+ - [ ] **4.3 Recovery speed**: Faster gear/heart re-acquisition after death
146
+
147
+ ### Priority 5: Map Control Strategy
148
+
149
+ **Problem**: No global strategy for territory expansion.
150
+
151
+ - [ ] **5.1 Hub defense**: Keep at least one junction near hub
152
+ - [ ] **5.2 Frontline awareness**: Push toward clips territory systematically
153
+ - [ ] **5.3 Pincer strategy**: Coordinate scramblers to attack clips from multiple angles
154
+
155
+ ## Implementation Guide
156
+
157
+ ### Adding a New Improvement
158
+
159
+ 1. **Create a test first** (if behavior-testable):
160
+
161
+ ```python
162
+ # In test_planky_behaviors.py
163
+ def test_new_behavior(self) -> None:
164
+ stats = run_planky_episode(NewBehaviorMission, ...)
165
+ assert stats["some_metric"] > threshold
166
+ ```
167
+
168
+ 2. **Create eval mission** (if needed):
169
+
170
+ ```python
171
+ # In planky_evals.py
172
+ class PlankyNewBehavior(_PlankyDiagnosticBase):
173
+ name: str = "planky_new_behavior"
174
+ map_name: str = "new_behavior.map"
175
+ ```
176
+
177
+ 3. **Implement in goal file**:
178
+ - `goals/miner.py` - resource gathering
179
+ - `goals/aligner.py` - junction alignment
180
+ - `goals/scrambler.py` - junction scrambling
181
+ - `goals/survive.py` - HP-based retreat
182
+ - `goals/shared.py` - cross-role behaviors (hearts)
183
+
184
+ 4. **Test locally**:
185
+
186
+ ```bash
187
+ metta pytest packages/cogames-agents/tests/test_planky_behaviors.py -v -k "new_behavior"
188
+ ```
189
+
190
+ 5. **Benchmark**:
191
+ ```bash
192
+ cogames scrimmage --mission cogsguard_machina_1.basic \
193
+ --policy "metta://policy/planky?stem=10" --episodes 5
194
+ ```
195
+
196
+ ### File Quick Reference
197
+
198
+ | File | Purpose |
199
+ | --------------- | -------------------------------------------------- |
200
+ | `policy.py` | Entry point, role distribution, goal list creation |
201
+ | `context.py` | PlankyContext, StateSnapshot |
202
+ | `entity_map.py` | Sparse map with find/query |
203
+ | `navigator.py` | A\* pathfinding, exploration |
204
+ | `goal.py` | Goal base class, evaluate_goals() |
205
+ | `goals/*.py` | Role-specific goals |
206
+
207
+ ## Current Baseline
208
+
209
+ Record baseline metrics here before each improvement session:
210
+
211
+ ```
212
+ Date: [DATE]
213
+ Config: stem=10
214
+ Episodes: 20
215
+ Seed: 42
216
+
217
+ Results:
218
+ - Mean reward: [X]
219
+ - Mean final cogs junctions: [X]
220
+ - Peak cogs junctions: [X]
221
+ - Total resources gathered: [X]
222
+ ```
223
+
224
+ ## Completed Improvements
225
+
226
+ Track completed work here:
227
+
228
+ - [x] Initial goal-tree implementation
229
+ - [x] Basic role goals (miner, aligner, scrambler, scout)
230
+ - [x] Navigation with A\* pathfinding
231
+ - [x] Attempt tracking to avoid stuck loops
232
+ - [x] Aligner AOE avoidance
233
+ - [x] **Resource balancing** - Miners now prioritize the resource the collective has least of, ensuring balanced
234
+ gathering for hearts
235
+ - [x] **Periodic re-evaluation** - Miners re-evaluate target resource every 50 steps to adapt to changing needs
236
+ - [x] **Useful action tracking** - Track steps since last useful action (mine/deposit/align/scramble) with `IDLE=N`
237
+ indicator in trace when idle > 20 steps
238
+ - [x] **Smarter deposit threshold** - Miners only deposit when cargo >= 50% full (or >= 10 resources)
239
+ - [x] **Faster extractor failure detection** - Reduced from 5 to 3 attempts, 500 step cooldown on failed extractors
240
+ - [x] **Idle reset mechanism** - Clear navigation cache and targets after 100+ idle steps to break stuck loops
241
+
242
+ ## Current Observations
243
+
244
+ After improvements, resources ARE being balanced (all 4 types mined), but:
245
+
246
+ - Junction control is poor (capture 2-3, lose all)
247
+ - Cog junctions peak early then decline - need to sustain territory
248
+ - Agents keep losing gear (walking into enemy AOE)
249
+
250
+ - [x] **Resource-aware gear/heart goals** — GetGearGoal and GetHeartsGoal check collective resources before attempting.
251
+ Agents skip when collective can't afford, falling through to productive goals instead of wasting time bumping
252
+ empty stations.
253
+
254
+ **Next Priority**: Economy bootstrapping — ensure miners get gear first so combat roles can follow