@jaguilar87/gaia 5.0.2 → 5.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/ARCHITECTURE.md +0 -1
  4. package/CHANGELOG.md +110 -0
  5. package/INSTALL.md +0 -2
  6. package/README.md +1 -6
  7. package/bin/README.md +0 -1
  8. package/bin/cli/_install_helpers.py +1 -1
  9. package/bin/cli/approvals.py +23 -21
  10. package/bin/cli/cleanup.py +0 -1
  11. package/bin/cli/doctor.py +1 -1
  12. package/bin/cli/memory.py +2 -0
  13. package/bin/cli/update.py +1 -1
  14. package/bin/pre-publish-validate.js +48 -5
  15. package/config/README.md +22 -44
  16. package/config/surface-routing.json +0 -2
  17. package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
  18. package/dist/gaia-ops/config/README.md +22 -44
  19. package/dist/gaia-ops/config/surface-routing.json +0 -2
  20. package/dist/gaia-ops/hooks/modules/agents/contract_validator.py +18 -0
  21. package/dist/gaia-ops/hooks/modules/agents/handoff_persister.py +214 -2
  22. package/dist/gaia-ops/hooks/modules/agents/response_contract.py +26 -0
  23. package/dist/gaia-ops/hooks/modules/agents/transcript_reader.py +15 -0
  24. package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -5
  25. package/dist/gaia-ops/hooks/modules/security/approval_grants.py +124 -19
  26. package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +99 -7
  27. package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +127 -24
  28. package/dist/gaia-ops/hooks/modules/validation/commit_validator.py +90 -55
  29. package/dist/gaia-ops/skills/README.md +1 -1
  30. package/dist/gaia-ops/skills/agent-contract-handoff/SKILL.md +3 -0
  31. package/dist/gaia-ops/skills/agent-response/SKILL.md +4 -2
  32. package/dist/gaia-ops/skills/gaia-patterns/SKILL.md +1 -1
  33. package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -3
  34. package/dist/gaia-ops/skills/gaia-release/SKILL.md +60 -24
  35. package/dist/gaia-ops/skills/gaia-release/reference.md +35 -11
  36. package/dist/gaia-ops/skills/git-conventions/SKILL.md +6 -2
  37. package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +30 -7
  38. package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +32 -15
  39. package/dist/gaia-ops/skills/readme-writing/SKILL.md +1 -1
  40. package/dist/gaia-ops/skills/readme-writing/reference.md +0 -1
  41. package/dist/gaia-ops/skills/security-tiers/SKILL.md +5 -1
  42. package/dist/gaia-ops/skills/security-tiers/reference.md +3 -1
  43. package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +43 -6
  44. package/dist/gaia-ops/skills/subagent-request-approval/reference.md +66 -16
  45. package/dist/gaia-ops/tools/context/README.md +1 -1
  46. package/dist/gaia-ops/tools/gaia_simulator/extractor.py +0 -1
  47. package/dist/gaia-ops/tools/scan/ui.py +20 -4
  48. package/dist/gaia-ops/tools/scan/verify.py +3 -3
  49. package/dist/gaia-ops/tools/validation/README.md +15 -24
  50. package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
  51. package/dist/gaia-security/hooks/modules/agents/contract_validator.py +18 -0
  52. package/dist/gaia-security/hooks/modules/agents/handoff_persister.py +214 -2
  53. package/dist/gaia-security/hooks/modules/agents/response_contract.py +26 -0
  54. package/dist/gaia-security/hooks/modules/agents/transcript_reader.py +15 -0
  55. package/dist/gaia-security/hooks/modules/security/__init__.py +0 -5
  56. package/dist/gaia-security/hooks/modules/security/approval_grants.py +124 -19
  57. package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +99 -7
  58. package/dist/gaia-security/hooks/modules/tools/bash_validator.py +127 -24
  59. package/dist/gaia-security/hooks/modules/validation/commit_validator.py +90 -55
  60. package/gaia/state/transitions.py +4 -4
  61. package/gaia/store/writer.py +56 -0
  62. package/hooks/modules/README.md +2 -4
  63. package/hooks/modules/agents/contract_validator.py +18 -0
  64. package/hooks/modules/agents/handoff_persister.py +214 -2
  65. package/hooks/modules/agents/response_contract.py +26 -0
  66. package/hooks/modules/agents/transcript_reader.py +15 -0
  67. package/hooks/modules/security/__init__.py +0 -5
  68. package/hooks/modules/security/approval_grants.py +124 -19
  69. package/hooks/modules/security/mutative_verbs.py +99 -7
  70. package/hooks/modules/tools/bash_validator.py +127 -24
  71. package/hooks/modules/validation/commit_validator.py +90 -55
  72. package/index.js +2 -12
  73. package/package.json +4 -6
  74. package/pyproject.toml +3 -3
  75. package/scripts/bootstrap_database.sh +88 -439
  76. package/scripts/check_schema_drift.py +208 -0
  77. package/scripts/migrations/README.md +78 -28
  78. package/scripts/migrations/schema.checksum +8 -0
  79. package/scripts/release-prepare.mjs +199 -0
  80. package/skills/README.md +1 -1
  81. package/skills/agent-contract-handoff/SKILL.md +3 -0
  82. package/skills/agent-response/SKILL.md +4 -2
  83. package/skills/gaia-patterns/SKILL.md +1 -1
  84. package/skills/gaia-patterns/reference.md +2 -3
  85. package/skills/gaia-release/SKILL.md +60 -24
  86. package/skills/gaia-release/reference.md +35 -11
  87. package/skills/git-conventions/SKILL.md +6 -2
  88. package/skills/orchestrator-present-approval/SKILL.md +30 -7
  89. package/skills/orchestrator-present-approval/reference.md +32 -15
  90. package/skills/readme-writing/SKILL.md +1 -1
  91. package/skills/readme-writing/reference.md +0 -1
  92. package/skills/security-tiers/SKILL.md +5 -1
  93. package/skills/security-tiers/reference.md +3 -1
  94. package/skills/subagent-request-approval/SKILL.md +43 -6
  95. package/skills/subagent-request-approval/reference.md +66 -16
  96. package/tools/context/README.md +1 -1
  97. package/tools/gaia_simulator/extractor.py +0 -1
  98. package/tools/scan/ui.py +20 -4
  99. package/tools/scan/verify.py +3 -3
  100. package/tools/validation/README.md +15 -24
  101. package/commands/README.md +0 -64
  102. package/commands/gaia.md +0 -37
  103. package/commands/scan-project.md +0 -74
  104. package/config/crons-schema.md +0 -81
  105. package/config/git_standards.json +0 -72
  106. package/dist/gaia-ops/commands/gaia.md +0 -37
  107. package/dist/gaia-ops/config/crons-schema.md +0 -81
  108. package/dist/gaia-ops/config/git_standards.json +0 -72
  109. package/dist/gaia-ops/hooks/modules/security/gitops_validator.py +0 -179
  110. package/dist/gaia-ops/tools/agentic-loop/decide-status.py +0 -210
  111. package/dist/gaia-ops/tools/agentic-loop/parse-metric.py +0 -106
  112. package/dist/gaia-ops/tools/agentic-loop/record-iteration.py +0 -223
  113. package/dist/gaia-security/hooks/modules/security/gitops_validator.py +0 -179
  114. package/git-hooks/commit-msg +0 -41
  115. package/hooks/modules/security/gitops_validator.py +0 -179
  116. package/scripts/migrations/v10_to_v11.sql +0 -170
  117. package/scripts/migrations/v10_to_v11_fresh.sql +0 -18
  118. package/scripts/migrations/v11_to_v12.sql +0 -195
  119. package/scripts/migrations/v11_to_v12_fresh.sql +0 -19
  120. package/scripts/migrations/v12_to_v13.sql +0 -48
  121. package/scripts/migrations/v12_to_v13_fresh.sql +0 -17
  122. package/scripts/migrations/v13_to_v14.sql +0 -44
  123. package/scripts/migrations/v13_to_v14_fresh.sql +0 -17
  124. package/scripts/migrations/v14_to_v15.sql +0 -71
  125. package/scripts/migrations/v14_to_v15_fresh.sql +0 -19
  126. package/scripts/migrations/v15_to_v16.sql +0 -57
  127. package/scripts/migrations/v15_to_v16_fresh.sql +0 -18
  128. package/scripts/migrations/v16_to_v17.sql +0 -51
  129. package/scripts/migrations/v16_to_v17_fresh.sql +0 -18
  130. package/scripts/migrations/v17_to_v18.sql +0 -66
  131. package/scripts/migrations/v17_to_v18_fresh.sql +0 -24
  132. package/scripts/migrations/v1_to_v2.sql +0 -97
  133. package/scripts/migrations/v2_to_v3.sql +0 -68
  134. package/scripts/migrations/v2_to_v3_merge.sql +0 -69
  135. package/scripts/migrations/v3_to_v4.sql +0 -67
  136. package/scripts/migrations/v3_to_v4_fresh.sql +0 -20
  137. package/scripts/migrations/v4_to_v5.sql +0 -55
  138. package/scripts/migrations/v4_to_v5_fresh.sql +0 -20
  139. package/scripts/migrations/v5_to_v6.sql +0 -48
  140. package/scripts/migrations/v5_to_v6_fresh.sql +0 -17
  141. package/scripts/migrations/v6_to_v7.sql +0 -26
  142. package/scripts/migrations/v6_to_v7_fresh.sql +0 -13
  143. package/scripts/migrations/v7_to_v8.sql +0 -44
  144. package/scripts/migrations/v7_to_v8_fresh.sql +0 -14
  145. package/scripts/migrations/v8_to_v9.sql +0 -87
  146. package/scripts/migrations/v8_to_v9_fresh.sql +0 -15
  147. package/scripts/migrations/v9_to_v10.sql +0 -109
  148. package/scripts/migrations/v9_to_v10_episodes_workspace.sql +0 -109
  149. package/scripts/migrations/v9_to_v10_fresh.sql +0 -18
  150. package/templates/README.md +0 -70
  151. package/templates/managed-settings.template.json +0 -43
  152. package/tools/agentic-loop/decide-status.py +0 -210
  153. package/tools/agentic-loop/parse-metric.py +0 -106
  154. package/tools/agentic-loop/record-iteration.py +0 -223
@@ -1,210 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- decide-status.py
4
-
5
- Mechanically decide what to do based on numbers alone. No LLM judgment.
6
-
7
- Usage:
8
- python3 decide-status.py \
9
- --current 94.5 \
10
- --best 92.0 \
11
- --threshold 98 \
12
- --direction higher \
13
- --consecutive-discards 2 \
14
- --pivot-count 1
15
-
16
- Output JSON:
17
- {
18
- "decision": "keep",
19
- "reason": "Metric improved from 92.0 to 94.5",
20
- "improved": true,
21
- "gap_remaining": 3.5
22
- }
23
-
24
- Decision precedence (evaluated top-to-bottom, first match wins):
25
- 1. pivot_count >= 3 → stop
26
- 2. consecutive_discards >= 5 → pivot (also a discard)
27
- 3. consecutive_discards >= 3 → refine (also a discard)
28
- 4. current meets or passes threshold → threshold_reached
29
- 5. current improved vs best (per direction) → keep
30
- 6. current same or worse → discard
31
-
32
- Exit codes:
33
- 0 success (decision emitted as JSON)
34
- 1 invalid input
35
- """
36
-
37
- import argparse
38
- import json
39
- import sys
40
-
41
-
42
- Decision = str # type alias for readability
43
-
44
-
45
- def _is_improved(current: float, best: float, direction: str) -> bool:
46
- """Return True if *current* is strictly better than *best* per direction."""
47
- if direction == "higher":
48
- return current > best
49
- return current < best # lower is better
50
-
51
-
52
- def _threshold_reached(current: float, threshold: float, direction: str) -> bool:
53
- """Return True if *current* has met or surpassed *threshold*."""
54
- if direction == "higher":
55
- return current >= threshold
56
- return current <= threshold
57
-
58
-
59
- def _gap_remaining(current: float, threshold: float, direction: str) -> float:
60
- """Absolute gap between current value and threshold."""
61
- if direction == "higher":
62
- return max(0.0, threshold - current)
63
- return max(0.0, current - threshold)
64
-
65
-
66
- def decide(
67
- current: float,
68
- best: float,
69
- threshold: float,
70
- direction: str,
71
- consecutive_discards: int,
72
- pivot_count: int,
73
- ) -> dict:
74
- """Pure function: return decision dict from numeric inputs."""
75
-
76
- gap = _gap_remaining(current, threshold, direction)
77
- improved = _is_improved(current, best, direction)
78
-
79
- # --- Precedence 1: hard stop on too many pivots ---
80
- if pivot_count >= 3:
81
- return {
82
- "decision": "stop",
83
- "reason": f"pivot_count={pivot_count} has reached the maximum of 3; halting loop",
84
- "improved": improved,
85
- "gap_remaining": gap,
86
- }
87
-
88
- # --- Precedence 2 & 3: discard streak escalations ---
89
- # Evaluated before threshold/keep so an ongoing failing streak is flagged
90
- # even if the current run happens to reach the threshold.
91
- if consecutive_discards >= 5:
92
- return {
93
- "decision": "pivot",
94
- "reason": (
95
- f"consecutive_discards={consecutive_discards} >= 5; "
96
- "strategy is not working, force a pivot"
97
- ),
98
- "improved": improved,
99
- "gap_remaining": gap,
100
- }
101
-
102
- if consecutive_discards >= 3:
103
- return {
104
- "decision": "refine",
105
- "reason": (
106
- f"consecutive_discards={consecutive_discards} >= 3; "
107
- "current approach needs refinement before continuing"
108
- ),
109
- "improved": improved,
110
- "gap_remaining": gap,
111
- }
112
-
113
- # --- Precedence 4: threshold reached ---
114
- if _threshold_reached(current, threshold, direction):
115
- return {
116
- "decision": "threshold_reached",
117
- "reason": (
118
- f"current={current} {'≥' if direction == 'higher' else '≤'} "
119
- f"threshold={threshold}; goal achieved"
120
- ),
121
- "improved": improved,
122
- "gap_remaining": 0.0,
123
- }
124
-
125
- # --- Precedence 5 & 6: standard keep/discard ---
126
- if improved:
127
- return {
128
- "decision": "keep",
129
- "reason": f"Metric improved from {best} to {current}",
130
- "improved": True,
131
- "gap_remaining": gap,
132
- }
133
-
134
- return {
135
- "decision": "discard",
136
- "reason": f"Metric did not improve (current={current}, best={best})",
137
- "improved": False,
138
- "gap_remaining": gap,
139
- }
140
-
141
-
142
- def main() -> None:
143
- parser = argparse.ArgumentParser(
144
- description="Compute the next agentic-loop decision from metric numbers only.",
145
- formatter_class=argparse.RawDescriptionHelpFormatter,
146
- epilog="""
147
- Decisions:
148
- keep current improved vs best
149
- discard current same or worse
150
- refine 3+ consecutive discards (improvement needed in approach)
151
- pivot 5+ consecutive discards (strategy change required)
152
- stop 3+ pivots already attempted
153
- threshold_reached current meets or surpasses the goal threshold
154
-
155
- Direction values:
156
- higher larger numbers are better (e.g. accuracy, passing tests)
157
- lower smaller numbers are better (e.g. error rate, latency ms)
158
- """,
159
- )
160
- parser.add_argument("--current", required=True, type=float, help="Metric value for the current run")
161
- parser.add_argument("--best", required=True, type=float, help="Best metric seen so far (from state.json)")
162
- parser.add_argument("--threshold", required=True, type=float, help="Target threshold to reach")
163
- parser.add_argument(
164
- "--direction",
165
- required=True,
166
- choices=["higher", "lower"],
167
- help="Whether higher or lower values are better",
168
- )
169
- parser.add_argument(
170
- "--consecutive-discards",
171
- required=True,
172
- type=int,
173
- metavar="N",
174
- help="Number of consecutive discard outcomes so far (from state.json)",
175
- )
176
- parser.add_argument(
177
- "--pivot-count",
178
- required=True,
179
- type=int,
180
- metavar="N",
181
- help="Number of pivots executed so far (from state.json)",
182
- )
183
- args = parser.parse_args()
184
-
185
- # --- Input validation ---
186
- errors = []
187
- if args.consecutive_discards < 0:
188
- errors.append("--consecutive-discards must be >= 0")
189
- if args.pivot_count < 0:
190
- errors.append("--pivot-count must be >= 0")
191
-
192
- if errors:
193
- for err in errors:
194
- print(f"error: {err}", file=sys.stderr)
195
- sys.exit(1)
196
-
197
- result = decide(
198
- current=args.current,
199
- best=args.best,
200
- threshold=args.threshold,
201
- direction=args.direction,
202
- consecutive_discards=args.consecutive_discards,
203
- pivot_count=args.pivot_count,
204
- )
205
-
206
- print(json.dumps(result, indent=2))
207
-
208
-
209
- if __name__ == "__main__":
210
- main()
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- parse-metric.py
4
-
5
- Read stdout from eval_command and extract METRIC lines.
6
-
7
- Usage:
8
- echo "output" | python3 parse-metric.py --metric accuracy
9
- python3 parse-metric.py --metric accuracy --file /tmp/eval-output.txt
10
-
11
- Input lines must match: METRIC {name}={number}
12
- Output: JSON to stdout with metric name, numeric value, and raw line.
13
- """
14
-
15
- import argparse
16
- import json
17
- import re
18
- import sys
19
- from typing import Optional
20
-
21
-
22
- METRIC_PATTERN = re.compile(r"^METRIC\s+(\w+)=([\d.]+)\s*$")
23
-
24
-
25
- def parse_lines(lines: list[str]) -> list[dict]:
26
- """Extract all METRIC entries from a sequence of lines."""
27
- results = []
28
- for line in lines:
29
- stripped = line.rstrip("\n")
30
- match = METRIC_PATTERN.match(stripped)
31
- if match:
32
- name = match.group(1)
33
- raw_value = match.group(2)
34
- # Preserve int vs float from the source text.
35
- value: int | float
36
- if "." in raw_value:
37
- value = float(raw_value)
38
- else:
39
- value = int(raw_value)
40
- results.append(
41
- {
42
- "metric": name,
43
- "value": value,
44
- "raw_line": stripped,
45
- }
46
- )
47
- return results
48
-
49
-
50
- def main() -> None:
51
- parser = argparse.ArgumentParser(
52
- description="Extract METRIC lines from eval_command output.",
53
- formatter_class=argparse.RawDescriptionHelpFormatter,
54
- epilog="""
55
- Examples:
56
- echo "METRIC accuracy=94.5" | python3 parse-metric.py --metric accuracy
57
- python3 parse-metric.py --metric passing_tests --file /tmp/out.txt
58
- python3 parse-metric.py --file /tmp/out.txt # returns all metrics
59
- """,
60
- )
61
- parser.add_argument(
62
- "--metric",
63
- metavar="NAME",
64
- help="Return only this named metric (case-sensitive). Exits 1 if not found.",
65
- )
66
- parser.add_argument(
67
- "--file",
68
- metavar="PATH",
69
- help="Read from file instead of stdin.",
70
- )
71
- args = parser.parse_args()
72
-
73
- # --- Read input ---
74
- try:
75
- if args.file:
76
- with open(args.file, "r") as fh:
77
- lines = fh.readlines()
78
- else:
79
- lines = sys.stdin.readlines()
80
- except OSError as exc:
81
- print(f"error: cannot read input: {exc}", file=sys.stderr)
82
- sys.exit(1)
83
-
84
- # --- Parse ---
85
- all_metrics = parse_lines(lines)
86
-
87
- if args.metric:
88
- # Filter to the requested metric name.
89
- matches = [m for m in all_metrics if m["metric"] == args.metric]
90
- if not matches:
91
- print(
92
- f"error: metric '{args.metric}' not found in input",
93
- file=sys.stderr,
94
- )
95
- sys.exit(1)
96
- # Return the last occurrence if there are duplicates.
97
- result = matches[-1]
98
- else:
99
- # Return all metrics as a list when no --metric filter is given.
100
- result = all_metrics # type: ignore[assignment]
101
-
102
- print(json.dumps(result, indent=2))
103
-
104
-
105
- if __name__ == "__main__":
106
- main()
@@ -1,223 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- record-iteration.py
4
-
5
- Atomically update state.json and append to worklog.md after each iteration.
6
- The LLM never writes state.json directly — this script is the only writer.
7
-
8
- Usage:
9
- python3 record-iteration.py \
10
- --state-file state.json \
11
- --worklog worklog.md \
12
- --iteration 5 \
13
- --metric-value 94.5 \
14
- --status keep \
15
- --description "Handle hyphenated verbs" \
16
- --insight "delete-objects splits correctly" \
17
- --next "Check camelCase+hyphen combined"
18
-
19
- Optional flags:
20
- --changed TEXT What was modified (default: same as description)
21
- --metric-name TEXT Name of the metric recorded (default: "metric")
22
-
23
- Atomic write guarantee: state.json is written to a .tmp sibling, fsynced,
24
- then renamed over the original. Either the full write lands or the original
25
- is untouched.
26
- """
27
-
28
- from __future__ import annotations
29
-
30
- import argparse
31
- import json
32
- import os
33
- import sys
34
- import tempfile
35
- from datetime import datetime, timezone
36
-
37
-
38
- def load_state(path: str) -> dict:
39
- """Load existing state.json or return an empty skeleton."""
40
- if not os.path.exists(path):
41
- return {
42
- "iteration": 0,
43
- "current_metric": None,
44
- "best_metric": None,
45
- "consecutive_discards": 0,
46
- "pivot_count": 0,
47
- "timestamp": None,
48
- "status": None,
49
- }
50
- try:
51
- with open(path, "r") as fh:
52
- data = json.load(fh)
53
- return data
54
- except (OSError, json.JSONDecodeError) as exc:
55
- print(f"error: cannot read state file '{path}': {exc}", file=sys.stderr)
56
- sys.exit(1)
57
-
58
-
59
- def atomic_write_json(path: str, data: dict) -> None:
60
- """Write *data* to *path* atomically using write-fsync-rename."""
61
- dir_name = os.path.dirname(os.path.abspath(path))
62
- # Use a temp file in the same directory so rename is on the same filesystem.
63
- try:
64
- fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix=".tmp")
65
- try:
66
- with os.fdopen(fd, "w") as fh:
67
- json.dump(data, fh, indent=2)
68
- fh.write("\n")
69
- fh.flush()
70
- os.fsync(fh.fileno())
71
- os.replace(tmp_path, path)
72
- except Exception:
73
- # Clean up orphaned temp file on failure.
74
- try:
75
- os.unlink(tmp_path)
76
- except OSError:
77
- pass
78
- raise
79
- except OSError as exc:
80
- print(f"error: atomic write to '{path}' failed: {exc}", file=sys.stderr)
81
- sys.exit(1)
82
-
83
-
84
- def append_worklog(
85
- path: str,
86
- iteration: int,
87
- description: str,
88
- metric_name: str,
89
- metric_value: float,
90
- status: str,
91
- changed: str,
92
- insight: str,
93
- next_step: str,
94
- best_metric: float | None,
95
- ) -> None:
96
- """Append a structured run entry to worklog.md."""
97
- status_upper = status.upper()
98
-
99
- # Build result sentence
100
- if best_metric is None:
101
- result_text = f"{metric_name}={metric_value} (first run, no prior best)"
102
- else:
103
- comparison = (
104
- f"improved from {best_metric}"
105
- if metric_value > best_metric
106
- else (
107
- f"unchanged from {best_metric}"
108
- if metric_value == best_metric
109
- else f"regressed from {best_metric}"
110
- )
111
- )
112
- result_text = f"{metric_name}={metric_value} ({comparison})"
113
-
114
- entry = (
115
- f"\n### Run {iteration}: {description} — {metric_name}={metric_value} ({status_upper})\n"
116
- f"- **Changed:** {changed}\n"
117
- f"- **Result:** {result_text}\n"
118
- f"- **Insight:** {insight}\n"
119
- f"- **Next:** {next_step}\n"
120
- )
121
-
122
- try:
123
- with open(path, "a") as fh:
124
- fh.write(entry)
125
- except OSError as exc:
126
- print(f"error: cannot append to worklog '{path}': {exc}", file=sys.stderr)
127
- sys.exit(1)
128
-
129
-
130
- def main() -> None:
131
- parser = argparse.ArgumentParser(
132
- description="Atomically record an agentic-loop iteration into state.json and worklog.md.",
133
- formatter_class=argparse.RawDescriptionHelpFormatter,
134
- epilog="""
135
- Status values:
136
- keep — metric improved; best is updated, consecutive_discards reset to 0
137
- discard — metric did not improve; consecutive_discards incremented
138
- pivot — forced strategy change (also increments pivot_count)
139
- stop — terminal state; loop should halt
140
-
141
- Exit codes:
142
- 0 success
143
- 1 error (message on stderr)
144
- """,
145
- )
146
- parser.add_argument("--state-file", required=True, metavar="PATH", help="Path to state.json")
147
- parser.add_argument("--worklog", required=True, metavar="PATH", help="Path to worklog.md (append-only)")
148
- parser.add_argument("--iteration", required=True, type=int, help="Current iteration number (1-based)")
149
- parser.add_argument("--metric-value", required=True, type=float, metavar="NUM", help="Numeric metric value this run")
150
- parser.add_argument(
151
- "--status",
152
- required=True,
153
- choices=["keep", "discard", "pivot", "stop"],
154
- help="Outcome classification for this iteration",
155
- )
156
- parser.add_argument("--description", required=True, help="Short description of what changed this run")
157
- parser.add_argument("--insight", required=True, help="What was learned from this run")
158
- parser.add_argument("--next", required=True, dest="next_step", help="What to try in the next iteration")
159
- parser.add_argument(
160
- "--changed",
161
- default=None,
162
- metavar="TEXT",
163
- help="What was specifically modified (defaults to --description)",
164
- )
165
- parser.add_argument(
166
- "--metric-name",
167
- default="metric",
168
- metavar="NAME",
169
- help="Name label for the metric (default: metric)",
170
- )
171
- args = parser.parse_args()
172
-
173
- changed = args.changed if args.changed is not None else args.description
174
-
175
- # --- Load current state ---
176
- state = load_state(args.state_file)
177
-
178
- prev_best: float | None = state.get("best_metric")
179
-
180
- # --- Compute new state values ---
181
- state["iteration"] = args.iteration
182
- state["current_metric"] = args.metric_value
183
- state["status"] = args.status
184
- state["timestamp"] = datetime.now(tz=timezone.utc).isoformat()
185
-
186
- if args.status == "keep":
187
- # Keep: this run is better; promote to best.
188
- state["best_metric"] = args.metric_value
189
- state["consecutive_discards"] = 0
190
- elif args.status == "discard":
191
- # Do not update best; increment discard counter.
192
- state["consecutive_discards"] = int(state.get("consecutive_discards") or 0) + 1
193
- elif args.status == "pivot":
194
- # Pivot: counts as a discard for streak purposes, but also advances pivot_count.
195
- state["consecutive_discards"] = int(state.get("consecutive_discards") or 0) + 1
196
- state["pivot_count"] = int(state.get("pivot_count") or 0) + 1
197
- elif args.status == "stop":
198
- # Terminal — no counter changes needed beyond recording.
199
- pass
200
-
201
- # --- Atomic write ---
202
- atomic_write_json(args.state_file, state)
203
-
204
- # --- Append worklog ---
205
- append_worklog(
206
- path=args.worklog,
207
- iteration=args.iteration,
208
- description=args.description,
209
- metric_name=args.metric_name,
210
- metric_value=args.metric_value,
211
- status=args.status,
212
- changed=changed,
213
- insight=args.insight,
214
- next_step=args.next_step,
215
- best_metric=prev_best,
216
- )
217
-
218
- # Emit updated state summary for easy inspection.
219
- print(json.dumps(state, indent=2))
220
-
221
-
222
- if __name__ == "__main__":
223
- main()