claudecode-omc 5.4.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.local/guidelines/CLAUDE.md +31 -0
  2. package/README.md +57 -1
  3. package/bundled/manifest.json +2 -2
  4. package/bundled/upstream/oh-my-claudecode/agents/analyst.md +1 -1
  5. package/bundled/upstream/oh-my-claudecode/agents/architect.md +1 -1
  6. package/bundled/upstream/oh-my-claudecode/agents/code-reviewer.md +1 -1
  7. package/bundled/upstream/oh-my-claudecode/agents/code-simplifier.md +1 -1
  8. package/bundled/upstream/oh-my-claudecode/agents/critic.md +1 -1
  9. package/bundled/upstream/oh-my-claudecode/agents/debugger.md +1 -1
  10. package/bundled/upstream/oh-my-claudecode/agents/designer.md +1 -1
  11. package/bundled/upstream/oh-my-claudecode/agents/document-specialist.md +1 -1
  12. package/bundled/upstream/oh-my-claudecode/agents/executor.md +1 -1
  13. package/bundled/upstream/oh-my-claudecode/agents/explore.md +1 -1
  14. package/bundled/upstream/oh-my-claudecode/agents/git-master.md +3 -3
  15. package/bundled/upstream/oh-my-claudecode/agents/planner.md +1 -1
  16. package/bundled/upstream/oh-my-claudecode/agents/qa-tester.md +1 -1
  17. package/bundled/upstream/oh-my-claudecode/agents/scientist.md +1 -1
  18. package/bundled/upstream/oh-my-claudecode/agents/security-reviewer.md +1 -1
  19. package/bundled/upstream/oh-my-claudecode/agents/test-engineer.md +1 -75
  20. package/bundled/upstream/oh-my-claudecode/agents/tracer.md +1 -1
  21. package/bundled/upstream/oh-my-claudecode/agents/verifier.md +1 -1
  22. package/bundled/upstream/oh-my-claudecode/agents/writer.md +1 -1
  23. package/bundled/upstream/oh-my-claudecode/hooks/hooks.json +21 -1
  24. package/bundled/upstream/oh-my-claudecode/skills/AGENTS.md +200 -0
  25. package/bundled/upstream/oh-my-claudecode/skills/autopilot/SKILL.md +17 -10
  26. package/bundled/upstream/oh-my-claudecode/skills/autoresearch/SKILL.md +90 -0
  27. package/bundled/upstream/oh-my-claudecode/skills/cancel/SKILL.md +15 -6
  28. package/bundled/upstream/oh-my-claudecode/skills/configure-notifications/SKILL.md +12 -12
  29. package/bundled/upstream/oh-my-claudecode/skills/debug/SKILL.md +35 -0
  30. package/bundled/upstream/oh-my-claudecode/skills/deep-dive/SKILL.md +4 -0
  31. package/bundled/upstream/oh-my-claudecode/skills/deep-interview/SKILL.md +23 -18
  32. package/bundled/upstream/oh-my-claudecode/skills/hud/SKILL.md +23 -101
  33. package/bundled/upstream/oh-my-claudecode/skills/learner/SKILL.md +27 -2
  34. package/bundled/upstream/oh-my-claudecode/skills/mcp-setup/SKILL.md +67 -8
  35. package/bundled/upstream/oh-my-claudecode/skills/omc-doctor/SKILL.md +32 -47
  36. package/bundled/upstream/oh-my-claudecode/skills/omc-setup/SKILL.md +4 -2
  37. package/bundled/upstream/oh-my-claudecode/skills/omc-setup/phases/01-install-claude-md.md +15 -4
  38. package/bundled/upstream/oh-my-claudecode/skills/omc-setup/phases/02-configure.md +9 -9
  39. package/bundled/upstream/oh-my-claudecode/skills/omc-setup/phases/03-integrations.md +13 -13
  40. package/bundled/upstream/oh-my-claudecode/skills/omc-setup/phases/04-welcome.md +3 -3
  41. package/bundled/upstream/oh-my-claudecode/skills/omc-teams/SKILL.md +28 -0
  42. package/bundled/upstream/oh-my-claudecode/skills/plan/SKILL.md +1 -0
  43. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/SKILL.md +25 -5
  44. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/lib/config.sh +2 -15
  45. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/lib/providers/github.sh +1 -1
  46. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/lib/session.sh +2 -2
  47. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/lib/tmux.sh +109 -4
  48. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/lib/worktree.sh +26 -0
  49. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/psm.sh +46 -5
  50. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/templates/pr-review.md +5 -2
  51. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/templates/projects.json +1 -1
  52. package/bundled/upstream/oh-my-claudecode/skills/project-session-manager/tests/test-psm-prompt-injection.sh +336 -0
  53. package/bundled/upstream/oh-my-claudecode/skills/ralph/SKILL.md +18 -9
  54. package/bundled/upstream/oh-my-claudecode/skills/ralplan/SKILL.md +2 -0
  55. package/bundled/upstream/oh-my-claudecode/skills/release/SKILL.md +167 -57
  56. package/bundled/upstream/oh-my-claudecode/skills/remember/SKILL.md +41 -0
  57. package/bundled/upstream/oh-my-claudecode/skills/self-improve/SKILL.md +391 -0
  58. package/bundled/upstream/oh-my-claudecode/skills/self-improve/data_contracts.md +274 -0
  59. package/bundled/upstream/oh-my-claudecode/skills/self-improve/scripts/plot_progress.py +128 -0
  60. package/bundled/upstream/oh-my-claudecode/skills/self-improve/scripts/resolve-paths.mjs +192 -0
  61. package/bundled/upstream/oh-my-claudecode/skills/self-improve/scripts/validate.sh +404 -0
  62. package/bundled/upstream/oh-my-claudecode/skills/self-improve/si-benchmark-builder.md +79 -0
  63. package/bundled/upstream/oh-my-claudecode/skills/self-improve/si-goal-clarifier.md +94 -0
  64. package/bundled/upstream/oh-my-claudecode/skills/self-improve/si-researcher.md +73 -0
  65. package/bundled/upstream/oh-my-claudecode/skills/self-improve/templates/agent-settings.json +14 -0
  66. package/bundled/upstream/oh-my-claudecode/skills/self-improve/templates/goal.md +22 -0
  67. package/bundled/upstream/oh-my-claudecode/skills/self-improve/templates/harness.md +18 -0
  68. package/bundled/upstream/oh-my-claudecode/skills/self-improve/templates/idea.md +5 -0
  69. package/bundled/upstream/oh-my-claudecode/skills/self-improve/templates/settings.json +23 -0
  70. package/bundled/upstream/oh-my-claudecode/skills/skill/SKILL.md +46 -77
  71. package/bundled/upstream/oh-my-claudecode/skills/skillify/SKILL.md +53 -0
  72. package/bundled/upstream/oh-my-claudecode/skills/team/SKILL.md +83 -11
  73. package/bundled/upstream/oh-my-claudecode/skills/trace/SKILL.md +1 -0
  74. package/bundled/upstream/oh-my-claudecode/skills/ultraqa/SKILL.md +1 -0
  75. package/bundled/upstream/oh-my-claudecode/skills/ultrawork/SKILL.md +1 -0
  76. package/bundled/upstream/oh-my-claudecode/skills/verify/SKILL.md +37 -0
  77. package/bundled/upstream/oh-my-claudecode/skills/wiki/SKILL.md +67 -0
  78. package/package.json +3 -1
  79. package/src/cli/artifact.js +47 -0
  80. package/src/cli/guidelines.js +83 -0
  81. package/src/cli/index.js +13 -1
  82. package/src/cli/setup.js +35 -17
  83. package/src/cli/source.js +35 -1
  84. package/src/config/artifact-types.js +12 -2
  85. package/src/config/paths.js +95 -4
  86. package/src/config/sources.js +29 -5
  87. package/src/guidelines/apply.js +152 -0
  88. package/src/guidelines/optimizer.js +325 -0
  89. package/src/merge/claude-md-merger.js +35 -12
  90. package/bundled/upstream/oh-my-claudecode/skills/omc-doctor/skill-debugger.md +0 -101
@@ -0,0 +1,404 @@
1
+ #!/usr/bin/env bash
2
+ # validate.sh — Sealed file enforcement + plan schema validation for self-improvement loop.
3
+ # Usage:
4
+ # ./validate.sh --worktree /path --settings /path/to/settings.json plan.json
5
+ # ./validate.sh --project-root /path/to/omc/project --topic "Improve tests" plan.json
6
+ # ./validate.sh plan.json
7
+ # ./validate.sh
8
+
9
+ set -euo pipefail
10
+
11
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12
+ SKILL_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
13
+
14
+ # Settings path may be provided directly or resolved from the scoped self-improve root.
15
+ SETTINGS=""
16
+ PROJECT_ROOT=""
17
+ TOPIC_NAME=""
18
+ TOPIC_SLUG=""
19
+
20
+ # Parse arguments
21
+ WORKTREE_PATH=""
22
+ POSITIONAL_ARGS=()
23
+ while [[ $# -gt 0 ]]; do
24
+ case "$1" in
25
+ --worktree)
26
+ WORKTREE_PATH="$2"
27
+ shift 2
28
+ ;;
29
+ --settings)
30
+ SETTINGS="$2"
31
+ shift 2
32
+ ;;
33
+ --project-root)
34
+ PROJECT_ROOT="$2"
35
+ shift 2
36
+ ;;
37
+ --topic)
38
+ TOPIC_NAME="$2"
39
+ shift 2
40
+ ;;
41
+ --slug)
42
+ TOPIC_SLUG="$2"
43
+ shift 2
44
+ ;;
45
+ *)
46
+ POSITIONAL_ARGS+=("$1")
47
+ shift
48
+ ;;
49
+ esac
50
+ done
51
+ set -- "${POSITIONAL_ARGS[@]+"${POSITIONAL_ARGS[@]}"}"
52
+
53
+ GIT_DIR="${WORKTREE_PATH:-$(pwd)}"
54
+
55
+ err() { echo "ERROR: $*" >&2; }
56
+ ok() { echo "OK: $*"; }
57
+
58
+ require_jq() {
59
+ if ! command -v jq &>/dev/null; then
60
+ err "jq is not installed. Install with: brew install jq (macOS) or apt-get install jq (Linux)"
61
+ exit 1
62
+ fi
63
+ }
64
+
65
+ resolve_settings_from_project_root() {
66
+ local project_root="$1"
67
+ local resolver="${SCRIPT_DIR}/resolve-paths.mjs"
68
+ local args=( "${resolver}" --project-root "${project_root}" )
69
+
70
+ if [[ -n "${TOPIC_SLUG}" ]]; then
71
+ args+=( --slug "${TOPIC_SLUG}" )
72
+ elif [[ -n "${TOPIC_NAME}" ]]; then
73
+ args+=( --topic "${TOPIC_NAME}" )
74
+ fi
75
+
76
+ local resolved
77
+ resolved=$(node "${args[@]}" 2>/dev/null || true)
78
+ if [[ -z "${resolved}" ]]; then
79
+ return 1
80
+ fi
81
+
82
+ local candidate
83
+ candidate=$(printf '%s' "${resolved}" | jq -r '.settings_path // ""' 2>/dev/null || true)
84
+ if [[ -n "${candidate}" ]]; then
85
+ SETTINGS="${candidate}"
86
+ return 0
87
+ fi
88
+
89
+ return 1
90
+ }
91
+
92
+ discover_settings_from_search_root() {
93
+ local search_dir="$1"
94
+ while [[ "${search_dir}" != "/" ]]; do
95
+ if [[ -f "${search_dir}/.omc/self-improve/config/settings.json" ]]; then
96
+ SETTINGS="${search_dir}/.omc/self-improve/config/settings.json"
97
+ return 0
98
+ fi
99
+
100
+ shopt -s nullglob
101
+ local scoped_candidates=( "${search_dir}"/.omc/self-improve/topics/*/config/settings.json )
102
+ shopt -u nullglob
103
+ if [[ "${#scoped_candidates[@]}" -eq 1 ]]; then
104
+ SETTINGS="${scoped_candidates[0]}"
105
+ return 0
106
+ fi
107
+ if [[ "${#scoped_candidates[@]}" -gt 1 ]]; then
108
+ err "Multiple self-improve topics exist under ${search_dir}/.omc/self-improve/topics/. Pass --settings, --project-root with --topic/--slug, or set SELF_IMPROVE_SETTINGS_PATH."
109
+ exit 1
110
+ fi
111
+
112
+ search_dir="$(dirname "${search_dir}")"
113
+ done
114
+ return 1
115
+ }
116
+
117
+ resolve_settings_path() {
118
+ [[ -n "${SETTINGS}" ]] && return 0
119
+
120
+ if [[ -n "${SELF_IMPROVE_SETTINGS_PATH:-}" ]]; then
121
+ SETTINGS="${SELF_IMPROVE_SETTINGS_PATH}"
122
+ return 0
123
+ fi
124
+
125
+ require_jq
126
+
127
+ if [[ -n "${PROJECT_ROOT}" ]]; then
128
+ if [[ -n "${TOPIC_SLUG}" || -n "${TOPIC_NAME}" ]]; then
129
+ if resolve_settings_from_project_root "${PROJECT_ROOT}"; then
130
+ return 0
131
+ fi
132
+ fi
133
+
134
+ if discover_settings_from_search_root "${PROJECT_ROOT}"; then
135
+ return 0
136
+ fi
137
+ fi
138
+
139
+ local search_dir="${WORKTREE_PATH:-$(pwd)}"
140
+ if discover_settings_from_search_root "${search_dir}"; then
141
+ return 0
142
+ fi
143
+
144
+ return 1
145
+ }
146
+
147
+ check_sealed_files() {
148
+ resolve_settings_path || true
149
+
150
+ if [[ -z "${SETTINGS}" || ! -f "${SETTINGS}" ]]; then
151
+ ok "No settings file found — skipping sealed file check."
152
+ return 0
153
+ fi
154
+
155
+ has_sealed=$(jq -r 'if (.sealed_files | type) == "array" and (.sealed_files | length) > 0 then "yes" else "no" end' "${SETTINGS}" 2>/dev/null || echo "no")
156
+
157
+ if [[ "${has_sealed}" != "yes" ]]; then
158
+ ok "No sealed files configured — skipping."
159
+ return 0
160
+ fi
161
+
162
+ if ! git -C "${GIT_DIR}" rev-parse --git-dir &>/dev/null 2>&1; then
163
+ ok "Not a git repository — skipping sealed file check."
164
+ return 0
165
+ fi
166
+
167
+ local modified_files_str=""
168
+ if [[ -n "${WORKTREE_PATH}" ]]; then
169
+ # Find the correct baseline: the improvement branch this experiment branched from.
170
+ # Try improve/* branches first, then fall back to main/master.
171
+ local base_commit
172
+ local improve_branch
173
+ improve_branch=$(git -C "${GIT_DIR}" branch -a --list 'improve/*' 2>/dev/null | head -1 | tr -d ' *' || true)
174
+ if [[ -z "${improve_branch}" ]]; then
175
+ improve_branch=$(git -C "${GIT_DIR}" symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||' || echo "main")
176
+ fi
177
+ base_commit=$(git -C "${GIT_DIR}" merge-base HEAD "${improve_branch}" 2>/dev/null || echo "HEAD~1")
178
+ modified_files_str=$(git -C "${GIT_DIR}" diff --name-only "${base_commit}" 2>/dev/null || true)
179
+ local uncommitted
180
+ uncommitted=$(git -C "${GIT_DIR}" diff --name-only 2>/dev/null || true)
181
+ if [[ -n "${uncommitted}" ]]; then
182
+ modified_files_str="${modified_files_str}"$'\n'"${uncommitted}"
183
+ fi
184
+ else
185
+ modified_files_str=$(git -C "${GIT_DIR}" diff --name-only HEAD 2>/dev/null || true)
186
+ local staged
187
+ staged=$(git -C "${GIT_DIR}" diff --name-only --cached 2>/dev/null || true)
188
+ if [[ -n "${staged}" ]]; then
189
+ modified_files_str="${modified_files_str}"$'\n'"${staged}"
190
+ fi
191
+ fi
192
+
193
+ if [[ -n "${modified_files_str}" ]]; then
194
+ modified_files_str=$(echo "${modified_files_str}" | sort -u)
195
+ fi
196
+
197
+ if [[ -z "${modified_files_str}" ]]; then
198
+ ok "No modified files detected."
199
+ return 0
200
+ fi
201
+
202
+ violations=""
203
+ while IFS= read -r sealed; do
204
+ [[ -z "${sealed}" ]] && continue
205
+ while IFS= read -r modified; do
206
+ [[ -z "${modified}" ]] && continue
207
+ if [[ "${sealed}" == */ ]]; then
208
+ [[ "${modified}" == "${sealed}"* ]] && violations="${violations} ${modified}"
209
+ else
210
+ [[ "${modified}" == "${sealed}" ]] && violations="${violations} ${modified}"
211
+ fi
212
+ done <<< "${modified_files_str}"
213
+ done < <(jq -r '.sealed_files[]' "${SETTINGS}" 2>/dev/null)
214
+
215
+ if [[ -n "${violations}" ]]; then
216
+ err "Sealed file(s) were modified:${violations}"
217
+ exit 1
218
+ fi
219
+
220
+ local modified_count
221
+ modified_count=$(echo "${modified_files_str}" | wc -l | tr -d ' ')
222
+ ok "Sealed file check passed (${modified_count} modified, none sealed)."
223
+ }
224
+
225
+ check_plan_schema() {
226
+ local plan_file="$1"
227
+ require_jq
228
+
229
+ if [[ ! -f "${plan_file}" ]]; then
230
+ err "Plan file not found: ${plan_file}"
231
+ exit 1
232
+ fi
233
+
234
+ local required_fields="plan_id planner_id round hypothesis approach_family critic_approved target_files steps expected_outcome history_reference"
235
+ local missing=""
236
+
237
+ for field in ${required_fields}; do
238
+ val=$(jq -r --arg f "${field}" '.[$f]' "${plan_file}" 2>/dev/null)
239
+ if [[ "${val}" == "null" || -z "${val}" ]]; then
240
+ missing="${missing} ${field}"
241
+ fi
242
+ done
243
+
244
+ if [[ -n "${missing}" ]]; then
245
+ err "Plan is missing required fields:${missing}"
246
+ exit 1
247
+ fi
248
+ ok "Plan contains all required fields."
249
+
250
+ # approach_family validation is handled by the critic (supports custom families
251
+ # from harness.md). validate.sh only checks structural schema, not taxonomy.
252
+
253
+ # Validate hypothesis is a single string
254
+ hypothesis_type=$(jq -r '.hypothesis | type' "${plan_file}" 2>/dev/null)
255
+ if [[ "${hypothesis_type}" != "string" ]]; then
256
+ err "hypothesis must be a string (got ${hypothesis_type})"
257
+ exit 1
258
+ fi
259
+ ok "One-hypothesis check passed."
260
+
261
+ # Validate steps non-empty
262
+ steps_len=$(jq '.steps | length' "${plan_file}" 2>/dev/null || echo "0")
263
+ if [[ "${steps_len}" -eq 0 ]]; then
264
+ err "steps must be a non-empty array"
265
+ exit 1
266
+ fi
267
+ ok "Steps validated (${steps_len} step(s))."
268
+ }
269
+
270
+ check_result_schema() {
271
+ local result_file="$1"
272
+ require_jq
273
+
274
+ if [[ ! -f "${result_file}" ]]; then
275
+ err "Result file not found: ${result_file}"
276
+ exit 1
277
+ fi
278
+
279
+ local required_fields="executor_id plan_id benchmark_score status timestamp benchmark_raw"
280
+ local missing=""
281
+
282
+ for field in ${required_fields}; do
283
+ val=$(jq -r --arg f "${field}" '.[$f]' "${result_file}" 2>/dev/null)
284
+ if [[ "${val}" == "null" || -z "${val}" ]]; then
285
+ if [[ "${field}" == "benchmark_raw" ]]; then
286
+ exists=$(jq --arg f "${field}" 'has($f)' "${result_file}" 2>/dev/null || echo "false")
287
+ if [[ "${exists}" != "true" ]]; then
288
+ missing="${missing} ${field}"
289
+ fi
290
+ elif [[ "${field}" == "benchmark_score" ]]; then
291
+ exists=$(jq --arg f "${field}" 'has($f)' "${result_file}" 2>/dev/null || echo "false")
292
+ if [[ "${exists}" != "true" ]]; then
293
+ missing="${missing} ${field}"
294
+ fi
295
+ else
296
+ missing="${missing} ${field}"
297
+ fi
298
+ fi
299
+ done
300
+
301
+ if [[ -n "${missing}" ]]; then
302
+ err "Result is missing required fields:${missing}"
303
+ exit 1
304
+ fi
305
+ ok "Result contains all required fields."
306
+
307
+ # Validate status enum
308
+ local status
309
+ status=$(jq -r '.status' "${result_file}" 2>/dev/null)
310
+ case "${status}" in
311
+ success|regression|error|timeout) ;;
312
+ *)
313
+ err "Invalid status '${status}'. Must be one of: success, regression, error, timeout"
314
+ exit 1
315
+ ;;
316
+ esac
317
+ ok "Status '${status}' is valid."
318
+
319
+ # Check failure_analysis on non-success status
320
+ if [[ "${status}" != "success" ]]; then
321
+ local fa_type
322
+ fa_type=$(jq -r '.failure_analysis | type' "${result_file}" 2>/dev/null)
323
+ if [[ "${fa_type}" != "object" ]]; then
324
+ err "failure_analysis must be a non-null object when status is '${status}' (got ${fa_type})"
325
+ exit 1
326
+ fi
327
+
328
+ local fa_fields="what why category lesson"
329
+ local fa_missing=""
330
+ for field in ${fa_fields}; do
331
+ val=$(jq -r --arg f "${field}" '.failure_analysis[$f]' "${result_file}" 2>/dev/null)
332
+ if [[ "${val}" == "null" || -z "${val}" ]]; then
333
+ fa_missing="${fa_missing} ${field}"
334
+ fi
335
+ done
336
+
337
+ if [[ -n "${fa_missing}" ]]; then
338
+ err "failure_analysis is missing required fields:${fa_missing}"
339
+ exit 1
340
+ fi
341
+ ok "failure_analysis is complete for non-success status."
342
+
343
+ # Validate failure category enum
344
+ local fa_category
345
+ fa_category=$(jq -r '.failure_analysis.category' "${result_file}" 2>/dev/null)
346
+ local valid_categories="oom timeout regression logic_error scope_error infrastructure benchmark_parse_error sealed_file_violation"
347
+ local cat_valid=0
348
+ for cat in ${valid_categories}; do
349
+ if [[ "${fa_category}" == "${cat}" ]]; then
350
+ cat_valid=1
351
+ break
352
+ fi
353
+ done
354
+
355
+ if [[ ${cat_valid} -eq 0 ]]; then
356
+ err "failure_analysis.category '${fa_category}' is not valid. Must be one of: ${valid_categories}"
357
+ exit 1
358
+ fi
359
+ ok "failure_analysis.category '${fa_category}' is valid."
360
+ fi
361
+
362
+ # Validate sub_scores if present
363
+ local has_sub_scores
364
+ has_sub_scores=$(jq 'has("sub_scores")' "${result_file}" 2>/dev/null || echo "false")
365
+ if [[ "${has_sub_scores}" == "true" ]]; then
366
+ local sub_scores_type
367
+ sub_scores_type=$(jq -r '.sub_scores | type' "${result_file}" 2>/dev/null)
368
+ if [[ "${sub_scores_type}" == "object" ]]; then
369
+ local invalid_values
370
+ invalid_values=$(jq -r '.sub_scores | to_entries[] | select(.value != null and (.value | type) != "number") | .key' "${result_file}" 2>/dev/null)
371
+ if [[ -n "${invalid_values}" ]]; then
372
+ err "sub_scores contains non-numeric values for keys: ${invalid_values}"
373
+ exit 1
374
+ fi
375
+ local sub_scores_count
376
+ sub_scores_count=$(jq '.sub_scores | length' "${result_file}" 2>/dev/null)
377
+ ok "sub_scores is a valid object (${sub_scores_count} dimension(s))."
378
+ elif [[ "${sub_scores_type}" != "null" ]]; then
379
+ err "sub_scores must be an object or null (got ${sub_scores_type})"
380
+ exit 1
381
+ fi
382
+ fi
383
+ }
384
+
385
+ main() {
386
+ resolve_settings_path || true
387
+ echo "=== self-improve validate.sh ==="
388
+ if [[ -n "${SETTINGS}" ]]; then
389
+ echo "Settings: ${SETTINGS}"
390
+ fi
391
+ check_sealed_files
392
+
393
+ if [[ ${#POSITIONAL_ARGS[@]} -ge 1 ]]; then
394
+ check_plan_schema "${POSITIONAL_ARGS[0]}"
395
+ fi
396
+
397
+ if [[ ${#POSITIONAL_ARGS[@]} -ge 2 ]]; then
398
+ check_result_schema "${POSITIONAL_ARGS[1]}"
399
+ fi
400
+
401
+ echo "=== All checks passed ==="
402
+ }
403
+
404
+ main
@@ -0,0 +1,79 @@
1
+ # Self-Improvement Benchmark Builder
2
+
3
+ ## Input Contract
4
+
5
+ Arguments passed via prompt context:
6
+ - `repo_path`: Absolute path to the target repository
7
+ - `goal_path`: Path to goal.md with defined objective and metric
8
+ - `settings_path`: Path to settings.json
9
+ - `agent_settings_path`: Path to agent-settings.json
10
+ - `tracking_path`: Path to tracking/ directory
11
+
12
+ ## Role
13
+
14
+ You build a benchmark for the self-improvement loop. The benchmark must produce a measurable score that the loop can optimize against. Prefer adapting existing evaluation over building from scratch.
15
+
16
+ ## Prerequisites
17
+
18
+ - Target repo exists and is cloned
19
+ - Goal is defined (si_setting_goal is true)
20
+ - goal.md has a defined objective and metric
21
+
22
+ ## Workflow
23
+
24
+ ### Phase 1 — Understand the Goal
25
+ Read goal.md. Extract metric name, direction, target value, scope.
26
+
27
+ ### Phase 2 — Repo Survey
28
+ Explore the target repo for existing evaluation:
29
+ - Test suites (pytest, jest, go test, cargo test)
30
+ - Benchmark scripts (benchmark.*, eval.*, score.*)
31
+ - CI evaluation (.github/workflows/)
32
+ - Performance tests, metrics in code
33
+
34
+ Classify: Ready to use | Partially usable | Nothing exists
35
+
36
+ ### Phase 3 — Interview (only if needed)
37
+ If approach is unclear, ask up to 3 questions. Hard cap.
38
+
39
+ ### Phase 4 — Design
40
+ Requirements:
41
+ - **JSON output preferred**: Last line of stdout as `{"primary": 85.2, "sub_scores": {"dim_a": 0.92}}`
42
+ - **Deterministic**: Same code → same score (fixed seeds)
43
+ - **Fast**: Under 5 minutes ideally
44
+ - **Self-contained**: No external services
45
+ - **Honest**: Measures actual quality
46
+
47
+ ### Phase 5 — Implement
48
+ Build the benchmark. Place it in the target repo (scripts/benchmark.py or benchmark.py).
49
+ Must exit 0 on success, non-zero on error. Print score as last stdout line.
50
+
51
+ ### Phase 6 — Validate
52
+ Run the benchmark 3 times:
53
+ ```
54
+ Run 1: {x}
55
+ Run 2: {y}
56
+ Run 3: {z}
57
+ Variance: {(max-min)/mean * 100}%
58
+ ```
59
+ All 3 must complete. Variance must be < 5%.
60
+
61
+ ### Phase 7 — Record and Configure
62
+ Update settings.json:
63
+ - `benchmark_command`: the shell command
64
+ - `benchmark_format`: "json", "number", or "pass_fail"
65
+ - `primary_metric`: key name in JSON output (default: "primary")
66
+
67
+ **Add benchmark script to `sealed_files`** — prevents the loop from modifying it.
68
+
69
+ Record baseline to tracking/baseline.json:
70
+ ```json
71
+ { "baseline_score": <mean_score>, "recorded_at": "<ISO 8601>" }
72
+ ```
73
+
74
+ Update agent-settings.json:
75
+ - `si_setting_benchmark` → true
76
+ - `best_score` → mean_score
77
+
78
+ ### Phase 8 — Handoff
79
+ Report: benchmark command, score, variance, and next step.
@@ -0,0 +1,94 @@
1
+ # Self-Improvement Goal Clarifier
2
+
3
+ ## Input Contract
4
+
5
+ Arguments passed via context:
6
+ - `repo_path`: Absolute path to the target repository
7
+ - `config_path`: Path to `<self-improve-root>/config/`
8
+ - `agent_settings_path`: Path to agent-settings.json
9
+ - `topic_slug`: Resolved self-improve topic slug
10
+
11
+ ## Role
12
+
13
+ You are an interviewer. Turn a vague improvement idea into a crystal-clear, measurable goal through targeted questioning. One question per round, always targeting the weakest dimension.
14
+
15
+ ## Prerequisites
16
+
17
+ - Target repo exists and is cloned
18
+ - If goal.md already has a complete goal, ask: "A goal is already defined. Refine or start fresh?"
19
+
20
+ ## Clarity Dimensions
21
+
22
+ Score each 0-100 after every round:
23
+
24
+ | Dimension | What it measures |
25
+ |-----------|-----------------|
26
+ | **Objective** | What exactly should improve? Specific enough to act on? |
27
+ | **Metric** | How do we measure it? Well-defined and automatable? |
28
+ | **Target** | What score are we aiming for? Realistic? |
29
+ | **Scope** | Which files/modules in/out of bounds? |
30
+
31
+ **Ambiguity score** = 100 - average(all dimensions)
32
+
33
+ ## Workflow
34
+
35
+ ### Phase 1 — Repo Scan (silent)
36
+ Explore the target repo: README, main source, tests, configs. Identify what it does, existing metrics, improvement opportunities. Use this to inform questions.
37
+
38
+ ### Phase 2 — Fast-Path Check
39
+ If user provides fully formed goal (objective, metric, target, scope all clear), skip interview. Go to Phase 4.
40
+
41
+ ### Phase 3 — Interview Rounds
42
+ Each round:
43
+ 1. Score all 4 dimensions
44
+ 2. Display scoreboard:
45
+ ```
46
+ === Round {n} ===
47
+ Objective: {score}/100
48
+ Metric: {score}/100
49
+ Target: {score}/100
50
+ Scope: {score}/100
51
+ Ambiguity: {score}%
52
+ ```
53
+ 3. Ask ONE question targeting the lowest-scoring dimension. Use repo context.
54
+ 4. Wait for response. Update scores. Repeat.
55
+
56
+ **Exit when ambiguity <= 20%** (all dimensions >= 80).
57
+ **Soft cap: 8 rounds**. **Hard cap: 12 rounds**.
58
+
59
+ ### Phase 4 — Write Goal
60
+ Write `<self-improve-root>/config/goal.md`:
61
+ ```markdown
62
+ # Improvement Goal
63
+
64
+ ## Objective
65
+ {specific objective}
66
+
67
+ ## Target Metric
68
+ - **Metric name**: {name}
69
+ - **Target value**: {value}
70
+ - **Direction**: higher_is_better | lower_is_better
71
+
72
+ ## Scope
73
+ - **In scope**: {files, modules}
74
+ - **Out of scope**: {exclusions}
75
+
76
+ ## Milestones (optional)
77
+ | Milestone | Target | Strategy Focus |
78
+ |-----------|--------|----------------|
79
+
80
+ ## Experiment Ideas (optional)
81
+ {ideas from interview}
82
+ ```
83
+
84
+ Update settings.json: `benchmark_direction`, `target_value`
85
+ Set `si_setting_goal` → true in agent-settings.json
86
+
87
+ ### Phase 5 — Handoff
88
+ Print summary and suggest next step (benchmark builder if needed).
89
+
90
+ ## Constraints
91
+ - ONE question per round
92
+ - Never assume — ask
93
+ - Use repo evidence in questions
94
+ - Partial updates only when writing settings JSON
@@ -0,0 +1,73 @@
1
+ # Self-Improvement Researcher
2
+
3
+ ## Input Contract
4
+
5
+ Arguments passed via prompt context:
6
+ - `iteration`: Current iteration number (1-indexed)
7
+ - `repo_path`: Absolute path to the target repository
8
+ - `goal_path`: Path to goal.md
9
+ - `history_path`: Path to iteration_history/ directory
10
+ - `briefs_path`: Path to research_briefs/ directory
11
+
12
+ ## Role
13
+
14
+ You are the **knowledge gatherer** for the self-improvement loop. Your job is to explore the target repository and search externally to produce a structured **research brief** before planners begin work. You run once per iteration, first.
15
+
16
+ Your output — a research brief JSON — is the foundation all N planners read before generating hypotheses.
17
+
18
+ ## Inputs
19
+
20
+ Read all of the following before producing output:
21
+
22
+ - Goal file — improvement objective, target metric, scope constraints, experiment ideas
23
+ - Iteration history — ALL prior records (winners, losers, lessons)
24
+ - Prior research briefs — avoid redundant research
25
+ - Target repository — source files, tests, configs, documentation
26
+
27
+ ## Workflow
28
+
29
+ 1. **Read the goal**: Extract primary metric, target score, scope constraints, user ideas
30
+ 2. **Read all iteration history**: Build a map of what has been tried, what worked, what failed
31
+ 3. **Check for user ideas**: Treat as highest-priority input
32
+ 4. **Deep-dive the target repository**:
33
+ - README, main source, tests, configs, dependencies
34
+ - Known bottlenecks (TODO/FIXME comments, profile outputs)
35
+ - Test coverage gaps, configuration defaults, outdated dependencies
36
+ 5. **Determine research strategy** based on iteration state:
37
+ - First iteration → broad exploration across all approach families
38
+ - After failures → avoid repeating documented failures
39
+ - Strategy exhaustion (same family 3+ wins) → shift to unexplored families
40
+ - Near target (within 5%) → fine-grained, low-risk changes
41
+ 6. **Search externally** when needed: papers, benchmarks, similar projects, official docs
42
+ 7. **Rank ideas**: high confidence first, then medium, then low. 3-10 ideas.
43
+ 8. **Write the research brief** as JSON
44
+
45
+ ## Output
46
+
47
+ Write to the path specified by the orchestrator. JSON format:
48
+
49
+ ```json
50
+ {
51
+ "iteration": 1,
52
+ "researcher_id": "researcher",
53
+ "repo_analysis_summary": "What the codebase does, current metric state, what has been tried, biggest gap",
54
+ "ideas": [
55
+ {
56
+ "title": "Short action-oriented name",
57
+ "source": "Specific origin — file names, issue numbers, paper titles",
58
+ "evidence": "Concrete evidence — line numbers, config values, benchmark numbers",
59
+ "approach_family": "architecture|training_config|data|infrastructure|optimization|testing|documentation|other",
60
+ "confidence": "high|medium|low",
61
+ "estimated_impact": "3-5% or unknown"
62
+ }
63
+ ]
64
+ }
65
+ ```
66
+
67
+ ## Quality Standards
68
+
69
+ - Every idea has specific, citable evidence
70
+ - No idea repeats a documented failure without explaining the difference
71
+ - Ideas span at least 2 different approach families
72
+ - Ideas sorted: high confidence first
73
+ - Valid JSON matching the schema
@@ -0,0 +1,14 @@
1
+ {
2
+ "trust_confirmed": false,
3
+ "si_setting_goal": false,
4
+ "si_setting_benchmark": false,
5
+ "si_setting_harness": false,
6
+ "iterations": 0,
7
+ "best_score": null,
8
+ "current_milestone": null,
9
+ "current_phase": null,
10
+ "plateau_consecutive_count": 0,
11
+ "circuit_breaker_count": 0,
12
+ "status": "idle",
13
+ "goal_slug": null
14
+ }
@@ -0,0 +1,22 @@
1
+ # Improvement Goal
2
+
3
+ ## Objective
4
+ <!-- Define what exactly should improve -->
5
+
6
+ ## Target Metric
7
+ - **Metric name**:
8
+ - **Target value**:
9
+ - **Direction**: higher_is_better | lower_is_better
10
+
11
+ ## Scope
12
+ - **In scope**:
13
+ - **Out of scope**:
14
+
15
+ ## Milestones (optional)
16
+ | Milestone | Target | Strategy Focus |
17
+ |-----------|--------|----------------|
18
+ | M1 | | Quick wins, low-hanging fruit |
19
+ | M2 | | Moderate improvements |
20
+
21
+ ## Experiment Ideas (optional)
22
+ <!-- Add specific ideas for the improvement loop to try -->