@jaguilar87/gaia 5.0.4 → 5.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +56 -0
- package/INSTALL.md +0 -2
- package/README.md +1 -6
- package/bin/README.md +0 -1
- package/bin/cli/_install_helpers.py +1 -1
- package/bin/cli/cleanup.py +0 -1
- package/bin/cli/doctor.py +1 -1
- package/bin/cli/memory.py +2 -0
- package/bin/cli/update.py +1 -1
- package/bin/pre-publish-validate.js +48 -5
- package/config/README.md +22 -44
- package/config/surface-routing.json +0 -1
- package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-ops/config/README.md +22 -44
- package/dist/gaia-ops/config/surface-routing.json +0 -1
- package/dist/gaia-ops/hooks/modules/agents/handoff_persister.py +2 -0
- package/dist/gaia-ops/hooks/modules/security/approval_grants.py +2 -0
- package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +2 -0
- package/dist/gaia-ops/hooks/modules/validation/commit_validator.py +90 -55
- package/dist/gaia-ops/skills/README.md +1 -1
- package/dist/gaia-ops/skills/gaia-patterns/SKILL.md +1 -1
- package/dist/gaia-ops/skills/gaia-patterns/reference.md +0 -1
- package/dist/gaia-ops/skills/gaia-release/SKILL.md +60 -24
- package/dist/gaia-ops/skills/gaia-release/reference.md +35 -11
- package/dist/gaia-ops/skills/git-conventions/SKILL.md +6 -2
- package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +10 -2
- package/dist/gaia-ops/skills/readme-writing/SKILL.md +1 -1
- package/dist/gaia-ops/skills/readme-writing/reference.md +0 -1
- package/dist/gaia-ops/tools/scan/ui.py +20 -4
- package/dist/gaia-ops/tools/scan/verify.py +3 -3
- package/dist/gaia-ops/tools/validation/README.md +15 -24
- package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-security/hooks/modules/agents/handoff_persister.py +2 -0
- package/dist/gaia-security/hooks/modules/security/approval_grants.py +2 -0
- package/dist/gaia-security/hooks/modules/tools/bash_validator.py +2 -0
- package/dist/gaia-security/hooks/modules/validation/commit_validator.py +90 -55
- package/hooks/modules/agents/handoff_persister.py +2 -0
- package/hooks/modules/security/approval_grants.py +2 -0
- package/hooks/modules/tools/bash_validator.py +2 -0
- package/hooks/modules/validation/commit_validator.py +90 -55
- package/index.js +2 -12
- package/package.json +4 -6
- package/pyproject.toml +3 -3
- package/scripts/bootstrap_database.sh +88 -439
- package/scripts/check_schema_drift.py +208 -0
- package/scripts/migrations/README.md +78 -28
- package/scripts/migrations/schema.checksum +8 -0
- package/scripts/release-prepare.mjs +199 -0
- package/skills/README.md +1 -1
- package/skills/gaia-patterns/SKILL.md +1 -1
- package/skills/gaia-patterns/reference.md +0 -1
- package/skills/gaia-release/SKILL.md +60 -24
- package/skills/gaia-release/reference.md +35 -11
- package/skills/git-conventions/SKILL.md +6 -2
- package/skills/orchestrator-present-approval/SKILL.md +10 -2
- package/skills/readme-writing/SKILL.md +1 -1
- package/skills/readme-writing/reference.md +0 -1
- package/tools/scan/ui.py +20 -4
- package/tools/scan/verify.py +3 -3
- package/tools/validation/README.md +15 -24
- package/commands/README.md +0 -64
- package/commands/gaia.md +0 -37
- package/commands/scan-project.md +0 -74
- package/config/crons-schema.md +0 -81
- package/config/git_standards.json +0 -72
- package/dist/gaia-ops/commands/gaia.md +0 -37
- package/dist/gaia-ops/config/crons-schema.md +0 -81
- package/dist/gaia-ops/config/git_standards.json +0 -72
- package/dist/gaia-ops/tools/agentic-loop/decide-status.py +0 -210
- package/dist/gaia-ops/tools/agentic-loop/parse-metric.py +0 -106
- package/dist/gaia-ops/tools/agentic-loop/record-iteration.py +0 -223
- package/git-hooks/commit-msg +0 -41
- package/scripts/migrations/v10_to_v11.sql +0 -170
- package/scripts/migrations/v10_to_v11_fresh.sql +0 -18
- package/scripts/migrations/v11_to_v12.sql +0 -195
- package/scripts/migrations/v11_to_v12_fresh.sql +0 -19
- package/scripts/migrations/v12_to_v13.sql +0 -48
- package/scripts/migrations/v12_to_v13_fresh.sql +0 -17
- package/scripts/migrations/v13_to_v14.sql +0 -44
- package/scripts/migrations/v13_to_v14_fresh.sql +0 -17
- package/scripts/migrations/v14_to_v15.sql +0 -71
- package/scripts/migrations/v14_to_v15_fresh.sql +0 -19
- package/scripts/migrations/v15_to_v16.sql +0 -57
- package/scripts/migrations/v15_to_v16_fresh.sql +0 -18
- package/scripts/migrations/v16_to_v17.sql +0 -51
- package/scripts/migrations/v16_to_v17_fresh.sql +0 -18
- package/scripts/migrations/v17_to_v18.sql +0 -66
- package/scripts/migrations/v17_to_v18_fresh.sql +0 -24
- package/scripts/migrations/v1_to_v2.sql +0 -97
- package/scripts/migrations/v2_to_v3.sql +0 -68
- package/scripts/migrations/v2_to_v3_merge.sql +0 -69
- package/scripts/migrations/v3_to_v4.sql +0 -67
- package/scripts/migrations/v3_to_v4_fresh.sql +0 -20
- package/scripts/migrations/v4_to_v5.sql +0 -55
- package/scripts/migrations/v4_to_v5_fresh.sql +0 -20
- package/scripts/migrations/v5_to_v6.sql +0 -48
- package/scripts/migrations/v5_to_v6_fresh.sql +0 -17
- package/scripts/migrations/v6_to_v7.sql +0 -26
- package/scripts/migrations/v6_to_v7_fresh.sql +0 -13
- package/scripts/migrations/v7_to_v8.sql +0 -44
- package/scripts/migrations/v7_to_v8_fresh.sql +0 -14
- package/scripts/migrations/v8_to_v9.sql +0 -87
- package/scripts/migrations/v8_to_v9_fresh.sql +0 -15
- package/scripts/migrations/v9_to_v10.sql +0 -109
- package/scripts/migrations/v9_to_v10_episodes_workspace.sql +0 -109
- package/scripts/migrations/v9_to_v10_fresh.sql +0 -18
- package/templates/README.md +0 -70
- package/templates/managed-settings.template.json +0 -43
- package/tools/agentic-loop/decide-status.py +0 -210
- package/tools/agentic-loop/parse-metric.py +0 -106
- package/tools/agentic-loop/record-iteration.py +0 -223
package/config/crons-schema.md
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# Crons Persistence Schema
|
|
2
|
-
|
|
3
|
-
**Version:** 1
|
|
4
|
-
**File location:** `.claude/crons.json`
|
|
5
|
-
**Owner:** Gaia cron persistence system
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## Schema
|
|
10
|
-
|
|
11
|
-
```json
|
|
12
|
-
{
|
|
13
|
-
"crons": [
|
|
14
|
-
{
|
|
15
|
-
"name": "check-email",
|
|
16
|
-
"interval_minutes": 180,
|
|
17
|
-
"prompt": "Revisa el correo y haz triage según gmail-triage skill",
|
|
18
|
-
"enabled": true,
|
|
19
|
-
"created": "2026-04-13T20:00:00Z",
|
|
20
|
-
"last_run": "2026-04-13T23:00:00Z"
|
|
21
|
-
}
|
|
22
|
-
],
|
|
23
|
-
"version": 1
|
|
24
|
-
}
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
## Field Definitions
|
|
28
|
-
|
|
29
|
-
| Field | Type | Required | Description |
|
|
30
|
-
|-------|------|----------|-------------|
|
|
31
|
-
| `name` | string | yes | Unique identifier for the cron. Used as the dedup key during restore. Must be URL-safe (alphanumeric, hyphens). |
|
|
32
|
-
| `interval_minutes` | integer | yes | How often the cron fires, in minutes. Mirrors CronCreate interval. |
|
|
33
|
-
| `prompt` | string | yes | The exact prompt sent to the orchestrator on each tick. |
|
|
34
|
-
| `enabled` | boolean | yes | If false, the cron is skipped during restore. Allows pausing without deletion. |
|
|
35
|
-
| `created` | string (ISO 8601 UTC) | yes | Timestamp when the cron was first created. Set once, never updated. |
|
|
36
|
-
| `last_run` | string (ISO 8601 UTC) or null | yes | Timestamp of the most recent execution. Null if the cron has never run. |
|
|
37
|
-
|
|
38
|
-
## Top-level Fields
|
|
39
|
-
|
|
40
|
-
| Field | Type | Description |
|
|
41
|
-
|-------|------|-------------|
|
|
42
|
-
| `crons` | array | The list of persisted cron entries. May be empty. |
|
|
43
|
-
| `version` | integer | Schema version. Currently 1. Increment when field semantics change. |
|
|
44
|
-
|
|
45
|
-
## Constraints
|
|
46
|
-
|
|
47
|
-
- `name` must be unique within the `crons` array. Duplicate names are invalid.
|
|
48
|
-
- `interval_minutes` must be a positive integer greater than 0.
|
|
49
|
-
- `last_run` is `null` when the cron has been created but has not yet fired.
|
|
50
|
-
|
|
51
|
-
## Example: Multiple Crons
|
|
52
|
-
|
|
53
|
-
```json
|
|
54
|
-
{
|
|
55
|
-
"crons": [
|
|
56
|
-
{
|
|
57
|
-
"name": "check-email",
|
|
58
|
-
"interval_minutes": 180,
|
|
59
|
-
"prompt": "Revisa el correo y haz triage según gmail-triage skill",
|
|
60
|
-
"enabled": true,
|
|
61
|
-
"created": "2026-04-13T20:00:00Z",
|
|
62
|
-
"last_run": "2026-04-13T23:00:00Z"
|
|
63
|
-
},
|
|
64
|
-
{
|
|
65
|
-
"name": "drift-monitor",
|
|
66
|
-
"interval_minutes": 60,
|
|
67
|
-
"prompt": "Check for infrastructure drift in the current project",
|
|
68
|
-
"enabled": false,
|
|
69
|
-
"created": "2026-04-10T10:00:00Z",
|
|
70
|
-
"last_run": null
|
|
71
|
-
}
|
|
72
|
-
],
|
|
73
|
-
"version": 1
|
|
74
|
-
}
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
## File Location
|
|
78
|
-
|
|
79
|
-
The file lives at `.claude/crons.json`, resolved relative to the active project root (same directory where `.claude/` is found). The path module `find_claude_dir()` from `hooks/modules/core/paths.py` provides the canonical `.claude/` path.
|
|
80
|
-
|
|
81
|
-
For projects that use `CLAUDE_PLUGIN_DATA`, the file lives under that data directory instead, consistent with how other persisted data (logs, sessions, grants) is stored.
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"commit_message": {
|
|
3
|
-
"format": "conventional_commits",
|
|
4
|
-
"description": "Commit messages must follow Conventional Commits specification",
|
|
5
|
-
|
|
6
|
-
"type_allowed": [
|
|
7
|
-
"feat",
|
|
8
|
-
"fix",
|
|
9
|
-
"refactor",
|
|
10
|
-
"docs",
|
|
11
|
-
"test",
|
|
12
|
-
"chore",
|
|
13
|
-
"ci",
|
|
14
|
-
"perf",
|
|
15
|
-
"style",
|
|
16
|
-
"build"
|
|
17
|
-
],
|
|
18
|
-
|
|
19
|
-
"scope_required": false,
|
|
20
|
-
"scope_examples": ["helmrelease", "terraform", "pg-non-prod", "infrastructure"],
|
|
21
|
-
|
|
22
|
-
"subject_max_length": 72,
|
|
23
|
-
"subject_rules": {
|
|
24
|
-
"capitalize_first_letter": false,
|
|
25
|
-
"no_period_at_end": true,
|
|
26
|
-
"imperative_mood": true,
|
|
27
|
-
"no_emoji": true
|
|
28
|
-
},
|
|
29
|
-
|
|
30
|
-
"body_max_line_length": 72,
|
|
31
|
-
"body_required": false,
|
|
32
|
-
|
|
33
|
-
"footer_forbidden": [
|
|
34
|
-
"Generated with Claude Code",
|
|
35
|
-
"Co-Authored-By: Claude",
|
|
36
|
-
"🤖 Generated with"
|
|
37
|
-
],
|
|
38
|
-
|
|
39
|
-
"footer_allowed": [
|
|
40
|
-
"BREAKING CHANGE:",
|
|
41
|
-
"Refs:",
|
|
42
|
-
"Closes:",
|
|
43
|
-
"Fixes:",
|
|
44
|
-
"Implements:",
|
|
45
|
-
"See:"
|
|
46
|
-
],
|
|
47
|
-
|
|
48
|
-
"examples_valid": [
|
|
49
|
-
"feat(helmrelease): add Phase 3.3 services",
|
|
50
|
-
"fix(pg-non-prod): correct API key environment variable mappings",
|
|
51
|
-
"refactor: simplify context provider logic",
|
|
52
|
-
"docs: update README with new workflow",
|
|
53
|
-
"chore(deps): update terraform to v1.6.0"
|
|
54
|
-
],
|
|
55
|
-
|
|
56
|
-
"examples_invalid": [
|
|
57
|
-
"Added new feature",
|
|
58
|
-
"Fixed bugs",
|
|
59
|
-
"Updates",
|
|
60
|
-
"feat: add feature\n\n🤖 Generated with Claude Code",
|
|
61
|
-
"feat: add new feature 🚀",
|
|
62
|
-
"fix: 🐛 correct bug"
|
|
63
|
-
]
|
|
64
|
-
},
|
|
65
|
-
|
|
66
|
-
"enforcement": {
|
|
67
|
-
"enabled": true,
|
|
68
|
-
"block_on_failure": true,
|
|
69
|
-
"log_violations": true,
|
|
70
|
-
"log_path": ".claude/logs/commit-violations.jsonl"
|
|
71
|
-
}
|
|
72
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: gaia
|
|
3
|
-
description: Invoke the Gaia meta-agent for system architecture analysis, agent design, skill creation, and orchestration debugging
|
|
4
|
-
allowed-tools:
|
|
5
|
-
- Bash(*)
|
|
6
|
-
- Read
|
|
7
|
-
- Edit
|
|
8
|
-
- Write
|
|
9
|
-
- Glob
|
|
10
|
-
- Grep
|
|
11
|
-
- WebSearch
|
|
12
|
-
- WebFetch
|
|
13
|
-
- Task
|
|
14
|
-
- Agent
|
|
15
|
-
- Skill
|
|
16
|
-
---
|
|
17
|
-
|
|
18
|
-
Invoke the Gaia meta-agent (`gaia-system`) to work on the gaia-ops orchestration
|
|
19
|
-
system itself. This is the entry point for tasks that modify or analyze agents,
|
|
20
|
-
skills, hooks, or system architecture.
|
|
21
|
-
|
|
22
|
-
## When to use
|
|
23
|
-
|
|
24
|
-
- Analyze or improve the gaia-ops architecture
|
|
25
|
-
- Create or update agent definitions (`.md` files)
|
|
26
|
-
- Create or update skills (`SKILL.md` files)
|
|
27
|
-
- Write or debug Python hooks and tools
|
|
28
|
-
- Update `CLAUDE.md` or system configuration
|
|
29
|
-
- Research best practices for agent orchestration
|
|
30
|
-
|
|
31
|
-
## How it works
|
|
32
|
-
|
|
33
|
-
This command delegates to the `gaia-system` agent, which is the meta-agent
|
|
34
|
-
specialized in the orchestration system. It follows the standard agent protocol
|
|
35
|
-
and returns a `agent_contract_handoff` block with findings and status.
|
|
36
|
-
|
|
37
|
-
$ARGUMENTS
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# Crons Persistence Schema
|
|
2
|
-
|
|
3
|
-
**Version:** 1
|
|
4
|
-
**File location:** `.claude/crons.json`
|
|
5
|
-
**Owner:** Gaia cron persistence system
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## Schema
|
|
10
|
-
|
|
11
|
-
```json
|
|
12
|
-
{
|
|
13
|
-
"crons": [
|
|
14
|
-
{
|
|
15
|
-
"name": "check-email",
|
|
16
|
-
"interval_minutes": 180,
|
|
17
|
-
"prompt": "Revisa el correo y haz triage según gmail-triage skill",
|
|
18
|
-
"enabled": true,
|
|
19
|
-
"created": "2026-04-13T20:00:00Z",
|
|
20
|
-
"last_run": "2026-04-13T23:00:00Z"
|
|
21
|
-
}
|
|
22
|
-
],
|
|
23
|
-
"version": 1
|
|
24
|
-
}
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
## Field Definitions
|
|
28
|
-
|
|
29
|
-
| Field | Type | Required | Description |
|
|
30
|
-
|-------|------|----------|-------------|
|
|
31
|
-
| `name` | string | yes | Unique identifier for the cron. Used as the dedup key during restore. Must be URL-safe (alphanumeric, hyphens). |
|
|
32
|
-
| `interval_minutes` | integer | yes | How often the cron fires, in minutes. Mirrors CronCreate interval. |
|
|
33
|
-
| `prompt` | string | yes | The exact prompt sent to the orchestrator on each tick. |
|
|
34
|
-
| `enabled` | boolean | yes | If false, the cron is skipped during restore. Allows pausing without deletion. |
|
|
35
|
-
| `created` | string (ISO 8601 UTC) | yes | Timestamp when the cron was first created. Set once, never updated. |
|
|
36
|
-
| `last_run` | string (ISO 8601 UTC) or null | yes | Timestamp of the most recent execution. Null if the cron has never run. |
|
|
37
|
-
|
|
38
|
-
## Top-level Fields
|
|
39
|
-
|
|
40
|
-
| Field | Type | Description |
|
|
41
|
-
|-------|------|-------------|
|
|
42
|
-
| `crons` | array | The list of persisted cron entries. May be empty. |
|
|
43
|
-
| `version` | integer | Schema version. Currently 1. Increment when field semantics change. |
|
|
44
|
-
|
|
45
|
-
## Constraints
|
|
46
|
-
|
|
47
|
-
- `name` must be unique within the `crons` array. Duplicate names are invalid.
|
|
48
|
-
- `interval_minutes` must be a positive integer greater than 0.
|
|
49
|
-
- `last_run` is `null` when the cron has been created but has not yet fired.
|
|
50
|
-
|
|
51
|
-
## Example: Multiple Crons
|
|
52
|
-
|
|
53
|
-
```json
|
|
54
|
-
{
|
|
55
|
-
"crons": [
|
|
56
|
-
{
|
|
57
|
-
"name": "check-email",
|
|
58
|
-
"interval_minutes": 180,
|
|
59
|
-
"prompt": "Revisa el correo y haz triage según gmail-triage skill",
|
|
60
|
-
"enabled": true,
|
|
61
|
-
"created": "2026-04-13T20:00:00Z",
|
|
62
|
-
"last_run": "2026-04-13T23:00:00Z"
|
|
63
|
-
},
|
|
64
|
-
{
|
|
65
|
-
"name": "drift-monitor",
|
|
66
|
-
"interval_minutes": 60,
|
|
67
|
-
"prompt": "Check for infrastructure drift in the current project",
|
|
68
|
-
"enabled": false,
|
|
69
|
-
"created": "2026-04-10T10:00:00Z",
|
|
70
|
-
"last_run": null
|
|
71
|
-
}
|
|
72
|
-
],
|
|
73
|
-
"version": 1
|
|
74
|
-
}
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
## File Location
|
|
78
|
-
|
|
79
|
-
The file lives at `.claude/crons.json`, resolved relative to the active project root (same directory where `.claude/` is found). The path module `find_claude_dir()` from `hooks/modules/core/paths.py` provides the canonical `.claude/` path.
|
|
80
|
-
|
|
81
|
-
For projects that use `CLAUDE_PLUGIN_DATA`, the file lives under that data directory instead, consistent with how other persisted data (logs, sessions, grants) is stored.
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"commit_message": {
|
|
3
|
-
"format": "conventional_commits",
|
|
4
|
-
"description": "Commit messages must follow Conventional Commits specification",
|
|
5
|
-
|
|
6
|
-
"type_allowed": [
|
|
7
|
-
"feat",
|
|
8
|
-
"fix",
|
|
9
|
-
"refactor",
|
|
10
|
-
"docs",
|
|
11
|
-
"test",
|
|
12
|
-
"chore",
|
|
13
|
-
"ci",
|
|
14
|
-
"perf",
|
|
15
|
-
"style",
|
|
16
|
-
"build"
|
|
17
|
-
],
|
|
18
|
-
|
|
19
|
-
"scope_required": false,
|
|
20
|
-
"scope_examples": ["helmrelease", "terraform", "pg-non-prod", "infrastructure"],
|
|
21
|
-
|
|
22
|
-
"subject_max_length": 72,
|
|
23
|
-
"subject_rules": {
|
|
24
|
-
"capitalize_first_letter": false,
|
|
25
|
-
"no_period_at_end": true,
|
|
26
|
-
"imperative_mood": true,
|
|
27
|
-
"no_emoji": true
|
|
28
|
-
},
|
|
29
|
-
|
|
30
|
-
"body_max_line_length": 72,
|
|
31
|
-
"body_required": false,
|
|
32
|
-
|
|
33
|
-
"footer_forbidden": [
|
|
34
|
-
"Generated with Claude Code",
|
|
35
|
-
"Co-Authored-By: Claude",
|
|
36
|
-
"🤖 Generated with"
|
|
37
|
-
],
|
|
38
|
-
|
|
39
|
-
"footer_allowed": [
|
|
40
|
-
"BREAKING CHANGE:",
|
|
41
|
-
"Refs:",
|
|
42
|
-
"Closes:",
|
|
43
|
-
"Fixes:",
|
|
44
|
-
"Implements:",
|
|
45
|
-
"See:"
|
|
46
|
-
],
|
|
47
|
-
|
|
48
|
-
"examples_valid": [
|
|
49
|
-
"feat(helmrelease): add Phase 3.3 services",
|
|
50
|
-
"fix(pg-non-prod): correct API key environment variable mappings",
|
|
51
|
-
"refactor: simplify context provider logic",
|
|
52
|
-
"docs: update README with new workflow",
|
|
53
|
-
"chore(deps): update terraform to v1.6.0"
|
|
54
|
-
],
|
|
55
|
-
|
|
56
|
-
"examples_invalid": [
|
|
57
|
-
"Added new feature",
|
|
58
|
-
"Fixed bugs",
|
|
59
|
-
"Updates",
|
|
60
|
-
"feat: add feature\n\n🤖 Generated with Claude Code",
|
|
61
|
-
"feat: add new feature 🚀",
|
|
62
|
-
"fix: 🐛 correct bug"
|
|
63
|
-
]
|
|
64
|
-
},
|
|
65
|
-
|
|
66
|
-
"enforcement": {
|
|
67
|
-
"enabled": true,
|
|
68
|
-
"block_on_failure": true,
|
|
69
|
-
"log_violations": true,
|
|
70
|
-
"log_path": ".claude/logs/commit-violations.jsonl"
|
|
71
|
-
}
|
|
72
|
-
}
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
decide-status.py
|
|
4
|
-
|
|
5
|
-
Mechanically decide what to do based on numbers alone. No LLM judgment.
|
|
6
|
-
|
|
7
|
-
Usage:
|
|
8
|
-
python3 decide-status.py \
|
|
9
|
-
--current 94.5 \
|
|
10
|
-
--best 92.0 \
|
|
11
|
-
--threshold 98 \
|
|
12
|
-
--direction higher \
|
|
13
|
-
--consecutive-discards 2 \
|
|
14
|
-
--pivot-count 1
|
|
15
|
-
|
|
16
|
-
Output JSON:
|
|
17
|
-
{
|
|
18
|
-
"decision": "keep",
|
|
19
|
-
"reason": "Metric improved from 92.0 to 94.5",
|
|
20
|
-
"improved": true,
|
|
21
|
-
"gap_remaining": 3.5
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
Decision precedence (evaluated top-to-bottom, first match wins):
|
|
25
|
-
1. pivot_count >= 3 → stop
|
|
26
|
-
2. consecutive_discards >= 5 → pivot (also a discard)
|
|
27
|
-
3. consecutive_discards >= 3 → refine (also a discard)
|
|
28
|
-
4. current meets or passes threshold → threshold_reached
|
|
29
|
-
5. current improved vs best (per direction) → keep
|
|
30
|
-
6. current same or worse → discard
|
|
31
|
-
|
|
32
|
-
Exit codes:
|
|
33
|
-
0 success (decision emitted as JSON)
|
|
34
|
-
1 invalid input
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
import argparse
|
|
38
|
-
import json
|
|
39
|
-
import sys
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
Decision = str # type alias for readability
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _is_improved(current: float, best: float, direction: str) -> bool:
|
|
46
|
-
"""Return True if *current* is strictly better than *best* per direction."""
|
|
47
|
-
if direction == "higher":
|
|
48
|
-
return current > best
|
|
49
|
-
return current < best # lower is better
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def _threshold_reached(current: float, threshold: float, direction: str) -> bool:
|
|
53
|
-
"""Return True if *current* has met or surpassed *threshold*."""
|
|
54
|
-
if direction == "higher":
|
|
55
|
-
return current >= threshold
|
|
56
|
-
return current <= threshold
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _gap_remaining(current: float, threshold: float, direction: str) -> float:
|
|
60
|
-
"""Absolute gap between current value and threshold."""
|
|
61
|
-
if direction == "higher":
|
|
62
|
-
return max(0.0, threshold - current)
|
|
63
|
-
return max(0.0, current - threshold)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def decide(
|
|
67
|
-
current: float,
|
|
68
|
-
best: float,
|
|
69
|
-
threshold: float,
|
|
70
|
-
direction: str,
|
|
71
|
-
consecutive_discards: int,
|
|
72
|
-
pivot_count: int,
|
|
73
|
-
) -> dict:
|
|
74
|
-
"""Pure function: return decision dict from numeric inputs."""
|
|
75
|
-
|
|
76
|
-
gap = _gap_remaining(current, threshold, direction)
|
|
77
|
-
improved = _is_improved(current, best, direction)
|
|
78
|
-
|
|
79
|
-
# --- Precedence 1: hard stop on too many pivots ---
|
|
80
|
-
if pivot_count >= 3:
|
|
81
|
-
return {
|
|
82
|
-
"decision": "stop",
|
|
83
|
-
"reason": f"pivot_count={pivot_count} has reached the maximum of 3; halting loop",
|
|
84
|
-
"improved": improved,
|
|
85
|
-
"gap_remaining": gap,
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
# --- Precedence 2 & 3: discard streak escalations ---
|
|
89
|
-
# Evaluated before threshold/keep so an ongoing failing streak is flagged
|
|
90
|
-
# even if the current run happens to reach the threshold.
|
|
91
|
-
if consecutive_discards >= 5:
|
|
92
|
-
return {
|
|
93
|
-
"decision": "pivot",
|
|
94
|
-
"reason": (
|
|
95
|
-
f"consecutive_discards={consecutive_discards} >= 5; "
|
|
96
|
-
"strategy is not working, force a pivot"
|
|
97
|
-
),
|
|
98
|
-
"improved": improved,
|
|
99
|
-
"gap_remaining": gap,
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
if consecutive_discards >= 3:
|
|
103
|
-
return {
|
|
104
|
-
"decision": "refine",
|
|
105
|
-
"reason": (
|
|
106
|
-
f"consecutive_discards={consecutive_discards} >= 3; "
|
|
107
|
-
"current approach needs refinement before continuing"
|
|
108
|
-
),
|
|
109
|
-
"improved": improved,
|
|
110
|
-
"gap_remaining": gap,
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
# --- Precedence 4: threshold reached ---
|
|
114
|
-
if _threshold_reached(current, threshold, direction):
|
|
115
|
-
return {
|
|
116
|
-
"decision": "threshold_reached",
|
|
117
|
-
"reason": (
|
|
118
|
-
f"current={current} {'≥' if direction == 'higher' else '≤'} "
|
|
119
|
-
f"threshold={threshold}; goal achieved"
|
|
120
|
-
),
|
|
121
|
-
"improved": improved,
|
|
122
|
-
"gap_remaining": 0.0,
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
# --- Precedence 5 & 6: standard keep/discard ---
|
|
126
|
-
if improved:
|
|
127
|
-
return {
|
|
128
|
-
"decision": "keep",
|
|
129
|
-
"reason": f"Metric improved from {best} to {current}",
|
|
130
|
-
"improved": True,
|
|
131
|
-
"gap_remaining": gap,
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return {
|
|
135
|
-
"decision": "discard",
|
|
136
|
-
"reason": f"Metric did not improve (current={current}, best={best})",
|
|
137
|
-
"improved": False,
|
|
138
|
-
"gap_remaining": gap,
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def main() -> None:
|
|
143
|
-
parser = argparse.ArgumentParser(
|
|
144
|
-
description="Compute the next agentic-loop decision from metric numbers only.",
|
|
145
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
146
|
-
epilog="""
|
|
147
|
-
Decisions:
|
|
148
|
-
keep current improved vs best
|
|
149
|
-
discard current same or worse
|
|
150
|
-
refine 3+ consecutive discards (improvement needed in approach)
|
|
151
|
-
pivot 5+ consecutive discards (strategy change required)
|
|
152
|
-
stop 3+ pivots already attempted
|
|
153
|
-
threshold_reached current meets or surpasses the goal threshold
|
|
154
|
-
|
|
155
|
-
Direction values:
|
|
156
|
-
higher larger numbers are better (e.g. accuracy, passing tests)
|
|
157
|
-
lower smaller numbers are better (e.g. error rate, latency ms)
|
|
158
|
-
""",
|
|
159
|
-
)
|
|
160
|
-
parser.add_argument("--current", required=True, type=float, help="Metric value for the current run")
|
|
161
|
-
parser.add_argument("--best", required=True, type=float, help="Best metric seen so far (from state.json)")
|
|
162
|
-
parser.add_argument("--threshold", required=True, type=float, help="Target threshold to reach")
|
|
163
|
-
parser.add_argument(
|
|
164
|
-
"--direction",
|
|
165
|
-
required=True,
|
|
166
|
-
choices=["higher", "lower"],
|
|
167
|
-
help="Whether higher or lower values are better",
|
|
168
|
-
)
|
|
169
|
-
parser.add_argument(
|
|
170
|
-
"--consecutive-discards",
|
|
171
|
-
required=True,
|
|
172
|
-
type=int,
|
|
173
|
-
metavar="N",
|
|
174
|
-
help="Number of consecutive discard outcomes so far (from state.json)",
|
|
175
|
-
)
|
|
176
|
-
parser.add_argument(
|
|
177
|
-
"--pivot-count",
|
|
178
|
-
required=True,
|
|
179
|
-
type=int,
|
|
180
|
-
metavar="N",
|
|
181
|
-
help="Number of pivots executed so far (from state.json)",
|
|
182
|
-
)
|
|
183
|
-
args = parser.parse_args()
|
|
184
|
-
|
|
185
|
-
# --- Input validation ---
|
|
186
|
-
errors = []
|
|
187
|
-
if args.consecutive_discards < 0:
|
|
188
|
-
errors.append("--consecutive-discards must be >= 0")
|
|
189
|
-
if args.pivot_count < 0:
|
|
190
|
-
errors.append("--pivot-count must be >= 0")
|
|
191
|
-
|
|
192
|
-
if errors:
|
|
193
|
-
for err in errors:
|
|
194
|
-
print(f"error: {err}", file=sys.stderr)
|
|
195
|
-
sys.exit(1)
|
|
196
|
-
|
|
197
|
-
result = decide(
|
|
198
|
-
current=args.current,
|
|
199
|
-
best=args.best,
|
|
200
|
-
threshold=args.threshold,
|
|
201
|
-
direction=args.direction,
|
|
202
|
-
consecutive_discards=args.consecutive_discards,
|
|
203
|
-
pivot_count=args.pivot_count,
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
print(json.dumps(result, indent=2))
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if __name__ == "__main__":
|
|
210
|
-
main()
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
parse-metric.py
|
|
4
|
-
|
|
5
|
-
Read stdout from eval_command and extract METRIC lines.
|
|
6
|
-
|
|
7
|
-
Usage:
|
|
8
|
-
echo "output" | python3 parse-metric.py --metric accuracy
|
|
9
|
-
python3 parse-metric.py --metric accuracy --file /tmp/eval-output.txt
|
|
10
|
-
|
|
11
|
-
Input lines must match: METRIC {name}={number}
|
|
12
|
-
Output: JSON to stdout with metric name, numeric value, and raw line.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import argparse
|
|
16
|
-
import json
|
|
17
|
-
import re
|
|
18
|
-
import sys
|
|
19
|
-
from typing import Optional
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
METRIC_PATTERN = re.compile(r"^METRIC\s+(\w+)=([\d.]+)\s*$")
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def parse_lines(lines: list[str]) -> list[dict]:
|
|
26
|
-
"""Extract all METRIC entries from a sequence of lines."""
|
|
27
|
-
results = []
|
|
28
|
-
for line in lines:
|
|
29
|
-
stripped = line.rstrip("\n")
|
|
30
|
-
match = METRIC_PATTERN.match(stripped)
|
|
31
|
-
if match:
|
|
32
|
-
name = match.group(1)
|
|
33
|
-
raw_value = match.group(2)
|
|
34
|
-
# Preserve int vs float from the source text.
|
|
35
|
-
value: int | float
|
|
36
|
-
if "." in raw_value:
|
|
37
|
-
value = float(raw_value)
|
|
38
|
-
else:
|
|
39
|
-
value = int(raw_value)
|
|
40
|
-
results.append(
|
|
41
|
-
{
|
|
42
|
-
"metric": name,
|
|
43
|
-
"value": value,
|
|
44
|
-
"raw_line": stripped,
|
|
45
|
-
}
|
|
46
|
-
)
|
|
47
|
-
return results
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def main() -> None:
|
|
51
|
-
parser = argparse.ArgumentParser(
|
|
52
|
-
description="Extract METRIC lines from eval_command output.",
|
|
53
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
54
|
-
epilog="""
|
|
55
|
-
Examples:
|
|
56
|
-
echo "METRIC accuracy=94.5" | python3 parse-metric.py --metric accuracy
|
|
57
|
-
python3 parse-metric.py --metric passing_tests --file /tmp/out.txt
|
|
58
|
-
python3 parse-metric.py --file /tmp/out.txt # returns all metrics
|
|
59
|
-
""",
|
|
60
|
-
)
|
|
61
|
-
parser.add_argument(
|
|
62
|
-
"--metric",
|
|
63
|
-
metavar="NAME",
|
|
64
|
-
help="Return only this named metric (case-sensitive). Exits 1 if not found.",
|
|
65
|
-
)
|
|
66
|
-
parser.add_argument(
|
|
67
|
-
"--file",
|
|
68
|
-
metavar="PATH",
|
|
69
|
-
help="Read from file instead of stdin.",
|
|
70
|
-
)
|
|
71
|
-
args = parser.parse_args()
|
|
72
|
-
|
|
73
|
-
# --- Read input ---
|
|
74
|
-
try:
|
|
75
|
-
if args.file:
|
|
76
|
-
with open(args.file, "r") as fh:
|
|
77
|
-
lines = fh.readlines()
|
|
78
|
-
else:
|
|
79
|
-
lines = sys.stdin.readlines()
|
|
80
|
-
except OSError as exc:
|
|
81
|
-
print(f"error: cannot read input: {exc}", file=sys.stderr)
|
|
82
|
-
sys.exit(1)
|
|
83
|
-
|
|
84
|
-
# --- Parse ---
|
|
85
|
-
all_metrics = parse_lines(lines)
|
|
86
|
-
|
|
87
|
-
if args.metric:
|
|
88
|
-
# Filter to the requested metric name.
|
|
89
|
-
matches = [m for m in all_metrics if m["metric"] == args.metric]
|
|
90
|
-
if not matches:
|
|
91
|
-
print(
|
|
92
|
-
f"error: metric '{args.metric}' not found in input",
|
|
93
|
-
file=sys.stderr,
|
|
94
|
-
)
|
|
95
|
-
sys.exit(1)
|
|
96
|
-
# Return the last occurrence if there are duplicates.
|
|
97
|
-
result = matches[-1]
|
|
98
|
-
else:
|
|
99
|
-
# Return all metrics as a list when no --metric filter is given.
|
|
100
|
-
result = all_metrics # type: ignore[assignment]
|
|
101
|
-
|
|
102
|
-
print(json.dumps(result, indent=2))
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
if __name__ == "__main__":
|
|
106
|
-
main()
|