atris 2.6.0 → 2.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GETTING_STARTED.md +2 -2
- package/atris/GETTING_STARTED.md +2 -2
- package/bin/atris.js +35 -4
- package/commands/business.js +244 -2
- package/commands/context-sync.js +228 -0
- package/commands/pull.js +176 -50
- package/commands/push.js +154 -61
- package/commands/setup.js +178 -0
- package/commands/workspace-clean.js +249 -0
- package/lib/manifest.js +224 -0
- package/lib/section-merge.js +196 -0
- package/package.json +9 -4
- package/utils/api.js +9 -1
- package/utils/update-check.js +11 -11
- package/AGENT.md +0 -35
- package/atris/experiments/README.md +0 -118
- package/atris/experiments/_examples/smoke-keep-revert/README.md +0 -45
- package/atris/experiments/_examples/smoke-keep-revert/candidate.py +0 -8
- package/atris/experiments/_examples/smoke-keep-revert/loop.py +0 -129
- package/atris/experiments/_examples/smoke-keep-revert/measure.py +0 -47
- package/atris/experiments/_examples/smoke-keep-revert/program.md +0 -3
- package/atris/experiments/_examples/smoke-keep-revert/proposals/bad_patch.py +0 -19
- package/atris/experiments/_examples/smoke-keep-revert/proposals/fix_patch.py +0 -22
- package/atris/experiments/_examples/smoke-keep-revert/reset.py +0 -21
- package/atris/experiments/_examples/smoke-keep-revert/results.tsv +0 -5
- package/atris/experiments/_examples/smoke-keep-revert/visual.svg +0 -52
- package/atris/experiments/_fixtures/invalid/BadName/loop.py +0 -1
- package/atris/experiments/_fixtures/invalid/BadName/program.md +0 -3
- package/atris/experiments/_fixtures/invalid/BadName/results.tsv +0 -1
- package/atris/experiments/_fixtures/invalid/bloated-context/loop.py +0 -1
- package/atris/experiments/_fixtures/invalid/bloated-context/measure.py +0 -1
- package/atris/experiments/_fixtures/invalid/bloated-context/program.md +0 -6
- package/atris/experiments/_fixtures/invalid/bloated-context/results.tsv +0 -1
- package/atris/experiments/_fixtures/valid/good-experiment/loop.py +0 -1
- package/atris/experiments/_fixtures/valid/good-experiment/measure.py +0 -1
- package/atris/experiments/_fixtures/valid/good-experiment/program.md +0 -3
- package/atris/experiments/_fixtures/valid/good-experiment/results.tsv +0 -1
- package/atris/experiments/_template/pack/loop.py +0 -3
- package/atris/experiments/_template/pack/measure.py +0 -13
- package/atris/experiments/_template/pack/program.md +0 -3
- package/atris/experiments/_template/pack/reset.py +0 -3
- package/atris/experiments/_template/pack/results.tsv +0 -1
- package/atris/experiments/benchmark_runtime.py +0 -81
- package/atris/experiments/benchmark_validate.py +0 -70
- package/atris/experiments/validate.py +0 -92
- package/atris/team/navigator/journal/2026-02-23.md +0 -6
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Section-level three-way merge for structured markdown files.
|
|
3
|
+
*
|
|
4
|
+
* Parses markdown into sections (split on ## headers + YAML frontmatter).
|
|
5
|
+
* Merges non-conflicting section changes. Flags same-section conflicts.
|
|
6
|
+
*
|
|
7
|
+
* This is what makes us better than git for context files.
|
|
8
|
+
* Git merges by line. We merge by section.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Parse a markdown document into sections.
|
|
13
|
+
* Returns: { __frontmatter__: string, __header__: string, sections: [{name, content}] }
|
|
14
|
+
*/
|
|
15
|
+
function parseSections(content) {
|
|
16
|
+
if (!content) return { frontmatter: '', header: '', sections: [] };
|
|
17
|
+
|
|
18
|
+
const lines = content.split('\n');
|
|
19
|
+
let frontmatter = '';
|
|
20
|
+
let header = '';
|
|
21
|
+
const sections = [];
|
|
22
|
+
let current = null;
|
|
23
|
+
let inFrontmatter = false;
|
|
24
|
+
let frontmatterDone = false;
|
|
25
|
+
let headerLines = [];
|
|
26
|
+
|
|
27
|
+
for (let i = 0; i < lines.length; i++) {
|
|
28
|
+
const line = lines[i];
|
|
29
|
+
|
|
30
|
+
// YAML frontmatter
|
|
31
|
+
if (i === 0 && line.trim() === '---') {
|
|
32
|
+
inFrontmatter = true;
|
|
33
|
+
headerLines.push(line);
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (inFrontmatter) {
|
|
37
|
+
headerLines.push(line);
|
|
38
|
+
if (line.trim() === '---') {
|
|
39
|
+
inFrontmatter = false;
|
|
40
|
+
frontmatterDone = true;
|
|
41
|
+
frontmatter = headerLines.join('\n');
|
|
42
|
+
headerLines = [];
|
|
43
|
+
}
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Section headers
|
|
48
|
+
if (line.startsWith('## ')) {
|
|
49
|
+
// Save previous section or header
|
|
50
|
+
if (current) {
|
|
51
|
+
sections.push(current);
|
|
52
|
+
} else if (headerLines.length > 0) {
|
|
53
|
+
header = headerLines.join('\n');
|
|
54
|
+
}
|
|
55
|
+
current = { name: line.substring(3).trim(), content: line };
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Content belongs to current section or header
|
|
60
|
+
if (current) {
|
|
61
|
+
current.content += '\n' + line;
|
|
62
|
+
} else {
|
|
63
|
+
headerLines.push(line);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Save last section or header
|
|
68
|
+
if (current) {
|
|
69
|
+
sections.push(current);
|
|
70
|
+
} else if (headerLines.length > 0 && !header) {
|
|
71
|
+
header = headerLines.join('\n');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return { frontmatter, header, sections };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Reconstruct a markdown document from parsed sections.
|
|
79
|
+
*/
|
|
80
|
+
function reconstructDocument(parsed) {
|
|
81
|
+
const parts = [];
|
|
82
|
+
if (parsed.frontmatter) parts.push(parsed.frontmatter);
|
|
83
|
+
if (parsed.header) parts.push(parsed.header);
|
|
84
|
+
for (const section of parsed.sections) {
|
|
85
|
+
parts.push(section.content);
|
|
86
|
+
}
|
|
87
|
+
return parts.join('\n');
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Three-way section merge.
|
|
92
|
+
*
|
|
93
|
+
* @param {string} base - Common ancestor content
|
|
94
|
+
* @param {string} local - Your version
|
|
95
|
+
* @param {string} remote - Their version
|
|
96
|
+
* @returns {{ merged: string|null, conflicts: [{section, local, remote}] }}
|
|
97
|
+
*
|
|
98
|
+
* If merged is non-null, the merge succeeded (conflicts array is empty).
|
|
99
|
+
* If merged is null, there are conflicts that need manual resolution.
|
|
100
|
+
*/
|
|
101
|
+
function sectionMerge(base, local, remote) {
|
|
102
|
+
const baseParsed = parseSections(base);
|
|
103
|
+
const localParsed = parseSections(local);
|
|
104
|
+
const remoteParsed = parseSections(remote);
|
|
105
|
+
|
|
106
|
+
const conflicts = [];
|
|
107
|
+
|
|
108
|
+
// Merge frontmatter (field-by-field if both changed, otherwise take the changed one)
|
|
109
|
+
let mergedFrontmatter = baseParsed.frontmatter;
|
|
110
|
+
if (localParsed.frontmatter !== baseParsed.frontmatter && remoteParsed.frontmatter === baseParsed.frontmatter) {
|
|
111
|
+
mergedFrontmatter = localParsed.frontmatter;
|
|
112
|
+
} else if (remoteParsed.frontmatter !== baseParsed.frontmatter && localParsed.frontmatter === baseParsed.frontmatter) {
|
|
113
|
+
mergedFrontmatter = remoteParsed.frontmatter;
|
|
114
|
+
} else if (localParsed.frontmatter !== remoteParsed.frontmatter && localParsed.frontmatter !== baseParsed.frontmatter) {
|
|
115
|
+
conflicts.push({ section: 'frontmatter', local: localParsed.frontmatter, remote: remoteParsed.frontmatter });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Merge header
|
|
119
|
+
let mergedHeader = baseParsed.header;
|
|
120
|
+
if (localParsed.header !== baseParsed.header && remoteParsed.header === baseParsed.header) {
|
|
121
|
+
mergedHeader = localParsed.header;
|
|
122
|
+
} else if (remoteParsed.header !== baseParsed.header && localParsed.header === baseParsed.header) {
|
|
123
|
+
mergedHeader = remoteParsed.header;
|
|
124
|
+
} else if (localParsed.header !== remoteParsed.header && localParsed.header !== baseParsed.header) {
|
|
125
|
+
conflicts.push({ section: 'header', local: localParsed.header, remote: remoteParsed.header });
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Build section maps
|
|
129
|
+
const baseMap = {};
|
|
130
|
+
for (const s of baseParsed.sections) baseMap[s.name] = s.content;
|
|
131
|
+
const localMap = {};
|
|
132
|
+
for (const s of localParsed.sections) localMap[s.name] = s.content;
|
|
133
|
+
const remoteMap = {};
|
|
134
|
+
for (const s of remoteParsed.sections) remoteMap[s.name] = s.content;
|
|
135
|
+
|
|
136
|
+
// Get all section names preserving order (base order, then new sections)
|
|
137
|
+
const allNames = [];
|
|
138
|
+
const seen = new Set();
|
|
139
|
+
for (const s of baseParsed.sections) { allNames.push(s.name); seen.add(s.name); }
|
|
140
|
+
for (const s of localParsed.sections) { if (!seen.has(s.name)) { allNames.push(s.name); seen.add(s.name); } }
|
|
141
|
+
for (const s of remoteParsed.sections) { if (!seen.has(s.name)) { allNames.push(s.name); seen.add(s.name); } }
|
|
142
|
+
|
|
143
|
+
// Merge each section
|
|
144
|
+
const mergedSections = [];
|
|
145
|
+
for (const name of allNames) {
|
|
146
|
+
const b = baseMap[name] || null;
|
|
147
|
+
const l = localMap[name] || null;
|
|
148
|
+
const r = remoteMap[name] || null;
|
|
149
|
+
|
|
150
|
+
if (l === r) {
|
|
151
|
+
// Both same — take either (or null = both deleted)
|
|
152
|
+
if (l !== null) mergedSections.push({ name, content: l });
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (b === null) {
|
|
157
|
+
// New section — exists in one or both
|
|
158
|
+
if (l && !r) { mergedSections.push({ name, content: l }); continue; }
|
|
159
|
+
if (r && !l) { mergedSections.push({ name, content: r }); continue; }
|
|
160
|
+
// Both added same-named section with different content
|
|
161
|
+
conflicts.push({ section: name, local: l, remote: r });
|
|
162
|
+
mergedSections.push({ name, content: l }); // default to local
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const localChanged = l !== b;
|
|
167
|
+
const remoteChanged = r !== b;
|
|
168
|
+
|
|
169
|
+
if (!localChanged && remoteChanged) {
|
|
170
|
+
if (r !== null) mergedSections.push({ name, content: r });
|
|
171
|
+
// else: remote deleted it, local didn't change → accept deletion
|
|
172
|
+
} else if (localChanged && !remoteChanged) {
|
|
173
|
+
if (l !== null) mergedSections.push({ name, content: l });
|
|
174
|
+
// else: local deleted it, remote didn't change → accept deletion
|
|
175
|
+
} else {
|
|
176
|
+
// Both changed the same section → conflict
|
|
177
|
+
conflicts.push({ section: name, local: l, remote: r });
|
|
178
|
+
if (l !== null) mergedSections.push({ name, content: l }); // default to local
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (conflicts.length > 0) {
|
|
183
|
+
return { merged: null, conflicts };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Reconstruct
|
|
187
|
+
const merged = reconstructDocument({
|
|
188
|
+
frontmatter: mergedFrontmatter,
|
|
189
|
+
header: mergedHeader,
|
|
190
|
+
sections: mergedSections,
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
return { merged, conflicts: [] };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
module.exports = { parseSections, reconstructDocument, sectionMerge };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "atris",
|
|
3
|
-
"version": "2.6.
|
|
3
|
+
"version": "2.6.2",
|
|
4
4
|
"description": "atrisDev (atris dev) - CLI for AI coding agents. Works with Claude Code, Cursor, Windsurf. Make any codebase AI-navigable.",
|
|
5
5
|
"main": "bin/atris.js",
|
|
6
6
|
"bin": {
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
"utils/",
|
|
13
13
|
"lib/",
|
|
14
14
|
"README.md",
|
|
15
|
-
"AGENT.md",
|
|
16
15
|
"AGENTS.md",
|
|
17
16
|
"atris.md",
|
|
18
17
|
"GETTING_STARTED.md",
|
|
@@ -20,8 +19,14 @@
|
|
|
20
19
|
"atris/atrisDev.md",
|
|
21
20
|
"atris/CLAUDE.md",
|
|
22
21
|
"atris/GEMINI.md",
|
|
23
|
-
"atris/
|
|
24
|
-
"atris/
|
|
22
|
+
"atris/GETTING_STARTED.md",
|
|
23
|
+
"atris/team/navigator/MEMBER.md",
|
|
24
|
+
"atris/team/executor/MEMBER.md",
|
|
25
|
+
"atris/team/validator/MEMBER.md",
|
|
26
|
+
"atris/team/brainstormer/MEMBER.md",
|
|
27
|
+
"atris/team/launcher/MEMBER.md",
|
|
28
|
+
"atris/team/researcher/MEMBER.md",
|
|
29
|
+
"atris/team/_template/MEMBER.md",
|
|
25
30
|
"atris/features/_templates/",
|
|
26
31
|
"atris/policies/",
|
|
27
32
|
"atris/skills/"
|
package/utils/api.js
CHANGED
|
@@ -41,7 +41,7 @@ function httpRequest(urlString, options) {
|
|
|
41
41
|
const parsed = new URL(urlString);
|
|
42
42
|
const isHttps = parsed.protocol === 'https:';
|
|
43
43
|
const transport = isHttps ? https : http;
|
|
44
|
-
const timeoutMs = typeof options.timeoutMs === 'number' ? options.timeoutMs :
|
|
44
|
+
const timeoutMs = typeof options.timeoutMs === 'number' ? options.timeoutMs : 30000;
|
|
45
45
|
|
|
46
46
|
const requestOptions = {
|
|
47
47
|
method: options.method || 'GET',
|
|
@@ -52,6 +52,13 @@ function httpRequest(urlString, options) {
|
|
|
52
52
|
};
|
|
53
53
|
|
|
54
54
|
const req = transport.request(requestOptions, (res) => {
|
|
55
|
+
// Follow redirects (301, 302, 307, 308)
|
|
56
|
+
if ([301, 302, 307, 308].includes(res.statusCode) && res.headers.location) {
|
|
57
|
+
const redirectUrl = new URL(res.headers.location, urlString).toString();
|
|
58
|
+
resolve(httpRequest(redirectUrl, options));
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
55
62
|
const chunks = [];
|
|
56
63
|
res.on('data', (chunk) => chunks.push(chunk));
|
|
57
64
|
res.on('end', () => {
|
|
@@ -111,6 +118,7 @@ async function apiRequestJson(pathname, options = {}) {
|
|
|
111
118
|
method: options.method || 'GET',
|
|
112
119
|
headers,
|
|
113
120
|
body: bodyPayload,
|
|
121
|
+
timeoutMs: options.timeoutMs,
|
|
114
122
|
});
|
|
115
123
|
|
|
116
124
|
const text = result.body.toString('utf8');
|
package/utils/update-check.js
CHANGED
|
@@ -4,8 +4,9 @@ const path = require('path');
|
|
|
4
4
|
const os = require('os');
|
|
5
5
|
|
|
6
6
|
const PACKAGE_NAME = 'atris';
|
|
7
|
-
const CHECK_INTERVAL_MS =
|
|
8
|
-
const
|
|
7
|
+
const CHECK_INTERVAL_MS = 60 * 60 * 1000; // 1 hour
|
|
8
|
+
const ATRIS_DIR = path.join(os.homedir(), '.atris');
|
|
9
|
+
const CACHE_FILE = path.join(ATRIS_DIR, '.update-check');
|
|
9
10
|
|
|
10
11
|
function getInstalledVersion() {
|
|
11
12
|
try {
|
|
@@ -34,6 +35,10 @@ function getCacheData() {
|
|
|
34
35
|
|
|
35
36
|
function saveCacheData(latestVersion) {
|
|
36
37
|
try {
|
|
38
|
+
// Ensure ~/.atris/ exists
|
|
39
|
+
if (!fs.existsSync(ATRIS_DIR)) {
|
|
40
|
+
fs.mkdirSync(ATRIS_DIR, { recursive: true });
|
|
41
|
+
}
|
|
37
42
|
const data = {
|
|
38
43
|
lastCheck: new Date().toISOString(),
|
|
39
44
|
latestVersion: latestVersion,
|
|
@@ -164,15 +169,10 @@ async function checkForUpdates(force = false) {
|
|
|
164
169
|
function showUpdateNotification(updateInfo) {
|
|
165
170
|
if (!updateInfo || !updateInfo.needsUpdate) return;
|
|
166
171
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
console.log(
|
|
171
|
-
if (updateInfo.fromCache) {
|
|
172
|
-
console.log(` (checking npm registry...)`);
|
|
173
|
-
}
|
|
174
|
-
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
|
175
|
-
console.log('');
|
|
172
|
+
// Single yellow warning line — non-intrusive
|
|
173
|
+
const yellow = '\x1b[33m';
|
|
174
|
+
const reset = '\x1b[0m';
|
|
175
|
+
console.log(`${yellow}Update available: ${updateInfo.installed} → ${updateInfo.latest}. Run: npm install -g atris${reset}`);
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
function autoUpdate(updateInfo) {
|
package/AGENT.md
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# AGENT.md
|
|
2
|
-
|
|
3
|
-
This file provides guidance to any coding agent (Claude Code, Cursor, Windsurf, etc) when working with code in this repository.
|
|
4
|
-
|
|
5
|
-
## Using Atris (If atris/ folder exists)
|
|
6
|
-
|
|
7
|
-
**You are in an Atris-managed project.**
|
|
8
|
-
|
|
9
|
-
**FIRST:** Read `atris/PERSONA.md` and adopt that personality.
|
|
10
|
-
|
|
11
|
-
**Then follow this workflow:**
|
|
12
|
-
1. **Execute first, research only if needed** — Run commands/tools directly. Don't search docs first—see what happens, then investigate if it fails. Saves context.
|
|
13
|
-
2. **Before any change:** Read `atris/MAP.md` to find relevant files/components
|
|
14
|
-
3. **When starting a task:** Check `atris/TODO.md` for existing tasks or add new one
|
|
15
|
-
4. **After completing task:** Delete task from TODO.md
|
|
16
|
-
5. **If architecture changes:** Update `atris/MAP.md` with new structure
|
|
17
|
-
6. **Follow agent workflow:** navigator (find) → executor (build) → validator (verify)
|
|
18
|
-
|
|
19
|
-
**Key files:**
|
|
20
|
-
- `atris/PERSONA.md` - How to communicate and work (READ THIS FIRST)
|
|
21
|
-
- `atris/MAP.md` - Navigation guide (where is X?)
|
|
22
|
-
- `atris/TODO.md` - Active tasks (delete when done)
|
|
23
|
-
- `atris/team/*.md` - Agent specs for reference
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
**Quick Start:**
|
|
28
|
-
1. Read PERSONA.md
|
|
29
|
-
2. Run `atris activate` to load context (no login or agent selection required)
|
|
30
|
-
3. Check TODO.md for current work
|
|
31
|
-
4. Use `atris visualize` to see plans before building
|
|
32
|
-
5. Use `atris autopilot` when you want the CLI to shepherd plan → do → review loops (optional)
|
|
33
|
-
6. Use `atris brainstorm` to generate a concise conversation starter before handing ideas to coding agents (optional)
|
|
34
|
-
|
|
35
|
-
Need to chat with Atris cloud agents? Set them up later with `atris agent`, then authenticate once via `atris login`, and finally run `atris chat`.
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
# experiments
|
|
2
|
-
|
|
3
|
-
Karpathy-style experiment framework for Atris workspaces.
|
|
4
|
-
|
|
5
|
-
This folder defines the schema, validation rules, and benchmark harness for self-improvement loops.
|
|
6
|
-
Live experiment packs belong directly inside `atris/experiments/`.
|
|
7
|
-
|
|
8
|
-
## What This Is
|
|
9
|
-
|
|
10
|
-
An experiment is not "the agent rewrote its prompt and said it improved."
|
|
11
|
-
|
|
12
|
-
An experiment is:
|
|
13
|
-
|
|
14
|
-
1. one bounded target
|
|
15
|
-
2. one external metric
|
|
16
|
-
3. one keep/revert loop
|
|
17
|
-
4. one append-only log
|
|
18
|
-
|
|
19
|
-
If the metric goes up, keep the change.
|
|
20
|
-
If it does not, revert it.
|
|
21
|
-
|
|
22
|
-
## Schema
|
|
23
|
-
|
|
24
|
-
```text
|
|
25
|
-
atris/experiments/
|
|
26
|
-
├── README.md
|
|
27
|
-
├── validate.py
|
|
28
|
-
├── benchmark_validate.py
|
|
29
|
-
├── benchmark_runtime.py
|
|
30
|
-
├── _template/ # packaged scaffolds
|
|
31
|
-
├── _examples/ # packaged smoke examples
|
|
32
|
-
├── _fixtures/ # validator benchmark cases
|
|
33
|
-
└── <experiment-slug>/
|
|
34
|
-
├── program.md
|
|
35
|
-
├── measure.py
|
|
36
|
-
├── loop.py
|
|
37
|
-
├── results.tsv
|
|
38
|
-
├── reset.py # preferred
|
|
39
|
-
├── proposals/ # optional
|
|
40
|
-
└── <bounded-target> # candidate.py, system_prompt.txt, etc.
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
## Rules
|
|
44
|
-
|
|
45
|
-
1. One bounded mutation target per experiment.
|
|
46
|
-
2. `measure.py` must use an external metric the agent cannot fake.
|
|
47
|
-
3. `loop.py` must keep only improvements and revert regressions.
|
|
48
|
-
4. `program.md` stays short and task-specific.
|
|
49
|
-
5. `results.tsv` stays append-only.
|
|
50
|
-
|
|
51
|
-
## Repo Contents
|
|
52
|
-
|
|
53
|
-
- `_template/pack/` - starter files for a new experiment
|
|
54
|
-
- `validate.py` - structural and bloat checks
|
|
55
|
-
- `benchmark_validate.py` - validator benchmark on fixed good/bad fixtures
|
|
56
|
-
- `benchmark_runtime.py` - runtime benchmark on packaged example packs
|
|
57
|
-
- `_examples/` - tiny reference implementation
|
|
58
|
-
|
|
59
|
-
## Example
|
|
60
|
-
|
|
61
|
-
Start with the smallest honest pack:
|
|
62
|
-
|
|
63
|
-
```text
|
|
64
|
-
_examples/smoke-keep-revert/
|
|
65
|
-
├── candidate.py
|
|
66
|
-
├── measure.py
|
|
67
|
-
├── loop.py
|
|
68
|
-
├── reset.py
|
|
69
|
-
├── results.tsv
|
|
70
|
-
└── proposals/
|
|
71
|
-
├── bad_patch.py
|
|
72
|
-
└── fix_patch.py
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
What it does:
|
|
76
|
-
|
|
77
|
-
- `candidate.py` starts broken on purpose
|
|
78
|
-
- `measure.py` scores it on a fixed word-count test
|
|
79
|
-
- `bad_patch.py` makes it worse
|
|
80
|
-
- `fix_patch.py` actually fixes it
|
|
81
|
-
- `loop.py` keeps only the fix
|
|
82
|
-
|
|
83
|
-
Run it:
|
|
84
|
-
|
|
85
|
-
```bash
|
|
86
|
-
python _examples/smoke-keep-revert/reset.py
|
|
87
|
-
python _examples/smoke-keep-revert/loop.py \
|
|
88
|
-
--proposal _examples/smoke-keep-revert/proposals/bad_patch.py \
|
|
89
|
-
--proposal _examples/smoke-keep-revert/proposals/fix_patch.py
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
Visual:
|
|
93
|
-
|
|
94
|
-
```text
|
|
95
|
-
broken target
|
|
96
|
-
↓
|
|
97
|
-
score = 0.2
|
|
98
|
-
↓
|
|
99
|
-
bad patch
|
|
100
|
-
↓
|
|
101
|
-
score = 0.0
|
|
102
|
-
↓
|
|
103
|
-
REVERT
|
|
104
|
-
↓
|
|
105
|
-
good patch
|
|
106
|
-
↓
|
|
107
|
-
score = 1.0
|
|
108
|
-
↓
|
|
109
|
-
KEEP
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
## Commands
|
|
113
|
-
|
|
114
|
-
```bash
|
|
115
|
-
python validate.py .
|
|
116
|
-
python benchmark_validate.py
|
|
117
|
-
python benchmark_runtime.py
|
|
118
|
-
```
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
# smoke-keep-revert
|
|
2
|
-
|
|
3
|
-
Smallest honest example of the framework.
|
|
4
|
-
|
|
5
|
-

|
|
6
|
-
|
|
7
|
-
## Files
|
|
8
|
-
|
|
9
|
-
```text
|
|
10
|
-
candidate.py -> bounded target
|
|
11
|
-
measure.py -> hard score
|
|
12
|
-
loop.py -> keep/revert engine
|
|
13
|
-
reset.py -> restore baseline
|
|
14
|
-
results.tsv -> trial log
|
|
15
|
-
proposals/ -> bad patch + good patch
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
## Flow
|
|
19
|
-
|
|
20
|
-
```text
|
|
21
|
-
candidate.py is wrong
|
|
22
|
-
↓
|
|
23
|
-
measure.py scores baseline
|
|
24
|
-
↓
|
|
25
|
-
loop.py applies bad_patch.py
|
|
26
|
-
↓
|
|
27
|
-
score does not improve
|
|
28
|
-
↓
|
|
29
|
-
loop.py reverts the change
|
|
30
|
-
↓
|
|
31
|
-
loop.py applies fix_patch.py
|
|
32
|
-
↓
|
|
33
|
-
score improves
|
|
34
|
-
↓
|
|
35
|
-
loop.py keeps the change
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
## Run
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
python reset.py
|
|
42
|
-
python loop.py \
|
|
43
|
-
--proposal proposals/bad_patch.py \
|
|
44
|
-
--proposal proposals/fix_patch.py
|
|
45
|
-
```
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
"""Shared keep/revert loop for a bounded local experiment."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import argparse
|
|
6
|
-
import csv
|
|
7
|
-
import json
|
|
8
|
-
import os
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
import shutil
|
|
11
|
-
import subprocess
|
|
12
|
-
import sys
|
|
13
|
-
from datetime import datetime, timezone
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
EXPERIMENT_DIR = Path(__file__).resolve().parent
|
|
17
|
-
DEFAULT_TARGET = EXPERIMENT_DIR / "candidate.py"
|
|
18
|
-
DEFAULT_MEASURE = EXPERIMENT_DIR / "measure.py"
|
|
19
|
-
DEFAULT_RESULTS = EXPERIMENT_DIR / "results.tsv"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def run_measure(measure_path: Path) -> dict:
|
|
23
|
-
proc = subprocess.run(
|
|
24
|
-
[sys.executable, str(measure_path)],
|
|
25
|
-
cwd=str(EXPERIMENT_DIR),
|
|
26
|
-
capture_output=True,
|
|
27
|
-
text=True,
|
|
28
|
-
check=True,
|
|
29
|
-
)
|
|
30
|
-
return json.loads(proc.stdout.strip())
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def append_result(results_path: Path, row: dict) -> None:
|
|
34
|
-
write_header = not results_path.exists() or results_path.stat().st_size == 0
|
|
35
|
-
with results_path.open("a", newline="", encoding="utf-8") as handle:
|
|
36
|
-
writer = csv.DictWriter(
|
|
37
|
-
handle,
|
|
38
|
-
fieldnames=[
|
|
39
|
-
"timestamp",
|
|
40
|
-
"trial",
|
|
41
|
-
"status",
|
|
42
|
-
"old_score",
|
|
43
|
-
"new_score",
|
|
44
|
-
"proposal",
|
|
45
|
-
"description",
|
|
46
|
-
],
|
|
47
|
-
delimiter="\t",
|
|
48
|
-
)
|
|
49
|
-
if write_header:
|
|
50
|
-
writer.writeheader()
|
|
51
|
-
writer.writerow(row)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def restore_backup(backup_path: Path, target_path: Path) -> None:
|
|
55
|
-
shutil.copy2(backup_path, target_path)
|
|
56
|
-
backup_path.unlink(missing_ok=True)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def main() -> int:
|
|
60
|
-
parser = argparse.ArgumentParser(description="Run a bounded keep/revert experiment.")
|
|
61
|
-
parser.add_argument("--proposal", action="append", default=[])
|
|
62
|
-
args = parser.parse_args()
|
|
63
|
-
|
|
64
|
-
target_path = DEFAULT_TARGET.resolve()
|
|
65
|
-
measure_path = DEFAULT_MEASURE.resolve()
|
|
66
|
-
results_path = DEFAULT_RESULTS.resolve()
|
|
67
|
-
|
|
68
|
-
baseline = run_measure(measure_path)
|
|
69
|
-
current_score = float(baseline["score"])
|
|
70
|
-
print(f"BASELINE {current_score:.4f}")
|
|
71
|
-
|
|
72
|
-
for trial_index, proposal in enumerate(args.proposal, start=1):
|
|
73
|
-
proposal_path = Path(proposal).resolve()
|
|
74
|
-
backup_path = target_path.with_suffix(target_path.suffix + f".trial{trial_index}.bak")
|
|
75
|
-
shutil.copy2(target_path, backup_path)
|
|
76
|
-
|
|
77
|
-
status = "error"
|
|
78
|
-
old_score = current_score
|
|
79
|
-
new_score = current_score
|
|
80
|
-
description = ""
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
proc = subprocess.run(
|
|
84
|
-
[sys.executable, str(proposal_path)],
|
|
85
|
-
cwd=str(EXPERIMENT_DIR),
|
|
86
|
-
capture_output=True,
|
|
87
|
-
text=True,
|
|
88
|
-
check=True,
|
|
89
|
-
env={**os.environ, "EXPERIMENT_TARGET": str(target_path)},
|
|
90
|
-
)
|
|
91
|
-
if proc.stdout.strip():
|
|
92
|
-
description = proc.stdout.strip().splitlines()[-1][:200]
|
|
93
|
-
|
|
94
|
-
measured = run_measure(measure_path)
|
|
95
|
-
new_score = float(measured["score"])
|
|
96
|
-
if new_score > current_score:
|
|
97
|
-
status = "kept"
|
|
98
|
-
current_score = new_score
|
|
99
|
-
backup_path.unlink(missing_ok=True)
|
|
100
|
-
else:
|
|
101
|
-
status = "reverted"
|
|
102
|
-
restore_backup(backup_path, target_path)
|
|
103
|
-
except subprocess.CalledProcessError as exc:
|
|
104
|
-
restore_backup(backup_path, target_path)
|
|
105
|
-
stderr = (exc.stderr or exc.stdout or "").strip()
|
|
106
|
-
description = (stderr.splitlines()[-1] if stderr else "proposal failed")[:200]
|
|
107
|
-
status = "error"
|
|
108
|
-
|
|
109
|
-
append_result(
|
|
110
|
-
results_path,
|
|
111
|
-
{
|
|
112
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
113
|
-
"trial": trial_index,
|
|
114
|
-
"status": status,
|
|
115
|
-
"old_score": f"{old_score:.4f}",
|
|
116
|
-
"new_score": f"{new_score:.4f}",
|
|
117
|
-
"proposal": proposal_path.name,
|
|
118
|
-
"description": description,
|
|
119
|
-
},
|
|
120
|
-
)
|
|
121
|
-
print(f"TRIAL {trial_index} {status.upper()} score={new_score:.4f} proposal={proposal_path.name}")
|
|
122
|
-
|
|
123
|
-
final_measure = run_measure(measure_path)
|
|
124
|
-
print(f"FINAL {final_measure['score']:.4f}")
|
|
125
|
-
return 0
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if __name__ == "__main__":
|
|
129
|
-
raise SystemExit(main())
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"""Objective metric for the smoke keep/revert example."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
import sys
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
EXPERIMENT_DIR = Path(__file__).resolve().parent
|
|
11
|
-
if str(EXPERIMENT_DIR) not in sys.path:
|
|
12
|
-
sys.path.insert(0, str(EXPERIMENT_DIR))
|
|
13
|
-
|
|
14
|
-
from candidate import count_words
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
CASES = [
|
|
18
|
-
("", 0),
|
|
19
|
-
("one", 1),
|
|
20
|
-
("two words", 2),
|
|
21
|
-
(" three spaced words ", 3),
|
|
22
|
-
("punctuation, still counts", 3),
|
|
23
|
-
]
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def main() -> int:
|
|
27
|
-
passed = 0
|
|
28
|
-
|
|
29
|
-
for text, expected in CASES:
|
|
30
|
-
actual = count_words(text)
|
|
31
|
-
if actual == expected:
|
|
32
|
-
passed += 1
|
|
33
|
-
|
|
34
|
-
total = len(CASES)
|
|
35
|
-
score = passed / total if total else 0.0
|
|
36
|
-
payload = {
|
|
37
|
-
"score": round(score, 4),
|
|
38
|
-
"passed": passed,
|
|
39
|
-
"total": total,
|
|
40
|
-
"status": "pass" if passed == total else "fail",
|
|
41
|
-
}
|
|
42
|
-
print(json.dumps(payload))
|
|
43
|
-
return 0
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if __name__ == "__main__":
|
|
47
|
-
raise SystemExit(main())
|