atris 2.6.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +124 -34
  2. package/atris/CLAUDE.md +5 -1
  3. package/atris/atris.md +4 -0
  4. package/atris/features/README.md +24 -0
  5. package/atris/skills/autopilot/SKILL.md +74 -75
  6. package/atris/skills/endgame/SKILL.md +179 -0
  7. package/atris/skills/flow/SKILL.md +121 -0
  8. package/atris/skills/improve/SKILL.md +84 -0
  9. package/atris/skills/loop/SKILL.md +72 -0
  10. package/atris/skills/wiki/SKILL.md +61 -0
  11. package/atris/team/executor/MEMBER.md +10 -4
  12. package/atris/team/navigator/MEMBER.md +2 -0
  13. package/atris/team/validator/MEMBER.md +8 -5
  14. package/atris.md +33 -0
  15. package/bin/atris.js +210 -41
  16. package/commands/activate.js +28 -2
  17. package/commands/align.js +720 -0
  18. package/commands/auth.js +75 -2
  19. package/commands/autopilot.js +1213 -270
  20. package/commands/browse.js +100 -0
  21. package/commands/business.js +785 -12
  22. package/commands/clean.js +107 -2
  23. package/commands/computer.js +429 -0
  24. package/commands/context-sync.js +78 -8
  25. package/commands/experiments.js +351 -0
  26. package/commands/feedback.js +150 -0
  27. package/commands/fleet.js +395 -0
  28. package/commands/fork.js +127 -0
  29. package/commands/init.js +50 -1
  30. package/commands/learn.js +407 -0
  31. package/commands/lifecycle.js +94 -0
  32. package/commands/loop.js +114 -0
  33. package/commands/publish.js +129 -0
  34. package/commands/pull.js +434 -48
  35. package/commands/push.js +312 -164
  36. package/commands/review.js +149 -0
  37. package/commands/run.js +76 -43
  38. package/commands/serve.js +360 -0
  39. package/commands/setup.js +1 -1
  40. package/commands/soul.js +381 -0
  41. package/commands/status.js +119 -1
  42. package/commands/sync.js +147 -1
  43. package/commands/terminal.js +201 -0
  44. package/commands/wiki.js +376 -0
  45. package/commands/workflow.js +191 -74
  46. package/commands/workspace-clean.js +3 -3
  47. package/lib/endstate.js +259 -0
  48. package/lib/learnings.js +235 -0
  49. package/lib/manifest.js +1 -0
  50. package/lib/todo.js +9 -5
  51. package/lib/wiki.js +578 -0
  52. package/package.json +2 -2
  53. package/utils/api.js +48 -36
  54. package/utils/auth.js +1 -0
@@ -1,6 +1,22 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
3
  const { spawnSync } = require('child_process');
4
+ const { runTaskOnce } = require('./autopilot');
5
+ const {
6
+ appendResultsRow,
7
+ buildRunId,
8
+ buildWikiArtifact,
9
+ compareEndstateArtifacts,
10
+ collectChangedFiles,
11
+ getArtifactScore,
12
+ getGitHead,
13
+ inferTestResults,
14
+ readLatestArtifact,
15
+ readTextIfExists,
16
+ scoreEndstateArtifact,
17
+ summarizeReview,
18
+ writeArtifact,
19
+ } = require('../lib/endstate');
4
20
 
5
21
  const SLUG_RE = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
6
22
  const ROOT_FILES = ['README.md', 'validate.py', 'benchmark_validate.py', 'benchmark_runtime.py'];
@@ -183,6 +199,327 @@ function experimentsBenchmark(kind = 'all') {
183
199
  }
184
200
  }
185
201
 
202
+ function parseRunOptions(args) {
203
+ const options = {
204
+ dryRun: false,
205
+ verbose: false,
206
+ backendPath: path.resolve(process.cwd(), '../atrisos-backend'),
207
+ interventions: [],
208
+ };
209
+
210
+ for (let i = 0; i < args.length; i++) {
211
+ const arg = args[i];
212
+
213
+ if (arg === '--dry-run' || arg === '-n') {
214
+ options.dryRun = true;
215
+ continue;
216
+ }
217
+
218
+ if (arg === '--verbose') {
219
+ options.verbose = true;
220
+ continue;
221
+ }
222
+
223
+ if (arg === '--backend' && args[i + 1]) {
224
+ options.backendPath = path.resolve(process.cwd(), args[++i]);
225
+ continue;
226
+ }
227
+
228
+ if (arg.startsWith('--backend=')) {
229
+ options.backendPath = path.resolve(process.cwd(), arg.split('=')[1]);
230
+ continue;
231
+ }
232
+
233
+ if (arg === '--intervention' && args[i + 1]) {
234
+ options.interventions.push(args[++i]);
235
+ continue;
236
+ }
237
+
238
+ if (arg.startsWith('--intervention=')) {
239
+ options.interventions.push(arg.split('=')[1]);
240
+ continue;
241
+ }
242
+ }
243
+
244
+ return options;
245
+ }
246
+
247
+ function isEndstatePack(name) {
248
+ return name === 'endstate-baseline' || name === 'endstate-stack';
249
+ }
250
+
251
+ function readTaskBrief(packDir) {
252
+ const programPath = path.join(packDir, 'program.md');
253
+ const content = fs.readFileSync(programPath, 'utf8');
254
+ return content.replace(/^# Program\s*/i, '').trim();
255
+ }
256
+
257
+ function readRunnerConfig(packDir, name) {
258
+ const runnerPath = path.join(packDir, 'runner.json');
259
+ const fallback = name.endsWith('stack')
260
+ ? {
261
+ name: 'stack-coordinated',
262
+ strategy: 'stack',
263
+ summary: 'Coordinated stack benchmark run.',
264
+ context_note: 'Runner profile: stack-coordinated\nProtocol: coordinated stack run',
265
+ }
266
+ : {
267
+ name: 'baseline-single',
268
+ strategy: 'single',
269
+ summary: 'Single-model direct benchmark run.',
270
+ context_note: 'Runner profile: baseline-single\nProtocol: single-model direct run',
271
+ };
272
+
273
+ if (!fs.existsSync(runnerPath)) return fallback;
274
+
275
+ try {
276
+ const parsed = JSON.parse(fs.readFileSync(runnerPath, 'utf8'));
277
+ return {
278
+ ...fallback,
279
+ ...parsed,
280
+ name: parsed.name || fallback.name,
281
+ strategy: parsed.strategy || fallback.strategy,
282
+ summary: parsed.summary || fallback.summary,
283
+ context_note: parsed.context_note || fallback.context_note,
284
+ };
285
+ } catch {
286
+ return fallback;
287
+ }
288
+ }
289
+
290
+ function buildPromptContext(name, runnerConfig, taskBrief, phaseResults) {
291
+ const sections = [
292
+ `pack: ${name}`,
293
+ `runner: ${runnerConfig.name}`,
294
+ `runner strategy: ${runnerConfig.strategy}`,
295
+ `runner summary:\n${runnerConfig.summary}`,
296
+ `task brief:\n${taskBrief}`,
297
+ ];
298
+
299
+ if (phaseResults) {
300
+ for (const phase of ['plan', 'do', 'review']) {
301
+ if (!phaseResults[phase]) continue;
302
+ sections.push(`${phase} prompt:\n${phaseResults[phase].prompt}`);
303
+ sections.push(`${phase} output:\n${phaseResults[phase].output || ''}`);
304
+ }
305
+ }
306
+
307
+ return sections.join('\n\n');
308
+ }
309
+
310
+ function buildBenchmarkArtifact(name, packDir, options) {
311
+ const track = name.endsWith('stack') ? 'stack' : 'baseline';
312
+ const cwd = process.cwd();
313
+ const backendPath = options.backendPath;
314
+ const taskBrief = readTaskBrief(packDir);
315
+ const runnerConfig = readRunnerConfig(packDir, name);
316
+ const runId = buildRunId(track);
317
+ const beforeCli = getGitHead(cwd);
318
+ const beforeBackend = getGitHead(backendPath);
319
+ const wikiBefore = readTextIfExists(path.join(cwd, 'atris', 'wiki', 'STATUS.md'));
320
+
321
+ let execution = null;
322
+ let reviewStatus = options.dryRun ? 'draft' : 'fail';
323
+ let reviewSummary = options.dryRun ? 'dry run — benchmark task not executed' : 'benchmark run failed before review';
324
+ let tests = [{
325
+ command: options.dryRun ? 'benchmark dry-run' : '(no explicit test command captured)',
326
+ status: 'not_run',
327
+ }];
328
+ let notes = options.dryRun ? 'dry-run artifact only' : '';
329
+
330
+ if (!options.dryRun) {
331
+ try {
332
+ execution = runTaskOnce(
333
+ { task: taskBrief, kind: 'benchmark' },
334
+ {
335
+ verbose: options.verbose,
336
+ benchmarkStrategy: runnerConfig.strategy,
337
+ runnerName: runnerConfig.name,
338
+ extraReadFiles: [
339
+ 'atris/features/endstate/contract.md',
340
+ 'atris/features/endstate/artifact-schema.json',
341
+ path.relative(cwd, path.join(packDir, 'program.md')),
342
+ '../atrisos-backend/atris/MAP.md',
343
+ '../atrisos-backend/atris/TODO.md',
344
+ ],
345
+ contextNote: [
346
+ `Project Endstate track: ${track}`,
347
+ `Pack: atris/experiments/${name}/program.md`,
348
+ runnerConfig.context_note,
349
+ 'Stay within the exact Level 1 contract. The outer runner records the receipt.',
350
+ ].join('\n'),
351
+ }
352
+ );
353
+
354
+ reviewStatus = execution.success ? 'pass' : 'fail';
355
+ reviewSummary = summarizeReview(execution.reviewOutput);
356
+ tests = inferTestResults(execution.reviewOutput);
357
+ } catch (error) {
358
+ reviewStatus = 'fail';
359
+ reviewSummary = summarizeReview(error.message || String(error));
360
+ notes = 'runner threw before completion';
361
+ }
362
+ }
363
+
364
+ const afterCli = getGitHead(cwd);
365
+ const afterBackend = getGitHead(backendPath);
366
+ const changedFiles = options.dryRun
367
+ ? []
368
+ : [
369
+ ...collectChangedFiles(cwd, beforeCli, afterCli),
370
+ ...collectChangedFiles(backendPath, beforeBackend, afterBackend, '../atrisos-backend/'),
371
+ ];
372
+ const wikiAfter = readTextIfExists(path.join(cwd, 'atris', 'wiki', 'STATUS.md'));
373
+
374
+ const artifact = {
375
+ run_id: runId,
376
+ track,
377
+ repo_commits: {
378
+ atris_cli: beforeCli || 'missing',
379
+ atrisos_backend: beforeBackend || 'missing',
380
+ },
381
+ repo_commits_after: {
382
+ atris_cli: afterCli || 'missing',
383
+ atrisos_backend: afterBackend || 'missing',
384
+ },
385
+ task_brief: taskBrief,
386
+ prompt_context: buildPromptContext(name, runnerConfig, taskBrief, execution?.phaseResults),
387
+ changed_files: changedFiles,
388
+ tests,
389
+ review: {
390
+ status: reviewStatus,
391
+ summary: reviewSummary,
392
+ },
393
+ wiki: buildWikiArtifact(wikiBefore, wikiAfter),
394
+ elapsed_seconds: execution?.elapsedSeconds || 0,
395
+ interventions: {
396
+ count: options.interventions.length,
397
+ events: options.interventions,
398
+ },
399
+ notes: [notes, `runner=${runnerConfig.name}`].filter(Boolean).join(' | '),
400
+ };
401
+
402
+ const score = scoreEndstateArtifact(artifact);
403
+ artifact.score = score.total;
404
+ artifact.score_breakdown = score.breakdown;
405
+
406
+ return { artifact, score };
407
+ }
408
+
409
+ function experimentsRun(name, ...args) {
410
+ const { experimentsDir } = ensureExperimentsFramework();
411
+
412
+ if (!name) {
413
+ console.error('Usage: atris experiments run <slug> [--dry-run] [--verbose]');
414
+ process.exit(1);
415
+ }
416
+
417
+ const packDir = path.join(experimentsDir, name);
418
+ if (!fs.existsSync(packDir)) {
419
+ console.error(`✗ Experiment "${name}" not found at atris/experiments/${name}/`);
420
+ process.exit(1);
421
+ }
422
+
423
+ const options = parseRunOptions(args);
424
+
425
+ if (!isEndstatePack(name)) {
426
+ const loopPath = path.join(packDir, 'loop.py');
427
+ if (!fs.existsSync(loopPath)) {
428
+ console.error(`✗ Experiment "${name}" has no loop.py to run.`);
429
+ process.exit(1);
430
+ }
431
+ if (options.dryRun) {
432
+ console.log(`✓ Dry run: would execute atris/experiments/${name}/loop.py`);
433
+ return;
434
+ }
435
+ runPython(loopPath, [], packDir);
436
+ return;
437
+ }
438
+
439
+ const schemaPath = path.join(process.cwd(), 'atris', 'features', 'endstate', 'artifact-schema.json');
440
+ const contractPath = path.join(process.cwd(), 'atris', 'features', 'endstate', 'contract.md');
441
+ if (!fs.existsSync(schemaPath) || !fs.existsSync(contractPath)) {
442
+ console.error('✗ Endstate contract missing. Expected atris/features/endstate/{contract.md,artifact-schema.json}.');
443
+ process.exit(1);
444
+ }
445
+
446
+ const { artifact, score } = buildBenchmarkArtifact(name, packDir, options);
447
+ const artifactPath = writeArtifact(packDir, artifact);
448
+ appendResultsRow(path.join(packDir, 'results.tsv'), artifactPath, artifact, score);
449
+
450
+ console.log(`✓ Endstate ${artifact.track} run recorded`);
451
+ console.log(` artifact: ${path.relative(process.cwd(), artifactPath)}`);
452
+ console.log(` score: ${score.total}/100`);
453
+ console.log(` review: ${artifact.review.status}`);
454
+ console.log(` interventions: ${artifact.interventions.count}`);
455
+
456
+ if (artifact.review.status === 'fail' && !options.dryRun) {
457
+ process.exit(1);
458
+ }
459
+ }
460
+
461
+ function formatCompareLine(label, entry) {
462
+ const artifact = entry.artifact;
463
+ const score = getArtifactScore(artifact);
464
+ const review = artifact.review?.status || 'unknown';
465
+ const interventions = artifact.interventions?.count || 0;
466
+ return ` ${label}: ${score}/100 | review: ${review} | interventions: ${interventions} | artifact: ${path.relative(process.cwd(), entry.filePath)}`;
467
+ }
468
+
469
+ function experimentsCompare(target = 'endstate') {
470
+ const { experimentsDir } = ensureExperimentsFramework(process.cwd(), { silent: true });
471
+
472
+ if (target !== 'endstate') {
473
+ console.error('Usage: atris experiments compare endstate');
474
+ process.exit(1);
475
+ }
476
+
477
+ const baselineDir = path.join(experimentsDir, 'endstate-baseline');
478
+ const stackDir = path.join(experimentsDir, 'endstate-stack');
479
+
480
+ try {
481
+ const baselineEntry = readLatestArtifact(baselineDir);
482
+ const stackEntry = readLatestArtifact(stackDir);
483
+ const comparison = compareEndstateArtifacts(baselineEntry, stackEntry);
484
+
485
+ console.log('✓ Endstate comparison ready');
486
+ console.log(formatCompareLine('baseline', baselineEntry));
487
+ console.log(formatCompareLine('stack', stackEntry));
488
+ console.log('');
489
+ if (comparison.winner === 'stack') {
490
+ console.log('Decision: stack wins.');
491
+ } else {
492
+ console.log('Decision: no winner yet.');
493
+ }
494
+ console.log(`Reason: ${comparison.reason}`);
495
+ } catch (error) {
496
+ console.error(`✗ ${error.message || error}`);
497
+ process.exit(1);
498
+ }
499
+ }
500
+
501
+ function experimentsReplay(target = 'endstate') {
502
+ if (target !== 'endstate') {
503
+ console.error('Usage: atris experiments replay endstate');
504
+ process.exit(1);
505
+ }
506
+
507
+ console.log('Replay: validate baseline pack');
508
+ experimentsValidate('endstate-baseline');
509
+ console.log('');
510
+ console.log('Replay: validate stack pack');
511
+ experimentsValidate('endstate-stack');
512
+ console.log('');
513
+ console.log('Replay: baseline dry run');
514
+ experimentsRun('endstate-baseline', '--dry-run');
515
+ console.log('');
516
+ console.log('Replay: stack dry run');
517
+ experimentsRun('endstate-stack', '--dry-run');
518
+ console.log('');
519
+ console.log('Replay: compare latest receipts');
520
+ experimentsCompare('endstate');
521
+ }
522
+
186
523
  function experimentsCommand(subcommand, ...args) {
187
524
  switch (subcommand) {
188
525
  case 'init':
@@ -192,6 +529,12 @@ function experimentsCommand(subcommand, ...args) {
192
529
  return experimentsValidate(args[0]);
193
530
  case 'benchmark':
194
531
  return experimentsBenchmark(args[0] || 'all');
532
+ case 'compare':
533
+ return experimentsCompare(args[0] || 'endstate');
534
+ case 'replay':
535
+ return experimentsReplay(args[0] || 'endstate');
536
+ case 'run':
537
+ return experimentsRun(args[0], ...args.slice(1));
195
538
  default:
196
539
  console.log('');
197
540
  console.log('Usage: atris experiments <subcommand> [name]');
@@ -199,12 +542,18 @@ function experimentsCommand(subcommand, ...args) {
199
542
  console.log('Subcommands:');
200
543
  console.log(' init [slug] Prepare atris/experiments/ or scaffold a new pack');
201
544
  console.log(' validate [path|slug] Run structural validation on packs or a single pack');
545
+ console.log(' run <slug> Execute a pack or record an Endstate benchmark receipt');
546
+ console.log(' compare endstate Compare the latest baseline and stack receipts');
547
+ console.log(' replay endstate Validate, dry-run, and compare the public benchmark flow');
202
548
  console.log(' benchmark [mode] Run validate/runtime/all benchmark harness');
203
549
  console.log('');
204
550
  console.log('Examples:');
205
551
  console.log(' atris experiments init');
206
552
  console.log(' atris experiments init self-heal');
207
553
  console.log(' atris experiments validate');
554
+ console.log(' atris experiments run endstate-baseline --dry-run');
555
+ console.log(' atris experiments compare endstate');
556
+ console.log(' atris experiments replay endstate');
208
557
  console.log(' atris experiments benchmark runtime');
209
558
  console.log('');
210
559
  }
@@ -213,4 +562,6 @@ function experimentsCommand(subcommand, ...args) {
213
562
  module.exports = {
214
563
  experimentsCommand,
215
564
  ensureExperimentsFramework,
565
+ buildBenchmarkArtifact,
566
+ parseRunOptions,
216
567
  };
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Feedback command for Atris CLI
3
+ *
4
+ * Usage:
5
+ * atris feedback "message here" - Submit feedback
6
+ * atris feedback - List your feedback
7
+ * atris feedback list - List your feedback
8
+ */
9
+
10
+ const fs = require('fs');
11
+ const path = require('path');
12
+ const { loadCredentials } = require('../utils/auth');
13
+ const { apiRequestJson } = require('../utils/api');
14
+
15
+ function getAuth() {
16
+ const creds = loadCredentials();
17
+ if (!creds || !creds.token) {
18
+ console.error('Not logged in. Run: atris login');
19
+ process.exit(1);
20
+ }
21
+ return { token: creds.token, email: creds.email || 'unknown' };
22
+ }
23
+
24
+ function getBusinessId() {
25
+ // 1. Check .atris/business.json in current directory
26
+ const bizFile = path.join(process.cwd(), '.atris', 'business.json');
27
+ if (fs.existsSync(bizFile)) {
28
+ try {
29
+ const biz = JSON.parse(fs.readFileSync(bizFile, 'utf8'));
30
+ if (biz.business_id) return biz.business_id;
31
+ } catch {}
32
+ }
33
+
34
+ // 2. Check ~/.atris/businesses.json (first connected business)
35
+ const home = require('os').homedir();
36
+ const globalBizFile = path.join(home, '.atris', 'businesses.json');
37
+ if (fs.existsSync(globalBizFile)) {
38
+ try {
39
+ const businesses = JSON.parse(fs.readFileSync(globalBizFile, 'utf8'));
40
+ const slugs = Object.keys(businesses);
41
+ if (slugs.length > 0 && businesses[slugs[0]].business_id) {
42
+ return businesses[slugs[0]].business_id;
43
+ }
44
+ } catch {}
45
+ }
46
+
47
+ return null;
48
+ }
49
+
50
+ async function submitFeedback(message) {
51
+ if (!message) {
52
+ console.error('Usage: atris feedback "your message here"');
53
+ process.exit(1);
54
+ }
55
+
56
+ const { token } = getAuth();
57
+ const businessId = getBusinessId();
58
+
59
+ const body = {
60
+ message,
61
+ source: 'cli',
62
+ };
63
+ if (businessId) {
64
+ body.business_id = businessId;
65
+ }
66
+
67
+ const result = await apiRequestJson('/feedback', {
68
+ method: 'POST',
69
+ token,
70
+ body,
71
+ });
72
+
73
+ if (!result.ok) {
74
+ console.error(`Error: ${result.error || 'Failed to submit feedback'}`);
75
+ process.exit(1);
76
+ }
77
+
78
+ console.log('Feedback submitted.');
79
+ if (result.data?.feedback_id) {
80
+ console.log(` ID: ${result.data.feedback_id}`);
81
+ }
82
+ }
83
+
84
+ async function listFeedback() {
85
+ const { token } = getAuth();
86
+ const businessId = getBusinessId();
87
+
88
+ let url = '/feedback?limit=20';
89
+ if (businessId) {
90
+ url += `&business_id=${businessId}`;
91
+ }
92
+
93
+ const result = await apiRequestJson(url, {
94
+ method: 'GET',
95
+ token,
96
+ });
97
+
98
+ if (!result.ok) {
99
+ console.error(`Error: ${result.error || 'Failed to fetch feedback'}`);
100
+ process.exit(1);
101
+ }
102
+
103
+ const items = result.data?.feedback || [];
104
+
105
+ if (items.length === 0) {
106
+ console.log('No feedback found.');
107
+ return;
108
+ }
109
+
110
+ console.log(`${items.length} feedback item${items.length !== 1 ? 's' : ''}:\n`);
111
+
112
+ for (const item of items) {
113
+ const date = item.created_at
114
+ ? new Date(item.created_at).toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric' })
115
+ : '';
116
+ const status = item.status || 'open';
117
+ const msg = item.message || '';
118
+ const preview = msg.length > 80 ? msg.substring(0, 80) + '...' : msg;
119
+
120
+ console.log(` [${status}] ${preview}`);
121
+ if (date || item.id) {
122
+ const parts = [];
123
+ if (date) parts.push(date);
124
+ if (item.id) parts.push(item.id.substring(0, 8));
125
+ console.log(` ${parts.join(' ')}`);
126
+ }
127
+ console.log('');
128
+ }
129
+ }
130
+
131
+ async function feedbackCommand() {
132
+ const subcommand = process.argv[3];
133
+
134
+ if (!subcommand || subcommand === 'list') {
135
+ await listFeedback();
136
+ } else if (subcommand === '--help' || subcommand === '-h') {
137
+ console.log('');
138
+ console.log('Usage:');
139
+ console.log(' atris feedback "message" Submit feedback');
140
+ console.log(' atris feedback List your feedback');
141
+ console.log(' atris feedback list List your feedback');
142
+ console.log('');
143
+ } else {
144
+ // Everything else is a feedback message
145
+ const message = process.argv.slice(3).join(' ');
146
+ await submitFeedback(message);
147
+ }
148
+ }
149
+
150
+ module.exports = { feedbackCommand };