@arvorco/relentless 0.3.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.claude/commands/relentless.constitution.md +1 -1
  2. package/.claude/commands/relentless.convert.md +25 -0
  3. package/.claude/commands/relentless.specify.md +1 -1
  4. package/.claude/skills/analyze/SKILL.md +113 -40
  5. package/.claude/skills/analyze/templates/analysis-report.md +138 -0
  6. package/.claude/skills/checklist/SKILL.md +143 -51
  7. package/.claude/skills/checklist/templates/checklist.md +43 -11
  8. package/.claude/skills/clarify/SKILL.md +70 -11
  9. package/.claude/skills/constitution/SKILL.md +61 -3
  10. package/.claude/skills/constitution/templates/constitution.md +241 -160
  11. package/.claude/skills/constitution/templates/prompt.md +150 -20
  12. package/.claude/skills/convert/SKILL.md +248 -0
  13. package/.claude/skills/implement/SKILL.md +82 -34
  14. package/.claude/skills/plan/SKILL.md +136 -27
  15. package/.claude/skills/plan/templates/plan.md +92 -9
  16. package/.claude/skills/specify/SKILL.md +110 -19
  17. package/.claude/skills/specify/scripts/bash/create-new-feature.sh +2 -2
  18. package/.claude/skills/specify/scripts/bash/setup-plan.sh +1 -1
  19. package/.claude/skills/specify/templates/spec.md +40 -5
  20. package/.claude/skills/tasks/SKILL.md +75 -1
  21. package/.claude/skills/tasks/templates/tasks.md +5 -4
  22. package/CHANGELOG.md +63 -1
  23. package/MANUAL.md +40 -0
  24. package/README.md +263 -11
  25. package/bin/relentless.ts +292 -5
  26. package/package.json +2 -2
  27. package/relentless/config.json +46 -2
  28. package/relentless/constitution.md +2 -2
  29. package/relentless/prompt.md +97 -18
  30. package/src/agents/amp.ts +53 -13
  31. package/src/agents/claude.ts +70 -15
  32. package/src/agents/codex.ts +73 -14
  33. package/src/agents/droid.ts +68 -14
  34. package/src/agents/exec.ts +96 -0
  35. package/src/agents/gemini.ts +59 -16
  36. package/src/agents/opencode.ts +188 -9
  37. package/src/cli/fallback-order.ts +210 -0
  38. package/src/cli/index.ts +63 -0
  39. package/src/cli/mode-flag.ts +198 -0
  40. package/src/cli/review-flags.ts +192 -0
  41. package/src/config/loader.ts +16 -1
  42. package/src/config/schema.ts +157 -2
  43. package/src/execution/runner.ts +144 -21
  44. package/src/init/scaffolder.ts +285 -25
  45. package/src/prd/parser.ts +92 -1
  46. package/src/prd/types.ts +136 -0
  47. package/src/review/index.ts +92 -0
  48. package/src/review/prompt.ts +293 -0
  49. package/src/review/runner.ts +337 -0
  50. package/src/review/tasks/docs.ts +529 -0
  51. package/src/review/tasks/index.ts +80 -0
  52. package/src/review/tasks/lint.ts +436 -0
  53. package/src/review/tasks/quality.ts +760 -0
  54. package/src/review/tasks/security.ts +452 -0
  55. package/src/review/tasks/test.ts +456 -0
  56. package/src/review/tasks/typecheck.ts +323 -0
  57. package/src/review/types.ts +139 -0
  58. package/src/routing/cascade.ts +310 -0
  59. package/src/routing/classifier.ts +338 -0
  60. package/src/routing/estimate.ts +270 -0
  61. package/src/routing/fallback.ts +512 -0
  62. package/src/routing/index.ts +124 -0
  63. package/src/routing/registry.ts +501 -0
  64. package/src/routing/report.ts +570 -0
  65. package/src/routing/router.ts +287 -0
  66. package/src/tui/App.tsx +2 -0
  67. package/src/tui/TUIRunner.tsx +103 -8
  68. package/src/tui/components/CurrentStory.tsx +23 -1
  69. package/src/tui/hooks/useTUI.ts +1 -0
  70. package/src/tui/types.ts +9 -0
  71. package/.claude/skills/specify/scripts/bash/update-agent-context.sh +0 -799
@@ -0,0 +1,501 @@
1
+ /**
2
+ * Model Registry Module
3
+ *
4
+ * Contains model profiles for all supported AI coding agents and harnesses.
5
+ * Provides data structures and query functions for smart model routing.
6
+ *
7
+ * @module src/routing/registry
8
+ */
9
+
10
+ import { z } from "zod";
11
+ import { HarnessNameSchema, type HarnessName } from "../config/schema";
12
+
13
+ /**
14
+ * Model tier classification for cost-based routing.
15
+ * - free: Zero-cost models (OpenCode Zen)
16
+ * - cheap: Low-cost models (Haiku, GPT-5.2 reasoning-effort low, Gemini Flash)
17
+ * - standard: Balanced cost/performance models (Sonnet, GPT-5.2 reasoning-effort medium)
18
+ * - premium: High-quality models (Gemini Pro)
19
+ * - sota: State-of-the-art models (Opus, GPT-5.2 reasoning-effort high)
20
+ */
21
+ export const ModelTierSchema = z.enum(["free", "cheap", "standard", "premium", "sota"]);
22
+ export type ModelTier = z.infer<typeof ModelTierSchema>;
23
+
24
+ /**
25
+ * Complete profile for a model including capabilities, costs, and CLI usage.
26
+ */
27
+ export const ModelProfileSchema = z.object({
28
+ /** Unique identifier for the model (e.g., "opus-4.5") */
29
+ id: z.string(),
30
+ /** Human-readable display name (e.g., "Claude Opus 4.5") */
31
+ displayName: z.string(),
32
+ /** Harness that provides this model */
33
+ harness: HarnessNameSchema,
34
+ /** Cost tier for routing decisions */
35
+ tier: ModelTierSchema,
36
+ /** Input cost per million tokens (USD) */
37
+ inputCost: z.number(),
38
+ /** Output cost per million tokens (USD) */
39
+ outputCost: z.number(),
40
+ /** SWE-bench score (if available) */
41
+ sweBenchScore: z.number().optional(),
42
+ /** Maximum context window in tokens */
43
+ contextWindow: z.number(),
44
+ /** Tokens per second generation rate (if known) */
45
+ tokensPerSecond: z.number().optional(),
46
+ /** List of task types this model excels at */
47
+ strengths: z.array(z.string()),
48
+ /** List of known limitations */
49
+ limitations: z.array(z.string()),
50
+ /** CLI flag used to select this model */
51
+ cliFlag: z.string(),
52
+ /** CLI value to pass with the flag */
53
+ cliValue: z.string(),
54
+ /** Additional CLI args required for model selection */
55
+ cliArgs: z.array(z.string()).optional(),
56
+ });
57
+ export type ModelProfile = z.infer<typeof ModelProfileSchema>;
58
+
59
+ /**
60
+ * Profile for a harness (AI coding agent) including its available models.
61
+ */
62
+ export const HarnessProfileSchema = z.object({
63
+ /** Harness identifier */
64
+ name: HarnessNameSchema,
65
+ /** Human-readable display name */
66
+ displayName: z.string(),
67
+ /** Available models for this harness */
68
+ models: z.array(ModelProfileSchema),
69
+ /** Default model to use when none specified */
70
+ defaultModel: z.string(),
71
+ /** Whether the harness supports model selection */
72
+ supportsModelSelection: z.boolean(),
73
+ /** Method used to select models (flag, env, config) */
74
+ modelSelectionMethod: z.enum(["flag", "env", "config"]),
75
+ });
76
+ export type HarnessProfile = z.infer<typeof HarnessProfileSchema>;
77
+
78
+ /**
79
+ * Complete registry of all available models across all harnesses.
80
+ * Ordered by harness, then by tier (SOTA first within each harness).
81
+ */
82
+ export const MODEL_REGISTRY: ModelProfile[] = [
83
+ // ============== Claude Models ==============
84
+ {
85
+ id: "opus-4.5",
86
+ displayName: "Claude Opus 4.5",
87
+ harness: "claude",
88
+ tier: "sota",
89
+ inputCost: 5.0,
90
+ outputCost: 25.0,
91
+ sweBenchScore: 80.9,
92
+ contextWindow: 200000,
93
+ strengths: ["code_review", "architecture", "debugging", "final_review", "complex_reasoning"],
94
+ limitations: ["expensive", "slower_start"],
95
+ cliFlag: "--model",
96
+ cliValue: "claude-opus-4-5-20251101",
97
+ },
98
+ {
99
+ id: "sonnet-4.5",
100
+ displayName: "Claude Sonnet 4.5",
101
+ harness: "claude",
102
+ tier: "standard",
103
+ inputCost: 3.0,
104
+ outputCost: 15.0,
105
+ contextWindow: 200000,
106
+ strengths: ["frontend", "refactoring", "daily_coding", "balanced"],
107
+ limitations: [],
108
+ cliFlag: "--model",
109
+ cliValue: "claude-sonnet-4-5-20251020",
110
+ },
111
+ {
112
+ id: "haiku-4.5",
113
+ displayName: "Claude Haiku 4.5",
114
+ harness: "claude",
115
+ tier: "cheap",
116
+ inputCost: 1.0,
117
+ outputCost: 5.0,
118
+ sweBenchScore: 73.0,
119
+ contextWindow: 200000,
120
+ tokensPerSecond: 200,
121
+ strengths: ["prototyping", "scaffolding", "simple_tasks", "fast"],
122
+ limitations: ["less_reasoning"],
123
+ cliFlag: "--model",
124
+ cliValue: "claude-haiku-4-5-20251001",
125
+ },
126
+
127
+ // ============== Codex (OpenAI) Models ==============
128
+ {
129
+ id: "gpt-5.2-high",
130
+ displayName: "GPT-5.2 (reasoning-effort high)",
131
+ harness: "codex",
132
+ tier: "sota",
133
+ inputCost: 1.75,
134
+ outputCost: 14.0,
135
+ sweBenchScore: 80.0,
136
+ contextWindow: 128000,
137
+ strengths: ["reasoning", "control_flow", "overnight_runs", "complex_logic"],
138
+ limitations: [],
139
+ cliFlag: "--model",
140
+ cliValue: "gpt-5.2",
141
+ cliArgs: ["-c", "reasoning_effort=\"high\""],
142
+ },
143
+ {
144
+ id: "gpt-5.2-xhigh",
145
+ displayName: "GPT-5.2 (reasoning-effort xhigh)",
146
+ harness: "codex",
147
+ tier: "sota",
148
+ inputCost: 1.75,
149
+ outputCost: 14.0,
150
+ sweBenchScore: 80.0,
151
+ contextWindow: 128000,
152
+ strengths: ["deep_reasoning", "complex_logic", "long_horizon_tasks"],
153
+ limitations: [],
154
+ cliFlag: "--model",
155
+ cliValue: "gpt-5.2",
156
+ cliArgs: ["-c", "reasoning_effort=\"xhigh\""],
157
+ },
158
+ {
159
+ id: "gpt-5.2-medium",
160
+ displayName: "GPT-5.2 (reasoning-effort medium)",
161
+ harness: "codex",
162
+ tier: "standard",
163
+ inputCost: 1.25,
164
+ outputCost: 10.0,
165
+ contextWindow: 128000,
166
+ strengths: ["balanced", "good_review", "general_coding"],
167
+ limitations: [],
168
+ cliFlag: "--model",
169
+ cliValue: "gpt-5.2",
170
+ cliArgs: ["-c", "reasoning_effort=\"medium\""],
171
+ },
172
+ {
173
+ id: "gpt-5.2-low",
174
+ displayName: "GPT-5.2 (reasoning-effort low)",
175
+ harness: "codex",
176
+ tier: "cheap",
177
+ inputCost: 0.75,
178
+ outputCost: 6.0,
179
+ contextWindow: 128000,
180
+ strengths: ["fast", "simple_tasks", "cost_effective"],
181
+ limitations: ["less_accuracy"],
182
+ cliFlag: "--model",
183
+ cliValue: "gpt-5.2",
184
+ cliArgs: ["-c", "reasoning_effort=\"low\""],
185
+ },
186
+
187
+ // ============== Droid Models ==============
188
+ {
189
+ id: "claude-opus-4-5-20251101",
190
+ displayName: "Claude Opus 4.5 (via Droid)",
191
+ harness: "droid",
192
+ tier: "sota",
193
+ inputCost: 5.0,
194
+ outputCost: 25.0,
195
+ contextWindow: 200000,
196
+ strengths: ["architecture", "debugging", "complex_reasoning"],
197
+ limitations: ["expensive"],
198
+ cliFlag: "-m",
199
+ cliValue: "claude-opus-4-5-20251101",
200
+ },
201
+ {
202
+ id: "claude-sonnet-4-5-20250929",
203
+ displayName: "Claude Sonnet 4.5 (via Droid)",
204
+ harness: "droid",
205
+ tier: "standard",
206
+ inputCost: 3.0,
207
+ outputCost: 15.0,
208
+ contextWindow: 200000,
209
+ strengths: ["balanced", "daily_coding"],
210
+ limitations: [],
211
+ cliFlag: "-m",
212
+ cliValue: "claude-sonnet-4-5-20250929",
213
+ },
214
+ {
215
+ id: "claude-haiku-4-5-20251001",
216
+ displayName: "Claude Haiku 4.5 (via Droid)",
217
+ harness: "droid",
218
+ tier: "cheap",
219
+ inputCost: 1.0,
220
+ outputCost: 5.0,
221
+ contextWindow: 200000,
222
+ strengths: ["fast", "simple_tasks"],
223
+ limitations: ["less_reasoning"],
224
+ cliFlag: "-m",
225
+ cliValue: "claude-haiku-4-5-20251001",
226
+ },
227
+ {
228
+ id: "gpt-5.2",
229
+ displayName: "GPT-5.2 (via Droid)",
230
+ harness: "droid",
231
+ tier: "standard",
232
+ inputCost: 1.25,
233
+ outputCost: 10.0,
234
+ contextWindow: 128000,
235
+ strengths: ["balanced", "general_coding"],
236
+ limitations: [],
237
+ cliFlag: "-m",
238
+ cliValue: "gpt-5.2",
239
+ },
240
+ {
241
+ id: "gpt-5.1",
242
+ displayName: "GPT-5.1 (via Droid)",
243
+ harness: "droid",
244
+ tier: "standard",
245
+ inputCost: 1.0,
246
+ outputCost: 8.0,
247
+ contextWindow: 128000,
248
+ strengths: ["general_tasks"],
249
+ limitations: [],
250
+ cliFlag: "-m",
251
+ cliValue: "gpt-5.1",
252
+ },
253
+ {
254
+ id: "gpt-5.1-codex",
255
+ displayName: "GPT-5.1 Codex (via Droid)",
256
+ harness: "droid",
257
+ tier: "standard",
258
+ inputCost: 1.0,
259
+ outputCost: 8.0,
260
+ contextWindow: 128000,
261
+ strengths: ["coding", "refactoring"],
262
+ limitations: [],
263
+ cliFlag: "-m",
264
+ cliValue: "gpt-5.1-codex",
265
+ },
266
+ {
267
+ id: "gpt-5.1-codex-max",
268
+ displayName: "GPT-5.1 Codex Max (via Droid)",
269
+ harness: "droid",
270
+ tier: "premium",
271
+ inputCost: 1.5,
272
+ outputCost: 12.0,
273
+ contextWindow: 128000,
274
+ strengths: ["long_context", "complex_tasks"],
275
+ limitations: [],
276
+ cliFlag: "-m",
277
+ cliValue: "gpt-5.1-codex-max",
278
+ },
279
+ {
280
+ id: "gemini-3-pro-preview",
281
+ displayName: "Gemini 3 Pro Preview (via Droid)",
282
+ harness: "droid",
283
+ tier: "premium",
284
+ inputCost: 3.0,
285
+ outputCost: 15.0,
286
+ contextWindow: 1000000,
287
+ strengths: ["long_context", "frontend_ui"],
288
+ limitations: [],
289
+ cliFlag: "-m",
290
+ cliValue: "gemini-3-pro-preview",
291
+ },
292
+
293
+ // ============== OpenCode Zen Models (Free Tier) ==============
294
+ {
295
+ id: "glm-4.7",
296
+ displayName: "GLM-4.7",
297
+ harness: "opencode",
298
+ tier: "free",
299
+ inputCost: 0.0,
300
+ outputCost: 0.0,
301
+ sweBenchScore: 73.8,
302
+ contextWindow: 128000,
303
+ strengths: ["multilingual", "backend", "tool_use", "agentic", "free"],
304
+ limitations: ["complex_ui"],
305
+ cliFlag: "--model",
306
+ cliValue: "opencode/glm-4.7-free",
307
+ },
308
+ {
309
+ id: "grok-code-fast-1",
310
+ displayName: "Grok Code Fast 1",
311
+ harness: "opencode",
312
+ tier: "free",
313
+ inputCost: 0.0,
314
+ outputCost: 0.0,
315
+ contextWindow: 128000,
316
+ tokensPerSecond: 92,
317
+ strengths: ["speed", "tool_calling", "agentic", "bug_fixes", "fastest"],
318
+ limitations: ["tailwind_v3"],
319
+ cliFlag: "--model",
320
+ cliValue: "opencode/grok-code",
321
+ },
322
+ {
323
+ id: "minimax-m2.1",
324
+ displayName: "MiniMax M2.1",
325
+ harness: "opencode",
326
+ tier: "free",
327
+ inputCost: 0.0,
328
+ outputCost: 0.0,
329
+ contextWindow: 128000,
330
+ strengths: ["fullstack", "web_mobile", "reviews", "free"],
331
+ limitations: ["newer_less_docs"],
332
+ cliFlag: "--model",
333
+ cliValue: "opencode/minimax-m2.1-free",
334
+ },
335
+
336
+ // ============== Amp Models ==============
337
+ {
338
+ id: "amp-free",
339
+ displayName: "Amp Free",
340
+ harness: "amp",
341
+ tier: "free",
342
+ inputCost: 0.0,
343
+ outputCost: 0.0,
344
+ contextWindow: 128000,
345
+ strengths: ["interactive", "refactoring", "smart_mode", "free_daily_grant"],
346
+ limitations: ["context_caps", "no_execute_mode", "ads"],
347
+ cliFlag: "-m",
348
+ cliValue: "free",
349
+ },
350
+ {
351
+ id: "amp-smart",
352
+ displayName: "Amp Smart",
353
+ harness: "amp",
354
+ tier: "standard",
355
+ inputCost: 2.0,
356
+ outputCost: 10.0,
357
+ contextWindow: 128000,
358
+ strengths: ["intelligent_routing", "best_available", "smart_mode"],
359
+ limitations: [],
360
+ cliFlag: "-m",
361
+ cliValue: "smart",
362
+ },
363
+
364
+ // ============== Gemini Models ==============
365
+ {
366
+ id: "gemini-3-pro",
367
+ displayName: "Gemini 3 Pro",
368
+ harness: "gemini",
369
+ tier: "premium",
370
+ inputCost: 3.0,
371
+ outputCost: 15.0,
372
+ contextWindow: 1000000,
373
+ strengths: ["frontend_ui", "webdev_arena_leader", "algorithms", "long_context"],
374
+ limitations: [],
375
+ cliFlag: "--model",
376
+ cliValue: "gemini-3-pro",
377
+ },
378
+ {
379
+ id: "gemini-3-flash",
380
+ displayName: "Gemini 3 Flash",
381
+ harness: "gemini",
382
+ tier: "cheap",
383
+ inputCost: 0.5,
384
+ outputCost: 3.0,
385
+ contextWindow: 1000000,
386
+ strengths: ["fast", "long_context", "simple_tasks", "affordable"],
387
+ limitations: [],
388
+ cliFlag: "--model",
389
+ cliValue: "gemini-3-flash",
390
+ },
391
+ ];
392
+
393
+ /**
394
+ * Profiles for all supported harnesses including their available models.
395
+ */
396
+ export const HARNESS_PROFILES: HarnessProfile[] = [
397
+ {
398
+ name: "claude",
399
+ displayName: "Claude Code",
400
+ models: MODEL_REGISTRY.filter((m) => m.harness === "claude"),
401
+ defaultModel: "sonnet-4.5",
402
+ supportsModelSelection: true,
403
+ modelSelectionMethod: "flag",
404
+ },
405
+ {
406
+ name: "codex",
407
+ displayName: "Codex CLI",
408
+ models: MODEL_REGISTRY.filter((m) => m.harness === "codex"),
409
+ defaultModel: "gpt-5.2-medium",
410
+ supportsModelSelection: true,
411
+ modelSelectionMethod: "flag",
412
+ },
413
+ {
414
+ name: "droid",
415
+ displayName: "Droid",
416
+ models: MODEL_REGISTRY.filter((m) => m.harness === "droid"),
417
+ defaultModel: "claude-opus-4-5-20251101",
418
+ supportsModelSelection: true,
419
+ modelSelectionMethod: "flag",
420
+ },
421
+ {
422
+ name: "opencode",
423
+ displayName: "OpenCode Zen",
424
+ models: MODEL_REGISTRY.filter((m) => m.harness === "opencode"),
425
+ defaultModel: "glm-4.7",
426
+ supportsModelSelection: true,
427
+ modelSelectionMethod: "flag",
428
+ },
429
+ {
430
+ name: "amp",
431
+ displayName: "Amp",
432
+ models: MODEL_REGISTRY.filter((m) => m.harness === "amp"),
433
+ defaultModel: "amp-free",
434
+ supportsModelSelection: true,
435
+ modelSelectionMethod: "flag",
436
+ },
437
+ {
438
+ name: "gemini",
439
+ displayName: "Gemini CLI",
440
+ models: MODEL_REGISTRY.filter((m) => m.harness === "gemini"),
441
+ defaultModel: "gemini-3-flash",
442
+ supportsModelSelection: true,
443
+ modelSelectionMethod: "flag",
444
+ },
445
+ ];
446
+
447
+ /**
448
+ * Get a model profile by its ID.
449
+ *
450
+ * @param id - The model identifier (e.g., "opus-4.5")
451
+ * @returns The model profile or undefined if not found
452
+ */
453
+ export function getModelById(id: string): ModelProfile | undefined {
454
+ return MODEL_REGISTRY.find((model) => model.id === id);
455
+ }
456
+
457
+ /**
458
+ * Get all models available for a specific harness.
459
+ * Models are ordered by tier (SOTA first, then premium, standard, cheap, free).
460
+ *
461
+ * @param harness - The harness name
462
+ * @returns Array of model profiles for the harness
463
+ */
464
+ export function getModelsByHarness(harness: HarnessName): ModelProfile[] {
465
+ const tierOrder: ModelTier[] = ["sota", "premium", "standard", "cheap", "free"];
466
+ return MODEL_REGISTRY.filter((model) => model.harness === harness).sort((a, b) => {
467
+ return tierOrder.indexOf(a.tier) - tierOrder.indexOf(b.tier);
468
+ });
469
+ }
470
+
471
+ /**
472
+ * Get all models of a specific tier across all harnesses.
473
+ *
474
+ * @param tier - The model tier
475
+ * @returns Array of model profiles matching the tier
476
+ */
477
+ export function getModelsByTier(tier: ModelTier): ModelProfile[] {
478
+ return MODEL_REGISTRY.filter((model) => model.tier === tier);
479
+ }
480
+
481
+ /**
482
+ * Get the default model ID for a harness.
483
+ *
484
+ * @param harness - The harness name
485
+ * @returns The default model ID for the harness
486
+ */
487
+ export function getDefaultModelForHarness(harness: HarnessName): string {
488
+ const profile = HARNESS_PROFILES.find((h) => h.name === harness);
489
+ return profile?.defaultModel ?? "";
490
+ }
491
+
492
+ /**
493
+ * Get the harness that provides a specific model.
494
+ *
495
+ * @param modelId - The model identifier
496
+ * @returns The harness name or undefined if model not found
497
+ */
498
+ export function getHarnessForModel(modelId: string): HarnessName | undefined {
499
+ const model = MODEL_REGISTRY.find((m) => m.id === modelId);
500
+ return model?.harness;
501
+ }