ag-cortex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/commands/test-browser.md +339 -0
- package/.agent/rules/00-constitution.md +46 -0
- package/.agent/rules/project-rules.md +49 -0
- package/.agent/skills/agent-browser/SKILL.md +223 -0
- package/.agent/skills/agent-native-architecture/SKILL.md +435 -0
- package/.agent/skills/agent-native-architecture/references/action-parity-discipline.md +409 -0
- package/.agent/skills/agent-native-architecture/references/agent-execution-patterns.md +467 -0
- package/.agent/skills/agent-native-architecture/references/agent-native-testing.md +582 -0
- package/.agent/skills/agent-native-architecture/references/architecture-patterns.md +478 -0
- package/.agent/skills/agent-native-architecture/references/dynamic-context-injection.md +338 -0
- package/.agent/skills/agent-native-architecture/references/files-universal-interface.md +301 -0
- package/.agent/skills/agent-native-architecture/references/from-primitives-to-domain-tools.md +359 -0
- package/.agent/skills/agent-native-architecture/references/mcp-tool-design.md +506 -0
- package/.agent/skills/agent-native-architecture/references/mobile-patterns.md +871 -0
- package/.agent/skills/agent-native-architecture/references/product-implications.md +443 -0
- package/.agent/skills/agent-native-architecture/references/refactoring-to-prompt-native.md +317 -0
- package/.agent/skills/agent-native-architecture/references/self-modification.md +269 -0
- package/.agent/skills/agent-native-architecture/references/shared-workspace-architecture.md +680 -0
- package/.agent/skills/agent-native-architecture/references/system-prompt-design.md +250 -0
- package/.agent/skills/agent-native-reviewer/SKILL.md +246 -0
- package/.agent/skills/andrew-kane-gem-writer/SKILL.md +184 -0
- package/.agent/skills/andrew-kane-gem-writer/references/database-adapters.md +231 -0
- package/.agent/skills/andrew-kane-gem-writer/references/module-organization.md +121 -0
- package/.agent/skills/andrew-kane-gem-writer/references/rails-integration.md +183 -0
- package/.agent/skills/andrew-kane-gem-writer/references/resources.md +119 -0
- package/.agent/skills/andrew-kane-gem-writer/references/testing-patterns.md +261 -0
- package/.agent/skills/ankane-readme-writer/SKILL.md +50 -0
- package/.agent/skills/architecture-strategist/SKILL.md +52 -0
- package/.agent/skills/best-practices-researcher/SKILL.md +100 -0
- package/.agent/skills/bug-reproduction-validator/SKILL.md +67 -0
- package/.agent/skills/code-simplicity-reviewer/SKILL.md +85 -0
- package/.agent/skills/coding-tutor/.claude-plugin/plugin.json +9 -0
- package/.agent/skills/coding-tutor/README.md +37 -0
- package/.agent/skills/coding-tutor/commands/quiz-me.md +1 -0
- package/.agent/skills/coding-tutor/commands/sync-tutorials.md +25 -0
- package/.agent/skills/coding-tutor/commands/teach-me.md +1 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/SKILL.md +214 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/create_tutorial.py +202 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/index_tutorials.py +203 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/quiz_priority.py +190 -0
- package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/setup_tutorials.py +132 -0
- package/.agent/skills/compound-docs/SKILL.md +510 -0
- package/.agent/skills/compound-docs/assets/critical-pattern-template.md +34 -0
- package/.agent/skills/compound-docs/assets/resolution-template.md +93 -0
- package/.agent/skills/compound-docs/references/yaml-schema.md +65 -0
- package/.agent/skills/compound-docs/schema.yaml +176 -0
- package/.agent/skills/create-agent-skills/SKILL.md +299 -0
- package/.agent/skills/create-agent-skills/references/api-security.md +226 -0
- package/.agent/skills/create-agent-skills/references/be-clear-and-direct.md +531 -0
- package/.agent/skills/create-agent-skills/references/best-practices.md +404 -0
- package/.agent/skills/create-agent-skills/references/common-patterns.md +595 -0
- package/.agent/skills/create-agent-skills/references/core-principles.md +437 -0
- package/.agent/skills/create-agent-skills/references/executable-code.md +175 -0
- package/.agent/skills/create-agent-skills/references/iteration-and-testing.md +474 -0
- package/.agent/skills/create-agent-skills/references/official-spec.md +185 -0
- package/.agent/skills/create-agent-skills/references/recommended-structure.md +168 -0
- package/.agent/skills/create-agent-skills/references/skill-structure.md +372 -0
- package/.agent/skills/create-agent-skills/references/using-scripts.md +113 -0
- package/.agent/skills/create-agent-skills/references/using-templates.md +112 -0
- package/.agent/skills/create-agent-skills/references/workflows-and-validation.md +510 -0
- package/.agent/skills/create-agent-skills/templates/router-skill.md +73 -0
- package/.agent/skills/create-agent-skills/templates/simple-skill.md +33 -0
- package/.agent/skills/create-agent-skills/workflows/add-reference.md +96 -0
- package/.agent/skills/create-agent-skills/workflows/add-script.md +93 -0
- package/.agent/skills/create-agent-skills/workflows/add-template.md +74 -0
- package/.agent/skills/create-agent-skills/workflows/add-workflow.md +120 -0
- package/.agent/skills/create-agent-skills/workflows/audit-skill.md +138 -0
- package/.agent/skills/create-agent-skills/workflows/create-domain-expertise-skill.md +605 -0
- package/.agent/skills/create-agent-skills/workflows/create-new-skill.md +191 -0
- package/.agent/skills/create-agent-skills/workflows/get-guidance.md +121 -0
- package/.agent/skills/create-agent-skills/workflows/upgrade-to-router.md +161 -0
- package/.agent/skills/create-agent-skills/workflows/verify-skill.md +204 -0
- package/.agent/skills/data-integrity-guardian/SKILL.md +70 -0
- package/.agent/skills/data-migration-expert/SKILL.md +97 -0
- package/.agent/skills/deployment-verification-agent/SKILL.md +159 -0
- package/.agent/skills/design-implementation-reviewer/SKILL.md +85 -0
- package/.agent/skills/design-iterator/SKILL.md +197 -0
- package/.agent/skills/dhh-rails-reviewer/SKILL.md +45 -0
- package/.agent/skills/dhh-rails-style/SKILL.md +184 -0
- package/.agent/skills/dhh-rails-style/references/architecture.md +653 -0
- package/.agent/skills/dhh-rails-style/references/controllers.md +303 -0
- package/.agent/skills/dhh-rails-style/references/frontend.md +510 -0
- package/.agent/skills/dhh-rails-style/references/gems.md +266 -0
- package/.agent/skills/dhh-rails-style/references/models.md +359 -0
- package/.agent/skills/dhh-rails-style/references/testing.md +338 -0
- package/.agent/skills/dspy-ruby/SKILL.md +594 -0
- package/.agent/skills/dspy-ruby/assets/config-template.rb +359 -0
- package/.agent/skills/dspy-ruby/assets/module-template.rb +326 -0
- package/.agent/skills/dspy-ruby/assets/signature-template.rb +143 -0
- package/.agent/skills/dspy-ruby/references/core-concepts.md +265 -0
- package/.agent/skills/dspy-ruby/references/optimization.md +623 -0
- package/.agent/skills/dspy-ruby/references/providers.md +305 -0
- package/.agent/skills/every-style-editor/SKILL.md +134 -0
- package/.agent/skills/every-style-editor/references/EVERY_WRITE_STYLE.md +529 -0
- package/.agent/skills/figma-design-sync/SKILL.md +166 -0
- package/.agent/skills/file-todos/SKILL.md +251 -0
- package/.agent/skills/file-todos/assets/todo-template.md +155 -0
- package/.agent/skills/framework-docs-researcher/SKILL.md +83 -0
- package/.agent/skills/frontend-design/SKILL.md +42 -0
- package/.agent/skills/gemini-imagegen/SKILL.md +237 -0
- package/.agent/skills/gemini-imagegen/requirements.txt +2 -0
- package/.agent/skills/gemini-imagegen/scripts/compose_images.py +168 -0
- package/.agent/skills/gemini-imagegen/scripts/edit_image.py +157 -0
- package/.agent/skills/gemini-imagegen/scripts/gemini_images.py +265 -0
- package/.agent/skills/gemini-imagegen/scripts/generate_image.py +147 -0
- package/.agent/skills/gemini-imagegen/scripts/multi_turn_chat.py +215 -0
- package/.agent/skills/git-history-analyzer/SKILL.md +42 -0
- package/.agent/skills/git-worktree/SKILL.md +302 -0
- package/.agent/skills/git-worktree/scripts/worktree-manager.sh +345 -0
- package/.agent/skills/julik-frontend-races-reviewer/SKILL.md +222 -0
- package/.agent/skills/kieran-python-reviewer/SKILL.md +104 -0
- package/.agent/skills/kieran-rails-reviewer/SKILL.md +86 -0
- package/.agent/skills/kieran-typescript-reviewer/SKILL.md +95 -0
- package/.agent/skills/lint/SKILL.md +16 -0
- package/.agent/skills/pattern-recognition-specialist/SKILL.md +57 -0
- package/.agent/skills/performance-oracle/SKILL.md +110 -0
- package/.agent/skills/pr-comment-resolver/SKILL.md +69 -0
- package/.agent/skills/rclone/SKILL.md +150 -0
- package/.agent/skills/rclone/scripts/check_setup.sh +60 -0
- package/.agent/skills/repo-research-analyst/SKILL.md +113 -0
- package/.agent/skills/security-sentinel/SKILL.md +93 -0
- package/.agent/skills/skill-creator/SKILL.md +209 -0
- package/.agent/skills/skill-creator/scripts/init_skill.py +304 -0
- package/.agent/skills/skill-creator/scripts/package_skill.py +112 -0
- package/.agent/skills/skill-creator/scripts/quick_validate.py +72 -0
- package/.agent/skills/spec-flow-analyzer/SKILL.md +113 -0
- package/.agent/skills/test-agent/SKILL.md +4 -0
- package/.agent/workflows/agent-native-audit.md +277 -0
- package/.agent/workflows/ask-user-question.md +21 -0
- package/.agent/workflows/changelog.md +137 -0
- package/.agent/workflows/compound.md +202 -0
- package/.agent/workflows/create-agent-skill.md +8 -0
- package/.agent/workflows/deepen-plan-research.md +334 -0
- package/.agent/workflows/deepen-plan-synthesis.md +182 -0
- package/.agent/workflows/deepen-plan.md +79 -0
- package/.agent/workflows/feature-video.md +342 -0
- package/.agent/workflows/generate-command.md +162 -0
- package/.agent/workflows/heal-skill.md +142 -0
- package/.agent/workflows/lfg.md +20 -0
- package/.agent/workflows/plan-analysis.md +67 -0
- package/.agent/workflows/plan-next-steps.md +63 -0
- package/.agent/workflows/plan-review.md +33 -0
- package/.agent/workflows/plan-synthesis.md +106 -0
- package/.agent/workflows/plan.md +49 -0
- package/.agent/workflows/report-bug.md +150 -0
- package/.agent/workflows/reproduce-bug.md +99 -0
- package/.agent/workflows/resolve-parallel.md +34 -0
- package/.agent/workflows/resolve-pr-parallel.md +49 -0
- package/.agent/workflows/resolve-todo-parallel.md +35 -0
- package/.agent/workflows/review-analysis.md +145 -0
- package/.agent/workflows/review-synthesis.md +262 -0
- package/.agent/workflows/review.md +64 -0
- package/.agent/workflows/ship.md +90 -0
- package/.agent/workflows/test-command.md +3 -0
- package/.agent/workflows/triage.md +310 -0
- package/.agent/workflows/work.md +157 -0
- package/.agent/workflows/xcode-test.md +332 -0
- package/LICENSE +22 -0
- package/README.md +49 -0
- package/bin/ag-cortex.js +54 -0
- package/lib/core.js +165 -0
- package/package.json +31 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
<overview>
|
|
2
|
+
Agent-native architecture has consequences for how products feel, not just how they're built. This document covers progressive disclosure of complexity, discovering latent demand through agent usage, and designing approval flows that match stakes and reversibility.
|
|
3
|
+
</overview>
|
|
4
|
+
|
|
5
|
+
<progressive_disclosure>
|
|
6
|
+
## Progressive Disclosure of Complexity
|
|
7
|
+
|
|
8
|
+
The best agent-native applications are simple to start but endlessly powerful.
|
|
9
|
+
|
|
10
|
+
### The Excel Analogy
|
|
11
|
+
|
|
12
|
+
Excel is the canonical example: you can use it for a grocery list, or you can build complex financial models. The same tool, radically different depths of use.
|
|
13
|
+
|
|
14
|
+
Antigravity has this quality: fix a typo, or refactor an entire codebase. The interface is the same—natural language—but the capability scales with the ask.
|
|
15
|
+
|
|
16
|
+
### The Pattern
|
|
17
|
+
|
|
18
|
+
Agent-native applications should aspire to this:
|
|
19
|
+
|
|
20
|
+
**Simple entry:** Basic requests work immediately with no learning curve
|
|
21
|
+
```
|
|
22
|
+
User: "Organize my downloads"
|
|
23
|
+
Agent: [Does it immediately, no configuration needed]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**Discoverable depth:** Users find they can do more as they explore
|
|
27
|
+
```
|
|
28
|
+
User: "Organize my downloads by project"
|
|
29
|
+
Agent: [Adapts to preference]
|
|
30
|
+
|
|
31
|
+
User: "Every Monday, review last week's downloads"
|
|
32
|
+
Agent: [Sets up recurring workflow]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**No ceiling:** Power users can push the system in ways you didn't anticipate
|
|
36
|
+
```
|
|
37
|
+
User: "Cross-reference my downloads with my calendar and flag
|
|
38
|
+
anything I downloaded during a meeting that I haven't
|
|
39
|
+
followed up on"
|
|
40
|
+
Agent: [Composes capabilities to accomplish this]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### How This Emerges
|
|
44
|
+
|
|
45
|
+
This isn't something you design directly. It **emerges naturally from the architecture:**
|
|
46
|
+
|
|
47
|
+
1. When features are prompts and tools are composable...
|
|
48
|
+
2. Users can start simple ("organize my downloads")...
|
|
49
|
+
3. And gradually discover complexity ("every Monday, review last week's...")...
|
|
50
|
+
4. Without you having to build each level explicitly
|
|
51
|
+
|
|
52
|
+
The agent meets users where they are.
|
|
53
|
+
|
|
54
|
+
### Design Implications
|
|
55
|
+
|
|
56
|
+
- **Don't force configuration upfront** - Let users start immediately
|
|
57
|
+
- **Don't hide capabilities** - Make them discoverable through use
|
|
58
|
+
- **Don't cap complexity** - If the agent can do it, let users ask for it
|
|
59
|
+
- **Do provide hints** - Help users discover what's possible
|
|
60
|
+
</progressive_disclosure>
|
|
61
|
+
|
|
62
|
+
<latent_demand_discovery>
|
|
63
|
+
## Latent Demand Discovery
|
|
64
|
+
|
|
65
|
+
Traditional product development: imagine what users want, build it, see if you're right.
|
|
66
|
+
|
|
67
|
+
Agent-native product development: build a capable foundation, observe what users ask the agent to do, formalize the patterns that emerge.
|
|
68
|
+
|
|
69
|
+
### The Shift
|
|
70
|
+
|
|
71
|
+
**Traditional approach:**
|
|
72
|
+
```
|
|
73
|
+
1. Imagine features users might want
|
|
74
|
+
2. Build them
|
|
75
|
+
3. Ship
|
|
76
|
+
4. Hope you guessed right
|
|
77
|
+
5. If wrong, rebuild
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Agent-native approach:**
|
|
81
|
+
```
|
|
82
|
+
1. Build capable foundation (atomic tools, parity)
|
|
83
|
+
2. Ship
|
|
84
|
+
3. Users ask agent for things
|
|
85
|
+
4. Observe what they're asking for
|
|
86
|
+
5. Patterns emerge
|
|
87
|
+
6. Formalize patterns into domain tools or prompts
|
|
88
|
+
7. Repeat
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### The Flywheel
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Build with atomic tools and parity
|
|
95
|
+
↓
|
|
96
|
+
Users ask for things you didn't anticipate
|
|
97
|
+
↓
|
|
98
|
+
Agent composes tools to accomplish them
|
|
99
|
+
(or fails, revealing a capability gap)
|
|
100
|
+
↓
|
|
101
|
+
You observe patterns in what's being requested
|
|
102
|
+
↓
|
|
103
|
+
Add domain tools or prompts to optimize common patterns
|
|
104
|
+
↓
|
|
105
|
+
(Repeat)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### What You Learn
|
|
109
|
+
|
|
110
|
+
**When users ask and the agent succeeds:**
|
|
111
|
+
- This is a real need
|
|
112
|
+
- Your architecture supports it
|
|
113
|
+
- Consider optimizing with a domain tool if it's common
|
|
114
|
+
|
|
115
|
+
**When users ask and the agent fails:**
|
|
116
|
+
- This is a real need
|
|
117
|
+
- You have a capability gap
|
|
118
|
+
- Fix the gap: add tool, fix parity, improve context
|
|
119
|
+
|
|
120
|
+
**When users don't ask for something:**
|
|
121
|
+
- Maybe they don't need it
|
|
122
|
+
- Or maybe they don't know it's possible (capability hiding)
|
|
123
|
+
|
|
124
|
+
### Implementation
|
|
125
|
+
|
|
126
|
+
**Log agent requests:**
|
|
127
|
+
```typescript
|
|
128
|
+
async function handleAgentRequest(request: string) {
|
|
129
|
+
// Log what users are asking for
|
|
130
|
+
await analytics.log({
|
|
131
|
+
type: 'agent_request',
|
|
132
|
+
request: request,
|
|
133
|
+
timestamp: Date.now(),
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// Process request...
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Track success/failure:**
|
|
141
|
+
```typescript
|
|
142
|
+
async function completeAgentSession(session: AgentSession) {
|
|
143
|
+
await analytics.log({
|
|
144
|
+
type: 'agent_session',
|
|
145
|
+
request: session.initialRequest,
|
|
146
|
+
succeeded: session.status === 'completed',
|
|
147
|
+
toolsUsed: session.toolCalls.map(t => t.name),
|
|
148
|
+
iterations: session.iterationCount,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Review patterns:**
|
|
154
|
+
- What are users asking for most?
|
|
155
|
+
- What's failing? Why?
|
|
156
|
+
- What would benefit from a domain tool?
|
|
157
|
+
- What needs better context injection?
|
|
158
|
+
|
|
159
|
+
### Example: Discovering "Weekly Review"
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
Week 1: Users start asking "summarize my activity this week"
|
|
163
|
+
Agent: Composes list_files + read_file, works but slow
|
|
164
|
+
|
|
165
|
+
Week 2: More users asking similar things
|
|
166
|
+
Pattern emerges: weekly review is common
|
|
167
|
+
|
|
168
|
+
Week 3: Add prompt section for weekly review
|
|
169
|
+
Faster, more consistent, still flexible
|
|
170
|
+
|
|
171
|
+
Week 4: If still common and performance matters
|
|
172
|
+
Add domain tool: generate_weekly_summary
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
You didn't have to guess that weekly review would be popular. You discovered it.
|
|
176
|
+
</latent_demand_discovery>
|
|
177
|
+
|
|
178
|
+
<approval_and_agency>
|
|
179
|
+
## Approval and User Agency
|
|
180
|
+
|
|
181
|
+
When agents take unsolicited actions—doing things on their own rather than responding to explicit requests—you need to decide how much autonomy to grant.
|
|
182
|
+
|
|
183
|
+
> **Note:** This framework applies to unsolicited agent actions. If the user explicitly asks the agent to do something ("send that email"), that's already approval—the agent just does it.
|
|
184
|
+
|
|
185
|
+
### The Stakes/Reversibility Matrix
|
|
186
|
+
|
|
187
|
+
Consider two dimensions:
|
|
188
|
+
- **Stakes:** How much does it matter if this goes wrong?
|
|
189
|
+
- **Reversibility:** How easy is it to undo?
|
|
190
|
+
|
|
191
|
+
| Stakes | Reversibility | Pattern | Example |
|
|
192
|
+
|--------|---------------|---------|---------|
|
|
193
|
+
| Low | Easy | **Auto-apply** | Organizing files |
|
|
194
|
+
| Low | Hard | **Quick confirm** | Publishing to a private feed |
|
|
195
|
+
| High | Easy | **Suggest + apply** | Code changes with undo |
|
|
196
|
+
| High | Hard | **Explicit approval** | Sending emails, payments |
|
|
197
|
+
|
|
198
|
+
### Patterns in Detail
|
|
199
|
+
|
|
200
|
+
**Auto-apply (low stakes, easy reversal):**
|
|
201
|
+
```
|
|
202
|
+
Agent: [Organizes files into folders]
|
|
203
|
+
Agent: "I organized your downloads into folders by type.
|
|
204
|
+
You can undo with Cmd+Z or move them back."
|
|
205
|
+
```
|
|
206
|
+
User doesn't need to approve—it's easy to undo and doesn't matter much.
|
|
207
|
+
|
|
208
|
+
**Quick confirm (low stakes, hard reversal):**
|
|
209
|
+
```
|
|
210
|
+
Agent: "I've drafted a post about your reading insights.
|
|
211
|
+
Publish to your feed?"
|
|
212
|
+
[Publish] [Edit first] [Cancel]
|
|
213
|
+
```
|
|
214
|
+
One-tap confirm because stakes are low, but it's hard to un-publish.
|
|
215
|
+
|
|
216
|
+
**Suggest + apply (high stakes, easy reversal):**
|
|
217
|
+
```
|
|
218
|
+
Agent: "I recommend these code changes to fix the bug:
|
|
219
|
+
[Shows diff]
|
|
220
|
+
Apply? Changes can be reverted with git."
|
|
221
|
+
[Apply] [Modify] [Cancel]
|
|
222
|
+
```
|
|
223
|
+
Shows what will happen, makes reversal clear.
|
|
224
|
+
|
|
225
|
+
**Explicit approval (high stakes, hard reversal):**
|
|
226
|
+
```
|
|
227
|
+
Agent: "I've drafted this email to your team about the deadline change:
|
|
228
|
+
[Shows full email]
|
|
229
|
+
This will send immediately and cannot be unsent.
|
|
230
|
+
Type 'send' to confirm."
|
|
231
|
+
```
|
|
232
|
+
Requires explicit action, makes consequences clear.
|
|
233
|
+
|
|
234
|
+
### Implementation
|
|
235
|
+
|
|
236
|
+
```swift
|
|
237
|
+
enum ApprovalLevel {
|
|
238
|
+
case autoApply // Just do it
|
|
239
|
+
case quickConfirm // One-tap approval
|
|
240
|
+
case suggestApply // Show preview, ask to apply
|
|
241
|
+
case explicitApproval // Require explicit confirmation
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
func approvalLevelFor(action: AgentAction) -> ApprovalLevel {
|
|
245
|
+
let stakes = assessStakes(action)
|
|
246
|
+
let reversibility = assessReversibility(action)
|
|
247
|
+
|
|
248
|
+
switch (stakes, reversibility) {
|
|
249
|
+
case (.low, .easy): return .autoApply
|
|
250
|
+
case (.low, .hard): return .quickConfirm
|
|
251
|
+
case (.high, .easy): return .suggestApply
|
|
252
|
+
case (.high, .hard): return .explicitApproval
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
func assessStakes(_ action: AgentAction) -> Stakes {
|
|
257
|
+
switch action {
|
|
258
|
+
case .organizeFiles: return .low
|
|
259
|
+
case .publishToFeed: return .low
|
|
260
|
+
case .modifyCode: return .high
|
|
261
|
+
case .sendEmail: return .high
|
|
262
|
+
case .makePayment: return .high
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
func assessReversibility(_ action: AgentAction) -> Reversibility {
|
|
267
|
+
switch action {
|
|
268
|
+
case .organizeFiles: return .easy // Can move back
|
|
269
|
+
case .publishToFeed: return .hard // People might see it
|
|
270
|
+
case .modifyCode: return .easy // Git revert
|
|
271
|
+
case .sendEmail: return .hard // Can't unsend
|
|
272
|
+
case .makePayment: return .hard // Money moved
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
### Self-Modification Considerations
|
|
278
|
+
|
|
279
|
+
When agents can modify their own behavior—changing prompts, updating preferences, adjusting workflows—the goals are:
|
|
280
|
+
|
|
281
|
+
1. **Visibility:** User can see what changed
|
|
282
|
+
2. **Understanding:** User understands the effects
|
|
283
|
+
3. **Rollback:** User can undo changes
|
|
284
|
+
|
|
285
|
+
Approval flows are one way to achieve this. Audit logs with easy rollback could be another. **The principle is: make it legible.**
|
|
286
|
+
|
|
287
|
+
```swift
|
|
288
|
+
// When agent modifies its own prompt
|
|
289
|
+
func agentSelfModify(change: PromptChange) async {
|
|
290
|
+
// Log the change
|
|
291
|
+
await auditLog.record(change)
|
|
292
|
+
|
|
293
|
+
// Create checkpoint for rollback
|
|
294
|
+
await createCheckpoint(currentState)
|
|
295
|
+
|
|
296
|
+
// Notify user (could be async/batched)
|
|
297
|
+
await notifyUser("I've adjusted my approach: \(change.summary)")
|
|
298
|
+
|
|
299
|
+
// Apply change
|
|
300
|
+
await applyChange(change)
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
</approval_and_agency>
|
|
304
|
+
|
|
305
|
+
<capability_visibility>
|
|
306
|
+
## Capability Visibility
|
|
307
|
+
|
|
308
|
+
Users need to discover what the agent can do. Hidden capabilities lead to underutilization.
|
|
309
|
+
|
|
310
|
+
### The Problem
|
|
311
|
+
|
|
312
|
+
```
|
|
313
|
+
User: "Help me with my reading"
|
|
314
|
+
Agent: "What would you like help with?"
|
|
315
|
+
// Agent doesn't mention it can publish to feed, research books,
|
|
316
|
+
// generate introductions, analyze themes...
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
The agent can do these things, but the user doesn't know.
|
|
320
|
+
|
|
321
|
+
### Solutions
|
|
322
|
+
|
|
323
|
+
**Onboarding hints:**
|
|
324
|
+
```
|
|
325
|
+
Agent: "I can help you with your reading in several ways:
|
|
326
|
+
- Research any book (web search + save findings)
|
|
327
|
+
- Generate personalized introductions
|
|
328
|
+
- Publish insights to your reading feed
|
|
329
|
+
- Analyze themes across your library
|
|
330
|
+
What interests you?"
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
**Contextual suggestions:**
|
|
334
|
+
```
|
|
335
|
+
User: "I just finished reading 1984"
|
|
336
|
+
Agent: "Great choice! Would you like me to:
|
|
337
|
+
- Research historical context?
|
|
338
|
+
- Compare it to other books in your library?
|
|
339
|
+
- Publish an insight about it to your feed?"
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
**Progressive revelation:**
|
|
343
|
+
```
|
|
344
|
+
// After user uses basic features
|
|
345
|
+
Agent: "By the way, you can also ask me to set up
|
|
346
|
+
recurring tasks, like 'every Monday, review my
|
|
347
|
+
reading progress.' Just let me know!"
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Balance
|
|
351
|
+
|
|
352
|
+
- **Don't overwhelm** with all capabilities upfront
|
|
353
|
+
- **Do reveal** capabilities naturally through use
|
|
354
|
+
- **Don't assume** users will discover things on their own
|
|
355
|
+
- **Do make** capabilities visible when relevant
|
|
356
|
+
</capability_visibility>
|
|
357
|
+
|
|
358
|
+
<designing_for_trust>
|
|
359
|
+
## Designing for Trust
|
|
360
|
+
|
|
361
|
+
Agent-native apps require trust. Users are giving an AI significant capability. Build trust through:
|
|
362
|
+
|
|
363
|
+
### Transparency
|
|
364
|
+
|
|
365
|
+
- Show what the agent is doing (tool calls, progress)
|
|
366
|
+
- Explain reasoning when it matters
|
|
367
|
+
- Make all agent work inspectable (files, logs)
|
|
368
|
+
|
|
369
|
+
### Predictability
|
|
370
|
+
|
|
371
|
+
- Consistent behavior for similar requests
|
|
372
|
+
- Clear patterns for when approval is needed
|
|
373
|
+
- No surprises in what the agent can access
|
|
374
|
+
|
|
375
|
+
### Reversibility
|
|
376
|
+
|
|
377
|
+
- Easy undo for agent actions
|
|
378
|
+
- Checkpoints before significant changes
|
|
379
|
+
- Clear rollback paths
|
|
380
|
+
|
|
381
|
+
### Control
|
|
382
|
+
|
|
383
|
+
- User can stop agent at any time
|
|
384
|
+
- User can adjust agent behavior (prompts, preferences)
|
|
385
|
+
- User can restrict capabilities if desired
|
|
386
|
+
|
|
387
|
+
### Implementation
|
|
388
|
+
|
|
389
|
+
```swift
|
|
390
|
+
struct AgentTransparency {
|
|
391
|
+
// Show what's happening
|
|
392
|
+
func onToolCall(_ tool: ToolCall) {
|
|
393
|
+
showInUI("Using \(tool.name)...")
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Explain reasoning
|
|
397
|
+
func onDecision(_ decision: AgentDecision) {
|
|
398
|
+
if decision.needsExplanation {
|
|
399
|
+
showInUI("I chose this because: \(decision.reasoning)")
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Make work inspectable
|
|
404
|
+
func onOutput(_ output: AgentOutput) {
|
|
405
|
+
// All output is in files user can see
|
|
406
|
+
// Or in visible UI state
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
```
|
|
410
|
+
</designing_for_trust>
|
|
411
|
+
|
|
412
|
+
<checklist>
|
|
413
|
+
## Product Design Checklist
|
|
414
|
+
|
|
415
|
+
### Progressive Disclosure
|
|
416
|
+
- [ ] Basic requests work immediately (no config)
|
|
417
|
+
- [ ] Depth is discoverable through use
|
|
418
|
+
- [ ] No artificial ceiling on complexity
|
|
419
|
+
- [ ] Capability hints provided
|
|
420
|
+
|
|
421
|
+
### Latent Demand Discovery
|
|
422
|
+
- [ ] Agent requests are logged
|
|
423
|
+
- [ ] Success/failure is tracked
|
|
424
|
+
- [ ] Patterns are reviewed regularly
|
|
425
|
+
- [ ] Common patterns formalized into tools/prompts
|
|
426
|
+
|
|
427
|
+
### Approval & Agency
|
|
428
|
+
- [ ] Stakes assessed for each action type
|
|
429
|
+
- [ ] Reversibility assessed for each action type
|
|
430
|
+
- [ ] Approval pattern matches stakes/reversibility
|
|
431
|
+
- [ ] Self-modification is legible (visible, understandable, reversible)
|
|
432
|
+
|
|
433
|
+
### Capability Visibility
|
|
434
|
+
- [ ] Onboarding reveals key capabilities
|
|
435
|
+
- [ ] Contextual suggestions provided
|
|
436
|
+
- [ ] Users aren't expected to guess what's possible
|
|
437
|
+
|
|
438
|
+
### Trust
|
|
439
|
+
- [ ] Agent actions are transparent
|
|
440
|
+
- [ ] Behavior is predictable
|
|
441
|
+
- [ ] Actions are reversible
|
|
442
|
+
- [ ] User has control
|
|
443
|
+
</checklist>
|