flyee 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -9
- package/bin/install.js +9 -0
- package/core/skills/cost-tracking/SKILL.md +68 -0
- package/core/skills/doctor/SKILL.md +171 -0
- package/core/skills/hallucination-guard/SKILL.md +110 -0
- package/core/skills/knowledge-persistence/SKILL.md +123 -0
- package/core/skills/quality-gates/SKILL.md +109 -0
- package/core/skills/roadmap-reassessment/SKILL.md +134 -0
- package/core/skills/skill-discovery/SKILL.md +152 -0
- package/core/skills/sprint-validation/SKILL.md +125 -0
- package/core/skills/stuck-detection/SKILL.md +176 -0
- package/core/skills/token-profiles/SKILL.md +150 -0
- package/core/skills/unique-ids/SKILL.md +112 -0
- package/core/templates/RUNTIME.template.md +41 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Flyee
|
|
2
2
|
|
|
3
|
-
> AI Agent Framework —
|
|
3
|
+
> AI Agent Framework — 22 specialist agents, 81 skills, 44 workflows. All runtimes.
|
|
4
4
|
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
@@ -46,7 +46,7 @@ npx flyee --all # All detected runtimes
|
|
|
46
46
|
| `game-developer` | Game development |
|
|
47
47
|
| ... and 13 more | Various domains |
|
|
48
48
|
|
|
49
|
-
### 🧩
|
|
49
|
+
### 🧩 81 Modular Skills
|
|
50
50
|
|
|
51
51
|
Skills are knowledge modules that enhance your agent's capabilities:
|
|
52
52
|
|
|
@@ -54,12 +54,14 @@ Skills are knowledge modules that enhance your agent's capabilities:
|
|
|
54
54
|
- **Testing Patterns** — Unit, integration, E2E strategies
|
|
55
55
|
- **Design System Enforcement** — Token-based UI consistency
|
|
56
56
|
- **SEO Fundamentals** — Core Web Vitals, E-E-A-T
|
|
57
|
-
- **
|
|
58
|
-
- **
|
|
59
|
-
- **
|
|
60
|
-
-
|
|
57
|
+
- **Quality Gates** — 8-question structured quality gates
|
|
58
|
+
- **Hallucination Guard** — Detects empty completions
|
|
59
|
+
- **Stuck Detection** — Breaks infinite agent loops
|
|
60
|
+
- **Knowledge Persistence** — Cross-session memory
|
|
61
|
+
- **Skill Discovery** — Auto-detect relevant skills
|
|
62
|
+
- ... and 70 more
|
|
61
63
|
|
|
62
|
-
### 🔄
|
|
64
|
+
### 🔄 44 Automated Workflows
|
|
63
65
|
|
|
64
66
|
```bash
|
|
65
67
|
/new-task # Add features with tracking
|
|
@@ -78,8 +80,8 @@ Skills are knowledge modules that enhance your agent's capabilities:
|
|
|
78
80
|
your-project/
|
|
79
81
|
├── .agent/ ← Installed by flyee (runtime-specific)
|
|
80
82
|
│ ├── agents/ ← 21 specialist agents
|
|
81
|
-
│ ├── skills/ ←
|
|
82
|
-
│ ├── workflows/ ←
|
|
83
|
+
│ ├── skills/ ← 81 modular skills
|
|
84
|
+
│ ├── workflows/ ← 44 automated workflows
|
|
83
85
|
│ ├── scripts/ ← Automation scripts
|
|
84
86
|
│ └── bridge/ ← Optional: Flyee SaaS sync
|
|
85
87
|
├── .flyee/ ← Runtime state (auto-created)
|
package/bin/install.js
CHANGED
|
@@ -268,6 +268,15 @@ async function installForRuntime(runtimeKey, projectDir, dryRun) {
|
|
|
268
268
|
copyDirRecursive(bridgeSrc, bridgeDest, adapter, config, dryRun, stats);
|
|
269
269
|
}
|
|
270
270
|
|
|
271
|
+
// Merge global user skills from ~/.flyee/skills/ (F-11)
|
|
272
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || '';
|
|
273
|
+
const globalSkillsDir = join(homeDir, '.flyee', 'skills');
|
|
274
|
+
if (existsSync(globalSkillsDir)) {
|
|
275
|
+
const globalSkillsDest = join(targetDir, 'skills');
|
|
276
|
+
console.log(` 🌐 Merging global skills from ~/.flyee/skills/`);
|
|
277
|
+
copyDirRecursive(globalSkillsDir, globalSkillsDest, adapter, config, dryRun, stats);
|
|
278
|
+
}
|
|
279
|
+
|
|
271
280
|
// Generate engine file (always use GEMINI.md as canonical source, adapter converts)
|
|
272
281
|
const engineFileSrc = join(ENGINE_FILES_DIR, 'GEMINI.md');
|
|
273
282
|
const engineFileDest = join(projectDir, config.engineFileTarget);
|
|
@@ -192,6 +192,73 @@ Output:
|
|
|
192
192
|
|
|
193
193
|
---
|
|
194
194
|
|
|
195
|
+
## 📈 COST PROJECTIONS (F-07)
|
|
196
|
+
|
|
197
|
+
Estimate remaining sprint cost based on completed work.
|
|
198
|
+
|
|
199
|
+
### Formula
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
avg_cost_per_task = total_spent / completed_tasks
|
|
203
|
+
remaining_cost = avg_cost_per_task × remaining_tasks
|
|
204
|
+
projected_total = total_spent + remaining_cost
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Weighted Projection
|
|
208
|
+
|
|
209
|
+
Simple average can be misleading (planning is cheap, execution is expensive). Use weighted projection:
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
# Weight by operation type
|
|
213
|
+
weights = {
|
|
214
|
+
"plan": 0.15, # Planning is ~15% of task cost
|
|
215
|
+
"execute": 0.60, # Execution is ~60%
|
|
216
|
+
"verify": 0.10, # Verification is ~10%
|
|
217
|
+
"debug": 0.15, # Debug is ~15%
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# Calculate cost profile from completed tasks
|
|
221
|
+
cost_profile = calculate_profile(completed_tasks)
|
|
222
|
+
|
|
223
|
+
# Project remaining based on profile
|
|
224
|
+
remaining = sum(
|
|
225
|
+
cost_profile[op] * remaining_tasks * weights[op]
|
|
226
|
+
for op in weights
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Projection Event
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
bridge.emit_event("cost.projection", {
|
|
234
|
+
"sprint": "S09",
|
|
235
|
+
"completed_tasks": 12,
|
|
236
|
+
"remaining_tasks": 4,
|
|
237
|
+
"spent_usd": 14.20,
|
|
238
|
+
"projected_remaining_usd": 4.72,
|
|
239
|
+
"projected_total_usd": 18.92,
|
|
240
|
+
"budget_usd": 15.00,
|
|
241
|
+
"over_budget": True,
|
|
242
|
+
"confidence": "medium"
|
|
243
|
+
})
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Report Output
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
📈 PROJECTION:
|
|
250
|
+
Completed: 12/16 tasks (75%)
|
|
251
|
+
Avg cost/task: $1.18
|
|
252
|
+
Remaining 4 tasks: ~$4.72
|
|
253
|
+
Projected total: $18.92 (⚠️ over budget by $3.92)
|
|
254
|
+
Confidence: medium
|
|
255
|
+
|
|
256
|
+
Recommendation: Switch to cheaper model for remaining tasks
|
|
257
|
+
OR reduce scope by 1 task to stay in budget
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
195
262
|
## ⚡ QUICK REFERENCE
|
|
196
263
|
|
|
197
264
|
```bash
|
|
@@ -204,3 +271,4 @@ tail -20 .flyee/cost-log.jsonl | python3 -m json.tool
|
|
|
204
271
|
# Set budget (in CONTEXT.md)
|
|
205
272
|
echo "Budget: $15.00" >> .flyee/sprints/S09/CONTEXT.md
|
|
206
273
|
```
|
|
274
|
+
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: doctor
|
|
3
|
+
description: Auto-diagnostic health check for the flyee framework. Validates .flyee/ directory integrity, checks skill loading, verifies bridge connectivity, detects orphaned state, and reports actionable issues. Run manually or triggered by session-resilience on crash recovery.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Doctor (Health Check)
|
|
7
|
+
|
|
8
|
+
> Auto-diagnose the flyee framework state. Find and fix issues.
|
|
9
|
+
|
|
10
|
+
## Purpose
|
|
11
|
+
|
|
12
|
+
Things break silently. Stale lock files, corrupted state, missing skills, broken bridge connections. Doctor finds these issues before they cause failures during execution.
|
|
13
|
+
|
|
14
|
+
## When to Run
|
|
15
|
+
|
|
16
|
+
| Trigger | Automatic? |
|
|
17
|
+
|---------|-----------|
|
|
18
|
+
| Session start | Optional (configurable) |
|
|
19
|
+
| Crash recovery | Yes (always) |
|
|
20
|
+
| User runs `/doctor` or asks "check health" | Manual |
|
|
21
|
+
| Before `/deploy` | Yes (always) |
|
|
22
|
+
| After `npx flyee` install | Yes (first run) |
|
|
23
|
+
|
|
24
|
+
## Health Checks
|
|
25
|
+
|
|
26
|
+
### 1. `.flyee/` Directory Integrity
|
|
27
|
+
|
|
28
|
+
```markdown
|
|
29
|
+
## Check: .flyee/ Integrity
|
|
30
|
+
|
|
31
|
+
- [ ] `.flyee/` directory exists
|
|
32
|
+
- [ ] `STATE.md` exists and is parseable
|
|
33
|
+
- [ ] `DECISIONS.md` exists
|
|
34
|
+
- [ ] `config.json` exists and is valid JSON
|
|
35
|
+
- [ ] `tasks.json` exists and is valid JSON (if tasks were created)
|
|
36
|
+
- [ ] `cost-log.jsonl` has no malformed lines
|
|
37
|
+
- [ ] `events.jsonl` has no malformed lines
|
|
38
|
+
- [ ] No stale lock files (`auto.lock` with dead PID)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 2. Skills Loading
|
|
42
|
+
|
|
43
|
+
```markdown
|
|
44
|
+
## Check: Skills
|
|
45
|
+
|
|
46
|
+
- [ ] `core/skills/` directory exists
|
|
47
|
+
- [ ] All skills have valid `SKILL.md` with frontmatter (name, description)
|
|
48
|
+
- [ ] No broken symlinks in skills directory
|
|
49
|
+
- [ ] Skill count matches expected (77 for v0.2.0)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 3. Agent Integrity
|
|
53
|
+
|
|
54
|
+
```markdown
|
|
55
|
+
## Check: Agents
|
|
56
|
+
|
|
57
|
+
- [ ] `core/agents/` directory exists
|
|
58
|
+
- [ ] All agents have valid frontmatter with `skills:` field
|
|
59
|
+
- [ ] Skills referenced in frontmatter exist in `core/skills/`
|
|
60
|
+
- [ ] No circular agent references
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### 4. Bridge Connectivity
|
|
64
|
+
|
|
65
|
+
```markdown
|
|
66
|
+
## Check: Bridge
|
|
67
|
+
|
|
68
|
+
- [ ] `bridge/local_tracker.py` exists and is executable
|
|
69
|
+
- [ ] Python 3 is available
|
|
70
|
+
- [ ] `flyee.json` status:
|
|
71
|
+
- Exists + valid → "Connected to Flyee SaaS"
|
|
72
|
+
- Exists + `opted_out: true` → "Opted out of SaaS"
|
|
73
|
+
- Not exists → "Not configured"
|
|
74
|
+
- [ ] If connected: API health check (GET /health)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 5. State Consistency
|
|
78
|
+
|
|
79
|
+
```markdown
|
|
80
|
+
## Check: State Consistency
|
|
81
|
+
|
|
82
|
+
- [ ] STATE.md phase matches tasks.json status
|
|
83
|
+
- [ ] No tasks in "in_progress" for > 24 hours (likely abandoned)
|
|
84
|
+
- [ ] No orphaned sprint directories in `.flyee/sprints/`
|
|
85
|
+
- [ ] DECISIONS.md entries have valid dates
|
|
86
|
+
- [ ] cost-log.jsonl timestamps are sequential (no time travel)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### 6. Git Health (if git repo)
|
|
90
|
+
|
|
91
|
+
```markdown
|
|
92
|
+
## Check: Git
|
|
93
|
+
|
|
94
|
+
- [ ] `.git/` exists
|
|
95
|
+
- [ ] Current branch is clean or has tracked changes
|
|
96
|
+
- [ ] `.flyee/` entries in `.gitignore` are correct
|
|
97
|
+
- [ ] No merge conflicts in tracked files
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Output Format
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
🏥 Flyee Doctor — Health Report
|
|
104
|
+
═══════════════════════════════
|
|
105
|
+
|
|
106
|
+
✅ .flyee/ directory: OK (7/7 checks passed)
|
|
107
|
+
✅ Skills: OK (77 skills loaded)
|
|
108
|
+
✅ Agents: OK (22 agents, all skills resolved)
|
|
109
|
+
⚠️ Bridge: WARNING — flyee.json not found (offline mode)
|
|
110
|
+
✅ State: OK (consistent)
|
|
111
|
+
✅ Git: OK (clean, on branch main)
|
|
112
|
+
|
|
113
|
+
Summary: 5/6 passed, 1 warning
|
|
114
|
+
No action required.
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### With Issues:
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
🏥 Flyee Doctor — Health Report
|
|
121
|
+
═══════════════════════════════
|
|
122
|
+
|
|
123
|
+
✅ .flyee/ directory: OK
|
|
124
|
+
❌ Skills: FAIL
|
|
125
|
+
→ Missing SKILL.md in: core/skills/broken-skill/
|
|
126
|
+
→ Fix: Remove directory or add SKILL.md
|
|
127
|
+
⚠️ Bridge: WARNING
|
|
128
|
+
→ Stale lock file: .flyee/auto.lock (PID 12345 is dead)
|
|
129
|
+
→ Fix: rm .flyee/auto.lock
|
|
130
|
+
❌ State: FAIL
|
|
131
|
+
→ Task T-003 in "in_progress" for 48 hours
|
|
132
|
+
→ Fix: Update status or mark abandoned
|
|
133
|
+
|
|
134
|
+
Summary: 3/6 passed, 1 warning, 2 failures
|
|
135
|
+
|
|
136
|
+
Run `flyee doctor --fix` to auto-fix recoverable issues.
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Auto-Fix
|
|
140
|
+
|
|
141
|
+
When `--fix` is passed (or during crash recovery):
|
|
142
|
+
|
|
143
|
+
| Issue | Auto-Fix Action |
|
|
144
|
+
|-------|----------------|
|
|
145
|
+
| Stale lock file | Delete lock file |
|
|
146
|
+
| Malformed JSONL line | Move to `.flyee/corrupted/` |
|
|
147
|
+
| Missing STATE.md | Regenerate from tasks.json |
|
|
148
|
+
| Abandoned tasks (>24h in_progress) | Mark as "blocked" |
|
|
149
|
+
| Missing config.json | Create with defaults |
|
|
150
|
+
|
|
151
|
+
## Event Emission
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
bridge.emit_event("health.check", {
|
|
155
|
+
"checks_total": 6,
|
|
156
|
+
"checks_passed": 5,
|
|
157
|
+
"checks_warned": 1,
|
|
158
|
+
"checks_failed": 0,
|
|
159
|
+
"issues": [
|
|
160
|
+
{"check": "bridge", "level": "warning", "message": "flyee.json not found"}
|
|
161
|
+
],
|
|
162
|
+
"auto_fixed": []
|
|
163
|
+
})
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Integration
|
|
167
|
+
|
|
168
|
+
- **session-resilience**: Doctor runs on crash recovery
|
|
169
|
+
- **verification-gate**: Doctor is part of pre-deploy verification
|
|
170
|
+
- **state-machine**: Doctor validates STATE.md consistency
|
|
171
|
+
- **Flyee SaaS**: Health panel consumes doctor events (S-05)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: hallucination-guard
|
|
3
|
+
description: Detects and rejects task completions where the agent produced no real work. Catches fabricated summaries, empty diffs, and zero-tool-call completions. Prevents wasted budget on phantom tasks.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Hallucination Guard
|
|
7
|
+
|
|
8
|
+
> Rejects task completions where no real work was done.
|
|
9
|
+
|
|
10
|
+
## Problem
|
|
11
|
+
|
|
12
|
+
Agents can produce detailed, confident-sounding summaries without actually writing any code or making any changes. This wastes budget and creates false progress.
|
|
13
|
+
|
|
14
|
+
Common hallucination patterns:
|
|
15
|
+
1. **Zero tool calls** — Agent describes what it "did" but never used any tools
|
|
16
|
+
2. **Read-only session** — Agent read files but never wrote/modified anything
|
|
17
|
+
3. **Self-referential completion** — Agent says "I've completed the task" without evidence
|
|
18
|
+
4. **Summary fabrication** — Detailed summary that describes non-existent changes
|
|
19
|
+
|
|
20
|
+
## Detection Protocol
|
|
21
|
+
|
|
22
|
+
### Before Marking Task Complete
|
|
23
|
+
|
|
24
|
+
Run the following checks:
|
|
25
|
+
|
|
26
|
+
```markdown
|
|
27
|
+
## Hallucination Guard Checklist
|
|
28
|
+
|
|
29
|
+
1. [ ] **Files modified?** — At least ONE file was created or modified during this task
|
|
30
|
+
2. [ ] **Meaningful diff?** — The diff contains substantive changes (not just whitespace/comments)
|
|
31
|
+
3. [ ] **Tool calls made?** — Agent used write/create/edit tools (not just read/search)
|
|
32
|
+
4. [ ] **Artifacts exist?** — Files mentioned in the summary actually exist on disk
|
|
33
|
+
5. [ ] **Verification ran?** — At least one verification command was executed
|
|
34
|
+
|
|
35
|
+
❌ Any unchecked → REJECT completion. Task needs actual work.
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Check 1: File Modification Check
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
Scan the session for tool calls that WRITE:
|
|
42
|
+
- write_to_file / create_file
|
|
43
|
+
- replace_file_content / multi_replace_file_content
|
|
44
|
+
- run_command (that modifies files: mv, cp, sed, etc.)
|
|
45
|
+
|
|
46
|
+
If ZERO write operations → HALLUCINATION DETECTED
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Check 2: Meaningful Diff Check
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
After task "completion", verify:
|
|
53
|
+
- git diff shows substantive changes
|
|
54
|
+
- Changed files are relevant to the task (not random files)
|
|
55
|
+
- Changes are not just adding/removing blank lines or comments
|
|
56
|
+
|
|
57
|
+
If diff is empty or trivial → HALLUCINATION DETECTED
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Check 3: Summary Cross-Reference
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
Parse the task summary for claims:
|
|
64
|
+
- "Created file X" → verify X exists
|
|
65
|
+
- "Modified function Y" → verify Y was actually changed
|
|
66
|
+
- "Added test for Z" → verify test file exists and contains Z
|
|
67
|
+
- "Fixed bug in W" → verify W has a diff
|
|
68
|
+
|
|
69
|
+
If claims don't match reality → HALLUCINATION DETECTED
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Response Protocol
|
|
73
|
+
|
|
74
|
+
| Detection | Action |
|
|
75
|
+
|-----------|--------|
|
|
76
|
+
| Zero tool calls | REJECT. Log incident. Retry task with explicit instruction: "You must use tools to make changes." |
|
|
77
|
+
| Read-only session | REJECT. Log incident. Retry with: "Reading is not implementing. Write the code." |
|
|
78
|
+
| Summary fabrication | REJECT. Log incident. Show discrepancy to user. |
|
|
79
|
+
| Trivial diff | WARN. Ask user: "Only minor changes were made. Is this task actually done?" |
|
|
80
|
+
|
|
81
|
+
## Event Emission
|
|
82
|
+
|
|
83
|
+
When hallucination is detected, emit event for Flyee SaaS:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"event_type": "hallucination.detected",
|
|
88
|
+
"payload": {
|
|
89
|
+
"task_id": "T-001",
|
|
90
|
+
"pattern": "zero_tool_calls",
|
|
91
|
+
"summary_length": 450,
|
|
92
|
+
"tool_calls_write": 0,
|
|
93
|
+
"tool_calls_read": 12,
|
|
94
|
+
"action": "rejected"
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Integration
|
|
100
|
+
|
|
101
|
+
- **task-complete workflow**: Run hallucination guard BEFORE quality gates
|
|
102
|
+
- **cost-tracking**: Log wasted cost when hallucination is detected
|
|
103
|
+
- **KNOWLEDGE.md**: If pattern recurs, add to Lessons Learned
|
|
104
|
+
- **Flyee SaaS**: Dashboard shows hallucination rate per project (S-03)
|
|
105
|
+
|
|
106
|
+
## Anti-Patterns
|
|
107
|
+
|
|
108
|
+
- ❌ Don't count research/investigation tasks as hallucinations — some tasks are legitimately read-only
|
|
109
|
+
- ❌ Don't flag documentation-only tasks — writing docs IS real work
|
|
110
|
+
- ❌ Don't block on first detection — retry once before escalating to user
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: knowledge-persistence
|
|
3
|
+
description: Cross-session knowledge persistence. Global + per-project KNOWLEDGE.md that accumulates rules, patterns, and lessons learned across sessions. Prevents the agent from repeating mistakes or forgetting decisions.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Knowledge Persistence
|
|
7
|
+
|
|
8
|
+
> Memória persistente cross-sessão. Acumula rules, patterns e lições aprendidas.
|
|
9
|
+
|
|
10
|
+
## Purpose
|
|
11
|
+
|
|
12
|
+
Without knowledge persistence, every new session starts from zero. The agent re-discovers the same patterns, re-makes the same mistakes, and ignores previous decisions.
|
|
13
|
+
|
|
14
|
+
KNOWLEDGE.md solves this by providing a structured, append-only file that accumulates context across sessions.
|
|
15
|
+
|
|
16
|
+
## Files
|
|
17
|
+
|
|
18
|
+
| File | Scope | Location |
|
|
19
|
+
|------|-------|----------|
|
|
20
|
+
| `~/.flyee/KNOWLEDGE.md` | **Global** — applies to ALL projects | User home |
|
|
21
|
+
| `.flyee/KNOWLEDGE.md` | **Project** — applies to THIS project only | Project root |
|
|
22
|
+
|
|
23
|
+
Both files are loaded. Project-level overrides global when there's a conflict.
|
|
24
|
+
|
|
25
|
+
## Protocol
|
|
26
|
+
|
|
27
|
+
### 1. Load (Session Start)
|
|
28
|
+
|
|
29
|
+
At session start, the agent MUST:
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
1. Check ~/.flyee/KNOWLEDGE.md → load if exists
|
|
33
|
+
2. Check .flyee/KNOWLEDGE.md → load if exists
|
|
34
|
+
3. Merge: project rules take priority over global rules
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### 2. Detect (During Work)
|
|
38
|
+
|
|
39
|
+
Knowledge entries are created when the agent:
|
|
40
|
+
|
|
41
|
+
- Discovers a **non-obvious pattern** in the codebase
|
|
42
|
+
- Makes a **mistake and corrects it** (lesson learned)
|
|
43
|
+
- The user provides a **rule or preference** that should persist
|
|
44
|
+
- A **decision** has broad impact beyond the current task
|
|
45
|
+
- A **workaround** is needed for a tool/library quirk
|
|
46
|
+
|
|
47
|
+
### 3. Write (Append-Only)
|
|
48
|
+
|
|
49
|
+
KNOWLEDGE.md is **append-only**. Never delete entries. Mark obsolete entries with `[OBSOLETE]`.
|
|
50
|
+
|
|
51
|
+
### Format
|
|
52
|
+
|
|
53
|
+
```markdown
|
|
54
|
+
# KNOWLEDGE
|
|
55
|
+
|
|
56
|
+
> Auto-maintained by flyee. Append-only.
|
|
57
|
+
|
|
58
|
+
## Rules
|
|
59
|
+
|
|
60
|
+
- Always use `pnpm` in this project, never `npm`
|
|
61
|
+
- API responses must follow the `{ data, error, meta }` envelope
|
|
62
|
+
- [OBSOLETE] Use v2 API endpoints → v3 is now the standard
|
|
63
|
+
|
|
64
|
+
## Patterns
|
|
65
|
+
|
|
66
|
+
- Database migrations use `alembic` with auto-generated revisions
|
|
67
|
+
- Components follow atomic design: atoms/ → molecules/ → organisms/
|
|
68
|
+
- Error handling uses Result<T, E> pattern, never throw
|
|
69
|
+
|
|
70
|
+
## Lessons Learned
|
|
71
|
+
|
|
72
|
+
- [2026-03-15] Prisma generates types in node_modules/.prisma — don't add to tsconfig paths
|
|
73
|
+
- [2026-03-20] React Server Components can't use useEffect — caught twice, now documented
|
|
74
|
+
- [2026-03-28] Bridge API returns 502 under load — implemented retry with exponential backoff
|
|
75
|
+
|
|
76
|
+
## Workarounds
|
|
77
|
+
|
|
78
|
+
- Tailwind v4 doesn't support `@apply` in CSS modules — use inline styles or cn() utility
|
|
79
|
+
- SQLite WAL mode required for concurrent reads during auto-mode
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 4. Prompt Integration
|
|
83
|
+
|
|
84
|
+
When constructing a prompt for a task, inject relevant KNOWLEDGE.md entries:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
## Active Knowledge
|
|
88
|
+
|
|
89
|
+
The following rules and patterns apply to this project:
|
|
90
|
+
{filtered entries from KNOWLEDGE.md relevant to the current task domain}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Filtering rules:**
|
|
94
|
+
- If task is frontend → inject Rules + Patterns tagged with frontend/UI/CSS/React
|
|
95
|
+
- If task is backend → inject Rules + Patterns tagged with API/database/server
|
|
96
|
+
- Always inject Rules (they apply globally)
|
|
97
|
+
- Always inject recent Lessons Learned (last 10)
|
|
98
|
+
|
|
99
|
+
### 5. Maintenance
|
|
100
|
+
|
|
101
|
+
| Trigger | Action |
|
|
102
|
+
|---------|--------|
|
|
103
|
+
| User says "remember that..." | Append to Rules |
|
|
104
|
+
| Agent discovers non-obvious pattern | Append to Patterns |
|
|
105
|
+
| Agent makes a mistake and fixes it | Append to Lessons Learned |
|
|
106
|
+
| User corrects agent behavior | Append to Rules with `[USER]` tag |
|
|
107
|
+
| Entry is outdated | Mark `[OBSOLETE]`, don't delete |
|
|
108
|
+
| File exceeds 200 entries | Summarize old entries, archive to `KNOWLEDGE-ARCHIVE.md` |
|
|
109
|
+
|
|
110
|
+
## Anti-Patterns
|
|
111
|
+
|
|
112
|
+
- ❌ Don't store task-specific details (that's for DECISIONS.md)
|
|
113
|
+
- ❌ Don't store temporary workarounds without a date
|
|
114
|
+
- ❌ Don't duplicate what's already in ARCHITECTURE.md or PROJECT.md
|
|
115
|
+
- ❌ Don't store code snippets — reference files instead
|
|
116
|
+
|
|
117
|
+
## Integration Points
|
|
118
|
+
|
|
119
|
+
- **Session start**: Load and inject into context
|
|
120
|
+
- **Task completion**: Check if new knowledge was discovered
|
|
121
|
+
- **Error recovery**: Auto-generate Lessons Learned entry
|
|
122
|
+
- **User correction**: Auto-append to Rules
|
|
123
|
+
- **Bridge sync**: When connected to Flyee SaaS, knowledge syncs to Knowledge Hub (S-07)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: quality-gates
|
|
3
|
+
description: Structured quality gates for planning and completion phases. 8-question evaluation protocol that ensures tasks meet minimum quality bar before being marked complete. Prevents premature completion and catches common oversights.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Quality Gates
|
|
7
|
+
|
|
8
|
+
> Structured quality gates with 8 mechanically verifiable questions.
|
|
9
|
+
|
|
10
|
+
## Purpose
|
|
11
|
+
|
|
12
|
+
Tasks often get marked "complete" when they're merely "code was written." Quality gates enforce a minimum bar by asking 8 specific questions that catch common oversights.
|
|
13
|
+
|
|
14
|
+
## When to Apply
|
|
15
|
+
|
|
16
|
+
| Phase | Gate | Trigger |
|
|
17
|
+
|-------|------|---------|
|
|
18
|
+
| Planning | **Planning Gate** | Before starting implementation |
|
|
19
|
+
| Task completion | **Completion Gate** | Before marking task done |
|
|
20
|
+
| Sprint completion | **Sprint Gate** | Before marking sprint/milestone done |
|
|
21
|
+
|
|
22
|
+
## The 8 Questions
|
|
23
|
+
|
|
24
|
+
### Planning Gate (Before Implementation)
|
|
25
|
+
|
|
26
|
+
```markdown
|
|
27
|
+
## Planning Quality Gate
|
|
28
|
+
|
|
29
|
+
1. [ ] **Scope bounded?** — Can this task complete in ONE context window?
|
|
30
|
+
2. [ ] **Must-haves defined?** — Are there mechanically verifiable outcomes (not vibes)?
|
|
31
|
+
3. [ ] **Dependencies clear?** — Do I know what files/modules this touches?
|
|
32
|
+
4. [ ] **Risk identified?** — What could go wrong? Is there a fallback?
|
|
33
|
+
5. [ ] **Knowledge checked?** — Did I check KNOWLEDGE.md for relevant patterns/lessons?
|
|
34
|
+
6. [ ] **Existing code reviewed?** — Did I READ the code I'm about to modify?
|
|
35
|
+
7. [ ] **Tests planned?** — Do I know HOW I'll verify this works?
|
|
36
|
+
8. [ ] **No gold-plating?** — Am I building ONLY what's needed?
|
|
37
|
+
|
|
38
|
+
❌ Any unchecked → STOP. Address before proceeding.
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Completion Gate (Before Marking Task Done)
|
|
42
|
+
|
|
43
|
+
```markdown
|
|
44
|
+
## Completion Quality Gate
|
|
45
|
+
|
|
46
|
+
1. [ ] **Truths verified?** — All must-haves are mechanically confirmed (not "I think it works")?
|
|
47
|
+
2. [ ] **Artifacts exist?** — All files that should exist DO exist with real implementation?
|
|
48
|
+
3. [ ] **Key links wired?** — Imports, routes, and integrations are connected?
|
|
49
|
+
4. [ ] **No mock data?** — Production code uses real data sources, not hardcoded values?
|
|
50
|
+
5. [ ] **Tests pass?** — All configured verification commands succeed?
|
|
51
|
+
6. [ ] **No regressions?** — Existing tests still pass?
|
|
52
|
+
7. [ ] **Knowledge captured?** — Did I learn something that should go in KNOWLEDGE.md?
|
|
53
|
+
8. [ ] **Clean diff?** — Only intended changes in the diff? No debug logs, console.logs, TODOs?
|
|
54
|
+
|
|
55
|
+
❌ Any unchecked → STOP. Fix before completing.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Sprint Gate (Before Marking Sprint/Milestone Done)
|
|
59
|
+
|
|
60
|
+
```markdown
|
|
61
|
+
## Sprint Quality Gate
|
|
62
|
+
|
|
63
|
+
1. [ ] **All tasks complete?** — Every task passed its Completion Gate?
|
|
64
|
+
2. [ ] **Success criteria met?** — Roadmap success criteria are satisfied?
|
|
65
|
+
3. [ ] **Integration tested?** — Components work TOGETHER, not just individually?
|
|
66
|
+
4. [ ] **No orphaned code?** — No dead code, unused imports, or abandoned experiments?
|
|
67
|
+
5. [ ] **Documentation updated?** — README, API docs, ARCHITECTURE.md reflect changes?
|
|
68
|
+
6. [ ] **Cost within budget?** — Sprint cost didn't exceed ceiling?
|
|
69
|
+
7. [ ] **Knowledge persisted?** — KNOWLEDGE.md updated with sprint lessons?
|
|
70
|
+
8. [ ] **Stakeholder verifiable?** — A human can verify the sprint outcomes without reading code?
|
|
71
|
+
|
|
72
|
+
❌ Any unchecked → STOP. Address before sealing sprint.
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Evaluation Protocol
|
|
76
|
+
|
|
77
|
+
Gates are evaluated **in parallel** when possible:
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
Questions 1-4 → Can be checked mechanically (automated)
|
|
81
|
+
Questions 5-6 → Require running commands (semi-automated)
|
|
82
|
+
Questions 7-8 → Require judgment (agent evaluates)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Scoring
|
|
86
|
+
|
|
87
|
+
| Score | Result | Action |
|
|
88
|
+
|-------|--------|--------|
|
|
89
|
+
| 8/8 | ✅ PASS | Proceed |
|
|
90
|
+
| 6-7/8 | 🟡 WARN | Proceed with documented exceptions |
|
|
91
|
+
| ≤5/8 | ❌ FAIL | STOP. Address failures before proceeding. |
|
|
92
|
+
|
|
93
|
+
### Exception Protocol
|
|
94
|
+
|
|
95
|
+
If a gate question genuinely doesn't apply:
|
|
96
|
+
|
|
97
|
+
```markdown
|
|
98
|
+
- Q7 (Knowledge captured): N/A — routine change, no new patterns discovered
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Mark N/A with justification. N/A counts as passed. But **never N/A more than 2 questions** — if 3+ don't apply, the gate itself may be wrong for this context.
|
|
102
|
+
|
|
103
|
+
## Integration
|
|
104
|
+
|
|
105
|
+
- **task-complete workflow**: Insert Completion Gate before `--update-task completed`
|
|
106
|
+
- **verification-gate skill**: Quality gates complement, not replace, verification
|
|
107
|
+
- **cost-tracking skill**: Q6 of Sprint Gate uses cost data
|
|
108
|
+
- **knowledge-persistence skill**: Q7 feeds into KNOWLEDGE.md
|
|
109
|
+
- **Flyee SaaS**: Gate results are emitted as events for dashboard visualization
|