special-agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -0
- package/content/agents/builder.yaml +25 -0
- package/content/agents/planner.yaml +13 -0
- package/content/agents/qa.yaml +16 -0
- package/content/agents/ticket-maker.yaml +11 -0
- package/content/defaults.yaml +13 -0
- package/content/docs/README.md +42 -0
- package/content/docs/admins.md +46 -0
- package/content/docs/ai-costs.md +38 -0
- package/content/docs/ai-evals.md +55 -0
- package/content/docs/ai.md +141 -0
- package/content/docs/api.md +51 -0
- package/content/docs/architecture.md +61 -0
- package/content/docs/business.md +49 -0
- package/content/docs/data-governance.md +67 -0
- package/content/docs/decisions/0000-template.md +29 -0
- package/content/docs/decisions/README.md +30 -0
- package/content/docs/docs.index.yaml +25 -0
- package/content/docs/features.md +41 -0
- package/content/docs/local-cloud.md +58 -0
- package/content/docs/operations.md +69 -0
- package/content/docs/release-checklist.md +56 -0
- package/content/docs/scalability.md +81 -0
- package/content/docs/security.md +82 -0
- package/content/docs/tickets.md +45 -0
- package/content/docs/users.md +43 -0
- package/content/preamble.md +13 -0
- package/content/rules/base/code-quality.md +20 -0
- package/content/rules/base/core.md +17 -0
- package/content/rules/base/definition-of-done.md +21 -0
- package/content/rules/base/git-safety.md +16 -0
- package/content/rules/base/response-expectations.md +18 -0
- package/content/rules/domain/accessibility.md +14 -0
- package/content/rules/domain/ai-cost.md +21 -0
- package/content/rules/domain/ai-evals.md +25 -0
- package/content/rules/domain/ai-governance.md +16 -0
- package/content/rules/domain/ai-reproducibility.md +19 -0
- package/content/rules/domain/ai-safety.md +19 -0
- package/content/rules/domain/data-governance.md +17 -0
- package/content/rules/domain/observability.md +18 -0
- package/content/rules/domain/robustness.md +21 -0
- package/content/rules/domain/scalability.md +18 -0
- package/content/rules/domain/security.md +28 -0
- package/content/rules/packs.index.yaml +177 -0
- package/content/rules/process/api-docs.md +16 -0
- package/content/rules/process/architecture.md +14 -0
- package/content/rules/process/business-docs.md +13 -0
- package/content/rules/process/ci.md +18 -0
- package/content/rules/process/dependencies.md +17 -0
- package/content/rules/process/project-docs.md +35 -0
- package/content/rules/process/release.md +16 -0
- package/content/rules/process/tdd.md +16 -0
- package/content/rules/process/testing.md +28 -0
- package/content/rules/process/tickets.md +17 -0
- package/content/rules/templated/database.md +16 -0
- package/content/rules/templated/infra.md +18 -0
- package/content/rules/templated/stack.md +19 -0
- package/content/skills/better-sqlite3-rebuild/SKILL.md +14 -0
- package/content/skills/grill-me/SKILL.md +10 -0
- package/content/skills/improve-codebase-architecture/REFERENCE.md +78 -0
- package/content/skills/improve-codebase-architecture/SKILL.md +76 -0
- package/content/skills/prd-to-issues/SKILL.md +92 -0
- package/content/skills/tdd/SKILL.md +107 -0
- package/content/skills/tdd/deep-modules.md +33 -0
- package/content/skills/tdd/interface-design.md +31 -0
- package/content/skills/tdd/mocking.md +59 -0
- package/content/skills/tdd/refactoring.md +10 -0
- package/content/skills/tdd/tests.md +61 -0
- package/content/skills/write-a-prd/SKILL.md +74 -0
- package/dist/agents.d.ts +11 -0
- package/dist/agents.js +31 -0
- package/dist/compile.d.ts +79 -0
- package/dist/compile.js +113 -0
- package/dist/content.d.ts +49 -0
- package/dist/content.js +73 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +12 -0
- package/dist/resolve.d.ts +46 -0
- package/dist/resolve.js +54 -0
- package/dist/skills.d.ts +11 -0
- package/dist/skills.js +45 -0
- package/dist/template.d.ts +22 -0
- package/dist/template.js +34 -0
- package/node_modules/rafi-spec/dist/index.d.ts +4 -0
- package/node_modules/rafi-spec/dist/index.js +4 -0
- package/node_modules/rafi-spec/dist/schemas.d.ts +185 -0
- package/node_modules/rafi-spec/dist/schemas.js +95 -0
- package/node_modules/rafi-spec/dist/types.d.ts +111 -0
- package/node_modules/rafi-spec/dist/types.js +6 -0
- package/node_modules/rafi-spec/dist/validate.d.ts +16 -0
- package/node_modules/rafi-spec/dist/validate.js +40 -0
- package/node_modules/rafi-spec/package.json +35 -0
- package/node_modules/rafi-spec/src/index.ts +19 -0
- package/node_modules/rafi-spec/src/schemas.ts +102 -0
- package/node_modules/rafi-spec/src/types.ts +134 -0
- package/node_modules/rafi-spec/src/validate.ts +60 -0
- package/package.json +39 -0
package/README.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# special-agents
|
|
2
|
+
|
|
3
|
+
29 composable best-practice rule packs, 6 skills, and 4 agent roles for Claude Code and Codex.
|
|
4
|
+
|
|
5
|
+
The content layer of [Rafi](https://github.com/ttante/foreman). Ships both the authoring source (`content/`) and prebuilt composition logic so it can be used as a library, consumed by `rafi compile`, or extended directly.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```sh
|
|
10
|
+
npm install special-agents
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
import { getAgent, getSkill, emitCompiledBundles } from "special-agents";
|
|
17
|
+
|
|
18
|
+
// Get a composed role bundle (system prompt + skills list)
|
|
19
|
+
const { system, skills } = getAgent("builder");
|
|
20
|
+
// system → assembled prompt with all applicable rule packs rendered
|
|
21
|
+
// skills → ["tdd", "improve-codebase-architecture"]
|
|
22
|
+
|
|
23
|
+
// Write compiled role bundles + AGENTS.md + CLAUDE.md to a target repo
|
|
24
|
+
emitCompiledBundles("./my-repo", {
|
|
25
|
+
defaults: {
|
|
26
|
+
stack: { frontend: "React", backend: "Node.js", database: "PostgreSQL", cloud: "AWS", packageManager: "pnpm" },
|
|
27
|
+
flags: { usesAI: false, hasFrontend: true, runsInCloud: true },
|
|
28
|
+
},
|
|
29
|
+
});
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Roles
|
|
33
|
+
|
|
34
|
+
| Role | Description |
|
|
35
|
+
|---|---|
|
|
36
|
+
| `builder` | Implements one ticket/step per turn |
|
|
37
|
+
| `qa` | Reviews and verifies completed work |
|
|
38
|
+
| `planner` | Produces the project plan and ticket list |
|
|
39
|
+
| `ticket-maker` | Converts requirements into structured tickets |
|
|
40
|
+
|
|
41
|
+
## Rule packs
|
|
42
|
+
|
|
43
|
+
29 packs across four categories. Conditional packs are only included when the matching flag is on.
|
|
44
|
+
|
|
45
|
+
| Category | Packs | Condition |
|
|
46
|
+
|---|---|---|
|
|
47
|
+
| base | core, git-safety, code-quality, definition-of-done, response-expectations | always |
|
|
48
|
+
| process | testing, tdd, ci, tickets, api-docs, release, dependencies, architecture, project-docs, business-docs | always |
|
|
49
|
+
| domain | security, robustness, scalability, observability, data-governance | always |
|
|
50
|
+
| domain | accessibility | `hasFrontend` |
|
|
51
|
+
| domain | ai-safety, ai-governance, ai-evals, ai-reproducibility, ai-cost | `usesAI` |
|
|
52
|
+
| templated | stack, database, infra | always / `runsInCloud` |
|
|
53
|
+
|
|
54
|
+
## Content structure
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
content/
|
|
58
|
+
rules/ rule packs (base/, process/, domain/, templated/)
|
|
59
|
+
skills/ SKILL.md units (tdd, grill-me, improve-codebase-architecture, ...)
|
|
60
|
+
agents/ role manifests (builder, qa, planner, ticket-maker .yaml)
|
|
61
|
+
docs/ starter doc templates for new repos
|
|
62
|
+
defaults.yaml default stack values
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Part of Rafi
|
|
66
|
+
|
|
67
|
+
- **`special-agents`** — this library
|
|
68
|
+
- **`ai-foreman`** — runtime that drives agents through a ticket loop
|
|
69
|
+
- **`@rafi-ai/cli`** — CLI for `rafi create` and `rafi compile`
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
name: builder
|
|
2
|
+
description: Implements one ticket or step per turn, test-first, to a senior standard.
|
|
3
|
+
role: builder
|
|
4
|
+
packs:
|
|
5
|
+
- base/*
|
|
6
|
+
- process/tdd
|
|
7
|
+
- process/testing
|
|
8
|
+
- process/api-docs
|
|
9
|
+
- domain/security
|
|
10
|
+
- domain/robustness
|
|
11
|
+
- templated/*
|
|
12
|
+
skills:
|
|
13
|
+
- tdd
|
|
14
|
+
- improve-codebase-architecture
|
|
15
|
+
conditionalPacks:
|
|
16
|
+
ai:
|
|
17
|
+
- domain/ai-safety
|
|
18
|
+
- domain/ai-evals
|
|
19
|
+
- domain/ai-cost
|
|
20
|
+
- domain/ai-reproducibility
|
|
21
|
+
- domain/ai-governance
|
|
22
|
+
frontend:
|
|
23
|
+
- domain/accessibility
|
|
24
|
+
model: null
|
|
25
|
+
effort: null
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
name: planner
|
|
2
|
+
description: Plans the next N tickets or steps before any implementation. Outputs an ordered list, implements nothing.
|
|
3
|
+
role: planner
|
|
4
|
+
packs:
|
|
5
|
+
- base/*
|
|
6
|
+
- process/architecture
|
|
7
|
+
- process/tickets
|
|
8
|
+
- process/project-docs
|
|
9
|
+
skills:
|
|
10
|
+
- write-a-prd
|
|
11
|
+
- prd-to-issues
|
|
12
|
+
model: null
|
|
13
|
+
effort: null
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: qa
|
|
2
|
+
description: Reviews the just-completed ticket or step for accuracy, tests, and security. Skeptical, not a rubber stamp.
|
|
3
|
+
role: qa
|
|
4
|
+
packs:
|
|
5
|
+
- base/*
|
|
6
|
+
- process/tdd
|
|
7
|
+
- process/testing
|
|
8
|
+
- domain/security
|
|
9
|
+
skills:
|
|
10
|
+
- grill-me
|
|
11
|
+
- tdd
|
|
12
|
+
conditionalPacks:
|
|
13
|
+
frontend:
|
|
14
|
+
- domain/accessibility
|
|
15
|
+
model: null
|
|
16
|
+
effort: null
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
name: ticket-maker
|
|
2
|
+
description: Populates the ticket backlog with well-formed tickets (ID, value, acceptance criteria, test expectations); asks the user for format/style preferences.
|
|
3
|
+
role: ticket-maker
|
|
4
|
+
packs:
|
|
5
|
+
- base/*
|
|
6
|
+
- process/tickets
|
|
7
|
+
- process/project-docs
|
|
8
|
+
- process/architecture
|
|
9
|
+
skills: []
|
|
10
|
+
model: null
|
|
11
|
+
effort: null
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Default stack values + flags. These reproduce the original hardcoded rules.md
|
|
2
|
+
# guidance: rendering the templated packs with these defaults equals the source.
|
|
3
|
+
# `rafi create` writes a project.yaml that overrides these per project.
|
|
4
|
+
stack:
|
|
5
|
+
frontend: "React with TypeScript"
|
|
6
|
+
backend: "Node.js, Python, or both, based on the project needs"
|
|
7
|
+
database: "PostgreSQL"
|
|
8
|
+
cloud: "AWS"
|
|
9
|
+
packageManager: "pnpm"
|
|
10
|
+
flags:
|
|
11
|
+
hasFrontend: true
|
|
12
|
+
usesAI: false
|
|
13
|
+
runsInCloud: true
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Starter Documentation Templates
|
|
2
|
+
|
|
3
|
+
Copy these files into new app repos when bootstrapping AI-agent-friendly project documentation.
|
|
4
|
+
|
|
5
|
+
## Baseline Templates
|
|
6
|
+
|
|
7
|
+
Use these for most application repos:
|
|
8
|
+
|
|
9
|
+
- `architecture.md`: system overview, modules, integrations, tradeoffs, and architecture digest.
|
|
10
|
+
- `features.md`: user-facing capabilities, roles, permissions, and workflows.
|
|
11
|
+
- `api.md`: API documentation index and contract notes.
|
|
12
|
+
- `users.md`: normal user documentation.
|
|
13
|
+
- `admins.md`: admin/operator documentation.
|
|
14
|
+
- `business.md`: costs, pricing, product assumptions, risks, and business notes.
|
|
15
|
+
- `operations.md`: deployment, monitoring, incidents, backups, and runbooks.
|
|
16
|
+
- `security.md`: auth, permissions, threat model, secrets, abuse controls, and incident response.
|
|
17
|
+
- `data-governance.md`: PII, consent, retention, exports, deletion, and training/eval data rules.
|
|
18
|
+
- `local-cloud.md`: local and cloud runtime paths, parity, and environment differences.
|
|
19
|
+
- `scalability.md`: scaling plan for server, cloud, frontend, databases, AI/model usage, and architecture.
|
|
20
|
+
- `tickets.md`: ticket log, roadmap, backlog, status, acceptance criteria, and future ideas.
|
|
21
|
+
- `release-checklist.md`: release readiness, smoke tests, migrations, rollback, and post-release checks.
|
|
22
|
+
- `decisions/README.md`: ADR index and decision history.
|
|
23
|
+
- `decisions/0000-template.md`: reusable ADR template.
|
|
24
|
+
|
|
25
|
+
## AI Templates
|
|
26
|
+
|
|
27
|
+
Use these when the app includes LLMs, AI generation, model calls, AI-assisted decisions, or future model-training plans:
|
|
28
|
+
|
|
29
|
+
- `ai.md`: AI workflows, models, prompts, safety controls, replayability, and training-data strategy.
|
|
30
|
+
- `ai-evals.md`: eval suites, golden examples, adversarial cases, quality gates, and regression results.
|
|
31
|
+
- `ai-costs.md`: AI cost per task, provider/model spend, high-cost workflows, and optimization notes.
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
The easiest path is the `rafi` CLI:
|
|
36
|
+
|
|
37
|
+
```sh
|
|
38
|
+
rafi create /path/to/new-repo
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
By default, `rafi create` skips doc files that already exist. Use `--force` to overwrite existing files.
|
|
42
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Admin And Operator Guide
|
|
2
|
+
|
|
3
|
+
Use this document for admin, support, and operator workflows.
|
|
4
|
+
|
|
5
|
+
## Audience
|
|
6
|
+
|
|
7
|
+
- Admin roles:
|
|
8
|
+
- Operator roles:
|
|
9
|
+
- Last reviewed:
|
|
10
|
+
|
|
11
|
+
## Admin Capabilities
|
|
12
|
+
|
|
13
|
+
| Capability | Role Required | Risk Level | Audit Logged? | Notes |
|
|
14
|
+
|---|---|---|---|---|
|
|
15
|
+
| `<capability>` | `<role>` | `<low/medium/high>` | `<yes/no>` | `<notes>` |
|
|
16
|
+
|
|
17
|
+
## Common Admin Workflows
|
|
18
|
+
|
|
19
|
+
### `<Workflow Name>`
|
|
20
|
+
|
|
21
|
+
- Goal:
|
|
22
|
+
- Required role:
|
|
23
|
+
- Steps:
|
|
24
|
+
- Verification:
|
|
25
|
+
- Rollback/recovery:
|
|
26
|
+
|
|
27
|
+
## User And Permission Management
|
|
28
|
+
|
|
29
|
+
- Invite users:
|
|
30
|
+
- Change roles:
|
|
31
|
+
- Disable users:
|
|
32
|
+
- Review access:
|
|
33
|
+
|
|
34
|
+
## Operational Tasks
|
|
35
|
+
|
|
36
|
+
- Data export:
|
|
37
|
+
- Billing/support actions:
|
|
38
|
+
- Background job actions:
|
|
39
|
+
- Manual recovery steps:
|
|
40
|
+
|
|
41
|
+
## Audit And Compliance
|
|
42
|
+
|
|
43
|
+
- Audit log location:
|
|
44
|
+
- Sensitive actions:
|
|
45
|
+
- Review cadence:
|
|
46
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# AI Cost Tracking
|
|
2
|
+
|
|
3
|
+
Use this document to track AI cost per task, model/provider spend, high-cost workflows, and cost optimization notes.
|
|
4
|
+
|
|
5
|
+
## Cost Strategy
|
|
6
|
+
|
|
7
|
+
- Cost owner:
|
|
8
|
+
- Monthly budget target:
|
|
9
|
+
- Alert threshold:
|
|
10
|
+
- Primary cost drivers:
|
|
11
|
+
- Last reviewed:
|
|
12
|
+
|
|
13
|
+
## Cost Per AI Task
|
|
14
|
+
|
|
15
|
+
| Task | Workflow | Provider | Model | Avg Input Tokens | Avg Output Tokens | Avg Retries | Avg Cost | Target Cost | Notes |
|
|
16
|
+
|---|---|---|---|---:|---:|---:|---:|---:|---|
|
|
17
|
+
| `<task>` | `<workflow>` | `<provider>` | `<model>` | `<tokens>` | `<tokens>` | `<count>` | `<cost>` | `<cost>` | `<notes>` |
|
|
18
|
+
|
|
19
|
+
## High-Cost App Operations
|
|
20
|
+
|
|
21
|
+
Track non-AI costs that may become significant early or at scale.
|
|
22
|
+
|
|
23
|
+
| Operation | Cost Source | Cost Driver | Current Estimate | Scale Risk | Mitigation |
|
|
24
|
+
|---|---|---|---:|---|---|
|
|
25
|
+
| `<operation>` | `<cloud/api/storage/email/sms/db/etc>` | `<driver>` | `<estimate>` | `<low/medium/high>` | `<plan>` |
|
|
26
|
+
|
|
27
|
+
## Cost Events
|
|
28
|
+
|
|
29
|
+
| Date | Event | Impact | Root Cause | Fix | Follow-Up Ticket |
|
|
30
|
+
|---|---|---|---|---|---|
|
|
31
|
+
| `<date>` | `<event>` | `<cost impact>` | `<cause>` | `<fix>` | `<ticket>` |
|
|
32
|
+
|
|
33
|
+
## Optimization Ideas
|
|
34
|
+
|
|
35
|
+
| Idea | Area | Expected Savings | Risk | Status | Ticket |
|
|
36
|
+
|---|---|---:|---|---|---|
|
|
37
|
+
| `<idea>` | `<workflow>` | `<estimate>` | `<risk>` | `<status>` | `<ticket>` |
|
|
38
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# AI Eval Log
|
|
2
|
+
|
|
3
|
+
Use this document to track AI quality gates, eval sets, golden examples, adversarial cases, and prompt/model regression results.
|
|
4
|
+
|
|
5
|
+
## Eval Strategy
|
|
6
|
+
|
|
7
|
+
- Primary AI workflows covered:
|
|
8
|
+
- Quality goals:
|
|
9
|
+
- Minimum passing threshold:
|
|
10
|
+
- Human review requirements:
|
|
11
|
+
- Regression policy:
|
|
12
|
+
- Prompt/model promotion policy:
|
|
13
|
+
- Last reviewed:
|
|
14
|
+
|
|
15
|
+
## Eval Suites
|
|
16
|
+
|
|
17
|
+
| Eval Suite | Workflow | Purpose | Cases | Passing Threshold | Owner | Status |
|
|
18
|
+
|---|---|---|---:|---|---|---|
|
|
19
|
+
| `<suite>` | `<workflow>` | `<purpose>` | `<count>` | `<threshold>` | `<owner>` | `<planned/live>` |
|
|
20
|
+
|
|
21
|
+
## Golden Examples
|
|
22
|
+
|
|
23
|
+
Golden examples are correct or top-quality examples that represent desired output.
|
|
24
|
+
|
|
25
|
+
| Example ID | Workflow | Input Summary | Expected Output Summary | Why It Matters | Status |
|
|
26
|
+
|---|---|---|---|---|---|
|
|
27
|
+
| `<example-id>` | `<workflow>` | `<input>` | `<output>` | `<reason>` | `<active>` |
|
|
28
|
+
|
|
29
|
+
## Edge And Failure Cases
|
|
30
|
+
|
|
31
|
+
| Case ID | Workflow | Scenario | Expected Handling | Risk | Status |
|
|
32
|
+
|---|---|---|---|---|---|
|
|
33
|
+
| `<case-id>` | `<workflow>` | `<scenario>` | `<behavior>` | `<low/medium/high>` | `<active>` |
|
|
34
|
+
|
|
35
|
+
## Adversarial Cases
|
|
36
|
+
|
|
37
|
+
| Case ID | Attack Type | Input Summary | Expected Defense | Status |
|
|
38
|
+
|---|---|---|---|---|
|
|
39
|
+
| `<case-id>` | `<prompt injection/jailbreak/data exfiltration/tool misuse>` | `<input>` | `<defense>` | `<active>` |
|
|
40
|
+
|
|
41
|
+
## Eval Runs
|
|
42
|
+
|
|
43
|
+
| Date | Change Tested | Model/Prompt Version | Suite | Score | Result | Notes | Follow-Up Ticket |
|
|
44
|
+
|---|---|---|---|---:|---|---|---|
|
|
45
|
+
| `<date>` | `<change>` | `<version>` | `<suite>` | `<score>` | `<pass/fail>` | `<notes>` | `<ticket>` |
|
|
46
|
+
|
|
47
|
+
## Promotion Decisions
|
|
48
|
+
|
|
49
|
+
| Date | Prompt/Model Change | Eval Result | Decision | Approver | Rollback Plan |
|
|
50
|
+
|---|---|---|---|---|---|
|
|
51
|
+
| `<date>` | `<change>` | `<result>` | `<promote/hold/rollback>` | `<approver>` | `<plan>` |
|
|
52
|
+
|
|
53
|
+
## Regression Notes
|
|
54
|
+
|
|
55
|
+
- `<date>`: `<what regressed, why, and what changed next>`
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# AI System Plan
|
|
2
|
+
|
|
3
|
+
Use this document for apps that include LLMs, AI generation, model calls, AI-assisted decisions, or future model-training plans.
|
|
4
|
+
|
|
5
|
+
## Current Status
|
|
6
|
+
|
|
7
|
+
- AI enabled: `<yes/no>`
|
|
8
|
+
- Primary AI use cases: `<summary>`
|
|
9
|
+
- Current model/provider stack: `<models/providers>`
|
|
10
|
+
- Current risk level: `<low/medium/high>`
|
|
11
|
+
- Last reviewed: `<date>`
|
|
12
|
+
|
|
13
|
+
## AI Workflows
|
|
14
|
+
|
|
15
|
+
| Workflow | User/admin value | Trigger | Inputs | Outputs | Human review? | Status |
|
|
16
|
+
|---|---|---|---|---|---|---|
|
|
17
|
+
| `<workflow>` | `<value>` | `<event>` | `<data>` | `<result>` | `<yes/no>` | `<planned/live>` |
|
|
18
|
+
|
|
19
|
+
## Model And Provider Choices
|
|
20
|
+
|
|
21
|
+
| Use case | Provider | Model | Why this model | Alternatives considered | Fallback | Owner |
|
|
22
|
+
|---|---|---|---|---|---|---|
|
|
23
|
+
| `<use case>` | `<provider>` | `<model>` | `<reason>` | `<options>` | `<fallback>` | `<owner>` |
|
|
24
|
+
|
|
25
|
+
## Model Governance
|
|
26
|
+
|
|
27
|
+
- Approved providers/models:
|
|
28
|
+
- Model-change approval rules:
|
|
29
|
+
- Fallback model rules:
|
|
30
|
+
- Required evals before promotion:
|
|
31
|
+
- Cost/latency thresholds:
|
|
32
|
+
- Safety thresholds:
|
|
33
|
+
- Rollback process:
|
|
34
|
+
|
|
35
|
+
## Prompt Inventory
|
|
36
|
+
|
|
37
|
+
| Prompt ID | Version | Purpose | Owner | Eval coverage | Last changed | Rollback version |
|
|
38
|
+
|---|---|---|---|---|---|---|
|
|
39
|
+
| `<prompt-id>` | `<v1>` | `<purpose>` | `<owner>` | `<eval link>` | `<date>` | `<version>` |
|
|
40
|
+
|
|
41
|
+
## Safety Controls
|
|
42
|
+
|
|
43
|
+
Document how the app prevents unsafe or abusive AI behavior.
|
|
44
|
+
|
|
45
|
+
- Prompt injection protection:
|
|
46
|
+
- Jailbreak protection:
|
|
47
|
+
- Data exfiltration protection:
|
|
48
|
+
- Tool-use boundaries:
|
|
49
|
+
- Human review requirements:
|
|
50
|
+
- Content safety checks:
|
|
51
|
+
- Abuse monitoring:
|
|
52
|
+
- Escalation path:
|
|
53
|
+
|
|
54
|
+
## Red-Team Testing
|
|
55
|
+
|
|
56
|
+
| Scenario | Attack Type | Expected Defense | Eval/Test Link | Status |
|
|
57
|
+
|---|---|---|---|---|
|
|
58
|
+
| `<scenario>` | `<prompt injection/jailbreak/data leakage/tool misuse/cost abuse>` | `<defense>` | `<link>` | `<status>` |
|
|
59
|
+
|
|
60
|
+
## AI Incident Response
|
|
61
|
+
|
|
62
|
+
- Harmful output response:
|
|
63
|
+
- Wrong/low-quality output response:
|
|
64
|
+
- Private data exposure response:
|
|
65
|
+
- High-cost/abusive usage response:
|
|
66
|
+
- Policy-violating output response:
|
|
67
|
+
- Owner/escalation path:
|
|
68
|
+
|
|
69
|
+
## Quality And Confidence
|
|
70
|
+
|
|
71
|
+
- Definition of high-quality output:
|
|
72
|
+
- Required confidence signals:
|
|
73
|
+
- Model self-checking default: `3 checks`
|
|
74
|
+
- Model self-checking configurable count:
|
|
75
|
+
- Model self-checking toggle:
|
|
76
|
+
- AI steps that use self-checking:
|
|
77
|
+
- AI steps where self-checking is disabled and why:
|
|
78
|
+
- QA checks:
|
|
79
|
+
- Auto-approval threshold:
|
|
80
|
+
- Admin approval threshold:
|
|
81
|
+
- Rejection criteria:
|
|
82
|
+
- Known weak spots:
|
|
83
|
+
|
|
84
|
+
## Reproducibility And Replayability
|
|
85
|
+
|
|
86
|
+
Record enough information to replay important generations without storing sensitive data unnecessarily.
|
|
87
|
+
|
|
88
|
+
- Prompt version:
|
|
89
|
+
- Rendered prompt storage policy:
|
|
90
|
+
- Input data reference policy:
|
|
91
|
+
- Model/provider and parameters:
|
|
92
|
+
- Retrieval context:
|
|
93
|
+
- Tool calls:
|
|
94
|
+
- Output:
|
|
95
|
+
- Validation results:
|
|
96
|
+
- Cost:
|
|
97
|
+
- Latency:
|
|
98
|
+
- User/admin decision:
|
|
99
|
+
- Retention policy:
|
|
100
|
+
|
|
101
|
+
## Learning Loop
|
|
102
|
+
|
|
103
|
+
Use this section to describe how failed generations become future improvements.
|
|
104
|
+
|
|
105
|
+
- Failed-generation capture:
|
|
106
|
+
- Correction generation:
|
|
107
|
+
- QA review:
|
|
108
|
+
- Auto-approval rules:
|
|
109
|
+
- Admin approval rules:
|
|
110
|
+
- Approved correction storage:
|
|
111
|
+
- Eval updates:
|
|
112
|
+
- Prompt updates:
|
|
113
|
+
- Product changes:
|
|
114
|
+
|
|
115
|
+
## Dataset Governance
|
|
116
|
+
|
|
117
|
+
- Dataset sources:
|
|
118
|
+
- Consent requirements:
|
|
119
|
+
- Labeling process:
|
|
120
|
+
- Quality thresholds:
|
|
121
|
+
- Access controls:
|
|
122
|
+
- Retention rules:
|
|
123
|
+
- PII redaction/tokenization:
|
|
124
|
+
- Allowed use for evals:
|
|
125
|
+
- Allowed use for fine-tuning/custom training:
|
|
126
|
+
- Versioning strategy:
|
|
127
|
+
|
|
128
|
+
## Future Custom Model Training
|
|
129
|
+
|
|
130
|
+
- Why custom training may be useful:
|
|
131
|
+
- Data needed:
|
|
132
|
+
- Data quality threshold:
|
|
133
|
+
- Privacy and consent constraints:
|
|
134
|
+
- Retention rules:
|
|
135
|
+
- Estimated cost:
|
|
136
|
+
- When training is worth it:
|
|
137
|
+
- Lighter alternative if full training is too heavy:
|
|
138
|
+
|
|
139
|
+
## Open Questions
|
|
140
|
+
|
|
141
|
+
- `<question>`
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# API Documentation
|
|
2
|
+
|
|
3
|
+
Use this document to summarize API contracts and link to generated API references.
|
|
4
|
+
|
|
5
|
+
## API Overview
|
|
6
|
+
|
|
7
|
+
- API type: `<HTTP/library/events/etc>`
|
|
8
|
+
- Base URL:
|
|
9
|
+
- Auth model:
|
|
10
|
+
- Versioning strategy:
|
|
11
|
+
- Generated docs:
|
|
12
|
+
- Last reviewed:
|
|
13
|
+
|
|
14
|
+
## Contract Sources
|
|
15
|
+
|
|
16
|
+
| Contract | Source | Generated? | Command | Notes |
|
|
17
|
+
|---|---|---|---|---|
|
|
18
|
+
| `<OpenAPI/Typedoc/etc>` | `<path>` | `<yes/no>` | `<command>` | `<notes>` |
|
|
19
|
+
|
|
20
|
+
## Authentication And Authorization
|
|
21
|
+
|
|
22
|
+
- Authentication:
|
|
23
|
+
- Authorization:
|
|
24
|
+
- Token/session behavior:
|
|
25
|
+
- Rate limits:
|
|
26
|
+
- Error behavior:
|
|
27
|
+
|
|
28
|
+
## Endpoints Or Public Interfaces
|
|
29
|
+
|
|
30
|
+
| Method/Interface | Path/Name | Purpose | Auth | Request | Response | Errors |
|
|
31
|
+
|---|---|---|---|---|---|---|
|
|
32
|
+
| `<method>` | `<path>` | `<purpose>` | `<auth>` | `<schema>` | `<schema>` | `<errors>` |
|
|
33
|
+
|
|
34
|
+
## Error Format
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"error": {
|
|
39
|
+
"code": "<code>",
|
|
40
|
+
"message": "<safe message>",
|
|
41
|
+
"requestId": "<request id>"
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Breaking Changes And Deprecations
|
|
47
|
+
|
|
48
|
+
| Change | Version/Date | Migration Path | Owner |
|
|
49
|
+
|---|---|---|---|
|
|
50
|
+
| `<change>` | `<version/date>` | `<steps>` | `<owner>` |
|
|
51
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Architecture Digest
|
|
2
|
+
|
|
3
|
+
Use this document to explain how the app works now. Keep it current as the architecture changes.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
- Product purpose:
|
|
8
|
+
- Primary users:
|
|
9
|
+
- Core workflows:
|
|
10
|
+
- Architecture status:
|
|
11
|
+
- Last reviewed:
|
|
12
|
+
|
|
13
|
+
## System Diagram
|
|
14
|
+
|
|
15
|
+
Describe or link to the current system diagram.
|
|
16
|
+
|
|
17
|
+
```text
|
|
18
|
+
<client> -> <api> -> <database/services>
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Runtime Components
|
|
22
|
+
|
|
23
|
+
| Component | Responsibility | Runtime | Owner | Notes |
|
|
24
|
+
|---|---|---|---|---|
|
|
25
|
+
| `<component>` | `<responsibility>` | `<node/python/etc>` | `<owner>` | `<notes>` |
|
|
26
|
+
|
|
27
|
+
## Data Flow
|
|
28
|
+
|
|
29
|
+
| Flow | Source | Destination | Data | Security/Privacy Notes |
|
|
30
|
+
|---|---|---|---|---|
|
|
31
|
+
| `<flow>` | `<source>` | `<destination>` | `<data>` | `<notes>` |
|
|
32
|
+
|
|
33
|
+
## Integrations
|
|
34
|
+
|
|
35
|
+
| Integration | Purpose | Direction | Auth | Failure Handling | Owner |
|
|
36
|
+
|---|---|---|---|---|---|
|
|
37
|
+
| `<service>` | `<purpose>` | `<inbound/outbound>` | `<auth>` | `<handling>` | `<owner>` |
|
|
38
|
+
|
|
39
|
+
## Infrastructure
|
|
40
|
+
|
|
41
|
+
- Local runtime:
|
|
42
|
+
- Cloud runtime:
|
|
43
|
+
- Default cloud provider:
|
|
44
|
+
- Deployment approach:
|
|
45
|
+
- Secrets:
|
|
46
|
+
- Observability:
|
|
47
|
+
|
|
48
|
+
## Key Tradeoffs
|
|
49
|
+
|
|
50
|
+
| Decision | Tradeoff | Link |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| `<decision>` | `<tradeoff>` | `<ADR/link>` |
|
|
53
|
+
|
|
54
|
+
## Known Limits
|
|
55
|
+
|
|
56
|
+
- `<limit>`
|
|
57
|
+
|
|
58
|
+
## Open Questions
|
|
59
|
+
|
|
60
|
+
- `<question>`
|
|
61
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Business Notes
|
|
2
|
+
|
|
3
|
+
Use this document to track product, pricing, cost, packaging, operational, and business-risk context.
|
|
4
|
+
|
|
5
|
+
## Business Summary
|
|
6
|
+
|
|
7
|
+
- Product:
|
|
8
|
+
- Target users/customers:
|
|
9
|
+
- Value proposition:
|
|
10
|
+
- Business model:
|
|
11
|
+
- Last reviewed:
|
|
12
|
+
|
|
13
|
+
## Features And Rationale
|
|
14
|
+
|
|
15
|
+
| Feature | Business Reason | User Value | Status | Notes |
|
|
16
|
+
|---|---|---|---|---|
|
|
17
|
+
| `<feature>` | `<reason>` | `<value>` | `<status>` | `<notes>` |
|
|
18
|
+
|
|
19
|
+
## Pricing And Packaging
|
|
20
|
+
|
|
21
|
+
| Plan/Package | Target User | Price | Limits | Notes |
|
|
22
|
+
|---|---|---:|---|---|
|
|
23
|
+
| `<plan>` | `<user>` | `<price>` | `<limits>` | `<notes>` |
|
|
24
|
+
|
|
25
|
+
## Cost Drivers
|
|
26
|
+
|
|
27
|
+
| Cost Driver | Source | Current Estimate | Scale Risk | Mitigation |
|
|
28
|
+
|---|---|---:|---|---|
|
|
29
|
+
| `<cost>` | `<vendor/service>` | `<estimate>` | `<low/medium/high>` | `<plan>` |
|
|
30
|
+
|
|
31
|
+
## Operational Concerns
|
|
32
|
+
|
|
33
|
+
- Support load:
|
|
34
|
+
- Admin effort:
|
|
35
|
+
- Compliance:
|
|
36
|
+
- Vendor risk:
|
|
37
|
+
- AI/model cost:
|
|
38
|
+
- Reliability risk:
|
|
39
|
+
|
|
40
|
+
## Things To Watch
|
|
41
|
+
|
|
42
|
+
- `<watch item>`
|
|
43
|
+
|
|
44
|
+
## Open Business Questions
|
|
45
|
+
|
|
46
|
+
| Question | Why It Matters | Owner | Status |
|
|
47
|
+
|---|---|---|---|
|
|
48
|
+
| `<question>` | `<impact>` | `<owner>` | `<status>` |
|
|
49
|
+
|