@nomos-arc/arc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.claude/settings.local.json +10 -0
  2. package/.nomos-config.json +5 -0
  3. package/CLAUDE.md +108 -0
  4. package/LICENSE +190 -0
  5. package/README.md +569 -0
  6. package/dist/cli.js +21120 -0
  7. package/docs/auth/googel_plan.yaml +1093 -0
  8. package/docs/auth/google_task.md +235 -0
  9. package/docs/auth/hardened_blueprint.yaml +1658 -0
  10. package/docs/auth/red_team_report.yaml +336 -0
  11. package/docs/auth/session_state.yaml +162 -0
  12. package/docs/certificate/cer_enhance_plan.md +605 -0
  13. package/docs/certificate/certificate_report.md +338 -0
  14. package/docs/dev_overview.md +419 -0
  15. package/docs/feature_assessment.md +156 -0
  16. package/docs/how_it_works.md +78 -0
  17. package/docs/infrastructure/map.md +867 -0
  18. package/docs/init/master_plan.md +3581 -0
  19. package/docs/init/red_team_report.md +215 -0
  20. package/docs/init/report_phase_1a.md +304 -0
  21. package/docs/integrity-gate/enhance_drift.md +703 -0
  22. package/docs/integrity-gate/overview.md +108 -0
  23. package/docs/management/manger-task.md +99 -0
  24. package/docs/management/scafffold.md +76 -0
  25. package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
  26. package/docs/map/RED_TEAM_REPORT.md +159 -0
  27. package/docs/map/map_task.md +147 -0
  28. package/docs/map/semantic_graph_task.md +792 -0
  29. package/docs/map/semantic_master_plan.md +705 -0
  30. package/docs/phase7/TEAM_RED.md +249 -0
  31. package/docs/phase7/plan.md +1682 -0
  32. package/docs/phase7/task.md +275 -0
  33. package/docs/prompts/USAGE.md +312 -0
  34. package/docs/prompts/architect.md +165 -0
  35. package/docs/prompts/executer.md +190 -0
  36. package/docs/prompts/hardener.md +190 -0
  37. package/docs/prompts/red_team.md +146 -0
  38. package/docs/verification/goveranance-overview.md +396 -0
  39. package/docs/verification/governance-overview.md +245 -0
  40. package/docs/verification/verification-arc-ar.md +560 -0
  41. package/docs/verification/verification-architecture.md +560 -0
  42. package/docs/very_next.md +52 -0
  43. package/docs/whitepaper.md +89 -0
  44. package/overview.md +1469 -0
  45. package/package.json +63 -0
  46. package/src/adapters/__tests__/git.test.ts +296 -0
  47. package/src/adapters/__tests__/stdio.test.ts +70 -0
  48. package/src/adapters/git.ts +226 -0
  49. package/src/adapters/pty.ts +159 -0
  50. package/src/adapters/stdio.ts +113 -0
  51. package/src/cli.ts +83 -0
  52. package/src/commands/apply.ts +47 -0
  53. package/src/commands/auth.ts +301 -0
  54. package/src/commands/certificate.ts +89 -0
  55. package/src/commands/discard.ts +24 -0
  56. package/src/commands/drift.ts +116 -0
  57. package/src/commands/index.ts +78 -0
  58. package/src/commands/init.ts +121 -0
  59. package/src/commands/list.ts +75 -0
  60. package/src/commands/map.ts +55 -0
  61. package/src/commands/plan.ts +30 -0
  62. package/src/commands/review.ts +58 -0
  63. package/src/commands/run.ts +63 -0
  64. package/src/commands/search.ts +147 -0
  65. package/src/commands/show.ts +63 -0
  66. package/src/commands/status.ts +59 -0
  67. package/src/core/__tests__/budget.test.ts +213 -0
  68. package/src/core/__tests__/certificate.test.ts +385 -0
  69. package/src/core/__tests__/config.test.ts +191 -0
  70. package/src/core/__tests__/preflight.test.ts +24 -0
  71. package/src/core/__tests__/prompt.test.ts +358 -0
  72. package/src/core/__tests__/review.test.ts +161 -0
  73. package/src/core/__tests__/state.test.ts +362 -0
  74. package/src/core/auth/__tests__/manager.test.ts +166 -0
  75. package/src/core/auth/__tests__/server.test.ts +220 -0
  76. package/src/core/auth/gcp-projects.ts +160 -0
  77. package/src/core/auth/manager.ts +114 -0
  78. package/src/core/auth/server.ts +141 -0
  79. package/src/core/budget.ts +119 -0
  80. package/src/core/certificate.ts +502 -0
  81. package/src/core/config.ts +212 -0
  82. package/src/core/errors.ts +54 -0
  83. package/src/core/factory.ts +49 -0
  84. package/src/core/graph/__tests__/builder.test.ts +272 -0
  85. package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
  86. package/src/core/graph/__tests__/enricher.test.ts +299 -0
  87. package/src/core/graph/__tests__/parser.test.ts +200 -0
  88. package/src/core/graph/__tests__/pipeline.test.ts +202 -0
  89. package/src/core/graph/__tests__/renderer.test.ts +128 -0
  90. package/src/core/graph/__tests__/resolver.test.ts +185 -0
  91. package/src/core/graph/__tests__/scanner.test.ts +231 -0
  92. package/src/core/graph/__tests__/show.test.ts +134 -0
  93. package/src/core/graph/builder.ts +303 -0
  94. package/src/core/graph/constraints.ts +94 -0
  95. package/src/core/graph/contract-writer.ts +93 -0
  96. package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
  97. package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
  98. package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
  99. package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
  100. package/src/core/graph/drift/classifier.ts +165 -0
  101. package/src/core/graph/drift/comparator.ts +205 -0
  102. package/src/core/graph/drift/reporter.ts +77 -0
  103. package/src/core/graph/enricher.ts +251 -0
  104. package/src/core/graph/grammar-paths.ts +30 -0
  105. package/src/core/graph/html-template.ts +493 -0
  106. package/src/core/graph/map-schema.ts +137 -0
  107. package/src/core/graph/parser.ts +336 -0
  108. package/src/core/graph/pipeline.ts +209 -0
  109. package/src/core/graph/renderer.ts +92 -0
  110. package/src/core/graph/resolver.ts +195 -0
  111. package/src/core/graph/scanner.ts +145 -0
  112. package/src/core/logger.ts +46 -0
  113. package/src/core/orchestrator.ts +792 -0
  114. package/src/core/plan-file-manager.ts +66 -0
  115. package/src/core/preflight.ts +64 -0
  116. package/src/core/prompt.ts +173 -0
  117. package/src/core/review.ts +95 -0
  118. package/src/core/state.ts +294 -0
  119. package/src/core/worktree-coordinator.ts +77 -0
  120. package/src/search/__tests__/chunk-extractor.test.ts +339 -0
  121. package/src/search/__tests__/embedder-auth.test.ts +124 -0
  122. package/src/search/__tests__/embedder.test.ts +267 -0
  123. package/src/search/__tests__/graph-enricher.test.ts +178 -0
  124. package/src/search/__tests__/indexer.test.ts +518 -0
  125. package/src/search/__tests__/integration.test.ts +649 -0
  126. package/src/search/__tests__/query-engine.test.ts +334 -0
  127. package/src/search/__tests__/similarity.test.ts +78 -0
  128. package/src/search/__tests__/vector-store.test.ts +281 -0
  129. package/src/search/chunk-extractor.ts +167 -0
  130. package/src/search/embedder.ts +209 -0
  131. package/src/search/graph-enricher.ts +95 -0
  132. package/src/search/indexer.ts +483 -0
  133. package/src/search/lexical-searcher.ts +190 -0
  134. package/src/search/query-engine.ts +225 -0
  135. package/src/search/vector-store.ts +311 -0
  136. package/src/types/index.ts +572 -0
  137. package/src/utils/__tests__/ansi.test.ts +54 -0
  138. package/src/utils/__tests__/frontmatter.test.ts +79 -0
  139. package/src/utils/__tests__/sanitize.test.ts +229 -0
  140. package/src/utils/ansi.ts +19 -0
  141. package/src/utils/context.ts +44 -0
  142. package/src/utils/frontmatter.ts +27 -0
  143. package/src/utils/sanitize.ts +78 -0
  144. package/test/e2e/lifecycle.test.ts +330 -0
  145. package/test/fixtures/mock-planner-hang.ts +5 -0
  146. package/test/fixtures/mock-planner.ts +26 -0
  147. package/test/fixtures/mock-reviewer-bad.ts +8 -0
  148. package/test/fixtures/mock-reviewer-retry.ts +34 -0
  149. package/test/fixtures/mock-reviewer.ts +18 -0
  150. package/test/fixtures/sample-project/src/circular-a.ts +6 -0
  151. package/test/fixtures/sample-project/src/circular-b.ts +6 -0
  152. package/test/fixtures/sample-project/src/config.ts +15 -0
  153. package/test/fixtures/sample-project/src/main.ts +19 -0
  154. package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
  155. package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
  156. package/test/fixtures/sample-project/src/types.ts +14 -0
  157. package/test/fixtures/sample-project/src/utils/index.ts +14 -0
  158. package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
  159. package/tsconfig.json +20 -0
  160. package/vitest.config.ts +12 -0
@@ -0,0 +1,275 @@
1
+ # Phase 7: Global Semantic Search (Vector-Based)
2
+
3
+ ## Overview
4
+
5
+ Build a semantic search engine for the codebase that understands **meaning**, not just keywords. Instead of searching for the symbol `Payment`, a developer can ask: *"Where is the refund logic handled in this project?"* — and get accurate, context-aware results across the entire codebase.
6
+
7
+ This phase transforms `project_map.json` and `.semantic.md` files into vector embeddings stored in a local database, enabling natural language queries over code architecture.
8
+
9
+ ---
10
+
11
+ ## Problem Statement
12
+
13
+ Traditional code search (grep, ripgrep, IDE find) is limited to exact or fuzzy keyword matching. This fails when:
14
+
15
+ - The developer doesn't know the exact symbol name or terminology used in the codebase.
16
+ - The concept spans multiple files with no shared keyword (e.g., "error recovery flow" touches handlers, middleware, and retry logic).
17
+ - Non-technical stakeholders (PMs, architects) need to locate functionality using domain language rather than code symbols.
18
+
19
+ **Semantic search closes this gap** by matching on intent and meaning rather than lexical tokens.
20
+
21
+ ---
22
+
23
+ ## Core Concepts
24
+
25
+ ### Embeddings
26
+
27
+ The process of converting text (e.g., contents of `.semantic.md` files) into numerical vectors in a high-dimensional space. Texts with similar meanings are mathematically "close" to each other, regardless of the specific words used.
28
+
29
+ ### Vector Database
30
+
31
+ A local-first storage engine (e.g., LanceDB) that indexes these vectors for fast similarity search. This avoids reprocessing the entire project on every query and supports incremental updates.
32
+
33
+ ### Similarity Search
34
+
35
+ When a user issues a query, it is converted to a vector using the same embedding model, then compared against stored vectors using cosine similarity (or similar distance metric) to find the closest matches.
36
+
37
+ ---
38
+
39
+ ## Execution Flow
40
+
41
+ ### Stage 1 — Indexing
42
+
43
+ **Input:** `project_map.json` + `.semantic.md` files from the mapping phase.
44
+
45
+ **Process:**
46
+ 1. Extract structured fields from each file entry: `purpose`, `key_logic`, `exports`, `dependencies`.
47
+ 2. Compose a search-optimized text chunk per file by concatenating these fields with clear delimiters.
48
+ 3. **Symbol-level chunking:** For each file, also extract individual classes and significant functions (using data from `project_map.json` exports/symbols). Each symbol gets its own embedding with a reference back to the parent file and line range. This enables search results that point to a specific function, not just a file.
49
+ 4. Send each chunk (file-level + symbol-level) to the embedding API (Gemini `text-embedding-004` or equivalent) to generate vectors.
50
+ 5. Store each vector alongside metadata (file path, module name, symbol name, line range, last modified timestamp).
51
+
52
+ **Output:** A local vector index (`.nomos/vectors/`) containing file-level and symbol-level embeddings.
53
+
54
+ ### Stage 2 — Storage
55
+
56
+ **Technology:** LanceDB (embedded, local-first, no server required).
57
+
58
+ **Schema (file-level entry):**
59
+ ```json
60
+ {
61
+ "id": "src/services/payment.ts",
62
+ "type": "file",
63
+ "vector": [0.012, -0.034, ...],
64
+ "file_path": "src/services/payment.ts",
65
+ "module": "payment-service",
66
+ "purpose": "Handles payment processing and refund logic",
67
+ "key_logic": "Stripe integration, idempotency keys, retry with backoff",
68
+ "graph_depth": 5,
69
+ "dependents_count": 10,
70
+ "last_indexed": "2026-04-06T12:00:00Z"
71
+ }
72
+ ```
73
+
74
+ **Schema (symbol-level entry):**
75
+ ```json
76
+ {
77
+ "id": "src/services/payment.ts::processRefund",
78
+ "type": "symbol",
79
+ "vector": [0.008, -0.041, ...],
80
+ "file_path": "src/services/payment.ts",
81
+ "symbol_name": "processRefund",
82
+ "symbol_type": "function",
83
+ "line_start": 45,
84
+ "line_end": 82,
85
+ "purpose": "Processes a refund request via Stripe, validates eligibility, and updates order state",
86
+ "parent_file_id": "src/services/payment.ts",
87
+ "last_indexed": "2026-04-06T12:00:00Z"
88
+ }
89
+ ```
90
+
91
+ **Why local-first:** No external infrastructure dependency. The vector DB lives inside the project (`.nomos/vectors/`), is version-controllable, and works offline.
92
+
93
+ ### Stage 3 — Querying
94
+
95
+ **Trigger:** `arc search "<natural language query>"`
96
+
97
+ **Process:**
98
+ 1. Convert the user's query string into a vector using the same embedding model.
99
+ 2. Perform a nearest-neighbor search against the stored vectors.
100
+ 3. Rank results by similarity score (0.0–1.0), merging file-level and symbol-level matches.
101
+ 4. **Dependency-aware enrichment:** For each result, look up the file's graph metadata from Phase 3 (`project_map.json`) and attach impact context — graph depth, number of dependents, and core/leaf classification.
102
+ 5. Return the top-K results with file path, symbol (if applicable), line range, purpose summary, similarity score, and dependency impact.
103
+
104
+ **Example:**
105
+ ```bash
106
+ $ arc search "how is refund handled?"
107
+
108
+ Results (top 3):
109
+
110
+ 1. src/services/payment.ts :: processRefund() [0.96] L45-82
111
+ "Processes a refund request via Stripe, validates eligibility, updates order state"
112
+ ⚠ Core Module (depth 5) — modifying this affects 10 dependents
113
+
114
+ 2. src/services/payment.ts [0.91]
115
+ "Handles payment processing and refund logic"
116
+ ⚠ Core Module (depth 5) — modifying this affects 10 dependents
117
+
118
+ 3. src/middleware/billing.ts [0.84]
119
+ "Validates billing state before checkout"
120
+ Leaf Module (depth 1) — 2 dependents
121
+ ```
122
+
123
+ ---
124
+
125
+ ## CLI Commands
126
+
127
+ | Command | Description |
128
+ |---|---|
129
+ | `arc index` | Build or rebuild the vector index from `project_map.json` and `.semantic.md` files |
130
+ | `arc index --incremental` | Only re-index files modified since the last indexing run |
131
+ | `arc search "<query>"` | Perform a semantic search and display ranked results |
132
+ | `arc search "<query>" --top <N>` | Limit results to top N matches (default: 5) |
133
+ | `arc search "<query>" --threshold <score>` | Only show results above the similarity threshold (default: 0.7) |
134
+ | `arc search "<query>" --json` | Output results as JSON for programmatic consumption |
135
+
136
+ ---
137
+
138
+ ## Architecture
139
+
140
+ ```
141
+ arc search "query"
142
+
143
+
144
+ ┌─────────────┐ ┌──────────────────┐
145
+ │ Query │────▶│ Embedding API │
146
+ │ Processor │ │ (Gemini/OpenAI) │
147
+ └─────────────┘ └──────────────────┘
148
+ │ │
149
+ │ query vector │
150
+ ▼ │
151
+ ┌─────────────┐ │
152
+ │ Vector DB │◀──────────────┘
153
+ │ (LanceDB) │ file + symbol vectors (at index time)
154
+ └─────────────┘
155
+
156
+ │ ranked results
157
+
158
+ ┌──────────────────┐ ┌──────────────────┐
159
+ │ Graph Enricher │────▶│ project_map.json │
160
+ │ (Phase 3 data) │ │ (dependency graph)│
161
+ └──────────────────┘ └──────────────────┘
162
+
163
+ │ enriched results (+ depth, dependents, impact)
164
+
165
+ ┌─────────────┐
166
+ │ Formatter │──▶ CLI output / JSON
167
+ └─────────────┘
168
+ ```
169
+
170
+ ### Module Breakdown
171
+
172
+ | Module | Responsibility |
173
+ |---|---|
174
+ | `src/search/indexer.ts` | Reads project map + semantic files, chunks text at file-level and symbol-level, calls embedding API, writes to vector DB |
175
+ | `src/search/symbol-extractor.ts` | Extracts individual classes/functions from project map exports for symbol-level indexing |
176
+ | `src/search/embedder.ts` | Wraps the embedding API (Gemini/OpenAI), handles batching and rate limits |
177
+ | `src/search/vector-store.ts` | LanceDB interface — create table, upsert vectors, query by similarity |
178
+ | `src/search/graph-enricher.ts` | Reads Phase 3 dependency graph from `project_map.json`, attaches depth/dependents/impact metadata to search results |
179
+ | `src/search/query-engine.ts` | Orchestrates the search flow: embed query → search → enrich → rank → format |
180
+ | `src/commands/search.ts` | CLI command handler for `arc search` |
181
+ | `src/commands/index.ts` | CLI command handler for `arc index` |
182
+
183
+ ---
184
+
185
+ ## State Management
186
+
187
+ Following the project convention: **JSON is the source of truth**.
188
+
189
+ **Index metadata** is stored at `.nomos/vectors/index-meta.json`:
190
+ ```json
191
+ {
192
+ "last_full_index": "2026-04-06T12:00:00Z",
193
+ "total_files_indexed": 142,
194
+ "embedding_model": "text-embedding-004",
195
+ "vector_dimensions": 768,
196
+ "files": {
197
+ "src/services/payment.ts": {
198
+ "last_indexed": "2026-04-06T12:00:00Z",
199
+ "content_hash": "sha256:abc123..."
200
+ }
201
+ }
202
+ }
203
+ ```
204
+
205
+ **Incremental indexing** compares `content_hash` of each file against the stored hash. Only changed files are re-embedded, reducing API calls and indexing time.
206
+
207
+ ---
208
+
209
+ ## Configuration
210
+
211
+ Added to `.nomos-config.json`:
212
+ ```json
213
+ {
214
+ "search": {
215
+ "embedding_provider": "gemini",
216
+ "embedding_model": "text-embedding-004",
217
+ "vector_db": "lancedb",
218
+ "vector_store_path": ".nomos/vectors",
219
+ "default_top_k": 5,
220
+ "default_threshold": 0.7,
221
+ "batch_size": 50,
222
+ "max_concurrent_requests": 5
223
+ }
224
+ }
225
+ ```
226
+
227
+ ---
228
+
229
+ ## Edge Cases and Constraints
230
+
231
+ | Concern | Mitigation |
232
+ |---|---|
233
+ | **Large projects (1000+ files)** | Batch embedding requests (50 per batch), incremental indexing, progress bar in CLI |
234
+ | **API rate limits** | Exponential backoff with jitter, configurable concurrency limit |
235
+ | **Stale index** | Warn the user if index is older than the latest file modification; suggest `arc index --incremental` |
236
+ | **No `.semantic.md` files** | Fall back to indexing raw `project_map.json` entries only; warn that results may be less accurate |
237
+ | **Offline usage** | If embedding API is unreachable, search against the existing local index (no re-indexing) |
238
+ | **Cost control** | Track token usage per indexing run, log to `.nomos/vectors/usage.json`, respect budget limits from `.nomos-config.json` |
239
+
240
+ ---
241
+
242
+ ## Success Criteria
243
+
244
+ - [ ] `arc index` builds a complete vector index from project map and semantic files
245
+ - [ ] `arc index` generates both file-level and symbol-level embeddings
246
+ - [ ] `arc index --incremental` only re-indexes changed files (verified by content hash)
247
+ - [ ] `arc search` returns semantically relevant results for natural language queries
248
+ - [ ] Results can point to a specific function/class with line range, not just the file
249
+ - [ ] Results include dependency impact context (graph depth, dependents count, core/leaf classification)
250
+ - [ ] Results include file path, similarity score, and purpose summary
251
+ - [ ] Search completes in under 2 seconds for projects with up to 500 indexed files
252
+ - [ ] JSON output mode (`--json`) produces valid, parseable output
253
+ - [ ] Graceful degradation when API is unavailable (search existing index, skip re-indexing)
254
+
255
+ ---
256
+
257
+ ## Dependencies
258
+
259
+ | Dependency | Purpose |
260
+ |---|---|
261
+ | `lancedb` | Local embedded vector database |
262
+ | `@google/generative-ai` or equivalent | Embedding API client |
263
+ | Existing `project_map.json` | Source data (from mapping phase) — used for file content, symbol exports, AND dependency graph |
264
+ | Existing `.semantic.md` files | Enriched semantic descriptions (from mapping phase) |
265
+ | Phase 3 dependency graph | Graph depth, dependents count, and impact data for dependency-aware enrichment |
266
+
267
+ ---
268
+
269
+ ## Out of Scope (Phase 7)
270
+
271
+ - Cross-project search (searching across multiple repositories)
272
+ - Real-time index updates (watching file changes via filesystem events)
273
+ - Custom embedding model training or fine-tuning
274
+ - UI/dashboard for search results (CLI only)
275
+ - Hybrid search (combining keyword + semantic results)
@@ -0,0 +1,312 @@
1
+ # AI Agent Pipeline — Usage Guide
2
+
3
+ ## The 4-Agent Pipeline
4
+
5
+ ```
6
+ ┌─────────────────────────────────────────────────────────────────┐
7
+ │ │
8
+ │ TASK.md / TASK.yaml │
9
+ │ │ │
10
+ │ ▼ │
11
+ │ ┌─────────────┐ plan.yaml ┌─────────────┐ │
12
+ │ │ ARCHITECT │ ───────────────► │ RED TEAM │ │
13
+ │ │ architect │ │ red_team │ │
14
+ │ └─────────────┘ └──────┬──────┘ │
15
+ │ │ audit.yaml │
16
+ │ ▼ │
17
+ │ ┌─────────────┐ │
18
+ │ │ RESOLVER │ │
19
+ │ ┌─────────│ hardener │ │
20
+ │ │ REVISE └──────┬──────┘ │
21
+ │ │ │ blueprint.yaml │
22
+ │ │ ▼ │
23
+ │ │ ┌─────────────┐ │
24
+ │ │ │ OPERATOR │ │
25
+ │ │ │ executer │ │
26
+ │ │ └──────┬──────┘ │
27
+ │ │ │ │
28
+ │ │ ┌──────────▼──────────┐ │
29
+ │ │ │ Phase 1 → Session 1 │ │
30
+ │ │ │ Phase 2 → Session 2 │ │
31
+ │ │ │ Phase N → Session N │ │
32
+ │ │ └─────────────────────┘ │
33
+ │ │ │
34
+ │ └── (if audit verdict = REVISE, │
35
+ │ loop back to Architect) │
36
+ └─────────────────────────────────────────────────────────────────┘
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Agent 1: Architect
42
+
43
+ **File:** `docs/prompts/architect.md`
44
+ **Role:** Analyzes the task and produces the initial atomic execution plan.
45
+ **Output:** `plan.yaml`
46
+
47
+ ### What to send
48
+
49
+ ```
50
+ Use this prompt @docs/prompts/architect.md to plan this task.
51
+
52
+ <task>
53
+ [Full task description, requirements, and acceptance criteria]
54
+ </task>
55
+
56
+ <codebase>
57
+ @src/relevant-file-1.ts
58
+ @src/relevant-file-2.ts
59
+ </codebase>
60
+
61
+ <tree>
62
+ [Output of: tree -L 3 or paste directory structure]
63
+ </tree>
64
+
65
+ <config>
66
+ @package.json
67
+ @tsconfig.json
68
+ </config>
69
+
70
+ <rules>
71
+ @rules/core.json
72
+ @rules/backend.json
73
+ </rules>
74
+ ```
75
+
76
+ ### What you get back
77
+
78
+ A `plan.yaml` file with:
79
+ - `context_analysis` — tech stack, touch zone, fragile zone
80
+ - `steps[]` — atomic steps with `step_id`, `action`, `file_path`, `validation`, `rollback`
81
+ - `risk_assessment` — overall risk and failure scenarios
82
+ - `compliance` — rules check results
83
+
84
+ ### Save the output as
85
+
86
+ ```
87
+ tasks-management/plans/TASK-001/plan.yaml
88
+ ```
89
+
90
+ ---
91
+
92
+ ## Agent 2: Red Team
93
+
94
+ **File:** `docs/prompts/red_team.md`
95
+ **Role:** Audits the plan for security vulnerabilities, architectural flaws, AI divergence risks, and rollback failures.
96
+ **Input:** `plan.yaml` from Architect
97
+ **Output:** `audit.yaml`
98
+
99
+ ### What to send
100
+
101
+ ```
102
+ Use this prompt @docs/prompts/red_team.md to audit this plan.
103
+
104
+ <proposed_plan>
105
+ @tasks-management/plans/TASK-001/plan.yaml
106
+ </proposed_plan>
107
+
108
+ <codebase>
109
+ @src/relevant-file-1.ts
110
+ @src/relevant-file-2.ts
111
+ </codebase>
112
+
113
+ <tree>
114
+ [directory structure]
115
+ </tree>
116
+
117
+ <config>
118
+ @package.json
119
+ </config>
120
+
121
+ <rules>
122
+ @rules/core.json
123
+ </rules>
124
+ ```
125
+
126
+ ### What you get back
127
+
128
+ An `audit.yaml` file with:
129
+ - `verdict` — `APPROVE` | `APPROVE_WITH_NOTES` | `REVISE`
130
+ - `system_integrity_score` — 0-100
131
+ - `findings[]` — each finding with `severity`, `step_id`, `description`, `recommendation`
132
+ - `negative_constraints[]` — list of hard "DO NOT" rules
133
+ - `rollback_assessment` — which rollbacks are weak or broken
134
+
135
+ ### Save the output as
136
+
137
+ ```
138
+ tasks-management/plans/TASK-001/audit.yaml
139
+ ```
140
+
141
+ ### Decision after audit
142
+
143
+ | Verdict | Next Step |
144
+ |---------|-----------|
145
+ | `APPROVE` | Skip Resolver → go directly to Operator |
146
+ | `APPROVE_WITH_NOTES` | Send to Resolver (optional improvements) |
147
+ | `REVISE` | Send to Resolver (mandatory) |
148
+
149
+ ---
150
+
151
+ ## Agent 3: Resolver
152
+
153
+ **File:** `docs/prompts/hardener.md`
154
+ **Role:** Takes the original plan + audit findings and produces the Final Hardened Blueprint. Resolves every finding, injects constraints, strengthens rollbacks, and eliminates ambiguity.
155
+ **Input:** `plan.yaml` + `audit.yaml`
156
+ **Output:** `blueprint.yaml`
157
+
158
+ ### What to send
159
+
160
+ ```
161
+ Use this prompt @docs/prompts/hardener.md to resolve this audit.
162
+
163
+ <original_plan>
164
+ @tasks-management/plans/TASK-001/plan.yaml
165
+ </original_plan>
166
+
167
+ <audit_report>
168
+ @tasks-management/plans/TASK-001/audit.yaml
169
+ </audit_report>
170
+
171
+ <codebase>
172
+ @src/relevant-file-1.ts
173
+ @src/relevant-file-2.ts
174
+ </codebase>
175
+
176
+ <tree>
177
+ [directory structure]
178
+ </tree>
179
+
180
+ <rules>
181
+ @rules/core.json
182
+ </rules>
183
+ ```
184
+
185
+ ### What you get back
186
+
187
+ A `blueprint.yaml` file with:
188
+ - `version: "2.0-HARDENED"` (or `"1.0-APPROVED"` if audit was clean)
189
+ - `resolution_log` — what was changed and why
190
+ - `steps[]` — same structure as plan but hardened, with `CONSTRAINT:` injections
191
+ - `integrity_check` — confirms all findings resolved, dependency chain valid
192
+ - `changelog` — diff: steps modified/added/removed
193
+
194
+ ### Save the output as
195
+
196
+ ```
197
+ tasks-management/plans/TASK-001/blueprint.yaml
198
+ ```
199
+
200
+ ---
201
+
202
+ ## Agent 4: Operator
203
+
204
+ **File:** `docs/prompts/executer.md`
205
+ **Role:** Executes one phase of the blueprint per session. Stops after the phase is complete or if any step fails. Outputs a session state for the next session to resume.
206
+ **Input:** `blueprint.yaml` + phase number + (optional) previous session state
207
+ **Output:** `execution_report.yaml` + updated `session_state.yaml`
208
+
209
+ ### First session (Phase 1)
210
+
211
+ ```
212
+ Use this prompt @docs/prompts/executer.md to execute phase 1
213
+ from this blueprint @tasks-management/plans/TASK-001/blueprint.yaml
214
+
215
+ Environment:
216
+ - OS: Ubuntu 22.04
217
+ - Node: 20.x
218
+ - DB: connected (postgres://localhost:5432/nomos_dev)
219
+ ```
220
+
221
+ ### Next sessions (Phase 2, 3, ...)
222
+
223
+ ```
224
+ Use this prompt @docs/prompts/executer.md to execute phase 2
225
+ from this blueprint @tasks-management/plans/TASK-001/blueprint.yaml
226
+
227
+ <session_state>
228
+ @tasks-management/plans/TASK-001/session_state.yaml
229
+ </session_state>
230
+
231
+ Environment:
232
+ - OS: Ubuntu 22.04
233
+ - Node: 20.x
234
+ - DB: connected (postgres://localhost:5432/nomos_dev)
235
+ ```
236
+
237
+ ### What you get back
238
+
239
+ An `execution_report.yaml` with:
240
+ - `status` — `SUCCESS` | `PARTIAL_FAILURE` | `ROLLED_BACK` | `ABORTED`
241
+ - `steps_log[]` — step-by-step results with validation status
242
+ - `session_state` — save this for the next session
243
+ - `state_delta` — files created/modified/deleted
244
+ - `post_mortem` — (only on failure) root cause + recommended action
245
+ - `phase_summary.ready_for_next_phase` — `true` / `false`
246
+
247
+ ### Save the outputs as
248
+
249
+ ```
250
+ tasks-management/plans/TASK-001/execution_report_phase1.yaml
251
+ tasks-management/plans/TASK-001/session_state.yaml ← pass this to next session
252
+ ```
253
+
254
+ ---
255
+
256
+ ## Full Example — End to End
257
+
258
+ ```
259
+ TASK: Add JWT authentication to the API
260
+
261
+ Step 1 — Architect
262
+ Input: task description + src/ + package.json + rules/
263
+ Output: plan.yaml (12 atomic steps across 3 phases)
264
+
265
+ Step 2 — Red Team
266
+ Input: plan.yaml + src/ + rules/
267
+ Output: audit.yaml (verdict: REVISE — 1 critical finding: JWT secret in config)
268
+
269
+ Step 3 — Resolver
270
+ Input: plan.yaml + audit.yaml + src/ + rules/
271
+ Output: blueprint.yaml v2.0-HARDENED (secret moved to env var, rollback strengthened)
272
+
273
+ Step 4a — Operator Session 1
274
+ Input: blueprint.yaml + phase 1 + environment
275
+ Output: execution_report_phase1.yaml (SUCCESS) + session_state.yaml
276
+
277
+ Step 4b — Operator Session 2
278
+ Input: blueprint.yaml + phase 2 + session_state.yaml + environment
279
+ Output: execution_report_phase2.yaml (SUCCESS) + session_state.yaml
280
+
281
+ Step 4c — Operator Session 3
282
+ Input: blueprint.yaml + phase 3 + session_state.yaml + environment
283
+ Output: execution_report_phase3.yaml (SUCCESS) — ready_for_next_phase: false ✓
284
+ ```
285
+
286
+ ---
287
+
288
+ ## File Naming Convention
289
+
290
+ ```
291
+ tasks-management/
292
+ └── plans/
293
+ └── TASK-001/
294
+ ├── plan.yaml ← Architect output
295
+ ├── audit.yaml ← Red Team output
296
+ ├── blueprint.yaml ← Resolver output
297
+ ├── session_state.yaml ← Operator: updated each session
298
+ ├── execution_report_phase1.yaml ← Operator: one per session
299
+ ├── execution_report_phase2.yaml
300
+ └── execution_report_phase3.yaml
301
+ ```
302
+
303
+ ---
304
+
305
+ ## Quick Reference
306
+
307
+ | Agent | Prompt File | Takes | Produces | Key Field |
308
+ |-------|-------------|-------|----------|-----------|
309
+ | Architect | `architect.md` | task + code + rules | `plan.yaml` | `steps[]` |
310
+ | Red Team | `red_team.md` | `plan.yaml` + code | `audit.yaml` | `verdict` |
311
+ | Resolver | `hardener.md` | `plan.yaml` + `audit.yaml` | `blueprint.yaml` | `integrity_check` |
312
+ | Operator | `executer.md` | `blueprint.yaml` + phase# | `execution_report.yaml` | `ready_for_next_phase` |