@dzhechkov/skills-feature-adr 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dzhechkov/skills-feature-adr",
3
- "version": "1.3.0",
3
+ "version": "1.3.1",
4
4
  "description": "Adaptive Feature Development skill pack for Claude Code — 11-step pipeline with Complexity Router (S/M/L/XL), ADR-driven architecture, 15 agentic-qe skills, multi-agent fleet QE. Supports --full-qe, --full-qe-extended, --with-learning, and --knowledge-extractor modes.",
5
5
  "main": "src/cli.js",
6
6
  "bin": {
@@ -167,14 +167,21 @@ async function run(options) {
167
167
  installedFiles.push(...files);
168
168
  }
169
169
 
170
- // ── e2) Install optional components ────────────────────────────────────
170
+ // ── e2) Install optional components — track which actually installed ──
171
+ // Components whose template source is missing return [] from installComponent;
172
+ // we must NOT record them in the manifest, otherwise `update` reports
173
+ // "Unknown component" on subsequent runs.
174
+ const installedOptionalKeys = [];
171
175
  for (const key of optionalKeys) {
172
176
  stepNum++;
173
177
  const comp = OPTIONAL_COMPONENTS[key];
174
178
  step(stepNum, totalComponents, `Installing ${comp.label}...`);
175
179
 
176
180
  const files = installComponent(key, comp, templatesDir, targetDir);
177
- installedFiles.push(...files);
181
+ if (files.length > 0) {
182
+ installedFiles.push(...files);
183
+ installedOptionalKeys.push(key);
184
+ }
178
185
  }
179
186
 
180
187
  // ── f) Write manifest ──────────────────────────────────────────────────
@@ -182,13 +189,14 @@ async function run(options) {
182
189
  const pkg = readJSON(pkgPath);
183
190
  const version = pkg ? pkg.version : '0.0.0';
184
191
 
185
- const allKeys = [...componentKeys, ...optionalKeys];
192
+ // Only manifest components whose templates were actually present on disk
193
+ const allKeys = [...componentKeys, ...installedOptionalKeys];
186
194
  const manifest = createManifest(version, allKeys, installedFiles.sort());
187
195
 
188
- // Track optional features in manifest
196
+ // Track optional features in manifest based on actual installs
189
197
  manifest.optional = {
190
- withLearning: optionalKeys.some((k) => OPTIONAL_COMPONENTS[k]?.group === 'learning'),
191
- knowledgeExtractor: optionalKeys.some((k) => OPTIONAL_COMPONENTS[k]?.group === 'knowledge-extractor'),
198
+ withLearning: installedOptionalKeys.some((k) => OPTIONAL_COMPONENTS[k]?.group === 'learning'),
199
+ knowledgeExtractor: installedOptionalKeys.some((k) => OPTIONAL_COMPONENTS[k]?.group === 'knowledge-extractor'),
192
200
  };
193
201
 
194
202
  writeManifest(targetDir, manifest);
@@ -204,7 +212,7 @@ async function run(options) {
204
212
  const comp = COMPONENTS[key];
205
213
  console.log(` ${green('\u2713')} ${comp.label}`);
206
214
  }
207
- for (const key of optionalKeys) {
215
+ for (const key of installedOptionalKeys) {
208
216
  const comp = OPTIONAL_COMPONENTS[key];
209
217
  console.log(` ${green('\u2713')} ${comp.label} ${yellow('[optional]')}`);
210
218
  }
@@ -0,0 +1,49 @@
1
+ # Harvest — Ритуал извлечения знаний из проекта
2
+
3
+ ## Использование
4
+ ```
5
+ /harvest [путь к директории или "all"] [only категория1,категория2]
6
+ ```
7
+
8
+ ## Аргумент
9
+ $ARGUMENTS
10
+
11
+ ## Скилл
12
+
13
+ ```
14
+ Read: .claude/skills/knowledge-extractor/SKILL.md
15
+ ```
16
+
17
+ Загрузи knowledge-extractor skill и следуй его Pipeline Protocol.
18
+
19
+ ## Парсинг аргументов
20
+
21
+ 1. **Путь:** Первый аргумент — путь к директории исследования или `"all"` для всех директорий в `researches/`
22
+ 2. **Scope filter:** Если после пути есть `only X,Y` — передай как `{SCOPE_FILTER}` в skill
23
+ - Допустимые значения: `skills`, `commands`, `hooks`, `rules`, `templates`, `patterns`, `snippets`
24
+ - Множественные через запятую: `only rules,templates`
25
+ 3. Если аргументов нет — запроси путь у пользователя
26
+
27
+ ## Примеры
28
+
29
+ ```
30
+ /harvest researches/bank-kc-automation/ ← полный harvest одной директории
31
+ /harvest researches/bank-kc-automation/ only patterns ← только паттерны
32
+ /harvest all ← все исследования
33
+ /harvest all only rules,templates ← все исследования, только правила и шаблоны
34
+ /harvest features/add-user-auth/ ← harvest фичи (не только исследования)
35
+ ```
36
+
37
+ ## Параллелизация
38
+
39
+ Если путь = `"all"`:
40
+ 1. Получи список всех директорий в `researches/`
41
+ 2. Для каждой директории запусти отдельную harvest сессию
42
+ 3. Каждая сессия создаёт свой findings JSON и проходит полный pipeline
43
+ 4. По завершении всех сессий — объедини отчёты
44
+
45
+ ## Суть
46
+
47
+ Систематический процесс извлечения и организации полезных знаний после завершения проекта. Использует 5 параллельных агентов-экстракторов, 7-категорийную классификацию, 8 блокирующих quality gates, и автоматическое размещение артефактов.
48
+
49
+ Pipeline: Extract (5 agents) → Classify (7 categories) → User Checkpoint → Gate (8 checks) → Integrate (auto-place)
@@ -0,0 +1,124 @@
1
+ # Reward Learning Rules
2
+
3
+ ## Purpose
4
+
5
+ Govern how the Keysarium pipeline integrates with the Reward-Calibrated Learning System. These rules define when and how to call `memory_query()` and `memory_store()`, how reward scores are assigned, and how historical patterns influence phase execution.
6
+
7
+ ## Core Protocol
8
+
9
+ Read `lib/memory-protocol.md` for the full protocol specification.
10
+ Read `lib/reward-tracker.md` for analytics and pattern detection.
11
+
12
+ ## When to Call memory_query()
13
+
14
+ **Trigger:** At the START of every pipeline phase (Phase 0 through Phase 5).
15
+
16
+ **Protocol:**
17
+ 1. Before loading the phase's governance shard, check if `.keysarium/memory/` exists.
18
+ 2. If it exists, call `memory_query()` with the current context:
19
+ - `phase`: Current phase identifier (e.g., "phase-2")
20
+ - `domain`: Detected domain from Phase 0 (or "unknown" if Phase 0 has not run yet)
21
+ - `slug`: Current case slug
22
+ - `skill`: Skill about to be loaded for this phase
23
+ 3. Log the number of patterns loaded.
24
+ 4. If relevant patterns are found, incorporate the top 3 into the phase brief:
25
+ - Include high-reward approaches from similar past cases
26
+ - Apply actionable advice from domain patterns
27
+ - Note any bottleneck warnings for the current phase/domain combination
28
+ 5. If no patterns are found (first run or no relevant data), proceed normally.
29
+
30
+ **Exception:** Phase 0 (Discovery) may not have a domain yet. Query with `domain: "unknown"` to load cross-domain patterns.
31
+
32
+ ## When to Call memory_store()
33
+
34
+ **Trigger:** At every CHECKPOINT, after the user responds.
35
+
36
+ **Protocol:**
37
+ 1. After displaying the checkpoint banner and receiving user response.
38
+ 2. Classify the user response into a reward level (see Reward Assignment below).
39
+ 3. Call `memory_store()` with:
40
+ - The phase result (artifacts created, promise tag emitted)
41
+ - The reward score and label
42
+ - Context metadata (upstream promises, patterns loaded, agent count)
43
+ - Outcome metadata (user response, iteration count)
44
+ 4. Log the stored reward.
45
+ 5. Continue with the checkpoint protocol as normal (proceed / adjust / redo).
46
+
47
+ **Exception:** If the session ends without a user response at the checkpoint, do NOT store a record. Only explicit user responses generate reward data.
48
+
49
+ ## Reward Assignment
50
+
51
+ Map the user's checkpoint response to a reward score:
52
+
53
+ | User Response Pattern | Reward | Label | Examples |
54
+ |----------------------|--------|-------|----------|
55
+ | Immediate approval | 1.0 | excellent | "ok", "ок", "next", "продолжай", "good", single-word approval |
56
+ | Minor adjustments | 0.7 | good | "углуби X", "expand section Y", "add one more competitor", feedback on 1 area |
57
+ | Significant rework | 0.3 | needs_work | "rework the approach", "this misses the point of X and Y", feedback on 3+ areas |
58
+ | Complete restart | 0.0 | failed | "start over", "this is wrong", "redo this phase completely" |
59
+
60
+ ### Classification Rules
61
+
62
+ 1. **Count the areas of feedback:** 0 areas = 1.0, 1 area = 0.7, 2+ areas = 0.3, full restart = 0.0
63
+ 2. **Positive feedback with minor note** counts as 1.0 (e.g., "great work, just fix the typo")
64
+ 3. **Multiple rounds:** If user gives feedback more than twice on the same phase, cap reward at 0.3
65
+ 4. **Ambiguous responses:** Default to 0.7 (assume minor adjustment)
66
+
67
+ ## Integration with Promise Tags
68
+
69
+ Every `memory_store()` call includes the promise tag emitted at the checkpoint:
70
+
71
+ | Phase | Promise Tag in Record |
72
+ |-------|----------------------|
73
+ | Phase 0 | `DISCOVERY_COMPLETE` |
74
+ | Phase 1 | `CASE_EXPLORED` |
75
+ | Phase 2 | `RESEARCH_PARANOID_PASSED` |
76
+ | Phase 2.5 | `CJM_VALIDATED` |
77
+ | Phase 3 | `SOLUTION_DESIGNED` |
78
+ | Phase 4 | `ARCHITECTURE_DEFINED` |
79
+ | Phase 5 | `PRESENTATION_READY` |
80
+
81
+ If a promise is `_INCOMPLETE`, the reward should be 0.3 or lower.
82
+
83
+ ## Integration with Feedback Loops
84
+
85
+ This system adds a new feedback loop to the Variable Registry (see `.claude/rules/feedback-loops.md`):
86
+
87
+ ### Loop 7: Memory -> All Phases
88
+
89
+ | Property | Value |
90
+ |----------|-------|
91
+ | Direction | `.keysarium/memory/` -> Phase 0-5 start |
92
+ | Variable | `{MEMORY_PATTERNS}` |
93
+ | Payload | Top reward records + domain patterns from memory_query() |
94
+ | Consumers | All phases (loaded at phase start) |
95
+ | Persistence | `.keysarium/memory/{domain}/{slug}/` |
96
+
97
+ **Contract:** `memory_query()` is called before each phase starts. Empty result is acceptable (first run).
98
+
99
+ ## Retention Policy
100
+
101
+ - **Default:** 90 days from record creation
102
+ - **Configuration:** Set in `.keysarium/memory/config.json` via `retention_days`
103
+ - **Enforcement:** Expired records are purged during `memory_query()` calls
104
+ - **Override:** Set `retention_days: 0` to disable expiration (keep forever)
105
+ - **Manual purge:** Delete `.keysarium/memory/` directory to reset all memory
106
+
107
+ ## Error Handling
108
+
109
+ | Situation | Behavior |
110
+ |-----------|----------|
111
+ | `.keysarium/memory/` does not exist | `memory_query()` returns empty; `memory_store()` creates it |
112
+ | `config.json` missing | Use defaults (90 days, 10 max results, enabled) |
113
+ | Corrupted JSON file during query | Skip file, log warning, continue with other records |
114
+ | Write failure during store | Log error, continue pipeline (memory is non-blocking) |
115
+ | Unknown domain | Store under `unknown/` directory |
116
+
117
+ ## Rules Summary
118
+
119
+ 1. **ALWAYS** call `memory_query()` at the start of every phase
120
+ 2. **ALWAYS** call `memory_store()` at every checkpoint after user responds
121
+ 3. **NEVER** block the pipeline if memory operations fail
122
+ 4. **NEVER** store a record without an explicit user response
123
+ 5. **ALWAYS** log memory operations (patterns loaded, reward stored)
124
+ 6. Memory operations are **advisory** -- they enhance quality but are not mandatory gates
@@ -0,0 +1,254 @@
1
+ ---
2
+ name: knowledge-extractor
3
+ description: >
4
+ Multi-agent knowledge harvesting system that extracts reusable patterns, commands,
5
+ rules, templates, and snippets from any project directory. Uses 5 parallel extractor
6
+ agents, 7-category classification, 8 blocking quality gates, and auto-placement.
7
+ Triggers on "harvest", "extract knowledge", "извлеки знания", "/harvest".
8
+ trust_tier: 2
9
+ trust_tier_label: "Validated"
10
+ bto_score: 7.5
11
+ bto_date: "2026-03-03"
12
+ trust_tier_path: "Run /bto-test, score >= 8.5 + eval tests for Tier 3"
13
+ ---
14
+
15
+ # Knowledge Extractor
16
+
17
+ > Domain-agnostic multi-agent system for extracting, classifying, gating, and placing
18
+ > reusable knowledge from completed projects into the toolkit.
19
+
20
+ ## When To Activate
21
+
22
+ Trigger on:
23
+ - `/harvest [path]` or `/harvest all`
24
+ - "extract knowledge from [path]"
25
+ - "извлеки знания из [path]"
26
+ - "harvest patterns from [project]"
27
+ - "что можно переиспользовать из [project]"
28
+
29
+ ## What You Get
30
+
31
+ After a successful harvest session:
32
+ - **Findings JSON** — structured extraction results at `.keysarium/harvest/findings-{SESSION_ID}.json`
33
+ - **Placed artifacts** — skills, commands, and rules placed in `.claude/skills/`, `.claude/commands/`, `.claude/rules/`
34
+ - **TOOLKIT_HARVEST.md** — updated with new patterns, templates, snippets, and hooks
35
+ - **Harvest report** — session summary with metrics (extracted → approved → placed counts, gate pass rates)
36
+ - **Memory record** — reward data stored via `memory_store()` for future session optimization
37
+
38
+ ## Architecture
39
+
40
+ ```
41
+ .claude/skills/knowledge-extractor/
42
+ ├── SKILL.md ← This file (orchestrator)
43
+ ├── modules/
44
+ │ ├── 01-extract.md ← 5 parallel extractor agents
45
+ │ ├── 02-classify.md ← 7-category classification + dedup
46
+ │ ├── 03-gate.md ← 8 quality gates (2-pass)
47
+ │ └── 04-integrate.md ← Auto-placement + harvest report
48
+ ├── references/
49
+ │ ├── quality-gates.md ← Gate definitions (G1-G8)
50
+ │ ├── artifact-categories.md ← 7 categories with destinations
51
+ │ └── maturity-model.md ← Alpha → Beta → Stable
52
+ └── templates/
53
+ ├── artifact-card.md ← Per-finding output template
54
+ └── harvest-report.md ← Session summary template
55
+ ```
56
+
57
+ ## Modules
58
+
59
+ | Module | Purpose | Agents | Model |
60
+ |--------|---------|--------|-------|
61
+ | 01-extract | Scan project via 5 focused lenses | 5 parallel | sonnet |
62
+ | 02-classify | Assign categories, deduplicate, cross-ref | 0 (inline) | — |
63
+ | 03-gate | Evaluate 8 quality gates in 2 passes | 1 (semantic) | haiku |
64
+ | 04-integrate | Place artifacts, generate report | 0 (inline) | — |
65
+
66
+ ## Pipeline Protocol
67
+
68
+ ### Step 0: Initialize Session
69
+
70
+ 1. Parse arguments: `{TARGET_PATH}` (directory or "all"), `{SCOPE_FILTER}` (optional `only X,Y`)
71
+ 2. **Resolve target path:**
72
+ - If `{TARGET_PATH}` is a specific directory → validate it exists, set `{slug}` = directory basename
73
+ - If `{TARGET_PATH}` is `"all"` → list all subdirectories in `researches/`. For each directory, run a separate harvest session (each gets its own findings JSON, session ID, and pipeline run). Merge reports at the end.
74
+ - If `{TARGET_PATH}` does not exist → report error and stop
75
+ 3. **Resolve domain:** If running inside a `/casarium` pipeline, inherit `{domain}` from Phase 0 discovery. If standalone, detect domain from file content keywords (banking/retail/enterprise/healthcare). If unable to detect, set `{domain}` = `"unknown"`.
76
+ 4. Generate session ID: `{slug}-{YYYYMMDDHHmmss}`
77
+ 5. If `TOOLKIT_HARVEST.md` does not exist at repo root → create it with a standard skeleton (title + empty section headers for each category).
78
+ 6. Call `memory_query({phase: "harvest", domain, slug, skill: "knowledge-extractor"})`
79
+ 7. Log loaded patterns count
80
+
81
+ ### Step 1: Extract (Module 01)
82
+
83
+ ```
84
+ Read: modules/01-extract.md
85
+ ```
86
+
87
+ Spawn 5 parallel extractor agents (sonnet), each with a focused lens:
88
+
89
+ | Agent | Lens | Seeks |
90
+ |-------|------|-------|
91
+ | extractor-patterns | patterns | Architecture patterns, design patterns, agent topologies |
92
+ | extractor-commands | commands | Scripts, CLI workflows, pipeline stages, automation |
93
+ | extractor-rules | rules | Constraints, anti-patterns, lessons learned, domain rules |
94
+ | extractor-templates | templates | Document structures, config templates, diagram templates |
95
+ | extractor-snippets | snippets | Reusable code fragments, hooks, utility functions |
96
+
97
+ Each agent returns numbered findings with reusability confidence (0.0-1.0).
98
+ Orchestrator merges all findings into a single numbered list and writes to JSON:
99
+ `.keysarium/harvest/findings-{SESSION_ID}.json`
100
+
101
+ **Scope filtering:** If `{SCOPE_FILTER}` is set, only spawn matching agents. See mapping in `references/artifact-categories.md`.
102
+
103
+ ### Step 2: Classify (Module 02)
104
+
105
+ ```
106
+ Read: modules/02-classify.md
107
+ Read: references/artifact-categories.md
108
+ ```
109
+
110
+ 1. Read findings from JSON file
111
+ 2. Assign each finding to one of 7 categories
112
+ 3. Deduplicate: check content similarity across findings and against TOOLKIT_HARVEST.md
113
+ 4. Cross-reference: link findings that reference existing skills/commands
114
+ 5. Write classified findings back to JSON
115
+
116
+ ### Step 3: User Checkpoint (Interactive)
117
+
118
+ Display all findings grouped by category as a numbered list:
119
+
120
+ ```
121
+ ═══════════════════════════════════════════════════════
122
+ 📋 HARVEST FINDINGS: {N} items extracted
123
+
124
+ ## patterns (K items)
125
+ #1 [0.85] Agent Swarm Topology — parallel agents with merge pattern
126
+ #4 [0.72] Event-Driven Pipeline — phase-to-phase data flow via files
127
+
128
+ ## rules (M items)
129
+ #2 [0.91] Domain Detection Rule — auto-detect domain from keywords
130
+ #7 [0.65] Regulatory Constraint — ФЗ-152 data isolation requirement
131
+
132
+ ## snippets (P items)
133
+ #3 [0.78] CJM Renderer — universal React component for CJM display
134
+ ...
135
+
136
+ Commands:
137
+ • "убери #N" — remove finding
138
+ • "переклассифицируй #N в X" — reclassify to category X
139
+ • "доработай #N" — expand/improve finding
140
+ • "объедини #N и #M" — merge two findings
141
+ • "покажи только X" — filter by category
142
+ • "ок" — proceed to quality gates
143
+ ═══════════════════════════════════════════════════════
144
+ ```
145
+
146
+ Apply user mutations to JSON file. Numbers are stable (removed items leave gaps).
147
+ Loop until user says "ок".
148
+
149
+ ### Step 4: Gate (Module 03)
150
+
151
+ ```
152
+ Read: modules/03-gate.md
153
+ Read: references/quality-gates.md
154
+ ```
155
+
156
+ Evaluate each active finding against 8 quality gates in 2 passes:
157
+
158
+ **Pass 1 — Deterministic (zero LLM cost):**
159
+ - G1: Has "When to use" section
160
+ - G2: Has "When NOT to use" section
161
+ - G5: Reusability confidence >= 0.5
162
+ - G6: Not a duplicate of existing toolkit entry
163
+ - G7: Has maturity label (alpha/beta/stable)
164
+
165
+ **Pass 2 — Semantic (haiku agent):**
166
+ - G3: Properly decontextualized (no project-specific refs)
167
+ - G4: Has at least one concrete example
168
+ - G8: Passes brutal-honesty review (self-critical assessment)
169
+
170
+ Findings passing all gates → status `approved`.
171
+ Findings failing any gate → status `blocked` with gate IDs.
172
+
173
+ Display blocked findings and allow overrides:
174
+ ```
175
+ ⚠️ BLOCKED: #7 — failed G3 (project-specific references remain)
176
+ • "пропусти G3 для #7" — override gate
177
+ • "ок" — accept blocks, proceed with approved only
178
+ ```
179
+
180
+ ### Step 5: Integrate (Module 04)
181
+
182
+ ```
183
+ Read: modules/04-integrate.md
184
+ Read: templates/artifact-card.md
185
+ Read: templates/harvest-report.md
186
+ ```
187
+
188
+ 1. Render artifact cards for each approved finding
189
+ 2. Auto-place by category:
190
+ - `skills` → create `.claude/skills/<name>/SKILL.md` (skeleton)
191
+ - `commands` → create `.claude/commands/<name>.md`
192
+ - `hooks` → document in TOOLKIT_HARVEST.md (hooks section)
193
+ - `rules` → create `.claude/rules/<name>.md`
194
+ - `templates` → document in TOOLKIT_HARVEST.md (templates section)
195
+ - `patterns` → document in TOOLKIT_HARVEST.md (patterns section)
196
+ - `snippets` → document in TOOLKIT_HARVEST.md (snippets section)
197
+ 3. Update TOOLKIT_HARVEST.md "Обработанные проекты" table
198
+ 4. Generate harvest report
199
+ 5. Call `memory_store()` with harvest results and user reward
200
+ 6. Update `.keysarium/insights/trigger-state.json` (case_completion event)
201
+ 7. Clean up findings JSON file
202
+
203
+ ## Scope Filtering
204
+
205
+ The `only` keyword filters which extractor agents are spawned:
206
+
207
+ ```
208
+ /harvest path/ → all 5 agents
209
+ /harvest path/ only patterns → extractor-patterns only
210
+ /harvest path/ only rules,templates → extractor-rules + extractor-templates
211
+ /harvest all only snippets → extractor-snippets for each directory
212
+ ```
213
+
214
+ Category-to-agent mapping (see `references/artifact-categories.md`):
215
+ - patterns → extractor-patterns
216
+ - commands → extractor-commands
217
+ - hooks → extractor-commands (shared lens)
218
+ - rules → extractor-rules
219
+ - templates → extractor-templates
220
+ - snippets → extractor-snippets
221
+ - skills → all agents (skills are cross-cutting)
222
+
223
+ ## Integration Points
224
+
225
+ | System | When | Operation |
226
+ |--------|------|-----------|
227
+ | memory_query() | Pipeline start | Load historical harvest patterns |
228
+ | memory_store() | After user checkpoint | Store harvest reward data |
229
+ | brain-export | N/A (passive) | Findings JSON is brain-compatible schema |
230
+ | BTO | Post-harvest | Placed skills can be evaluated via `/bto-test` |
231
+ | dream cycles | Pipeline end | Update trigger-state.json |
232
+ | /harvest command | Entry point | Thin wrapper loads this skill |
233
+
234
+ ## Anti-Patterns
235
+
236
+ | Anti-Pattern | Detection | Fix |
237
+ |-------------|-----------|-----|
238
+ | Extracting project-specific details | Finding contains project names, slugs, dates | Decontextualize: replace specifics with generic placeholders |
239
+ | Duplicate extraction | content_hash matches existing TOOLKIT_HARVEST.md entry | Skip or merge; gate G6 blocks duplicates |
240
+ | Extracting from incomplete phases | Artifact lacks promise tag or checkpoint | Only harvest from completed projects |
241
+ | Over-classifying snippets as patterns | Single-use code without abstraction | Require reuse evidence before promoting to pattern |
242
+ | Under-specifying maturity | All findings marked "stable" on first run | First extraction is always "alpha" or "beta" |
243
+ | Skipping user checkpoint | Proceeding to gates without user review | BLOCK — user checkpoint is mandatory |
244
+ | Running all agents when scope is filtered | `only patterns` but 5 agents spawned | Check SCOPE_FILTER before spawning |
245
+
246
+ ## Dependencies
247
+
248
+ | Resource | Path | Purpose |
249
+ |----------|------|---------|
250
+ | quality-gates.md | references/quality-gates.md | Gate definitions for Module 03 |
251
+ | artifact-categories.md | references/artifact-categories.md | 7-category taxonomy |
252
+ | maturity-model.md | references/maturity-model.md | Alpha/Beta/Stable criteria |
253
+ | artifact-card.md | templates/artifact-card.md | Per-finding output format |
254
+ | harvest-report.md | templates/harvest-report.md | Session report format |
@@ -0,0 +1,188 @@
1
+ # Extract Module — Parallel Knowledge Extraction
2
+
3
+ ## Purpose
4
+
5
+ Spawn 5 focused extractor agents to scan a project directory through different lenses, collecting reusable knowledge findings with confidence scores.
6
+
7
+ ## Input
8
+
9
+ - `{TARGET_PATH}` — directory to scan (e.g., `researches/bank-kc-automation/`)
10
+ - `{SCOPE_FILTER}` — optional list of lenses to activate (null = all 5)
11
+ - `{MEMORY_PATTERNS}` — historical patterns from memory_query() (may be empty)
12
+ - `{SESSION_ID}` — harvest session identifier
13
+
14
+ ## Protocol
15
+
16
+ ### Step 1: Inventory Target Directory
17
+
18
+ List all files in `{TARGET_PATH}`. Build a file manifest with paths and sizes.
19
+ Skip binary files, images, and files > 100KB.
20
+
21
+ ### Step 2: Determine Active Agents
22
+
23
+ If `{SCOPE_FILTER}` is set, map categories to lenses:
24
+
25
+ | Requested Category | Agent Lens to Spawn |
26
+ |-------------------|---------------------|
27
+ | patterns | extractor-patterns |
28
+ | commands | extractor-commands |
29
+ | hooks | extractor-commands (shared) |
30
+ | rules | extractor-rules |
31
+ | templates | extractor-templates |
32
+ | snippets | extractor-snippets |
33
+ | skills | ALL agents (cross-cutting) |
34
+
35
+ If `{SCOPE_FILTER}` is null, spawn all 5 agents.
36
+
37
+ ### Step 3: Spawn Extractor Agents
38
+
39
+ Spawn selected agents in parallel using the Agent tool:
40
+
41
+ ```
42
+ Agent(
43
+ subagent_type="general-purpose",
44
+ model="sonnet",
45
+ description="Harvest Extractor — {LENS_NAME}",
46
+ prompt="""
47
+ You are a knowledge extraction agent with a focused lens: {LENS_DESCRIPTION}.
48
+
49
+ ## Your Task
50
+ Scan the following project files and extract reusable knowledge through your lens.
51
+ For each finding, provide:
52
+ 1. A short descriptive title (3-8 words)
53
+ 2. Content: the reusable knowledge (decontextualized where possible)
54
+ 3. "When to use" guidance (1-2 sentences)
55
+ 4. "When NOT to use" guidance (1-2 sentences)
56
+ 5. A concrete example of usage
57
+ 6. Reusability confidence (0.0 to 1.0):
58
+ - 0.9-1.0: Universal, works in any project
59
+ - 0.7-0.89: Works in most similar projects
60
+ - 0.5-0.69: Works in some contexts
61
+ - < 0.5: Probably too project-specific
62
+ 7. Suggested maturity: alpha (first time seen) or beta (if evidence of reuse)
63
+
64
+ ## Project Files
65
+ Read the following directory: {TARGET_PATH}
66
+
67
+ ## Historical Patterns (from previous harvests)
68
+ {MEMORY_PATTERNS_OR_NONE}
69
+
70
+ Avoid extracting findings that overlap with these historical patterns.
71
+
72
+ ## Extraction Rules
73
+ - Extract ONLY genuinely reusable knowledge
74
+ - DO NOT extract project-specific implementation details
75
+ - DO NOT extract trivial or obvious patterns
76
+ - Each finding must be independently useful outside this project
77
+ - Prefer actionable knowledge over abstract observations
78
+ - If in doubt about reusability, include it with a lower confidence score
79
+
80
+ ## Output Format
81
+ Return a numbered list of findings in this exact format:
82
+
83
+ ### Finding 1
84
+ **Title:** [short title]
85
+ **Confidence:** [0.0-1.0]
86
+ **Maturity:** [alpha|beta]
87
+ **Content:** [the reusable knowledge]
88
+ **When to use:** [guidance]
89
+ **When NOT to use:** [anti-guidance]
90
+ **Example:** [concrete example]
91
+
92
+ ### Finding 2
93
+ ...
94
+ """
95
+ )
96
+ ```
97
+
98
+ ### Step 4: Handle Agent Failures
99
+
100
+ If any extractor agent fails, times out, or returns malformed output:
101
+
102
+ 1. **Partial success:** Proceed with results from successful agents. Do NOT block the entire pipeline.
103
+ 2. **Log failures:** Record which lenses failed and why:
104
+ ```
105
+ ⚠️ Extractor agent failed: extractor-snippets (timeout after 120s)
106
+ Proceeding with 4/5 lenses. Snippets will not be harvested.
107
+ ```
108
+ 3. **Surface in report:** Add failed lenses to the harvest report's "Coverage Gaps" section.
109
+ 4. **Retry option:** Offer the user: `"повтори snippets"` — retry the failed lens only.
110
+ 5. **Malformed output:** If an agent returns output that cannot be parsed into the Finding format, discard that agent's results and log: `"Extractor {lens} returned unparseable output — discarded."`
111
+ 6. **Zero findings:** If an agent returns no findings, this is normal (not all lenses find relevant content). Log: `"Extractor {lens}: 0 findings (no relevant content detected)."`
112
+
113
+ ### Step 5: Report Skipped Files
114
+
115
+ Before merging, surface any files that were skipped during inventory:
116
+
117
+ ```
118
+ 📋 File inventory: {total_files} files scanned, {skipped_count} skipped
119
+ Skipped: {file1} (112KB, exceeds 100KB limit), {file2} (binary)
120
+ ```
121
+
122
+ Include this in the harvest report under "Coverage Gaps" so the user knows what was NOT examined.
123
+
124
+ ### Step 6: Merge Results
125
+
126
+ After all agents complete (or after handling failures):
127
+
128
+ 1. Collect all findings from all agents
129
+ 2. Assign sequential numbers starting from 1 (global numbering across all agents)
130
+ 3. Tag each finding with its source lens
131
+ 4. Set initial status to `raw`
132
+ 5. Write to findings JSON file at `.keysarium/harvest/findings-{SESSION_ID}.json`
133
+
134
+ ### 5 Agent Lens Descriptions
135
+
136
+ | Agent | Lens Description |
137
+ |-------|-----------------|
138
+ | extractor-patterns | Architecture patterns, design patterns, agent topologies, data flow patterns, orchestration strategies, parallelism approaches. Look for structural solutions that could be applied to other systems. |
139
+ | extractor-commands | CLI commands, scripts, pipeline stages, automation workflows, slash commands, build processes. Look for reusable command patterns and workflow automations. |
140
+ | extractor-rules | Constraints, quality gates, anti-patterns, domain rules, naming conventions, regulatory requirements, lessons learned. Look for guardrails and restrictions that prevent mistakes. |
141
+ | extractor-templates | Document structures, config file formats, diagram templates, report formats, checklist templates. Look for reusable document and configuration scaffolding. |
142
+ | extractor-snippets | Reusable code fragments, utility functions, React components, bash one-liners, prompt templates, hook implementations. Look for copy-paste-ready code blocks. |
143
+
144
+ ## Output Format
145
+
146
+ The findings JSON file structure:
147
+
148
+ ```json
149
+ {
150
+ "session_id": "{SESSION_ID}",
151
+ "status": "extracting",
152
+ "target_paths": ["{TARGET_PATH}"],
153
+ "scope_filter": null,
154
+ "created_at": "ISO-8601",
155
+ "next_number": 16,
156
+ "extractors_spawned": ["patterns", "commands", "rules", "templates", "snippets"],
157
+ "findings": {
158
+ "1": {
159
+ "number": 1,
160
+ "title": "Agent Swarm Topology",
161
+ "content": "...",
162
+ "when_to_use": "...",
163
+ "when_not_to_use": "...",
164
+ "example": "...",
165
+ "confidence": 0.85,
166
+ "maturity": "alpha",
167
+ "source_lens": "patterns",
168
+ "category": null,
169
+ "status": "raw",
170
+ "gate_results": {},
171
+ "gate_overrides": [],
172
+ "merged_from": null
173
+ }
174
+ }
175
+ }
176
+ ```
177
+
178
+ ## Anti-Patterns
179
+
180
+ | Anti-Pattern | Detection | Fix |
181
+ |-------------|-----------|-----|
182
+ | Agent extracts trivial findings | Findings like "use git for version control" | Prompt emphasizes non-obvious, actionable knowledge |
183
+ | Agent reads files outside target | File paths outside {TARGET_PATH} | Prompt constrains to target directory only |
184
+ | Duplicate findings across agents | Two agents extract the same pattern | Handled by Module 02 (dedup), not here |
185
+ | Agent returns unstructured text | Missing required fields in output | Strict output format in prompt; orchestrator validates |
186
+ | All findings have confidence 0.9+ | Agent is not being self-critical | Prompt calibration: explain the confidence scale |
187
+ | Agent timeout with no fallback | Pipeline stalls waiting for failed agent | Use Step 4 failure protocol: proceed with partial results |
188
+ | Large files silently skipped | User unaware of coverage gaps | Step 5 surfaces skipped files; harvest report includes coverage gaps |