@wazir-dev/cli 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -44
- package/README.md +13 -13
- package/assets/demo.cast +47 -0
- package/assets/demo.gif +0 -0
- package/docs/anti-patterns/AP-23-skipping-enabled-workflows.md +28 -0
- package/docs/anti-patterns/AP-24-clarifier-deciding-scope.md +34 -0
- package/docs/concepts/architecture.md +1 -1
- package/docs/concepts/why-wazir.md +1 -1
- package/docs/readmes/INDEX.md +1 -1
- package/docs/readmes/features/expertise/README.md +1 -1
- package/docs/readmes/features/hooks/pre-compact-summary.md +1 -1
- package/docs/reference/hooks.md +1 -0
- package/docs/reference/launch-checklist.md +3 -3
- package/docs/reference/review-loop-pattern.md +3 -2
- package/docs/reference/skill-tiers.md +2 -2
- package/docs/research/2026-03-20-agents/a18fb002157904af5.txt +187 -0
- package/docs/research/2026-03-20-agents/a1d0ac79ac2f11e6f.txt +2 -0
- package/docs/research/2026-03-20-agents/a324079de037abd7c.txt +198 -0
- package/docs/research/2026-03-20-agents/a357586bccfafb0e5.txt +256 -0
- package/docs/research/2026-03-20-agents/a4365394e4d753105.txt +137 -0
- package/docs/research/2026-03-20-agents/a492af28bc52d3613.txt +136 -0
- package/docs/research/2026-03-20-agents/a4984db0b6a8eee07.txt +124 -0
- package/docs/research/2026-03-20-agents/a5b30e59d34bbb062.txt +214 -0
- package/docs/research/2026-03-20-agents/a5cf7829dab911586.txt +165 -0
- package/docs/research/2026-03-20-agents/a607157c30dd97c9e.txt +96 -0
- package/docs/research/2026-03-20-agents/a60b68b1e19d1e16b.txt +115 -0
- package/docs/research/2026-03-20-agents/a722af01c5594aba0.txt +166 -0
- package/docs/research/2026-03-20-agents/a787bdc516faa5829.txt +181 -0
- package/docs/research/2026-03-20-agents/a7c46d1bba1056ed2.txt +132 -0
- package/docs/research/2026-03-20-agents/a7e5abbab2b281a0d.txt +100 -0
- package/docs/research/2026-03-20-agents/a8dbadc66cd0d7d5a.txt +95 -0
- package/docs/research/2026-03-20-agents/a904d9f45d6b86a6d.txt +75 -0
- package/docs/research/2026-03-20-agents/a927659a942ee7f60.txt +102 -0
- package/docs/research/2026-03-20-agents/a962cb569191f7583.txt +125 -0
- package/docs/research/2026-03-20-agents/aab6decea538aac41.txt +148 -0
- package/docs/research/2026-03-20-agents/abd58b853dd938a1b.txt +295 -0
- package/docs/research/2026-03-20-agents/ac009da573eff7f65.txt +100 -0
- package/docs/research/2026-03-20-agents/ac1bc783364405e5f.txt +190 -0
- package/docs/research/2026-03-20-agents/aca5e2b57fde152a0.txt +132 -0
- package/docs/research/2026-03-20-agents/ad849b8c0a7e95b8b.txt +176 -0
- package/docs/research/2026-03-20-agents/adc2b12a4da32c962.txt +258 -0
- package/docs/research/2026-03-20-agents/af97caaaa9a80e4cb.txt +146 -0
- package/docs/research/2026-03-20-agents/afc5faceee368b3ca.txt +111 -0
- package/docs/research/2026-03-20-agents/afdb282d866e3c1e4.txt +164 -0
- package/docs/research/2026-03-20-agents/afe9d1f61c02b1e8d.txt +299 -0
- package/docs/research/2026-03-20-agents/b4hmkwril.txt +1856 -0
- package/docs/research/2026-03-20-agents/b80ptk89g.txt +1856 -0
- package/docs/research/2026-03-20-agents/bf54s1jss.txt +1150 -0
- package/docs/research/2026-03-20-agents/bhd6kq2kx.txt +1856 -0
- package/docs/research/2026-03-20-agents/bmb2fodyr.txt +988 -0
- package/docs/research/2026-03-20-agents/bmmsrij8i.txt +826 -0
- package/docs/research/2026-03-20-agents/bn4t2ywpu.txt +2175 -0
- package/docs/research/2026-03-20-agents/bu22t9f1z.txt +0 -0
- package/docs/research/2026-03-20-agents/bwvl98v2p.txt +738 -0
- package/docs/research/2026-03-20-agents/psych-a3697a7fd06eb64fd.txt +135 -0
- package/docs/research/2026-03-20-agents/psych-a37776fabc870feae.txt +123 -0
- package/docs/research/2026-03-20-agents/psych-a5b1fe05c0589efaf.txt +2 -0
- package/docs/research/2026-03-20-agents/psych-a95c15b1f29424435.txt +76 -0
- package/docs/research/2026-03-20-agents/psych-a9c26f4d9172dde7c.txt +2 -0
- package/docs/research/2026-03-20-agents/psych-aa19c69f0ca2c5ad3.txt +2 -0
- package/docs/research/2026-03-20-agents/psych-aa4e4cb70e1be5ecb.txt +95 -0
- package/docs/research/2026-03-20-agents/psych-ab5b302f26a554663.txt +102 -0
- package/docs/research/2026-03-20-deep-research-complete.md +101 -0
- package/docs/research/2026-03-20-deep-research-status.md +38 -0
- package/docs/research/2026-03-20-enforcement-research.md +107 -0
- package/expertise/antipatterns/process/ai-coding-antipatterns.md +117 -0
- package/expertise/composition-map.yaml +27 -8
- package/expertise/digests/reviewer/ai-coding-digest.md +83 -0
- package/expertise/digests/reviewer/architectural-thinking-digest.md +63 -0
- package/expertise/digests/reviewer/architecture-antipatterns-digest.md +49 -0
- package/expertise/digests/reviewer/code-smells-digest.md +53 -0
- package/expertise/digests/reviewer/coupling-cohesion-digest.md +54 -0
- package/expertise/digests/reviewer/ddd-digest.md +60 -0
- package/expertise/digests/reviewer/dependency-risk-digest.md +40 -0
- package/expertise/digests/reviewer/error-handling-digest.md +55 -0
- package/expertise/digests/reviewer/review-methodology-digest.md +49 -0
- package/exports/hosts/claude/.claude/commands/learn.md +61 -8
- package/exports/hosts/claude/.claude/commands/plan-review.md +3 -1
- package/exports/hosts/claude/.claude/commands/verify.md +30 -1
- package/exports/hosts/claude/.claude/settings.json +7 -6
- package/exports/hosts/claude/export.manifest.json +8 -5
- package/exports/hosts/claude/host-package.json +3 -0
- package/exports/hosts/codex/export.manifest.json +8 -5
- package/exports/hosts/codex/host-package.json +3 -0
- package/exports/hosts/cursor/.cursor/hooks.json +6 -6
- package/exports/hosts/cursor/export.manifest.json +8 -5
- package/exports/hosts/cursor/host-package.json +3 -0
- package/exports/hosts/gemini/export.manifest.json +8 -5
- package/exports/hosts/gemini/host-package.json +3 -0
- package/hooks/definitions/pretooluse_dispatcher.yaml +26 -0
- package/hooks/definitions/pretooluse_pipeline_guard.yaml +22 -0
- package/hooks/definitions/stop_pipeline_gate.yaml +22 -0
- package/hooks/hooks.json +7 -6
- package/hooks/pretooluse-dispatcher +84 -0
- package/hooks/pretooluse-pipeline-guard +9 -0
- package/hooks/stop-pipeline-gate +9 -0
- package/llms-full.txt +48 -18
- package/package.json +2 -3
- package/schemas/decision.schema.json +15 -0
- package/schemas/hook.schema.json +4 -1
- package/schemas/phase-report.schema.json +9 -0
- package/skills/TEMPLATE-3-ZONE.md +160 -0
- package/skills/brainstorming/SKILL.md +137 -21
- package/skills/clarifier/SKILL.md +364 -53
- package/skills/claude-cli/SKILL.md +91 -12
- package/skills/codex-cli/SKILL.md +91 -12
- package/skills/debugging/SKILL.md +133 -38
- package/skills/design/SKILL.md +173 -37
- package/skills/dispatching-parallel-agents/SKILL.md +129 -31
- package/skills/executing-plans/SKILL.md +113 -25
- package/skills/executor/SKILL.md +252 -21
- package/skills/finishing-a-development-branch/SKILL.md +107 -18
- package/skills/gemini-cli/SKILL.md +91 -12
- package/skills/humanize/SKILL.md +92 -13
- package/skills/init-pipeline/SKILL.md +90 -18
- package/skills/prepare-next/SKILL.md +93 -24
- package/skills/receiving-code-review/SKILL.md +90 -16
- package/skills/requesting-code-review/SKILL.md +100 -24
- package/skills/requesting-code-review/code-reviewer.md +29 -17
- package/skills/reviewer/SKILL.md +270 -57
- package/skills/run-audit/SKILL.md +92 -15
- package/skills/scan-project/SKILL.md +93 -14
- package/skills/self-audit/SKILL.md +133 -39
- package/skills/skill-research/SKILL.md +275 -0
- package/skills/subagent-driven-development/SKILL.md +129 -30
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +30 -2
- package/skills/subagent-driven-development/implementer-prompt.md +40 -27
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +25 -12
- package/skills/tdd/SKILL.md +125 -20
- package/skills/using-git-worktrees/SKILL.md +118 -28
- package/skills/using-skills/SKILL.md +116 -29
- package/skills/verification/SKILL.md +160 -17
- package/skills/wazir/SKILL.md +750 -120
- package/skills/writing-plans/SKILL.md +134 -28
- package/skills/writing-skills/SKILL.md +91 -13
- package/skills/writing-skills/anthropic-best-practices.md +104 -64
- package/skills/writing-skills/persuasion-principles.md +100 -34
- package/tooling/src/capture/command.js +46 -2
- package/tooling/src/capture/decision.js +40 -0
- package/tooling/src/capture/store.js +33 -0
- package/tooling/src/capture/user-input.js +66 -0
- package/tooling/src/checks/security-sensitivity.js +69 -0
- package/tooling/src/cli.js +28 -26
- package/tooling/src/config/depth-table.js +60 -0
- package/tooling/src/export/compiler.js +7 -8
- package/tooling/src/guards/guardrail-functions.js +131 -0
- package/tooling/src/guards/phase-prerequisite-guard.js +97 -3
- package/tooling/src/hooks/pretooluse-dispatcher.js +300 -0
- package/tooling/src/hooks/pretooluse-pipeline-guard.js +141 -0
- package/tooling/src/hooks/stop-pipeline-gate.js +92 -0
- package/tooling/src/init/auto-detect.js +0 -2
- package/tooling/src/init/command.js +3 -95
- package/tooling/src/learn/pipeline.js +177 -0
- package/tooling/src/state/db.js +251 -2
- package/tooling/src/state/pipeline-state.js +262 -0
- package/tooling/src/status/command.js +6 -1
- package/tooling/src/verify/proof-collector.js +299 -0
- package/wazir.manifest.yaml +3 -0
- package/workflows/learn.md +61 -8
- package/workflows/plan-review.md +3 -1
- package/workflows/verify.md +30 -1
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"parentUuid":null,"isSidechain":true,"promptId":"67fcde4a-7483-4526-a2da-9ae9514890c8","agentId":"a1d0ac79ac2f11e6f","type":"user","message":{"role":"user","content":"Research ONLY — no code. How do systems turn runtime findings into permanent rules?\n\nResearch:\n1. SonarQube — how do quality profiles evolve? Can findings become new rules?\n2. ESLint — how are custom rules created from recurring patterns?\n3. GitHub Advanced Security — how do vulnerability patterns become detection rules?\n4. Snyk — how do dependency vulnerability patterns get added to the database?\n\nFor Wazir: the learning loop is:\n- Review finding: \"executor missed error handling in async code\"\n- Recurs 3+ times across runs\n- Becomes antipattern: AP-XX in expertise/antipatterns/\n- Loaded into future reviewer context\n\nResearch: how should this promotion work?\n- Automatic (risky — bad patterns could become rules)?\n- Human-approved (current design)?\n- Semi-automatic (auto-propose, auto-accept after N confirmations)?\n\nOutput: Finding-to-antipattern promotion pipeline design with concrete criteria."},"uuid":"a5799425-d9aa-406f-a824-99da4ed014c2","timestamp":"2026-03-20T13:34:11.863Z","userType":"external","entrypoint":"cli","cwd":"/Users/mohamedabdallah/Work/Wazir","sessionId":"0c5a9bfa-6816-42b1-a97d-37ae7dad26c5","version":"2.1.80","gitBranch":"feat/remaining-6-fixes","slug":"mighty-wondering-giraffe"}
|
|
2
|
+
{"parentUuid":"a5799425-d9aa-406f-a824-99da4ed014c2","isSidechain":true,"agentId":"a1d0ac79ac2f11e6f","message":{"model":"claude-opus-4-6","id":"msg_01F29tUsish8VbKJ6bqWuCKj","type":"message","role":"assistant","content":[{"type":"text","text":"\n\n# Research: Finding-to-Rule Promotion Pipelines\n\nI'll research how major tools handle the promotion of runtime findings into permanent rules, then design a concrete pipeline for Wazir.\n\n---\n\n## 1. SonarQube — Quality Profile Evolution\n\n**How it works:**\n\nSonarQube ships with a large built-in rule catalog per language. Organizations customize behavior through **Quality Profiles**, which are named collections of activated rules with configured severities and parameters.\n\n**Can findings become new rules?**\n\n- **Not directly.** SonarQube does not auto-generate rules from detected issues. The rule catalog is maintained by SonarSource engineers and the community.\n- **Custom rules** can be written as plugins (Java-based for most analyzers) or via XPath/regex patterns for XML-based languages.\n- The closest mechanism is **\"issue templates\"** — parameterized rules that admins can instantiate with specific patterns (e.g., \"ban calls to `Thread.sleep()` in production code\").\n- Quality Profiles evolve through: (a) SonarSource releasing new rules in upgrades, (b) admins activating/deactivating rules, (c) admins adjusting severity thresholds.\n\n**Key insight:** SonarQube treats rule creation as a **human-curated, release-gated process**. There is no automated promotion from \"frequently seen issue\" to \"new rule.\" The signal that a pattern deserves a rule comes from human analysis of recurring findings across projects.\n\n---\n\n## 2. ESLint — Custom Rules from Recurring Patterns\n\n**How it works:**\n\nESLint's architecture explicitly supports rule creation by end users:\n\n- Each rule is a single JS/TS module that exports a `create()` function receiving an AST context.\n- Rules are packaged as **plugins** (`eslint-plugin-*`) and activated in config files.\n- The ecosystem has hundreds of community plugins born from recurring patterns people noticed in code review.\n\n**The typical lifecycle of a pattern becoming a rule:**\n\n1. A team notices the same code review comment appearing repeatedly (e.g., \"don't use `forEach` with `async` callbacks\").\n2. Someone writes a custom rule that walks the AST looking for that pattern.\n3. The rule is tested against the existing codebase to calibrate false-positive rates.\n4. It's added to the team's shared config.\n5. If broadly useful, it gets published as a plugin or proposed to eslint-core.\n\n**Key insight:** ESLint's model is **human-initiated, community-validated**. The tooling makes rule authoring easy (AST visitor pattern, test harness), but a human must decide \"this pattern is worth codifying.\" The quality gate is: does it have a low false-positive rate across real codebases?\n\n---\n\n## 3. GitHub Advanced Security — Vulnerability Pattern Detection\n\n**How it works:**\n\nGitHub Advanced Security uses two systems:\n\n- **CodeQL** — a semantic code analysis engine where vulnerability patterns are expressed as **queries** (declarative logic over a relational database extracted from code).\n- **Secret scanning** — regex-based pattern matching for credentials, with patterns contributed by partner programs.\n\n**How patterns become detection rules:**\n\n- **CodeQL queries** are maintained in the `github/codeql` repository. New queries follow a strict process:\n 1. A security researcher identifies a vulnerability class.\n 2. They write a CodeQL query with precision/recall metadata.\n 3. The query goes through peer review with mandatory **false-positive benchmarking** against open-source corpora.\n 4. Queries are tagged with precision levels: `high`, `medium`, `low`. Only `high` and `medium` precision queries ship in the default suite.\n 5. Community contributions go through the same review bar.\n\n- **Secret scanning patterns** are added when a service provider registers their token format with GitHub's partner program, or when GitHub's security team identifies a new credential pattern.\n\n**Key insight:** GitHub's model is **expert-authored, precision-gated**. The critical quality gate is false-positive rate measured against real-world benchmarks. Queries with >5% false-positive rate on the benchmark corpus are not included in the default suite.\n\n---\n\n## 4. Snyk — Vulnerability Database Evolution\n\n**How it works:**\n\nSnyk maintains a proprietary vulnerability database that aggregates from multiple sources:\n\n- **Upstream feeds:** NVD (CVE), vendor advisories, language-specific advisory databases (RubyGems, npm, PyPI advisories).\n- **Snyk Security Research Team:** Manually discovers, validates, and enriches vulnerability records.\n- **Community reports:** Users can report vulnerabilities through Snyk's disclosure program.\n\n**The promotion pipeline:**\n\n1. **Signal detection:** Automated monitors watch upstream feeds, commit messages, changelogs, and issue trackers for vulnerability signals.\n2. **Triage:** Automated classifiers sort signals by likely severity and relevance.\n3. **Human validation:** Security analysts verify the vulnerability, write the advisory, determine affected version ranges, and assign severity (CVSS).\n4. **Enrichment:** Analysts add exploit maturity ratings, fix guidance, and remediation paths.\n5. **Publication:** The entry enters the live database and triggers alerts for affected users.\n\n**Key insight:** Snyk's model is **machine-detected, human-validated**. Automation handles the high-volume signal gathering, but every entry that affects user decisions goes through human review. The quality gate is analyst verification of exploitability and affected versions.\n\n---\n\n## Synthesis: The Three Models\n\n| Model | Example | Signal Source | Gate | Risk |\n|---|---|---|---|---|\n| **Fully human** | SonarQube rules | Human observation | Expert authorship + release cycle | Slow to adapt |\n| **Human-initiated, community-validated** | ESLint plugins | Recurring review comments | FP rate on real code | Inconsistent quality across plugins |\n| **Machine-detected, human-validated** | Snyk DB, GitHub CodeQL | Automated scanning + heuristics | Human analyst verification | Bottleneck at review stage |\n\nNo major system uses **fully automatic** promotion. The universal lesson: **false positives are more damaging than false negatives** in rule systems, because every false positive erodes trust and causes rule fatigue.\n\n---\n\n## Wazir Design: Finding-to-Antipattern Promotion Pipeline\n\n### The Problem\n\nWazir's review loop generates findings like:\n\n> \"executor missed error handling in async code\"\n\nWhen this recurs 3+ times, it should become a codified antipattern (`AP-XX`) loaded into future reviewer context. The question is how to manage that promotion reliably.\n\n### Recommended Model: Semi-Automatic (Auto-Propose, Human-Approve)\n\nThis is the Snyk/CodeQL hybrid model adapted for Wazir's context. Fully automatic is too risky (bad pattern becomes a permanent rule). Fully manual is too slow (findings accumulate without action). Semi-automatic captures the best of both.\n\n### Pipeline Stages\n\n```\nStage 1: TALLY Stage 2: CANDIDATE Stage 3: PROMOTE Stage 4: ACTIVE\n(automatic) (automatic) (human gate) (automatic)\n\nFinding logged --> Recurrence detected --> Candidate proposed --> Antipattern loaded\nin run state and grouped for human review into reviewer context\n```\n\n### Stage 1 — TALLY (Automatic)\n\n**What happens:** Every review finding is logged to the run's state DB with structured metadata.\n\n**Schema per finding:**\n\n```yaml\nfinding_id: \"f-20260320-001\"\nrun_id: \"run-20260320-010128\"\nphase: \"implement\"\nrole: \"reviewer\"\ncategory: \"error-handling\" # from a fixed taxonomy\npattern_text: \"missing error handling in async code\"\nfile_path: \"src/executor/runner.ts\"\ncode_snippet_hash: \"<sha256 of normalized snippet>\"\nseverity: \"major\"\n```\n\n**Normalization:** Before storage, the finding text is normalized — file paths, variable names, and line numbers are stripped to produce a **canonical pattern string**. This enables matching across runs.\n\n### Stage 2 — CANDIDATE (Automatic)\n\n**What happens:** After each run completes, a post-run analysis step groups findings by canonical pattern and checks recurrence thresholds.\n\n**Promotion criteria (all must be met):**\n\n| Criterion | Threshold | Rationale |\n|---|---|---|\n| Recurrence count | >= 3 distinct runs | Filters one-off mistakes |\n| Distinct files | >= 2 different files | Confirms it's a pattern, not a single-file issue |\n| Time span | >= 2 distinct calendar days | Prevents burst-counting from a single session |\n| Not already an antipattern | No existing AP with >0.8 similarity | Avoids duplicates |\n| Not already rejected | Not in the rejection log | Respects prior human decisions |\n\n**Output:** A candidate record written to `~/.wazir/projects/<slug>/candidates/`:\n\n```yaml\ncandidate_id: \"cand-20260320-001\"\ncanonical_pattern: \"missing error handling in async code\"\ncategory: \"error-handling\"\noccurrences:\n - { run: \"run-20260318-...\", file: \"src/a.ts\", finding: \"f-...\" }\n - { run: \"run-20260319-...\", file: \"src/b.ts\", finding: \"f-...\" }\n - { run: \"run-20260320-...\", file: \"src/c.ts\", finding: \"f-...\" }\nproposed_severity: \"major\" # mode of occurrence severities\nproposed_title: \"AP-XX: Unhandled errors in async code paths\"\nproposed_body: | # auto-drafted from occurrences\n ## Antipattern\n Async functions or promise chains without try/catch or .catch() handlers.\n \n ## Why it matters\n Unhandled rejections crash the process or silently swallow errors.\n \n ## Correct pattern\n Wrap async calls in try/catch; attach .catch() to every promise chain.\n \n ## Evidence\n Found in 3 runs across 3 files over 2 days.\nstatus: \"pending_review\"\ncreated_at: \"2026-03-20T12:00:00Z\"\n```\n\n### Stage 3 — PROMOTE (Human Gate)\n\n**What happens:** The human (or a designated approver role) reviews candidates.\n\n**Review interface:** A CLI command:\n\n```\nwazir antipatterns review\n```\n\nThis presents each pending candidate with its evidence and offers three actions:\n\n| Action | Effect |\n|---|---|\n| **approve** | Candidate becomes an antipattern file in `expertise/antipatterns/` |\n| **reject** | Candidate is logged to rejection list; same pattern won't be re-proposed |\n| **defer** | Candidate stays pending; re-surfaces after N more occurrences (default: +2) |\n\n**On approve:**\n- A new file `expertise/antipatterns/AP-XX-<slug>.md` is created from the candidate body.\n- The antipattern is added to the antipattern index.\n- The candidate is marked `promoted`.\n\n**On reject:**\n- The candidate's canonical pattern is added to `~/.wazir/projects/<slug>/candidates/rejected.yaml`.\n- A reason field is recorded (optional but encouraged).\n\n### Stage 4 — ACTIVE (Automatic)\n\n**What happens:** Antipatterns in `expertise/antipatterns/` are loaded into reviewer context at the start of every review phase. No additional action needed — this already works in Wazir's current design.\n\n**Feedback loop:** If a promoted antipattern stops appearing in findings for N consecutive runs (suggested: 10), it is flagged as \"possibly resolved\" for the human to archive or keep.\n\n### Quality Safeguards\n\n1. **No fully automatic promotion.** The human gate at Stage 3 prevents bad patterns from becoming permanent rules. This is the universal lesson from SonarQube, ESLint, CodeQL, and Snyk.\n\n2. **Rejection memory.** Rejected patterns are remembered so the system doesn't nag. This avoids the anti-pattern of every tool that re-suggests the same dismissed finding.\n\n3. **Similarity matching at proposal time.** New candidates are compared against existing antipatterns (simple text similarity on canonical pattern + category match). Threshold of 0.8 prevents near-duplicate antipatterns from accumulating.\n\n4. **Decay/archival.** Antipatterns that stop triggering are surfaced for human review. Rules that outlive their usefulness get cleaned up instead of accumulating forever.\n\n5. **Evidence-linked.** Every antipattern traces back to specific findings in specific runs. This supports auditing (\"why does this rule exist?\") and makes it possible to revoke a rule if its evidence turns out to be flawed.\n\n### Comparison to Alternatives\n\n**Why not fully automatic?**\n\nThe risk is a feedback loop: a false finding recurs because of a systematic misunderstanding by the reviewer LLM (not an actual code problem), gets promoted to an antipattern, then reinforces the same misunderstanding in future reviews. This is the \"model training on its own errors\" problem. The human gate breaks this loop.\n\n**Why not fully manual?**\n\nWazir generates findings at a volume where manual tracking is impractical. The auto-tally and auto-candidate stages handle the bookkeeping. The human only needs to make a yes/no/defer decision on pre-filtered, pre-drafted candidates — a much lighter cognitive load than \"go through all findings and decide which ones deserve codification.\"\n\n**Why 3 occurrences and not 2 or 5?**\n\n- At 2: too many false promotions from coincidental pairs.\n- At 5: too slow to codify genuine patterns, especially in smaller projects.\n- 3 across 2+ files and 2+ days is the sweet spot from the CodeQL and Snyk research — it filters noise while remaining responsive.\n\n### Summary\n\nThe recommended design is a four-stage pipeline: automatic tallying, automatic candidate extraction at threshold, human-gated promotion, and automatic loading. This mirrors the architecture used by every mature finding-to-rule system studied, where the universal principle is: **automate the detection and drafting, but gate the codification on human judgment**."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":2658,"cache_read_input_tokens":7745,"output_tokens":3358,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":2658},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZEREoxWi3KVqkrEHwDfH","type":"assistant","uuid":"05d13811-3d96-427c-a51f-5b5ccf16a157","timestamp":"2026-03-20T13:35:36.006Z","userType":"external","entrypoint":"cli","cwd":"/Users/mohamedabdallah/Work/Wazir","sessionId":"0c5a9bfa-6816-42b1-a97d-37ae7dad26c5","version":"2.1.80","gitBranch":"feat/remaining-6-fixes","slug":"mighty-wondering-giraffe"}
|