@fenixforce/edition-pro 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/dist/api/approval-routes.d.ts +15 -0
  2. package/dist/api/fleet-routes.d.ts +23 -0
  3. package/dist/api/integration-routes.d.ts +17 -0
  4. package/dist/api/middleware.d.ts +37 -0
  5. package/dist/boot.d.ts +37 -0
  6. package/dist/business/approval-queue.d.ts +94 -0
  7. package/dist/business/arena.d.ts +71 -0
  8. package/dist/business/best-of-n.d.ts +68 -0
  9. package/dist/business/brainstorm.d.ts +42 -0
  10. package/dist/business/compile-checker.d.ts +50 -0
  11. package/dist/business/debate.d.ts +38 -0
  12. package/dist/business/fleet-budget.d.ts +69 -0
  13. package/dist/business/fleet-config.d.ts +125 -0
  14. package/dist/business/fleet.d.ts +85 -0
  15. package/dist/business/handoff.d.ts +56 -0
  16. package/dist/business/hat-system.d.ts +57 -0
  17. package/dist/business/index.d.ts +44 -0
  18. package/dist/business/integration-registry.d.ts +66 -0
  19. package/dist/business/node-pipeline.d.ts +62 -0
  20. package/dist/business/oracle.d.ts +64 -0
  21. package/dist/business/roles/index.d.ts +7 -0
  22. package/dist/business/roles/judge.d.ts +24 -0
  23. package/dist/business/roles/planner.d.ts +30 -0
  24. package/dist/business/roles/types.d.ts +37 -0
  25. package/dist/business/roles/worker.d.ts +25 -0
  26. package/dist/business/router.d.ts +75 -0
  27. package/dist/business/shared-memory.d.ts +85 -0
  28. package/dist/business/status-detector.d.ts +52 -0
  29. package/dist/business/swarm-registry.d.ts +63 -0
  30. package/dist/business/templates.d.ts +55 -0
  31. package/dist/business/workspace-manager.d.ts +105 -0
  32. package/dist/index.d.ts +21 -0
  33. package/dist/index.js +638 -0
  34. package/dist/infrastructure/pty-agent.d.ts +74 -0
  35. package/dist/migrations/migrate.d.ts +7 -0
  36. package/dist/migrations/runner.d.ts +49 -0
  37. package/dist/workspace/worktree.d.ts +69 -0
  38. package/package.json +37 -0
  39. package/skills/builtin/academic-researcher/SKILL.md +51 -0
  40. package/skills/builtin/advanced-recon/SKILL.md +75 -0
  41. package/skills/builtin/agent-governance/SKILL.md +122 -0
  42. package/skills/builtin/algorithmic-art/SKILL.md +55 -0
  43. package/skills/builtin/api-attack-surface-mapper/SKILL.md +88 -0
  44. package/skills/builtin/api-development/SKILL.md +147 -0
  45. package/skills/builtin/api-exploit-prover/SKILL.md +74 -0
  46. package/skills/builtin/api-integration/SKILL.md +73 -0
  47. package/skills/builtin/api-security-tester/SKILL.md +82 -0
  48. package/skills/builtin/api-test-executor/SKILL.md +62 -0
  49. package/skills/builtin/app-store-optimization/SKILL.md +46 -0
  50. package/skills/builtin/audio-tour-guide/SKILL.md +18 -0
  51. package/skills/builtin/auth-flow-operator/SKILL.md +70 -0
  52. package/skills/builtin/autonomous-rag/SKILL.md +21 -0
  53. package/skills/builtin/backend-development/SKILL.md +265 -0
  54. package/skills/builtin/binary-analysis-analyst/SKILL.md +61 -0
  55. package/skills/builtin/binary-analysis-core/SKILL.md +65 -0
  56. package/skills/builtin/binary-recon/SKILL.md +64 -0
  57. package/skills/builtin/blackboard-coordination/SKILL.md +56 -0
  58. package/skills/builtin/blog-to-podcast/SKILL.md +18 -0
  59. package/skills/builtin/blog-writing/SKILL.md +36 -0
  60. package/skills/builtin/brainstorming/SKILL.md +69 -0
  61. package/skills/builtin/brand-design/SKILL.md +42 -0
  62. package/skills/builtin/ci-cd-pipelines/SKILL.md +210 -0
  63. package/skills/builtin/cloud-infrastructure/SKILL.md +140 -0
  64. package/skills/builtin/code-review/SKILL.md +88 -0
  65. package/skills/builtin/code-review-analyst/SKILL.md +96 -0
  66. package/skills/builtin/code-review-recon/SKILL.md +64 -0
  67. package/skills/builtin/code-review-verifier/SKILL.md +55 -0
  68. package/skills/builtin/coding-agent-team/SKILL.md +13 -0
  69. package/skills/builtin/competitor-intelligence/SKILL.md +39 -0
  70. package/skills/builtin/content-engine/SKILL.md +82 -0
  71. package/skills/builtin/context7-docs/SKILL.md +145 -0
  72. package/skills/builtin/copywriting/SKILL.md +38 -0
  73. package/skills/builtin/corrective-rag/SKILL.md +19 -0
  74. package/skills/builtin/cost-optimization/SKILL.md +131 -0
  75. package/skills/builtin/crypto-vulnerability-analyst/SKILL.md +64 -0
  76. package/skills/builtin/customer-support/SKILL.md +48 -0
  77. package/skills/builtin/customer-voice-support/SKILL.md +43 -0
  78. package/skills/builtin/data-analysis/SKILL.md +57 -0
  79. package/skills/builtin/data-visualization/SKILL.md +33 -0
  80. package/skills/builtin/database-design/SKILL.md +119 -0
  81. package/skills/builtin/decision-helper/SKILL.md +84 -0
  82. package/skills/builtin/deep-research/SKILL.md +68 -0
  83. package/skills/builtin/deepwiki-research/SKILL.md +115 -0
  84. package/skills/builtin/dependency-audit/SKILL.md +46 -0
  85. package/skills/builtin/doc-coauthoring/SKILL.md +48 -0
  86. package/skills/builtin/docker-deployment/SKILL.md +243 -0
  87. package/skills/builtin/docx-generation/SKILL.md +135 -0
  88. package/skills/builtin/dry-run-harness/SKILL.md +61 -0
  89. package/skills/builtin/editor/SKILL.md +44 -0
  90. package/skills/builtin/email-drafter/SKILL.md +42 -0
  91. package/skills/builtin/error-handling/SKILL.md +82 -0
  92. package/skills/builtin/eval-harness/SKILL.md +197 -0
  93. package/skills/builtin/evaluation-framework/SKILL.md +51 -0
  94. package/skills/builtin/exploit-writer/SKILL.md +63 -0
  95. package/skills/builtin/fact-checker/SKILL.md +51 -0
  96. package/skills/builtin/filesystem-context/SKILL.md +47 -0
  97. package/skills/builtin/financial-coach/SKILL.md +18 -0
  98. package/skills/builtin/finding-chain-correlator/SKILL.md +70 -0
  99. package/skills/builtin/finding-verifier/SKILL.md +65 -0
  100. package/skills/builtin/frontend-design/SKILL.md +104 -0
  101. package/skills/builtin/frontend-development/SKILL.md +227 -0
  102. package/skills/builtin/frontend-slides/SKILL.md +155 -0
  103. package/skills/builtin/fullstack-project/SKILL.md +286 -0
  104. package/skills/builtin/game-development/SKILL.md +60 -0
  105. package/skills/builtin/git-workflow/SKILL.md +44 -0
  106. package/skills/builtin/i18n-localization/SKILL.md +38 -0
  107. package/skills/builtin/image-prompt-engineering/SKILL.md +37 -0
  108. package/skills/builtin/investment-research/SKILL.md +33 -0
  109. package/skills/builtin/investor-materials/SKILL.md +90 -0
  110. package/skills/builtin/javascript-surface-analyzer/SKILL.md +66 -0
  111. package/skills/builtin/markdown-reports/SKILL.md +68 -0
  112. package/skills/builtin/market-research/SKILL.md +69 -0
  113. package/skills/builtin/mcp-builder/SKILL.md +86 -0
  114. package/skills/builtin/meeting-notes/SKILL.md +47 -0
  115. package/skills/builtin/memory-safety-analyst/SKILL.md +61 -0
  116. package/skills/builtin/meta-controller/SKILL.md +44 -0
  117. package/skills/builtin/mixture-of-agents/SKILL.md +53 -0
  118. package/skills/builtin/monitoring-observability/SKILL.md +169 -0
  119. package/skills/builtin/negotiation-simulator/SKILL.md +24 -0
  120. package/skills/builtin/nestjs-development/SKILL.md +56 -0
  121. package/skills/builtin/nextjs-development/SKILL.md +55 -0
  122. package/skills/builtin/parallel-dispatch/SKILL.md +83 -0
  123. package/skills/builtin/pdf-generation/SKILL.md +169 -0
  124. package/skills/builtin/personal-finance/SKILL.md +17 -0
  125. package/skills/builtin/pev-workflow/SKILL.md +62 -0
  126. package/skills/builtin/planning-with-files/SKILL.md +59 -0
  127. package/skills/builtin/pptx-generation/SKILL.md +117 -0
  128. package/skills/builtin/prisma-orm/SKILL.md +48 -0
  129. package/skills/builtin/rag-database-routing/SKILL.md +38 -0
  130. package/skills/builtin/rapid-prototyping/SKILL.md +152 -0
  131. package/skills/builtin/react-development/SKILL.md +244 -0
  132. package/skills/builtin/react-native-mobile/SKILL.md +113 -0
  133. package/skills/builtin/refactoring/SKILL.md +39 -0
  134. package/skills/builtin/reflexive-metacognition/SKILL.md +29 -0
  135. package/skills/builtin/riper-workflow/SKILL.md +214 -0
  136. package/skills/builtin/security-audit/SKILL.md +113 -0
  137. package/skills/builtin/security-self-audit/SKILL.md +311 -0
  138. package/skills/builtin/self-evolving-agent/SKILL.md +28 -0
  139. package/skills/builtin/self-improvement-loop/SKILL.md +58 -0
  140. package/skills/builtin/semantic-search/SKILL.md +93 -0
  141. package/skills/builtin/seo-audit-team/SKILL.md +27 -0
  142. package/skills/builtin/seo-optimization/SKILL.md +49 -0
  143. package/skills/builtin/server-management/SKILL.md +190 -0
  144. package/skills/builtin/social-media-content/SKILL.md +50 -0
  145. package/skills/builtin/sprint-planner/SKILL.md +49 -0
  146. package/skills/builtin/strategic-compact/SKILL.md +61 -0
  147. package/skills/builtin/strategy-advisor/SKILL.md +51 -0
  148. package/skills/builtin/structured-thinking/SKILL.md +70 -0
  149. package/skills/builtin/subagent-development/SKILL.md +105 -0
  150. package/skills/builtin/system-design/SKILL.md +66 -0
  151. package/skills/builtin/systematic-debugging/SKILL.md +87 -0
  152. package/skills/builtin/tailwind-css/SKILL.md +55 -0
  153. package/skills/builtin/taint-flow-tracer/SKILL.md +89 -0
  154. package/skills/builtin/teaching-agent-team/SKILL.md +32 -0
  155. package/skills/builtin/tech-debt-manager/SKILL.md +67 -0
  156. package/skills/builtin/technical-documentation/SKILL.md +47 -0
  157. package/skills/builtin/test-driven-development/SKILL.md +70 -0
  158. package/skills/builtin/theme-factory/SKILL.md +244 -0
  159. package/skills/builtin/threat-model-generator/SKILL.md +105 -0
  160. package/skills/builtin/trust-layer/SKILL.md +43 -0
  161. package/skills/builtin/typescript-patterns/SKILL.md +61 -0
  162. package/skills/builtin/ui-ux-design/SKILL.md +75 -0
  163. package/skills/builtin/verification-before-completion/SKILL.md +41 -0
  164. package/skills/builtin/verification-loop/SKILL.md +120 -0
  165. package/skills/builtin/waf-bypass-agent/SKILL.md +97 -0
  166. package/skills/builtin/web-artifacts-builder/SKILL.md +117 -0
  167. package/skills/builtin/web-assessment-executor/SKILL.md +66 -0
  168. package/skills/builtin/web-exploit-prover/SKILL.md +58 -0
  169. package/skills/builtin/web-scraping/SKILL.md +63 -0
  170. package/skills/builtin/webapp-testing/SKILL.md +86 -0
  171. package/skills/builtin/webhook-development/SKILL.md +62 -0
  172. package/skills/builtin/writing-skills/SKILL.md +67 -0
  173. package/skills/builtin/xlsx-generation/SKILL.md +116 -0
@@ -0,0 +1,197 @@
1
+ # Eval Harness
2
+
3
+ A formal evaluation framework for Claude Code sessions, implementing eval-driven development (EDD) principles.
4
+
5
+ ## When to Activate
6
+
7
+ - Setting up eval-driven development (EDD) for AI-assisted workflows
8
+ - Defining pass/fail criteria for Claude Code task completion
9
+ - Measuring agent reliability with pass@k metrics
10
+ - Creating regression test suites for prompt or agent changes
11
+ - Benchmarking agent performance across model versions
12
+
13
+ ## Philosophy
14
+
15
+ Eval-Driven Development treats evals as the "unit tests of AI development":
16
+ - Define expected behavior BEFORE implementation
17
+ - Run evals continuously during development
18
+ - Track regressions with each change
19
+ - Use pass@k metrics for reliability measurement
20
+
21
+ ## Eval Types
22
+
23
+ ### Capability Evals
24
+ Test if Claude can do something it couldn't before:
25
+ ```markdown
26
+ [CAPABILITY EVAL: feature-name]
27
+ Task: Description of what Claude should accomplish
28
+ Success Criteria:
29
+ - [ ] Criterion 1
30
+ - [ ] Criterion 2
31
+ - [ ] Criterion 3
32
+ Expected Output: Description of expected result
33
+ ```
34
+
35
+ ### Regression Evals
36
+ Ensure changes don't break existing functionality:
37
+ ```markdown
38
+ [REGRESSION EVAL: feature-name]
39
+ Baseline: SHA or checkpoint name
40
+ Tests:
41
+ - existing-test-1: PASS/FAIL
42
+ - existing-test-2: PASS/FAIL
43
+ - existing-test-3: PASS/FAIL
44
+ Result: X/Y passed (previously Y/Y)
45
+ ```
46
+
47
+ ## Grader Types
48
+
49
+ ### 1. Code-Based Grader
50
+ Deterministic checks using code:
51
+ ```bash
52
+ # Check if file contains expected pattern
53
+ grep -q "export function handleAuth" src/auth.ts && echo "PASS" || echo "FAIL"
54
+
55
+ # Check if tests pass
56
+ npm test -- --testPathPattern="auth" && echo "PASS" || echo "FAIL"
57
+
58
+ # Check if build succeeds
59
+ npm run build && echo "PASS" || echo "FAIL"
60
+ ```
61
+
62
+ ### 2. Model-Based Grader
63
+ Use Claude to evaluate open-ended outputs:
64
+ ```markdown
65
+ [MODEL GRADER PROMPT]
66
+ Evaluate the following code change:
67
+ 1. Does it solve the stated problem?
68
+ 2. Is it well-structured?
69
+ 3. Are edge cases handled?
70
+ 4. Is error handling appropriate?
71
+
72
+ Score: 1-5 (1=poor, 5=excellent)
73
+ Reasoning: [explanation]
74
+ ```
75
+
76
+ ### 3. Human Grader
77
+ Flag for manual review:
78
+ ```markdown
79
+ [HUMAN REVIEW REQUIRED]
80
+ Change: Description of what changed
81
+ Reason: Why human review is needed
82
+ Risk Level: LOW/MEDIUM/HIGH
83
+ ```
84
+
85
+ ## Metrics
86
+
87
+ ### pass@k
88
+ "At least one success in k attempts"
89
+ - pass@1: First attempt success rate
90
+ - pass@3: Success within 3 attempts
91
+ - Typical target: pass@3 > 90%
92
+
93
+ ### pass^k
94
+ "All k trials succeed"
95
+ - Higher bar for reliability
96
+ - pass^3: 3 consecutive successes
97
+ - Use for critical paths
98
+
99
+ ## Eval Workflow
100
+
101
+ ### 1. Define (Before Coding)
102
+ ```markdown
103
+ ## EVAL DEFINITION: feature-xyz
104
+
105
+ ### Capability Evals
106
+ 1. Can create new user account
107
+ 2. Can validate email format
108
+ 3. Can hash password securely
109
+
110
+ ### Regression Evals
111
+ 1. Existing login still works
112
+ 2. Session management unchanged
113
+ 3. Logout flow intact
114
+
115
+ ### Success Metrics
116
+ - pass@3 > 90% for capability evals
117
+ - pass^3 = 100% for regression evals
118
+ ```
119
+
120
+ ### 2. Implement
121
+ Write code to pass the defined evals.
122
+
123
+ ### 3. Evaluate
124
+ ```bash
125
+ # Run capability evals
126
+ [Run each capability eval, record PASS/FAIL]
127
+
128
+ # Run regression evals
129
+ npm test -- --testPathPattern="existing"
130
+
131
+ # Generate report
132
+ ```
133
+
134
+ ### 4. Report
135
+ ```markdown
136
+ EVAL REPORT: feature-xyz
137
+ ========================
138
+
139
+ Capability Evals:
140
+ create-user: PASS (pass@1)
141
+ validate-email: PASS (pass@2)
142
+ hash-password: PASS (pass@1)
143
+ Overall: 3/3 passed
144
+
145
+ Regression Evals:
146
+ login-flow: PASS
147
+ session-mgmt: PASS
148
+ logout-flow: PASS
149
+ Overall: 3/3 passed
150
+
151
+ Metrics:
152
+ pass@1: 67% (2/3)
153
+ pass@3: 100% (3/3)
154
+
155
+ Status: READY FOR REVIEW
156
+ ```
157
+
158
+ ## Integration Patterns
159
+
160
+ ### Pre-Implementation
161
+ ```
162
+ /eval define feature-name
163
+ ```
164
+ Creates eval definition file at `.claude/evals/feature-name.md`
165
+
166
+ ### During Implementation
167
+ ```
168
+ /eval check feature-name
169
+ ```
170
+ Runs current evals and reports status
171
+
172
+ ### Post-Implementation
173
+ ```
174
+ /eval report feature-name
175
+ ```
176
+ Generates full eval report
177
+
178
+ ## Eval Storage
179
+
180
+ Store evals in project:
181
+ ```
182
+ .claude/
183
+ evals/
184
+ feature-xyz.md # Eval definition
185
+ feature-xyz.log # Eval run history
186
+ baseline.json # Regression baselines
187
+ ```
188
+
189
+ ## Best Practices
190
+
191
+ 1. **Define evals BEFORE coding** — Forces clear thinking about success criteria
192
+ 2. **Run evals frequently** — Catch regressions early
193
+ 3. **Track pass@k over time** — Monitor reliability trends
194
+ 4. **Use code graders when possible** — Deterministic > probabilistic
195
+ 5. **Human review for security** — Never fully automate security checks
196
+ 6. **Keep evals fast** — Slow evals don't get run
197
+ 7. **Version evals with code** — Evals are first-class artifacts
@@ -0,0 +1,51 @@
1
+ ---
2
+ name: evaluation-framework
3
+ description: "Use this skill when evaluating output quality, comparing approaches, scoring responses, or setting up quality benchmarks. Triggers: 'evaluate', 'assess', 'score', 'compare quality', 'judge', 'benchmark', 'quality check', or requests to measure how good an output is."
4
+ license: MIT
5
+ ---
6
+
7
+ # Evaluation Framework
8
+
9
+ ## What This Skill Does
10
+
11
+ Evaluate LLM outputs and agent work products using structured rubrics. LLM-as-judge, pairwise comparison, and bias mitigation.
12
+
13
+ ## Rubric Dimensions
14
+
15
+ | Dimension | 1 (Poor) | 3 (Adequate) | 5 (Excellent) |
16
+ |-----------|----------|--------------|---------------|
17
+ | Accuracy | Factual errors | Mostly correct | Completely correct |
18
+ | Completeness | Missing major elements | Covers basics | Comprehensive |
19
+ | Clarity | Confusing, unclear | Understandable | Clear and well-organized |
20
+ | Relevance | Off-topic | Generally relevant | Precisely targeted |
21
+ | Actionability | No clear next steps | Some guidance | Specific, implementable |
22
+
23
+ ## LLM-as-Judge Pattern
24
+
25
+ ```
26
+ System: You are an expert evaluator. Score the following output on each dimension from 1-5.
27
+ Provide your reasoning BEFORE your score to avoid anchoring.
28
+
29
+ [Output to evaluate]
30
+
31
+ Score each dimension:
32
+ 1. Accuracy: [reasoning] → [score]
33
+ 2. Completeness: [reasoning] → [score]
34
+ 3. Clarity: [reasoning] → [score]
35
+ 4. Relevance: [reasoning] → [score]
36
+ 5. Actionability: [reasoning] → [score]
37
+ ```
38
+
39
+ ## Bias Mitigation
40
+
41
+ - **Position bias:** Randomize order when comparing options
42
+ - **Length bias:** Longer responses aren't automatically better
43
+ - **Self-enhancement:** Don't let the same model evaluate its own output
44
+ - **Anchoring:** Generate reasoning before scores, not after
45
+
46
+ ## Rules
47
+
48
+ - Define evaluation criteria BEFORE looking at outputs
49
+ - Use blind comparison when comparing two approaches
50
+ - Multiple evaluators are better than one (even if all are LLMs)
51
+ - Document the evaluation methodology and reproduce it
@@ -0,0 +1,63 @@
1
+ # Exploit Writer
2
+
3
+ ## Purpose
4
+ Transform confirmed primitives into reproducible proof-of-exploit artifacts and stepwise execution plans.
5
+
6
+ ## Inputs
7
+ - `validated_primitive`
8
+ - `target_context`
9
+ - `environment_constraints`
10
+ - `success_criteria`
11
+
12
+ ## Workflow
13
+ ### Phase 1: Objective and Boundaries
14
+ 1. Define exploit goal (data read, privilege gain, state change).
15
+ 2. Define explicit stop condition.
16
+ 3. Define prohibited actions and safety constraints.
17
+
18
+ ### Phase 2: Chain Design
19
+ 1. Break exploit into stages: setup, trigger, control gain, impact verification.
20
+ 2. Include fallback branches for unstable stages.
21
+
22
+ ### Phase 3: Procedure Authoring
23
+ 1. Write deterministic steps with required inputs.
24
+ 2. Include expected output per step.
25
+ 3. Include failure diagnostics per step.
26
+
27
+ ### Phase 4: Robustness Checks
28
+ 1. Re-run in fresh session/environment.
29
+ 2. Validate whether exploit is deterministic or probabilistic.
30
+ 3. Capture conditions that break reliability.
31
+
32
+ ### Phase 5: Reporting Package
33
+ 1. Provide concise replay instructions.
34
+ 2. Provide artifact index.
35
+ 3. Provide impact statement tied to observed behavior.
36
+
37
+ ## Exploit Procedure Template
38
+ - Preconditions
39
+ - Setup commands/actions
40
+ - Trigger sequence
41
+ - Verification checks
42
+ - Cleanup and rollback
43
+ - Failure troubleshooting
44
+
45
+ ## Output Contract
46
+ ```json
47
+ {
48
+ "exploit_plan": [],
49
+ "stepwise_procedure": [],
50
+ "success_signals": [],
51
+ "failure_diagnostics": [],
52
+ "safety_notes": []
53
+ }
54
+ ```
55
+
56
+ ## Constraints
57
+ - Build only from validated primitives.
58
+ - Do not fabricate impact or reliability.
59
+
60
+ ## Quality Checklist
61
+ - [ ] Another tester can replay from instructions.
62
+ - [ ] Preconditions are explicit.
63
+ - [ ] Impact claim matches observed result.
@@ -0,0 +1,51 @@
1
+ # Fact Checker
2
+
3
+ ## Rating Scale
4
+
5
+ - **TRUE**: accurate, supported by reliable evidence
6
+ - **MOSTLY TRUE**: accurate but missing important context
7
+ - **MIXED**: contains both true and false elements
8
+ - **MOSTLY FALSE**: misleading or largely inaccurate
9
+ - **FALSE**: demonstrably wrong
10
+ - **UNVERIFIABLE**: cannot be confirmed or denied
11
+
12
+ ## Verification Process
13
+
14
+ 1. **Extract the claim**: isolate the specific factual assertion, separate fact from opinion
15
+ 2. **Determine evidence needed**: what would prove/disprove this?
16
+ 3. **Evaluate evidence**: check authoritative sources, primary data, publication dates
17
+ 4. **Rate the claim**: assess accuracy, note confidence, explain reasoning
18
+ 5. **Provide context**: why it matters, common misconceptions, proper interpretation
19
+
20
+ ## Manipulation Patterns to Watch
21
+
22
+ - **Cherry-picking**: selective data that supports a predetermined conclusion
23
+ - **Context removal**: quotes taken out of context, missing qualifiers
24
+ - **False equivalences**: treating unequal sources as equally valid
25
+ - **Correlation as causation**: two things happen together therefore one caused the other
26
+ - **Misleading scales**: graphs with truncated axes, inconsistent intervals
27
+
28
+ ## Output Format
29
+
30
+ ```markdown
31
+ ## Claim
32
+ [Exact statement being verified]
33
+
34
+ ## Verdict: [RATING]
35
+
36
+ ## Analysis
37
+ [Why this rating. Evidence for and against.]
38
+
39
+ ## Correct Information
40
+ [If claim is false, what's actually true]
41
+
42
+ ## Sources
43
+ [Numbered with credibility notes]
44
+ ```
45
+
46
+ ## Rules
47
+
48
+ - Always search for counter-evidence, not just confirming evidence
49
+ - Rate the claim, not the person making it
50
+ - "UNVERIFIABLE" is a valid and honest answer
51
+ - Never present absence of evidence as evidence of absence
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: filesystem-context
3
+ description: "Use this skill for long-running tasks, multi-session work, or any task where context might be lost. Triggers: 'long task', 'context limit', 'multi-session', 'persist', 'remember across sessions', 'pick up where I left off', or any task spanning multiple context windows."
4
+ license: MIT
5
+ ---
6
+
7
+ # Filesystem Context
8
+
9
+ ## What This Skill Does
10
+
11
+ Use the filesystem as extended agent memory. Persist state, decisions, and progress across context windows and sessions.
12
+
13
+ ## Directory Convention
14
+
15
+ ```
16
+ workspace/
17
+ ├── session-state.md # Current task, progress, what to do next
18
+ ├── decisions.md # Decisions made and their rationale
19
+ ├── open-questions.md # Unresolved questions needing answers
20
+ └── scratchpad.md # Rough notes, temporary data
21
+ ```
22
+
23
+ ## Session Start (Orient)
24
+
25
+ ```
26
+ 1. Read session-state.md → understand current task and progress
27
+ 2. Read decisions.md → recall prior decisions (don't re-decide)
28
+ 3. Read open-questions.md → identify what needs resolution
29
+ 4. Resume work from where it left off
30
+ ```
31
+
32
+ ## Session End (Persist)
33
+
34
+ ```
35
+ 1. Update session-state.md with current progress
36
+ 2. Add any new decisions to decisions.md
37
+ 3. Update open-questions.md (add new, mark resolved)
38
+ 4. Clear scratchpad of obsolete notes
39
+ ```
40
+
41
+ ## Rules
42
+
43
+ - Always read state files at session start before doing anything
44
+ - Always update state files before session ends
45
+ - Decisions are permanent. Once recorded, don't re-debate without new information.
46
+ - Keep session-state.md under 50 lines (summary, not transcript)
47
+ - Use scratchpad.md for temporary reasoning (can be deleted).
@@ -0,0 +1,18 @@
1
+ # Financial Coach
2
+ ## Multi-Agent Pipeline
3
+ 1. **Data Analyzer**: parse financial documents (CSV, statements), compute metrics
4
+ 2. **Risk Assessor**: evaluate risk tolerance, current exposure, concentration
5
+ 3. **Recommendation Generator**: actionable advice grounded in analyzed data
6
+
7
+ ## Visualization Output
8
+ Generate Plotly chart specifications for:
9
+ - Income vs expenses over time (line chart)
10
+ - Expense breakdown (donut chart)
11
+ - Net worth trajectory (area chart)
12
+ - Debt payoff projections (stacked bar)
13
+
14
+ ## Rules
15
+ - Every recommendation must reference specific data from the analysis
16
+ - Include both optimistic and conservative projections
17
+ - Note: informational guidance, not professional financial advice
18
+ - Never recommend specific securities or funds.
@@ -0,0 +1,70 @@
1
+ # Finding Chain Correlator
2
+
3
+ ## Purpose
4
+ Reveal compounding risk that isolated findings understate.
5
+
6
+ ## Inputs
7
+ - `finding_set`
8
+ - `application_context`
9
+ - `auth_model`
10
+ - `data_sensitivity_map`
11
+
12
+ ## Workflow
13
+ ### Phase 1: Normalization
14
+ 1. Standardize findings into capability statements.
15
+ 2. Extract prerequisites, required role, and affected assets.
16
+
17
+ ### Phase 2: Link Construction
18
+ 1. Connect findings where output of one enables next.
19
+ 2. Identify dependency order and branching options.
20
+ 3. Reject links lacking technical preconditions.
21
+
22
+ ### Phase 3: Chain Validation
23
+ 1. Validate each step is feasible in target context.
24
+ 2. Validate session and state transitions between steps.
25
+ 3. Validate operational reliability of full chain.
26
+
27
+ ### Phase 4: Impact Aggregation
28
+ 1. Evaluate confidentiality, integrity, and availability impact.
29
+ 2. Estimate blast radius and tenant crossover risk.
30
+ 3. Rank by attacker effort vs outcome.
31
+
32
+ ### Phase 5: Defensive Breakpoints
33
+ 1. Identify minimal controls that break the chain.
34
+ 2. Prioritize controls by implementation cost and risk reduction.
35
+
36
+ ## Chain Scoring Factors
37
+ - prerequisite complexity
38
+ - execution reliability
39
+ - privilege needed
40
+ - detectability
41
+ - business impact
42
+
43
+ ## Output Contract
44
+ ```json
45
+ {
46
+ "attack_chains": [],
47
+ "prerequisite_graph": [],
48
+ "aggregate_impact": [],
49
+ "defensive_breakpoints": [],
50
+ "priority_order": []
51
+ }
52
+ ```
53
+
54
+ ## Constraints
55
+ - No speculative chain links.
56
+ - No additive severity without chain feasibility.
57
+
58
+ ## Quality Checklist
59
+ - [ ] Every link has evidence.
60
+ - [ ] Chain order is technically valid.
61
+ - [ ] Defensive breakpoints are practical.
62
+
63
+ ## Conditional Decision Matrix
64
+ | Condition | Action | Evidence Requirement |
65
+ |---|---|---|
66
+ | Finding signal unstable | downgrade confidence and add retest plan | repeated run variance log |
67
+ | Chain link missing prerequisite | split chain and mark dependency blocker | prerequisite graph |
68
+ | Impact appears low in isolation | evaluate chain amplification paths | chain-level impact narrative |
69
+ | Mitigation claim is partial | verify alternate path and state variants | mitigation bypass check |
70
+ | Environment blocker dominates | classify inconclusive with unblock requests | blocker evidence |
@@ -0,0 +1,65 @@
1
+ # Finding Verifier
2
+
3
+ ## Purpose
4
+ Ensure reported findings are accurate, reproducible, and correctly classified.
5
+
6
+ ## Inputs
7
+ - `finding_report`
8
+ - `evidence_bundle`
9
+ - `environment_notes`
10
+
11
+ ## Verification Workflow
12
+ ### Phase 1: Evidence Integrity
13
+ 1. Verify artifact completeness and timestamps.
14
+ 2. Verify request-response pairing and context consistency.
15
+
16
+ ### Phase 2: Independent Replay
17
+ 1. Reproduce with original method.
18
+ 2. Reproduce with alternate method when possible.
19
+ 3. Compare behavior consistency.
20
+
21
+ ### Phase 3: Confounder Analysis
22
+ 1. Caching and stale session effects.
23
+ 2. Timing and infrastructure noise.
24
+ 3. Seed-data drift and race artifacts.
25
+
26
+ ### Phase 4: Final Status
27
+ 1. `confirmed` if replayable with clear impact.
28
+ 2. `disputed` if strong counter-evidence exists.
29
+ 3. `inconclusive` if unresolved blockers remain.
30
+
31
+ ## Acceptance Criteria by Class
32
+ | Class | Confirmed Requires |
33
+ |---|---|
34
+ | Injection | parser/engine effect + attacker control |
35
+ | XSS | controlled script execution in target context |
36
+ | Authz | unauthorized action/object access proven |
37
+ | SSRF | outbound request influence or protected target reach |
38
+
39
+ ## Output Contract
40
+ ```json
41
+ {
42
+ "verification_status": [],
43
+ "replay_results": [],
44
+ "confounder_notes": [],
45
+ "required_follow_up": []
46
+ }
47
+ ```
48
+
49
+ ## Constraints
50
+ - Do not confirm from single unstable run.
51
+ - Do not dispute on intuition alone.
52
+
53
+ ## Quality Checklist
54
+ - [ ] Independent replay attempted.
55
+ - [ ] Confounders addressed.
56
+ - [ ] Status rationale is explicit.
57
+
58
+ ## Conditional Decision Matrix
59
+ | Condition | Action | Evidence Requirement |
60
+ |---|---|---|
61
+ | Finding signal unstable | downgrade confidence and add retest plan | repeated run variance log |
62
+ | Chain link missing prerequisite | split chain and mark dependency blocker | prerequisite graph |
63
+ | Impact appears low in isolation | evaluate chain amplification paths | chain-level impact narrative |
64
+ | Mitigation claim is partial | verify alternate path and state variants | mitigation bypass check |
65
+ | Environment blocker dominates | classify inconclusive with unblock requests | blocker evidence |
@@ -0,0 +1,104 @@
1
+ ---
2
+ name: frontend-design
3
+ description: "Use this skill when the user asks to build a website, web page, landing page, web UI, HTML/CSS/JS project, or any browser-based interface. Triggers: 'build me a website', 'create a landing page', 'make a web app', 'HTML page', 'frontend', 'web UI', 'responsive layout', 'CSS', styling tasks, accessibility fixes, or any request to create something that runs in a browser. Covers HTML, CSS, JavaScript, responsive design, accessibility, and modern web standards."
4
+ license: MIT
5
+ ---
6
+
7
+ # Frontend Design
8
+
9
+ ## What This Skill Does
10
+
11
+ Build complete, production-quality web interfaces. HTML, CSS, JavaScript. Responsive layouts, accessibility, modern patterns. From single landing pages to multi-page sites.
12
+
13
+ ## Before You Start
14
+
15
+ 1. **Fetch current docs** via Context7 if using any CSS framework (Tailwind, Bootstrap) or JS library
16
+ 2. **Check DeepWiki** if integrating with an unfamiliar frontend framework or build tool
17
+ 3. **Ask the user** about target browsers, mobile requirements, and any existing design system
18
+
19
+ ## HTML Standards
20
+
21
+ Write semantic HTML5. Every page needs:
22
+
23
+ ```html
24
+ <!DOCTYPE html>
25
+ <html lang="en">
26
+ <head>
27
+ <meta charset="UTF-8">
28
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
29
+ <meta name="description" content="Page description for SEO">
30
+ <title>Page Title</title>
31
+ </head>
32
+ <body>
33
+ <header>...</header>
34
+ <nav>...</nav>
35
+ <main>...</main>
36
+ <footer>...</footer>
37
+ </body>
38
+ </html>
39
+ ```
40
+
41
+ Semantic elements over divs: `<header>`, `<nav>`, `<main>`, `<section>`, `<article>`, `<aside>`, `<footer>`. Use `<div>` only when no semantic element fits.
42
+
43
+ ## CSS Patterns
44
+
45
+ ### Layout (use modern CSS)
46
+ - **Flexbox** for one-dimensional layouts (navbars, card rows, centering)
47
+ - **CSS Grid** for two-dimensional layouts (page layouts, dashboards, galleries)
48
+ - **Container queries** for component-level responsiveness
49
+ - Never use floats for layout
50
+
51
+ ### Responsive Design
52
+ ```css
53
+ /* Mobile-first breakpoints */
54
+ /* Base styles = mobile */
55
+ @media (min-width: 768px) { /* Tablet */ }
56
+ @media (min-width: 1024px) { /* Desktop */ }
57
+ @media (min-width: 1440px) { /* Large desktop */ }
58
+ ```
59
+
60
+ Use `clamp()` for fluid typography: `font-size: clamp(1rem, 2.5vw, 2rem);`
61
+
62
+ ### CSS Variables for Theming
63
+ ```css
64
+ :root {
65
+ --color-primary: #2563eb;
66
+ --color-surface: #ffffff;
67
+ --color-text: #1a1a2e;
68
+ --radius: 8px;
69
+ --shadow: 0 2px 8px rgba(0,0,0,0.08);
70
+ --transition: 200ms ease;
71
+ }
72
+
73
+ @media (prefers-color-scheme: dark) {
74
+ :root {
75
+ --color-surface: #0f0f1a;
76
+ --color-text: #e2e2e8;
77
+ }
78
+ }
79
+ ```
80
+
81
+ ## Accessibility Checklist
82
+
83
+ - All images have `alt` text (decorative images get `alt=""`)
84
+ - All interactive elements are keyboard-accessible (Tab, Enter, Escape)
85
+ - Color contrast ratio meets WCAG AA (4.5:1 for text, 3:1 for large text)
86
+ - Form inputs have associated `<label>` elements
87
+ - Page has a single `<h1>` and heading hierarchy doesn't skip levels
88
+ - Focus states are visible (never `outline: none` without a replacement)
89
+ - ARIA attributes used correctly: `aria-label`, `aria-expanded`, `aria-hidden`
90
+
91
+ ## Performance
92
+
93
+ - Images: use `loading="lazy"`, provide `width` and `height`, use modern formats (WebP, AVIF)
94
+ - Fonts: `font-display: swap`, preload critical fonts, limit to 2 families max
95
+ - CSS: critical styles inline in `<head>`, rest loaded async
96
+ - JS: `defer` attribute on scripts, avoid render-blocking
97
+
98
+ ## Rules
99
+
100
+ - Design for the most common use case, accommodate edge cases
101
+ - Every interactive element needs: default, hover, active, focus, disabled states
102
+ - Error messages must tell the user what went wrong AND how to fix it
103
+ - Test with real content, not lorem ipsum
104
+ - Mobile-first: write base styles for mobile, enhance for larger screens