@rfxlamia/skillkit 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/agents/agents/creative-copywriter.md +212 -0
  2. package/agents/agents/dario-amodei.md +135 -0
  3. package/agents/agents/doc-simplifier.md +63 -0
  4. package/agents/agents/kotlin-pro.md +433 -0
  5. package/agents/agents/red-team.md +136 -0
  6. package/agents/agents/sam-altman.md +121 -0
  7. package/agents/agents/seo-manager.md +184 -0
  8. package/package.json +7 -2
  9. package/skills/quick-spec/tests/__pycache__/test_skill.cpython-314-pytest-9.0.2.pyc +0 -0
  10. package/skills/skillkit/.claude/settings.local.json +7 -0
  11. package/skills/skillkit/scripts/__pycache__/decision_helper.cpython-314.pyc +0 -0
  12. package/skills/skillkit/scripts/__pycache__/quick_validate.cpython-312.pyc +0 -0
  13. package/skills/skillkit/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  14. package/skills/skillkit/scripts/__pycache__/test_generator.cpython-314-pytest-9.0.2.pyc +0 -0
  15. package/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  16. package/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-314.pyc +0 -0
  17. package/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-312.pyc +0 -0
  18. package/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-314.pyc +0 -0
  19. package/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-312.pyc +0 -0
  20. package/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-314.pyc +0 -0
  21. package/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-312.pyc +0 -0
  22. package/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-314.pyc +0 -0
  23. package/skills/skillkit-help/SKILL.md +81 -0
  24. package/skills/skillkit-help/knowledge/application/09-case-studies.md +257 -0
  25. package/skills/skillkit-help/knowledge/application/12-testing-and-validation.md +276 -0
  26. package/skills/skillkit-help/knowledge/foundation/01-why-skills-exist.md +246 -0
  27. package/skills/skillkit-help/knowledge/foundation/02-skills-vs-subagents-comparison.md +312 -0
  28. package/skills/skillkit-help/knowledge/foundation/03-skills-vs-subagents-decision-tree.md +346 -0
  29. package/skills/skillkit-help/knowledge/foundation/06-platform-constraints.md +237 -0
  30. package/skills/skillkit-help/knowledge/foundation/08-when-not-to-use-skills.md +270 -0
  31. package/skills/skillkit-help/template/SKILL.md +52 -0
  32. package/skills/skills/adversarial-review/SKILL.md +219 -0
  33. package/skills/skills/baby-education/SKILL.md +260 -0
  34. package/skills/skills/baby-education/references/advanced-techniques.md +323 -0
  35. package/skills/skills/baby-education/references/transformations.md +345 -0
  36. package/skills/skills/been-there-done-that/SKILL.md +455 -0
  37. package/skills/skills/been-there-done-that/references/analysis-patterns.md +162 -0
  38. package/skills/skills/been-there-done-that/references/git-commands.md +132 -0
  39. package/skills/skills/been-there-done-that/references/tree-insertion-logic.md +145 -0
  40. package/skills/skills/coolhunter/SKILL.md +270 -0
  41. package/skills/skills/coolhunter/assets/elicitation-methods.csv +51 -0
  42. package/skills/skills/coolhunter/knowledge/elicitation-methods.md +312 -0
  43. package/skills/skills/coolhunter/references/workflow-execution.md +238 -0
  44. package/skills/skills/coolhunter/workflow-plan-coolhunter.md +232 -0
  45. package/skills/skills/creative-copywriting/SKILL.md +324 -0
  46. package/skills/skills/creative-copywriting/databases/README.md +60 -0
  47. package/skills/skills/creative-copywriting/databases/carousel-structures.csv +16 -0
  48. package/skills/skills/creative-copywriting/databases/emotional-arcs.csv +11 -0
  49. package/skills/skills/creative-copywriting/databases/hook-formulas.csv +51 -0
  50. package/skills/skills/creative-copywriting/databases/power-words.csv +201 -0
  51. package/skills/skills/creative-copywriting/databases/psychological-triggers.csv +21 -0
  52. package/skills/skills/creative-copywriting/databases/read-more-patterns.csv +26 -0
  53. package/skills/skills/creative-copywriting/databases/swipe-triggers.csv +31 -0
  54. package/skills/skills/creative-copywriting/references/carousel-psychology.md +223 -0
  55. package/skills/skills/creative-copywriting/references/hook-anatomy.md +169 -0
  56. package/skills/skills/creative-copywriting/references/power-word-science.md +134 -0
  57. package/skills/skills/creative-copywriting/references/storytelling-frameworks.md +157 -0
  58. package/skills/skills/diverse-content-gen/SKILL.md +201 -0
  59. package/skills/skills/diverse-content-gen/references/advanced-techniques.md +320 -0
  60. package/skills/skills/diverse-content-gen/references/research-findings.md +379 -0
  61. package/skills/skills/diverse-content-gen/references/task-workflows.md +241 -0
  62. package/skills/skills/diverse-content-gen/references/tool-integration.md +419 -0
  63. package/skills/skills/diverse-content-gen/references/troubleshooting.md +426 -0
  64. package/skills/skills/diverse-content-gen/references/vs-core-technique.md +240 -0
  65. package/skills/skills/framework-critical-thinking/SKILL.md +220 -0
  66. package/skills/skills/framework-critical-thinking/references/bias_detector.md +375 -0
  67. package/skills/skills/framework-critical-thinking/references/fallback_handler.md +239 -0
  68. package/skills/skills/framework-critical-thinking/references/memory_curator.md +161 -0
  69. package/skills/skills/framework-critical-thinking/references/metacognitive_monitor.md +297 -0
  70. package/skills/skills/framework-critical-thinking/references/producer_critic_orchestrator.md +333 -0
  71. package/skills/skills/framework-critical-thinking/references/reasoning_router.md +235 -0
  72. package/skills/skills/framework-critical-thinking/references/reasoning_validator.md +97 -0
  73. package/skills/skills/framework-critical-thinking/references/reflection_trigger.md +78 -0
  74. package/skills/skills/framework-critical-thinking/references/self_verification.md +388 -0
  75. package/skills/skills/framework-critical-thinking/references/uncertainty_quantifier.md +207 -0
  76. package/skills/skills/framework-initiative/SKILL.md +231 -0
  77. package/skills/skills/framework-initiative/references/examples.md +150 -0
  78. package/skills/skills/framework-initiative/references/impact-analysis.md +157 -0
  79. package/skills/skills/framework-initiative/references/intent-patterns.md +145 -0
  80. package/skills/skills/framework-initiative/references/star-framework.md +165 -0
  81. package/skills/skills/humanize-docs/SKILL.md +203 -0
  82. package/skills/skills/humanize-docs/references/advanced-techniques.md +13 -0
  83. package/skills/skills/humanize-docs/references/core-transformations.md +368 -0
  84. package/skills/skills/humanize-docs/references/detection-patterns.md +400 -0
  85. package/skills/skills/humanize-docs/references/examples-gallery.md +374 -0
  86. package/skills/skills/imagine/SKILL.md +190 -0
  87. package/skills/skills/imagine/references/artstyle-corporate-memphis.md +625 -0
  88. package/skills/skills/imagine/references/artstyle-crewdson-hyperrealism.md +295 -0
  89. package/skills/skills/imagine/references/artstyle-iphone-social-media.md +426 -0
  90. package/skills/skills/imagine/references/artstyle-sciencesaru.md +276 -0
  91. package/skills/skills/pre-deploy-checklist/README.md +26 -0
  92. package/skills/skills/pre-deploy-checklist/SKILL.md +153 -0
  93. package/skills/skills/pre-deploy-checklist/references/checklist-categories.md +174 -0
  94. package/skills/skills/pre-deploy-checklist/references/domain-prompts.md +216 -0
  95. package/skills/skills/prompt-engineering/SKILL.md +209 -0
  96. package/skills/skills/prompt-engineering/references/advanced-combinations.md +444 -0
  97. package/skills/skills/prompt-engineering/references/chain-of-thought.md +140 -0
  98. package/skills/skills/prompt-engineering/references/decision_matrix.md +220 -0
  99. package/skills/skills/prompt-engineering/references/few-shot.md +346 -0
  100. package/skills/skills/prompt-engineering/references/json-format.md +270 -0
  101. package/skills/skills/prompt-engineering/references/natural-language.md +420 -0
  102. package/skills/skills/prompt-engineering/references/pitfalls.md +365 -0
  103. package/skills/skills/prompt-engineering/references/prompt-chaining.md +498 -0
  104. package/skills/skills/prompt-engineering/references/react.md +108 -0
  105. package/skills/skills/prompt-engineering/references/self-consistency.md +322 -0
  106. package/skills/skills/prompt-engineering/references/tree-of-thoughts.md +386 -0
  107. package/skills/skills/prompt-engineering/references/xml-format.md +220 -0
  108. package/skills/skills/prompt-engineering/references/yaml-format.md +488 -0
  109. package/skills/skills/prompt-engineering/references/zero-shot.md +74 -0
  110. package/skills/skills/quick-spec/SKILL.md +280 -0
  111. package/skills/skills/quick-spec/assets/tech-spec-template.md +74 -0
  112. package/skills/skills/quick-spec/references/step-01-understand.md +189 -0
  113. package/skills/skills/quick-spec/references/step-02-investigate.md +144 -0
  114. package/skills/skills/quick-spec/references/step-03-generate.md +128 -0
  115. package/skills/skills/quick-spec/references/step-04-review.md +173 -0
  116. package/skills/skills/quick-spec/tests/__pycache__/test_skill.cpython-314-pytest-9.0.2.pyc +0 -0
  117. package/skills/skills/quick-spec/tests/test_scenarios.md +83 -0
  118. package/skills/skills/quick-spec/tests/test_skill.py +136 -0
  119. package/skills/skills/readme-expert/SKILL.md +538 -0
  120. package/skills/skills/readme-expert/knowledge/INDEX.md +192 -0
  121. package/skills/skills/readme-expert/knowledge/application/quality-standards.md +470 -0
  122. package/skills/skills/readme-expert/knowledge/application/script-executor.md +604 -0
  123. package/skills/skills/readme-expert/knowledge/application/template-library.md +822 -0
  124. package/skills/skills/readme-expert/knowledge/foundation/codebase-scanner.md +361 -0
  125. package/skills/skills/readme-expert/knowledge/foundation/validation-checklist.md +481 -0
  126. package/skills/skills/red-teaming/SKILL.md +321 -0
  127. package/skills/skills/red-teaming/references/ai-llm-redteam.md +517 -0
  128. package/skills/skills/red-teaming/references/attack-techniques.md +410 -0
  129. package/skills/skills/red-teaming/references/cybersecurity-redteam.md +383 -0
  130. package/skills/skills/red-teaming/references/tools-frameworks.md +446 -0
  131. package/skills/skills/releasing/.skillkit-mode +1 -0
  132. package/skills/skills/releasing/SKILL.md +225 -0
  133. package/skills/skills/releasing/references/version-detection.md +108 -0
  134. package/skills/skills/screenwriter/SKILL.md +273 -0
  135. package/skills/skills/screenwriter/references/advanced-techniques.md +216 -0
  136. package/skills/skills/screenwriter/references/pipeline-integration.md +266 -0
  137. package/skills/skills/skillkit/.claude/settings.local.json +7 -0
  138. package/skills/skills/skillkit/.claude-plugin/plugin.json +27 -0
  139. package/skills/skills/skillkit/CHANGELOG.md +484 -0
  140. package/skills/skills/skillkit/SKILL.md +511 -0
  141. package/skills/skills/skillkit/commands/skillkit.md +6 -0
  142. package/skills/skills/skillkit/commands/validate-plan.md +6 -0
  143. package/skills/skills/skillkit/commands/verify.md +6 -0
  144. package/skills/skills/skillkit/knowledge/INDEX.md +352 -0
  145. package/skills/skills/skillkit/knowledge/application/09-case-studies.md +257 -0
  146. package/skills/skills/skillkit/knowledge/application/10-technical-architecture.md +324 -0
  147. package/skills/skills/skillkit/knowledge/application/11-adoption-strategy.md +267 -0
  148. package/skills/skills/skillkit/knowledge/application/12-testing-and-validation.md +276 -0
  149. package/skills/skills/skillkit/knowledge/application/13-competitive-landscape.md +198 -0
  150. package/skills/skills/skillkit/knowledge/foundation/01-why-skills-exist.md +246 -0
  151. package/skills/skills/skillkit/knowledge/foundation/02-skills-vs-subagents-comparison.md +312 -0
  152. package/skills/skills/skillkit/knowledge/foundation/03-skills-vs-subagents-decision-tree.md +346 -0
  153. package/skills/skills/skillkit/knowledge/foundation/04-hybrid-patterns.md +308 -0
  154. package/skills/skills/skillkit/knowledge/foundation/05-token-economics.md +275 -0
  155. package/skills/skills/skillkit/knowledge/foundation/06-platform-constraints.md +237 -0
  156. package/skills/skills/skillkit/knowledge/foundation/07-security-concerns.md +322 -0
  157. package/skills/skills/skillkit/knowledge/foundation/08-when-not-to-use-skills.md +270 -0
  158. package/skills/skills/skillkit/knowledge/plugin-guide.md +614 -0
  159. package/skills/skills/skillkit/knowledge/tools/14-validation-tools-guide.md +150 -0
  160. package/skills/skills/skillkit/knowledge/tools/15-cost-tools-guide.md +157 -0
  161. package/skills/skills/skillkit/knowledge/tools/16-security-tools-guide.md +122 -0
  162. package/skills/skills/skillkit/knowledge/tools/17-pattern-tools-guide.md +161 -0
  163. package/skills/skills/skillkit/knowledge/tools/18-decision-helper-guide.md +243 -0
  164. package/skills/skills/skillkit/knowledge/tools/19-test-generator-guide.md +275 -0
  165. package/skills/skills/skillkit/knowledge/tools/20-split-skill-guide.md +149 -0
  166. package/skills/skills/skillkit/knowledge/tools/21-quality-scorer-guide.md +226 -0
  167. package/skills/skills/skillkit/knowledge/tools/22-migration-helper-guide.md +356 -0
  168. package/skills/skills/skillkit/knowledge/tools/23-subagent-creation-guide.md +448 -0
  169. package/skills/skills/skillkit/knowledge/tools/24-behavioral-testing-guide.md +122 -0
  170. package/skills/skills/skillkit/references/proposal-generation.md +982 -0
  171. package/skills/skills/skillkit/references/rationalization-catalog.md +75 -0
  172. package/skills/skills/skillkit/references/research-methodology.md +661 -0
  173. package/skills/skills/skillkit/references/section-2-full-creation-workflow.md +452 -0
  174. package/skills/skills/skillkit/references/section-3-validation-workflow-existing-skill.md +63 -0
  175. package/skills/skills/skillkit/references/section-4-decision-workflow-skills-vs-subagents.md +64 -0
  176. package/skills/skills/skillkit/references/section-5-migration-workflow-doc-to-skill.md +58 -0
  177. package/skills/skills/skillkit/references/section-6-subagent-creation-workflow.md +499 -0
  178. package/skills/skills/skillkit/references/section-7-knowledge-reference-map.md +72 -0
  179. package/skills/skills/skillkit/scripts/__pycache__/decision_helper.cpython-314.pyc +0 -0
  180. package/skills/skills/skillkit/scripts/__pycache__/quick_validate.cpython-312.pyc +0 -0
  181. package/skills/skills/skillkit/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  182. package/skills/skills/skillkit/scripts/__pycache__/test_generator.cpython-314-pytest-9.0.2.pyc +0 -0
  183. package/skills/skills/skillkit/scripts/decision_helper.py +799 -0
  184. package/skills/skills/skillkit/scripts/init_skill.py +400 -0
  185. package/skills/skills/skillkit/scripts/init_subagent.py +231 -0
  186. package/skills/skills/skillkit/scripts/migration_helper.py +669 -0
  187. package/skills/skills/skillkit/scripts/package_skill.py +211 -0
  188. package/skills/skills/skillkit/scripts/pattern_detector.py +381 -0
  189. package/skills/skills/skillkit/scripts/pattern_detector_new.py +382 -0
  190. package/skills/skills/skillkit/scripts/pressure_tester.py +157 -0
  191. package/skills/skills/skillkit/scripts/quality_scorer.py +999 -0
  192. package/skills/skills/skillkit/scripts/quick_validate.py +100 -0
  193. package/skills/skills/skillkit/scripts/security_scanner.py +474 -0
  194. package/skills/skills/skillkit/scripts/split_skill.py +540 -0
  195. package/skills/skills/skillkit/scripts/test_generator.py +695 -0
  196. package/skills/skills/skillkit/scripts/token_estimator.py +493 -0
  197. package/skills/skills/skillkit/scripts/utils/__init__.py +49 -0
  198. package/skills/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  199. package/skills/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-314.pyc +0 -0
  200. package/skills/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-312.pyc +0 -0
  201. package/skills/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-314.pyc +0 -0
  202. package/skills/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-312.pyc +0 -0
  203. package/skills/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-314.pyc +0 -0
  204. package/skills/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-312.pyc +0 -0
  205. package/skills/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-314.pyc +0 -0
  206. package/skills/skills/skillkit/scripts/utils/budget_tracker.py +388 -0
  207. package/skills/skills/skillkit/scripts/utils/output_formatter.py +263 -0
  208. package/skills/skills/skillkit/scripts/utils/reference_validator.py +401 -0
  209. package/skills/skills/skillkit/scripts/validate_skill.py +594 -0
  210. package/skills/skills/skillkit/tests/test_behavioral.py +39 -0
  211. package/skills/skills/skillkit/tests/test_scenarios.md +83 -0
  212. package/skills/skills/skillkit/tests/test_skill.py +136 -0
  213. package/skills/skills/skillkit-help/SKILL.md +81 -0
  214. package/skills/skills/skillkit-help/knowledge/application/09-case-studies.md +257 -0
  215. package/skills/skills/skillkit-help/knowledge/application/12-testing-and-validation.md +276 -0
  216. package/skills/skills/skillkit-help/knowledge/foundation/01-why-skills-exist.md +246 -0
  217. package/skills/skills/skillkit-help/knowledge/foundation/02-skills-vs-subagents-comparison.md +312 -0
  218. package/skills/skills/skillkit-help/knowledge/foundation/03-skills-vs-subagents-decision-tree.md +346 -0
  219. package/skills/skills/skillkit-help/knowledge/foundation/06-platform-constraints.md +237 -0
  220. package/skills/skills/skillkit-help/knowledge/foundation/08-when-not-to-use-skills.md +270 -0
  221. package/skills/skills/skillkit-help/template/SKILL.md +52 -0
  222. package/skills/skills/social-media-seo/SKILL.md +278 -0
  223. package/skills/skills/social-media-seo/databases/caption-styles.csv +31 -0
  224. package/skills/skills/social-media-seo/databases/engagement-tactics.csv +16 -0
  225. package/skills/skills/social-media-seo/databases/hashtag-strategies.csv +21 -0
  226. package/skills/skills/social-media-seo/databases/hook-formulas.csv +26 -0
  227. package/skills/skills/social-media-seo/databases/keyword-clusters.csv +11 -0
  228. package/skills/skills/social-media-seo/databases/thread-structures.csv +26 -0
  229. package/skills/skills/social-media-seo/databases/viral-patterns.csv +21 -0
  230. package/skills/skills/social-media-seo/references/analytics-guide.md +321 -0
  231. package/skills/skills/social-media-seo/references/instagram-seo.md +235 -0
  232. package/skills/skills/social-media-seo/references/threads-seo.md +305 -0
  233. package/skills/skills/social-media-seo/references/x-twitter-seo.md +337 -0
  234. package/skills/skills/social-media-seo/scripts/query_database.py +191 -0
  235. package/skills/skills/storyteller/SKILL.md +241 -0
  236. package/skills/skills/storyteller/references/transformation-methodology.md +293 -0
  237. package/skills/skills/storyteller/references/visual-vocabulary.md +177 -0
  238. package/skills/skills/thread-pro/SKILL.md +162 -0
  239. package/skills/skills/thread-pro/anti-ai-patterns.md +120 -0
  240. package/skills/skills/thread-pro/hook-formulas.md +138 -0
  241. package/skills/skills/thread-pro/references/anti-ai-patterns.md +120 -0
  242. package/skills/skills/thread-pro/references/hook-formulas.md +138 -0
  243. package/skills/skills/thread-pro/references/thread-structures.md +240 -0
  244. package/skills/skills/thread-pro/references/voice-injection.md +130 -0
  245. package/skills/skills/thread-pro/thread-structures.md +240 -0
  246. package/skills/skills/thread-pro/voice-injection.md +130 -0
  247. package/skills/skills/tinkering/SKILL.md +251 -0
  248. package/skills/skills/tinkering/references/graduation-checklist.md +100 -0
  249. package/skills/skills/validate-plan/.skillkit-mode +1 -0
  250. package/skills/skills/validate-plan/SKILL.md +406 -0
  251. package/skills/skills/validate-plan/references/dry-principles.md +251 -0
  252. package/skills/skills/validate-plan/references/gap-analysis-guide.md +320 -0
  253. package/skills/skills/validate-plan/references/tdd-patterns.md +413 -0
  254. package/skills/skills/validate-plan/references/yagni-checklist.md +330 -0
  255. package/skills/skills/verify-before-ship/.skillkit-mode +1 -0
  256. package/skills/skills/verify-before-ship/SKILL.md +116 -0
  257. package/skills/skills/verify-before-ship/references/anti-rationalization.md +212 -0
  258. package/skills/skills/verify-before-ship/references/verification-gates.md +305 -0
  259. package/skills-manifest.json +8 -2
  260. package/src/banner.js +1 -1
  261. package/src/cli.js +15 -4
  262. package/src/install.js +45 -29
  263. package/src/install.test.js +75 -7
  264. package/src/picker.js +15 -4
  265. package/src/picker.test.js +36 -1
  266. package/src/scope.js +8 -39
  267. package/src/scope.test.js +9 -13
  268. package/src/tools.js +76 -0
  269. package/src/tools.test.js +80 -0
@@ -0,0 +1,78 @@
1
+ # Reflection Trigger
2
+
3
+ ## Purpose
4
+
5
+ The Reflection Trigger provides rule-based conditions for activating self-correction loops. It determines when an agent should pause, evaluate its progress, and potentially adjust its approach.
6
+
7
+ ## Trigger Conditions
8
+
9
+ ### 1. Confidence Threshold Violations
10
+
11
+ ```python
12
+ class ConfidenceTrigger:
13
+ def __init__(self, thresholds):
14
+ self.thresholds = {
15
+ 'critical': 0.3,
16
+ 'warning': 0.5,
17
+ 'caution': 0.7
18
+ }
19
+
20
+ def check(self, current_state):
21
+ confidence = current_state.get('confidence', 1.0)
22
+ if confidence < self.thresholds['critical']:
23
+ return TriggerResult(
24
+ should_reflect=True,
25
+ priority='critical',
26
+ reason=f"Confidence {confidence:.2f} critically low"
27
+ )
28
+ return TriggerResult(should_reflect=False)
29
+ ```
30
+
31
+ ### 2. Repeated Action Patterns
32
+
33
+ ```python
34
+ class PatternTrigger:
35
+ def check(self, action_history):
36
+ if len(action_history) < 3:
37
+ return TriggerResult(should_reflect=False)
38
+
39
+ recent = action_history[-3:]
40
+ if self.are_identical(recent):
41
+ return TriggerResult(
42
+ should_reflect=True,
43
+ priority='high',
44
+ reason="Same action repeated 3 times"
45
+ )
46
+ return TriggerResult(should_reflect=False)
47
+ ```
48
+
49
+ ### 3. Latency Spikes
50
+
51
+ ```python
52
+ class LatencyTrigger:
53
+ def check(self, elapsed_time, expected_time):
54
+ if elapsed_time > expected_time * 5:
55
+ return TriggerResult(
56
+ should_reflect=True,
57
+ priority='critical',
58
+ reason="Latency 5x expected - possible infinite loop"
59
+ )
60
+ return TriggerResult(should_reflect=False)
61
+ ```
62
+
63
+ ## Usage Example
64
+
65
+ ```python
66
+ trigger = ReflectionTrigger(config={'confidence': {'critical': 0.3}})
67
+
68
+ for step_number, state in enumerate(agent_states):
69
+ result = trigger.should_reflect(state, step_number)
70
+ if result.should_reflect:
71
+ reflection = generate_reflection(state)
72
+ state = apply_reflection(state, reflection)
73
+ ```
74
+
75
+ ---
76
+
77
+ **Sources:**
78
+ - [Self-Corrective Agent Architecture](https://www.emergentmind.com/topics/self-corrective-agent-architecture)
@@ -0,0 +1,388 @@
1
+ # Self-Verification
2
+
3
+ ## Table of Contents
4
+
5
+ - [Purpose](#purpose)
6
+ - [Core Techniques](#core-techniques)
7
+ - [Chain-of-Verification (CoVe)](#1-chain-of-verification-cove)
8
+ - [Backward Verification](#2-backward-verification)
9
+ - [Self-Consistency Check](#3-self-consistency-check)
10
+ - [Cross-Verification](#4-cross-verification-with-external-sources)
11
+ - [Verification Strategies by Task Type](#verification-strategies-by-task-type)
12
+ - [Self-Refine Loop](#self-refine-loop)
13
+ - [Implementation Example](#implementation-example)
14
+ - [Best Practices](#best-practices)
15
+ - [Common Pitfalls](#common-pitfalls)
16
+
17
+ ## Purpose
18
+
19
+ Self-Verification provides mechanisms for AI agents to validate their own outputs before delivery. It implements techniques like Chain-of-Verification (CoVe), backward verification, and cross-verification to catch errors and hallucinations.
20
+
21
+ ## Core Techniques
22
+
23
+ ### 1. Chain-of-Verification (CoVe)
24
+
25
+ A multi-stage verification process where the model generates verification questions and answers them to validate its initial output.
26
+
27
+ ```
28
+ ┌─────────────────────────────────────────────────────────────┐
29
+ │ Stage 1: Baseline Response │
30
+ │ Generate initial answer to the query │
31
+ └──────────────────────┬──────────────────────────────────────┘
32
+
33
+
34
+ ┌─────────────────────────────────────────────────────────────┐
35
+ │ Stage 2: Plan Verification │
36
+ │ Generate verification questions based on baseline │
37
+ └──────────────────────┬──────────────────────────────────────┘
38
+
39
+
40
+ ┌─────────────────────────────────────────────────────────────┐
41
+ │ Stage 3: Execute Verification │
42
+ │ Answer verification questions independently │
43
+ └──────────────────────┬──────────────────────────────────────┘
44
+
45
+
46
+ ┌─────────────────────────────────────────────────────────────┐
47
+ │ Stage 4: Generate Final Verified Response │
48
+ │ Incorporate verification results into final output │
49
+ └─────────────────────────────────────────────────────────────┘
50
+ ```
51
+
52
+ **Implementation:**
53
+
54
+ ```python
55
+ class ChainOfVerification:
56
+ def verify(self, query, baseline_response):
57
+ # Stage 2: Generate verification questions
58
+ verification_questions = self.generate_verification_questions(
59
+ query, baseline_response
60
+ )
61
+
62
+ # Stage 3: Execute verification
63
+ verified_facts = []
64
+ for question in verification_questions:
65
+ # Answer independently, without reference to baseline
66
+ answer = self.answer_independently(question)
67
+ verified_facts.append({
68
+ 'question': question,
69
+ 'answer': answer,
70
+ 'matches_baseline': self.check_consistency(
71
+ answer, baseline_response
72
+ )
73
+ })
74
+
75
+ # Stage 4: Generate final response
76
+ if all(f['matches_baseline'] for f in verified_facts):
77
+ return baseline_response
78
+ else:
79
+ return self.revise_response(baseline_response, verified_facts)
80
+
81
+ def generate_verification_questions(self, query, response):
82
+ """Extract claims and generate questions to verify them."""
83
+ claims = self.extract_claims(response)
84
+ questions = []
85
+ for claim in claims:
86
+ questions.extend(self.claim_to_questions(claim))
87
+ return questions
88
+ ```
89
+
90
+ **Template for Verification Planning:**
91
+ ```
92
+ Given the following claim from a response:
93
+ "{claim}"
94
+
95
+ Generate 2-3 specific questions that would verify the accuracy of this claim.
96
+ Questions should be answerable independently without referencing the original claim.
97
+
98
+ Verification questions:
99
+ 1.
100
+ 2.
101
+ 3.
102
+ ```
103
+
104
+ ### 2. Backward Verification
105
+
106
+ For mathematical or logical problems, verify by working backward from the conclusion to see if it leads to the original problem conditions.
107
+
108
+ ```python
109
+ class BackwardVerification:
110
+ def verify(self, problem, solution_steps, final_answer):
111
+ """Verify by reversing the solution."""
112
+ # Start from final answer
113
+ current = final_answer
114
+
115
+ # Apply inverse operations in reverse order
116
+ for step in reversed(solution_steps):
117
+ inverse_op = self.get_inverse_operation(step.operation)
118
+ current = inverse_op.apply(current, step.operand)
119
+
120
+ # Check if we return to original problem conditions
121
+ return self.matches_original(current, problem)
122
+
123
+ def get_inverse_operation(self, operation):
124
+ inverses = {
125
+ 'add': 'subtract',
126
+ 'multiply': 'divide',
127
+ 'square': 'square_root',
128
+ 'append': 'remove_last'
129
+ }
130
+ return inverses.get(operation)
131
+ ```
132
+
133
+ **Example:**
134
+ ```
135
+ Problem: "If x + 5 = 12, what is x?"
136
+ Forward: x = 12 - 5 = 7
137
+ Backward: 7 + 5 = 12 ✓ (matches original equation)
138
+ ```
139
+
140
+ ### 3. Self-Consistency Check
141
+
142
+ Generate multiple reasoning paths and check if they converge to the same answer.
143
+
144
+ ```python
145
+ class SelfConsistency:
146
+ def __init__(self, sample_count=10, temperature=0.7):
147
+ self.sample_count = sample_count
148
+ self.temperature = temperature
149
+
150
+ def verify(self, problem):
151
+ # Generate multiple reasoning paths
152
+ answers = []
153
+ for i in range(self.sample_count):
154
+ reasoning, answer = self.generate_with_temperature(
155
+ problem, self.temperature
156
+ )
157
+ answers.append({
158
+ 'reasoning': reasoning,
159
+ 'answer': answer,
160
+ 'confidence': self.assess_reasoning_quality(reasoning)
161
+ })
162
+
163
+ # Aggregate answers
164
+ answer_counts = Counter(a['answer'] for a in answers)
165
+ most_common = answer_counts.most_common(1)[0]
166
+
167
+ # Calculate consistency score
168
+ consistency_score = most_common[1] / len(answers)
169
+
170
+ return {
171
+ 'final_answer': most_common[0],
172
+ 'consistency_score': consistency_score,
173
+ 'all_answers': answers,
174
+ 'confidence': self.compute_aggregate_confidence(answers)
175
+ }
176
+ ```
177
+
178
+ ### 4. Cross-Verification with External Sources
179
+
180
+ When external knowledge bases or tools are available, verify claims against them.
181
+
182
+ ```python
183
+ class CrossVerification:
184
+ def __init__(self, knowledge_base, tools):
185
+ self.kb = knowledge_base
186
+ self.tools = tools
187
+
188
+ def verify_claim(self, claim):
189
+ verification_result = {
190
+ 'claim': claim,
191
+ 'verified': False,
192
+ 'sources': [],
193
+ 'confidence': 0.0
194
+ }
195
+
196
+ # Query knowledge base
197
+ kb_results = self.kb.query(claim)
198
+ if kb_results:
199
+ verification_result['sources'].extend(kb_results)
200
+ verification_result['kb_match'] = self.check_match(
201
+ claim, kb_results
202
+ )
203
+
204
+ # Use tools if applicable
205
+ if self.is_computable(claim):
206
+ tool_result = self.compute_with_tools(claim)
207
+ verification_result['tool_result'] = tool_result
208
+ verification_result['tool_match'] = self.check_match(
209
+ claim, tool_result
210
+ )
211
+
212
+ # Aggregate verification
213
+ verification_result['verified'] = self.aggregate_verification(
214
+ verification_result
215
+ )
216
+
217
+ return verification_result
218
+ ```
219
+
220
+ ## Verification Strategies by Task Type
221
+
222
+ ### Factual Claims
223
+
224
+ Use CoVe with web search or knowledge base verification:
225
+
226
+ ```python
227
+ factual_verification = {
228
+ 'technique': 'CoVe',
229
+ 'question_generation': 'claim_extraction',
230
+ 'verification_source': ['knowledge_base', 'web_search'],
231
+ 'revision_strategy': 'correct_or_hedge'
232
+ }
233
+ ```
234
+
235
+ ### Mathematical Reasoning
236
+
237
+ Use backward verification + computation tools:
238
+
239
+ ```python
240
+ math_verification = {
241
+ 'technique': 'backward_verification',
242
+ 'tools': ['calculator', 'sympy', 'wolfram_alpha'],
243
+ 'check_steps': True,
244
+ 'precision_tolerance': 1e-10
245
+ }
246
+ ```
247
+
248
+ ### Logical Arguments
249
+
250
+ Use consistency checking and counter-example search:
251
+
252
+ ```python
253
+ logic_verification = {
254
+ 'technique': 'consistency_checking',
255
+ 'methods': ['truth_table', 'counter_example_search'],
256
+ 'validate_premises': True,
257
+ 'check_entailment': True
258
+ }
259
+ ```
260
+
261
+ ### Code Generation
262
+
263
+ Use execution and test validation:
264
+
265
+ ```python
266
+ code_verification = {
267
+ 'technique': 'execution_based',
268
+ 'steps': [
269
+ 'syntax_check',
270
+ 'static_analysis',
271
+ 'test_execution',
272
+ 'edge_case_testing'
273
+ ],
274
+ 'timeout': 30
275
+ }
276
+ ```
277
+
278
+ ## Self-Refine Loop
279
+
280
+ Iterative improvement through self-critique:
281
+
282
+ ```python
283
+ class SelfRefine:
284
+ def __init__(self, max_iterations=3):
285
+ self.max_iterations = max_iterations
286
+
287
+ def refine(self, initial_output, task_description):
288
+ current = initial_output
289
+
290
+ for iteration in range(self.max_iterations):
291
+ # Generate critique
292
+ critique = self.generate_critique(current, task_description)
293
+
294
+ # Check if satisfactory
295
+ if critique['is_satisfactory']:
296
+ break
297
+
298
+ # Refine based on critique
299
+ current = self.apply_refinement(current, critique)
300
+
301
+ return {
302
+ 'final_output': current,
303
+ 'iterations': iteration + 1,
304
+ 'critiques': critique_history
305
+ }
306
+
307
+ def generate_critique(self, output, task):
308
+ critique_prompt = f"""
309
+ Task: {task}
310
+ Output: {output}
311
+
312
+ Critique this output. Identify:
313
+ 1. Factual errors or hallucinations
314
+ 2. Logical inconsistencies
315
+ 3. Missing information
316
+ 4. Unclear explanations
317
+ 5. Areas for improvement
318
+
319
+ Is this output satisfactory? (Yes/No with explanation)
320
+ """
321
+ return self.llm.generate(critique_prompt)
322
+ ```
323
+
324
+ ## Implementation Example
325
+
326
+ ```python
327
+ class SelfVerificationSystem:
328
+ def __init__(self):
329
+ self.cove = ChainOfVerification()
330
+ self.backward = BackwardVerification()
331
+ self.consistency = SelfConsistency()
332
+ self.refine = SelfRefine()
333
+
334
+ def verify_output(self, task_type, problem, output):
335
+ verification_pipeline = self.get_pipeline(task_type)
336
+
337
+ results = {}
338
+ for technique in verification_pipeline:
339
+ if technique == 'CoVe':
340
+ results['cove'] = self.cove.verify(problem, output)
341
+ elif technique == 'backward':
342
+ results['backward'] = self.backward.verify(
343
+ problem, output['steps'], output['answer']
344
+ )
345
+ elif technique == 'consistency':
346
+ results['consistency'] = self.consistency.verify(problem)
347
+
348
+ # Aggregate verification results
349
+ final_confidence = self.aggregate_confidence(results)
350
+
351
+ # Refine if needed
352
+ if final_confidence < 0.8:
353
+ refined = self.refine.refine(output, problem)
354
+ return {
355
+ 'verified_output': refined['final_output'],
356
+ 'confidence': final_confidence,
357
+ 'verification_details': results,
358
+ 'was_refined': True
359
+ }
360
+
361
+ return {
362
+ 'verified_output': output,
363
+ 'confidence': final_confidence,
364
+ 'verification_details': results,
365
+ 'was_refined': False
366
+ }
367
+ ```
368
+
369
+ ## Best Practices
370
+
371
+ 1. **Independent Verification:** Answer verification questions without reference to the baseline response
372
+ 2. **Multiple Techniques:** Combine different verification methods for robust validation
373
+ 3. **Graceful Degradation:** If verification fails, hedge claims or request human review
374
+ 4. **Budget Management:** Limit verification iterations to control computational cost
375
+ 5. **Calibration:** Track verification success rates and adjust confidence thresholds
376
+
377
+ ## Common Pitfalls
378
+
379
+ - **Verification Hallucination:** Model hallucinates during verification phase
380
+ - **Confirmation Bias:** Verification favors the baseline response
381
+ - **Over-Verification:** Spending too much compute on simple, reliable outputs
382
+ - **False Confidence:** High verification scores for incorrect but internally consistent outputs
383
+
384
+ ---
385
+
386
+ **Sources:**
387
+ - [Chain-of-Verification Reduces Hallucination in LLMs](https://arxiv.org/abs/2309.11495)
388
+ - [Self-Verification Prompting](https://learnprompting.org/docs/advanced/self_criticism/self_verification)
@@ -0,0 +1,207 @@
1
+ # Uncertainty Quantifier
2
+
3
+ ## Purpose
4
+
5
+ The Uncertainty Quantifier calibrates confidence scores to match actual accuracy, ensuring that stated confidence levels are reliable indicators of correctness.
6
+
7
+ ## Confidence Calibration
8
+
9
+ ### The Calibration Problem
10
+
11
+ Models often exhibit:
12
+ - **Overconfidence:** Stating high confidence for incorrect answers
13
+ - **Underconfidence:** Stating low confidence for correct answers
14
+
15
+ ### Calibration Methods
16
+
17
+ #### 1. Temperature Scaling
18
+
19
+ ```python
20
+ class TemperatureScaler:
21
+ def __init__(self):
22
+ self.temperature = 1.0
23
+
24
+ def fit(self, confidences, accuracies):
25
+ """Learn optimal temperature on validation set."""
26
+ def nll_loss(T):
27
+ scaled = self.scale_confidences(confidences, T)
28
+ return -np.mean(
29
+ accuracies * np.log(scaled) +
30
+ (1 - accuracies) * np.log(1 - scaled)
31
+ )
32
+
33
+ self.temperature = minimize(nll_loss, x0=1.0).x[0]
34
+
35
+ def calibrate(self, confidence):
36
+ """Apply temperature scaling."""
37
+ return self.scale_confidences(confidence, self.temperature)
38
+
39
+ def scale_confidences(self, confidences, temperature):
40
+ return 1 / (1 + np.exp(-np.log(confidences / (1 - confidences)) / temperature))
41
+ ```
42
+
43
+ #### 2. Platt Scaling
44
+
45
+ ```python
46
+ class PlattScaler:
47
+ def __init__(self):
48
+ self.a = 1.0
49
+ self.b = 0.0
50
+
51
+ def fit(self, confidences, accuracies):
52
+ """Fit sigmoid parameters."""
53
+ from scipy.optimize import minimize
54
+
55
+ def loss(params):
56
+ a, b = params
57
+ calibrated = 1 / (1 + np.exp(-(a * confidences + b)))
58
+ return -np.mean(
59
+ accuracies * np.log(calibrated + 1e-10) +
60
+ (1 - accuracies) * np.log(1 - calibrated + 1e-10)
61
+ )
62
+
63
+ result = minimize(loss, x0=[1.0, 0.0])
64
+ self.a, self.b = result.x
65
+
66
+ def calibrate(self, confidence):
67
+ """Apply Platt scaling."""
68
+ return 1 / (1 + np.exp(-(self.a * confidence + self.b)))
69
+ ```
70
+
71
+ #### 3. Isotonic Regression
72
+
73
+ ```python
74
+ class IsotonicCalibrator:
75
+ def __init__(self):
76
+ from sklearn.isotonic import IsotonicRegression
77
+ self.iso = IsotonicRegression(out_of_bounds='clip')
78
+
79
+ def fit(self, confidences, accuracies):
80
+ """Fit isotonic regression."""
81
+ self.iso.fit(confidences, accuracies)
82
+
83
+ def calibrate(self, confidence):
84
+ """Apply isotonic calibration."""
85
+ return self.iso.predict([confidence])[0]
86
+ ```
87
+
88
+ ## Calibration Metrics
89
+
90
+ ### Expected Calibration Error (ECE)
91
+
92
+ ```python
93
+ def expected_calibration_error(confidences, predictions, labels, n_bins=10):
94
+ """Calculate ECE - lower is better."""
95
+ bin_boundaries = np.linspace(0, 1, n_bins + 1)
96
+ bin_lowers = bin_boundaries[:-1]
97
+ bin_uppers = bin_boundaries[1:]
98
+
99
+ ece = 0.0
100
+ for lower, upper in zip(bin_lowers, bin_uppers):
101
+ in_bin = (confidences > lower) & (confidences <= upper)
102
+ prop_in_bin = in_bin.mean()
103
+
104
+ if prop_in_bin > 0:
105
+ accuracy_in_bin = (predictions[in_bin] == labels[in_bin]).mean()
106
+ avg_confidence_in_bin = confidences[in_bin].mean()
107
+ ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
108
+
109
+ return ece
110
+ ```
111
+
112
+ ### Maximum Calibration Error (MCE)
113
+
114
+ ```python
115
+ def maximum_calibration_error(confidences, predictions, labels, n_bins=10):
116
+ """Calculate MCE - worst-case calibration error."""
117
+ bin_boundaries = np.linspace(0, 1, n_bins + 1)
118
+ errors = []
119
+
120
+ for i in range(n_bins):
121
+ lower, upper = bin_boundaries[i], bin_boundaries[i + 1]
122
+ in_bin = (confidences > lower) & (confidences <= upper)
123
+
124
+ if in_bin.sum() > 0:
125
+ accuracy = (predictions[in_bin] == labels[in_bin]).mean()
126
+ confidence = confidences[in_bin].mean()
127
+ errors.append(np.abs(confidence - accuracy))
128
+
129
+ return max(errors) if errors else 0.0
130
+ ```
131
+
132
+ ### Brier Score
133
+
134
+ ```python
135
+ def brier_score(confidences, outcomes):
136
+ """Calculate Brier score - lower is better."""
137
+ return np.mean((confidences - outcomes) ** 2)
138
+ ```
139
+
140
+ ## Confidence Intervals
141
+
142
+ ```python
143
+ class ConfidenceIntervalEstimator:
144
+ def __init__(self, confidence_level=0.95):
145
+ self.confidence_level = confidence_level
146
+
147
+ def estimate_interval(self, predictions, variances):
148
+ """Estimate confidence intervals."""
149
+ from scipy import stats
150
+
151
+ z_score = stats.norm.ppf((1 + self.confidence_level) / 2)
152
+
153
+ lower = predictions - z_score * np.sqrt(variances)
154
+ upper = predictions + z_score * np.sqrt(variances)
155
+
156
+ return {'lower': lower, 'upper': upper, 'confidence': self.confidence_level}
157
+
158
+ def monte_carlo_dropout(self, model, input_data, n_samples=100):
159
+ """Estimate uncertainty using MC dropout."""
160
+ predictions = []
161
+ model.train() # Enable dropout
162
+
163
+ for _ in range(n_samples):
164
+ pred = model(input_data)
165
+ predictions.append(pred)
166
+
167
+ predictions = torch.stack(predictions)
168
+ mean = predictions.mean(dim=0)
169
+ variance = predictions.var(dim=0)
170
+
171
+ return {'mean': mean, 'variance': variance}
172
+ ```
173
+
174
+ ## Usage Example
175
+
176
+ ```python
177
+ # Collect calibration data
178
+ confidences = []
179
+ accuracies = []
180
+
181
+ for task in validation_set:
182
+ confidence, prediction, actual = agent.solve_with_confidence(task)
183
+ confidences.append(confidence)
184
+ accuracies.append(prediction == actual)
185
+
186
+ # Fit calibrator
187
+ calibrator = TemperatureScaler()
188
+ calibrator.fit(confidences, accuracies)
189
+
190
+ # Use calibrated confidence
191
+ raw_confidence = agent.get_confidence(new_task)
192
+ calibrated = calibrator.calibrate(raw_confidence)
193
+
194
+ print(f"Raw: {raw_confidence:.2f}, Calibrated: {calibrated:.2f}")
195
+ ```
196
+
197
+ ## Best Practices
198
+
199
+ 1. **Use held-out calibration set** - Don't calibrate on training data
200
+ 2. **Monitor ECE regularly** - Recalibrate if drift detected
201
+ 3. **Consider task-specific calibration** - Different tasks may need different calibrators
202
+ 4. **Report uncertainty** - Always provide confidence intervals, not just point estimates
203
+
204
+ ---
205
+
206
+ **Sources:**
207
+ - [On Calibration of Modern Neural Networks](https://arxiv.org/abs/1706.04599)