@rfxlamia/skillkit 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/agents/creative-copywriter.md +212 -0
- package/agents/agents/dario-amodei.md +135 -0
- package/agents/agents/doc-simplifier.md +63 -0
- package/agents/agents/kotlin-pro.md +433 -0
- package/agents/agents/red-team.md +136 -0
- package/agents/agents/sam-altman.md +121 -0
- package/agents/agents/seo-manager.md +184 -0
- package/package.json +7 -2
- package/skills/quick-spec/tests/__pycache__/test_skill.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/skillkit/.claude/settings.local.json +7 -0
- package/skills/skillkit/scripts/__pycache__/decision_helper.cpython-314.pyc +0 -0
- package/skills/skillkit/scripts/__pycache__/quick_validate.cpython-312.pyc +0 -0
- package/skills/skillkit/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
- package/skills/skillkit/scripts/__pycache__/test_generator.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-312.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-314.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-312.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-314.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-312.pyc +0 -0
- package/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-314.pyc +0 -0
- package/skills/skillkit-help/SKILL.md +81 -0
- package/skills/skillkit-help/knowledge/application/09-case-studies.md +257 -0
- package/skills/skillkit-help/knowledge/application/12-testing-and-validation.md +276 -0
- package/skills/skillkit-help/knowledge/foundation/01-why-skills-exist.md +246 -0
- package/skills/skillkit-help/knowledge/foundation/02-skills-vs-subagents-comparison.md +312 -0
- package/skills/skillkit-help/knowledge/foundation/03-skills-vs-subagents-decision-tree.md +346 -0
- package/skills/skillkit-help/knowledge/foundation/06-platform-constraints.md +237 -0
- package/skills/skillkit-help/knowledge/foundation/08-when-not-to-use-skills.md +270 -0
- package/skills/skillkit-help/template/SKILL.md +52 -0
- package/skills/skills/adversarial-review/SKILL.md +219 -0
- package/skills/skills/baby-education/SKILL.md +260 -0
- package/skills/skills/baby-education/references/advanced-techniques.md +323 -0
- package/skills/skills/baby-education/references/transformations.md +345 -0
- package/skills/skills/been-there-done-that/SKILL.md +455 -0
- package/skills/skills/been-there-done-that/references/analysis-patterns.md +162 -0
- package/skills/skills/been-there-done-that/references/git-commands.md +132 -0
- package/skills/skills/been-there-done-that/references/tree-insertion-logic.md +145 -0
- package/skills/skills/coolhunter/SKILL.md +270 -0
- package/skills/skills/coolhunter/assets/elicitation-methods.csv +51 -0
- package/skills/skills/coolhunter/knowledge/elicitation-methods.md +312 -0
- package/skills/skills/coolhunter/references/workflow-execution.md +238 -0
- package/skills/skills/coolhunter/workflow-plan-coolhunter.md +232 -0
- package/skills/skills/creative-copywriting/SKILL.md +324 -0
- package/skills/skills/creative-copywriting/databases/README.md +60 -0
- package/skills/skills/creative-copywriting/databases/carousel-structures.csv +16 -0
- package/skills/skills/creative-copywriting/databases/emotional-arcs.csv +11 -0
- package/skills/skills/creative-copywriting/databases/hook-formulas.csv +51 -0
- package/skills/skills/creative-copywriting/databases/power-words.csv +201 -0
- package/skills/skills/creative-copywriting/databases/psychological-triggers.csv +21 -0
- package/skills/skills/creative-copywriting/databases/read-more-patterns.csv +26 -0
- package/skills/skills/creative-copywriting/databases/swipe-triggers.csv +31 -0
- package/skills/skills/creative-copywriting/references/carousel-psychology.md +223 -0
- package/skills/skills/creative-copywriting/references/hook-anatomy.md +169 -0
- package/skills/skills/creative-copywriting/references/power-word-science.md +134 -0
- package/skills/skills/creative-copywriting/references/storytelling-frameworks.md +157 -0
- package/skills/skills/diverse-content-gen/SKILL.md +201 -0
- package/skills/skills/diverse-content-gen/references/advanced-techniques.md +320 -0
- package/skills/skills/diverse-content-gen/references/research-findings.md +379 -0
- package/skills/skills/diverse-content-gen/references/task-workflows.md +241 -0
- package/skills/skills/diverse-content-gen/references/tool-integration.md +419 -0
- package/skills/skills/diverse-content-gen/references/troubleshooting.md +426 -0
- package/skills/skills/diverse-content-gen/references/vs-core-technique.md +240 -0
- package/skills/skills/framework-critical-thinking/SKILL.md +220 -0
- package/skills/skills/framework-critical-thinking/references/bias_detector.md +375 -0
- package/skills/skills/framework-critical-thinking/references/fallback_handler.md +239 -0
- package/skills/skills/framework-critical-thinking/references/memory_curator.md +161 -0
- package/skills/skills/framework-critical-thinking/references/metacognitive_monitor.md +297 -0
- package/skills/skills/framework-critical-thinking/references/producer_critic_orchestrator.md +333 -0
- package/skills/skills/framework-critical-thinking/references/reasoning_router.md +235 -0
- package/skills/skills/framework-critical-thinking/references/reasoning_validator.md +97 -0
- package/skills/skills/framework-critical-thinking/references/reflection_trigger.md +78 -0
- package/skills/skills/framework-critical-thinking/references/self_verification.md +388 -0
- package/skills/skills/framework-critical-thinking/references/uncertainty_quantifier.md +207 -0
- package/skills/skills/framework-initiative/SKILL.md +231 -0
- package/skills/skills/framework-initiative/references/examples.md +150 -0
- package/skills/skills/framework-initiative/references/impact-analysis.md +157 -0
- package/skills/skills/framework-initiative/references/intent-patterns.md +145 -0
- package/skills/skills/framework-initiative/references/star-framework.md +165 -0
- package/skills/skills/humanize-docs/SKILL.md +203 -0
- package/skills/skills/humanize-docs/references/advanced-techniques.md +13 -0
- package/skills/skills/humanize-docs/references/core-transformations.md +368 -0
- package/skills/skills/humanize-docs/references/detection-patterns.md +400 -0
- package/skills/skills/humanize-docs/references/examples-gallery.md +374 -0
- package/skills/skills/imagine/SKILL.md +190 -0
- package/skills/skills/imagine/references/artstyle-corporate-memphis.md +625 -0
- package/skills/skills/imagine/references/artstyle-crewdson-hyperrealism.md +295 -0
- package/skills/skills/imagine/references/artstyle-iphone-social-media.md +426 -0
- package/skills/skills/imagine/references/artstyle-sciencesaru.md +276 -0
- package/skills/skills/pre-deploy-checklist/README.md +26 -0
- package/skills/skills/pre-deploy-checklist/SKILL.md +153 -0
- package/skills/skills/pre-deploy-checklist/references/checklist-categories.md +174 -0
- package/skills/skills/pre-deploy-checklist/references/domain-prompts.md +216 -0
- package/skills/skills/prompt-engineering/SKILL.md +209 -0
- package/skills/skills/prompt-engineering/references/advanced-combinations.md +444 -0
- package/skills/skills/prompt-engineering/references/chain-of-thought.md +140 -0
- package/skills/skills/prompt-engineering/references/decision_matrix.md +220 -0
- package/skills/skills/prompt-engineering/references/few-shot.md +346 -0
- package/skills/skills/prompt-engineering/references/json-format.md +270 -0
- package/skills/skills/prompt-engineering/references/natural-language.md +420 -0
- package/skills/skills/prompt-engineering/references/pitfalls.md +365 -0
- package/skills/skills/prompt-engineering/references/prompt-chaining.md +498 -0
- package/skills/skills/prompt-engineering/references/react.md +108 -0
- package/skills/skills/prompt-engineering/references/self-consistency.md +322 -0
- package/skills/skills/prompt-engineering/references/tree-of-thoughts.md +386 -0
- package/skills/skills/prompt-engineering/references/xml-format.md +220 -0
- package/skills/skills/prompt-engineering/references/yaml-format.md +488 -0
- package/skills/skills/prompt-engineering/references/zero-shot.md +74 -0
- package/skills/skills/quick-spec/SKILL.md +280 -0
- package/skills/skills/quick-spec/assets/tech-spec-template.md +74 -0
- package/skills/skills/quick-spec/references/step-01-understand.md +189 -0
- package/skills/skills/quick-spec/references/step-02-investigate.md +144 -0
- package/skills/skills/quick-spec/references/step-03-generate.md +128 -0
- package/skills/skills/quick-spec/references/step-04-review.md +173 -0
- package/skills/skills/quick-spec/tests/__pycache__/test_skill.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/skills/quick-spec/tests/test_scenarios.md +83 -0
- package/skills/skills/quick-spec/tests/test_skill.py +136 -0
- package/skills/skills/readme-expert/SKILL.md +538 -0
- package/skills/skills/readme-expert/knowledge/INDEX.md +192 -0
- package/skills/skills/readme-expert/knowledge/application/quality-standards.md +470 -0
- package/skills/skills/readme-expert/knowledge/application/script-executor.md +604 -0
- package/skills/skills/readme-expert/knowledge/application/template-library.md +822 -0
- package/skills/skills/readme-expert/knowledge/foundation/codebase-scanner.md +361 -0
- package/skills/skills/readme-expert/knowledge/foundation/validation-checklist.md +481 -0
- package/skills/skills/red-teaming/SKILL.md +321 -0
- package/skills/skills/red-teaming/references/ai-llm-redteam.md +517 -0
- package/skills/skills/red-teaming/references/attack-techniques.md +410 -0
- package/skills/skills/red-teaming/references/cybersecurity-redteam.md +383 -0
- package/skills/skills/red-teaming/references/tools-frameworks.md +446 -0
- package/skills/skills/releasing/.skillkit-mode +1 -0
- package/skills/skills/releasing/SKILL.md +225 -0
- package/skills/skills/releasing/references/version-detection.md +108 -0
- package/skills/skills/screenwriter/SKILL.md +273 -0
- package/skills/skills/screenwriter/references/advanced-techniques.md +216 -0
- package/skills/skills/screenwriter/references/pipeline-integration.md +266 -0
- package/skills/skills/skillkit/.claude/settings.local.json +7 -0
- package/skills/skills/skillkit/.claude-plugin/plugin.json +27 -0
- package/skills/skills/skillkit/CHANGELOG.md +484 -0
- package/skills/skills/skillkit/SKILL.md +511 -0
- package/skills/skills/skillkit/commands/skillkit.md +6 -0
- package/skills/skills/skillkit/commands/validate-plan.md +6 -0
- package/skills/skills/skillkit/commands/verify.md +6 -0
- package/skills/skills/skillkit/knowledge/INDEX.md +352 -0
- package/skills/skills/skillkit/knowledge/application/09-case-studies.md +257 -0
- package/skills/skills/skillkit/knowledge/application/10-technical-architecture.md +324 -0
- package/skills/skills/skillkit/knowledge/application/11-adoption-strategy.md +267 -0
- package/skills/skills/skillkit/knowledge/application/12-testing-and-validation.md +276 -0
- package/skills/skills/skillkit/knowledge/application/13-competitive-landscape.md +198 -0
- package/skills/skills/skillkit/knowledge/foundation/01-why-skills-exist.md +246 -0
- package/skills/skills/skillkit/knowledge/foundation/02-skills-vs-subagents-comparison.md +312 -0
- package/skills/skills/skillkit/knowledge/foundation/03-skills-vs-subagents-decision-tree.md +346 -0
- package/skills/skills/skillkit/knowledge/foundation/04-hybrid-patterns.md +308 -0
- package/skills/skills/skillkit/knowledge/foundation/05-token-economics.md +275 -0
- package/skills/skills/skillkit/knowledge/foundation/06-platform-constraints.md +237 -0
- package/skills/skills/skillkit/knowledge/foundation/07-security-concerns.md +322 -0
- package/skills/skills/skillkit/knowledge/foundation/08-when-not-to-use-skills.md +270 -0
- package/skills/skills/skillkit/knowledge/plugin-guide.md +614 -0
- package/skills/skills/skillkit/knowledge/tools/14-validation-tools-guide.md +150 -0
- package/skills/skills/skillkit/knowledge/tools/15-cost-tools-guide.md +157 -0
- package/skills/skills/skillkit/knowledge/tools/16-security-tools-guide.md +122 -0
- package/skills/skills/skillkit/knowledge/tools/17-pattern-tools-guide.md +161 -0
- package/skills/skills/skillkit/knowledge/tools/18-decision-helper-guide.md +243 -0
- package/skills/skills/skillkit/knowledge/tools/19-test-generator-guide.md +275 -0
- package/skills/skills/skillkit/knowledge/tools/20-split-skill-guide.md +149 -0
- package/skills/skills/skillkit/knowledge/tools/21-quality-scorer-guide.md +226 -0
- package/skills/skills/skillkit/knowledge/tools/22-migration-helper-guide.md +356 -0
- package/skills/skills/skillkit/knowledge/tools/23-subagent-creation-guide.md +448 -0
- package/skills/skills/skillkit/knowledge/tools/24-behavioral-testing-guide.md +122 -0
- package/skills/skills/skillkit/references/proposal-generation.md +982 -0
- package/skills/skills/skillkit/references/rationalization-catalog.md +75 -0
- package/skills/skills/skillkit/references/research-methodology.md +661 -0
- package/skills/skills/skillkit/references/section-2-full-creation-workflow.md +452 -0
- package/skills/skills/skillkit/references/section-3-validation-workflow-existing-skill.md +63 -0
- package/skills/skills/skillkit/references/section-4-decision-workflow-skills-vs-subagents.md +64 -0
- package/skills/skills/skillkit/references/section-5-migration-workflow-doc-to-skill.md +58 -0
- package/skills/skills/skillkit/references/section-6-subagent-creation-workflow.md +499 -0
- package/skills/skills/skillkit/references/section-7-knowledge-reference-map.md +72 -0
- package/skills/skills/skillkit/scripts/__pycache__/decision_helper.cpython-314.pyc +0 -0
- package/skills/skills/skillkit/scripts/__pycache__/quick_validate.cpython-312.pyc +0 -0
- package/skills/skills/skillkit/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
- package/skills/skills/skillkit/scripts/__pycache__/test_generator.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/skills/skillkit/scripts/decision_helper.py +799 -0
- package/skills/skills/skillkit/scripts/init_skill.py +400 -0
- package/skills/skills/skillkit/scripts/init_subagent.py +231 -0
- package/skills/skills/skillkit/scripts/migration_helper.py +669 -0
- package/skills/skills/skillkit/scripts/package_skill.py +211 -0
- package/skills/skills/skillkit/scripts/pattern_detector.py +381 -0
- package/skills/skills/skillkit/scripts/pattern_detector_new.py +382 -0
- package/skills/skills/skillkit/scripts/pressure_tester.py +157 -0
- package/skills/skills/skillkit/scripts/quality_scorer.py +999 -0
- package/skills/skills/skillkit/scripts/quick_validate.py +100 -0
- package/skills/skills/skillkit/scripts/security_scanner.py +474 -0
- package/skills/skills/skillkit/scripts/split_skill.py +540 -0
- package/skills/skills/skillkit/scripts/test_generator.py +695 -0
- package/skills/skills/skillkit/scripts/token_estimator.py +493 -0
- package/skills/skills/skillkit/scripts/utils/__init__.py +49 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-312.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/budget_tracker.cpython-314.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-312.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/output_formatter.cpython-314.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-312.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/__pycache__/reference_validator.cpython-314.pyc +0 -0
- package/skills/skills/skillkit/scripts/utils/budget_tracker.py +388 -0
- package/skills/skills/skillkit/scripts/utils/output_formatter.py +263 -0
- package/skills/skills/skillkit/scripts/utils/reference_validator.py +401 -0
- package/skills/skills/skillkit/scripts/validate_skill.py +594 -0
- package/skills/skills/skillkit/tests/test_behavioral.py +39 -0
- package/skills/skills/skillkit/tests/test_scenarios.md +83 -0
- package/skills/skills/skillkit/tests/test_skill.py +136 -0
- package/skills/skills/skillkit-help/SKILL.md +81 -0
- package/skills/skills/skillkit-help/knowledge/application/09-case-studies.md +257 -0
- package/skills/skills/skillkit-help/knowledge/application/12-testing-and-validation.md +276 -0
- package/skills/skills/skillkit-help/knowledge/foundation/01-why-skills-exist.md +246 -0
- package/skills/skills/skillkit-help/knowledge/foundation/02-skills-vs-subagents-comparison.md +312 -0
- package/skills/skills/skillkit-help/knowledge/foundation/03-skills-vs-subagents-decision-tree.md +346 -0
- package/skills/skills/skillkit-help/knowledge/foundation/06-platform-constraints.md +237 -0
- package/skills/skills/skillkit-help/knowledge/foundation/08-when-not-to-use-skills.md +270 -0
- package/skills/skills/skillkit-help/template/SKILL.md +52 -0
- package/skills/skills/social-media-seo/SKILL.md +278 -0
- package/skills/skills/social-media-seo/databases/caption-styles.csv +31 -0
- package/skills/skills/social-media-seo/databases/engagement-tactics.csv +16 -0
- package/skills/skills/social-media-seo/databases/hashtag-strategies.csv +21 -0
- package/skills/skills/social-media-seo/databases/hook-formulas.csv +26 -0
- package/skills/skills/social-media-seo/databases/keyword-clusters.csv +11 -0
- package/skills/skills/social-media-seo/databases/thread-structures.csv +26 -0
- package/skills/skills/social-media-seo/databases/viral-patterns.csv +21 -0
- package/skills/skills/social-media-seo/references/analytics-guide.md +321 -0
- package/skills/skills/social-media-seo/references/instagram-seo.md +235 -0
- package/skills/skills/social-media-seo/references/threads-seo.md +305 -0
- package/skills/skills/social-media-seo/references/x-twitter-seo.md +337 -0
- package/skills/skills/social-media-seo/scripts/query_database.py +191 -0
- package/skills/skills/storyteller/SKILL.md +241 -0
- package/skills/skills/storyteller/references/transformation-methodology.md +293 -0
- package/skills/skills/storyteller/references/visual-vocabulary.md +177 -0
- package/skills/skills/thread-pro/SKILL.md +162 -0
- package/skills/skills/thread-pro/anti-ai-patterns.md +120 -0
- package/skills/skills/thread-pro/hook-formulas.md +138 -0
- package/skills/skills/thread-pro/references/anti-ai-patterns.md +120 -0
- package/skills/skills/thread-pro/references/hook-formulas.md +138 -0
- package/skills/skills/thread-pro/references/thread-structures.md +240 -0
- package/skills/skills/thread-pro/references/voice-injection.md +130 -0
- package/skills/skills/thread-pro/thread-structures.md +240 -0
- package/skills/skills/thread-pro/voice-injection.md +130 -0
- package/skills/skills/tinkering/SKILL.md +251 -0
- package/skills/skills/tinkering/references/graduation-checklist.md +100 -0
- package/skills/skills/validate-plan/.skillkit-mode +1 -0
- package/skills/skills/validate-plan/SKILL.md +406 -0
- package/skills/skills/validate-plan/references/dry-principles.md +251 -0
- package/skills/skills/validate-plan/references/gap-analysis-guide.md +320 -0
- package/skills/skills/validate-plan/references/tdd-patterns.md +413 -0
- package/skills/skills/validate-plan/references/yagni-checklist.md +330 -0
- package/skills/skills/verify-before-ship/.skillkit-mode +1 -0
- package/skills/skills/verify-before-ship/SKILL.md +116 -0
- package/skills/skills/verify-before-ship/references/anti-rationalization.md +212 -0
- package/skills/skills/verify-before-ship/references/verification-gates.md +305 -0
- package/skills-manifest.json +8 -2
- package/src/banner.js +1 -1
- package/src/cli.js +15 -4
- package/src/install.js +45 -29
- package/src/install.test.js +75 -7
- package/src/picker.js +15 -4
- package/src/picker.test.js +36 -1
- package/src/scope.js +8 -39
- package/src/scope.test.js +9 -13
- package/src/tools.js +76 -0
- package/src/tools.test.js +80 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# Producer-Critic Orchestrator
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The Producer-Critic Orchestrator implements the Generate-Critique-Refine pattern for iterative improvement of AI outputs. It coordinates multiple specialized agents to produce higher quality results through structured feedback loops.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
11
|
+
│ Master Agent (Orchestrator) │
|
|
12
|
+
└──────────────┬──────────────────────────────────────┬───────────┘
|
|
13
|
+
│ │
|
|
14
|
+
▼ ▼
|
|
15
|
+
┌────────────────────────┐ ┌────────────────────────┐
|
|
16
|
+
│ Producer Agent │ │ Critic Agent │
|
|
17
|
+
│ │ │ │
|
|
18
|
+
│ - Generates initial │─────────────►│ - Evaluates quality │
|
|
19
|
+
│ output │ │ - Identifies errors │
|
|
20
|
+
│ - Incorporates │◄─────────────│ - Suggests │
|
|
21
|
+
│ feedback │ critique │ improvements │
|
|
22
|
+
└────────────────────────┘ └────────────────────────┘
|
|
23
|
+
▲ │
|
|
24
|
+
│ ▼
|
|
25
|
+
│ ┌────────────────────────┐
|
|
26
|
+
│ │ Refinement Decision │
|
|
27
|
+
└─────────────────────────│ │
|
|
28
|
+
(if needs refinement) │ - Satisfactory? │
|
|
29
|
+
│ - Budget exhausted? │
|
|
30
|
+
│ - Max iterations? │
|
|
31
|
+
└────────────────────────┘
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Components
|
|
35
|
+
|
|
36
|
+
### Master Agent (Orchestrator)
|
|
37
|
+
|
|
38
|
+
Coordinates the overall process:
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
class ProducerCriticOrchestrator:
|
|
42
|
+
def __init__(self, config):
|
|
43
|
+
self.producer = ProducerAgent(config['producer'])
|
|
44
|
+
self.critic = CriticAgent(config['critic'])
|
|
45
|
+
self.max_iterations = config.get('max_iterations', 3)
|
|
46
|
+
self.refinement_budget = config.get('budget', 5)
|
|
47
|
+
self.satisfaction_threshold = config.get('threshold', 0.8)
|
|
48
|
+
|
|
49
|
+
async def generate(self, task, context):
|
|
50
|
+
iteration = 0
|
|
51
|
+
history = []
|
|
52
|
+
|
|
53
|
+
# Initial generation
|
|
54
|
+
current_output = await self.producer.generate(task, context)
|
|
55
|
+
|
|
56
|
+
while iteration < self.max_iterations:
|
|
57
|
+
# Get critique
|
|
58
|
+
critique = await self.critic.evaluate(current_output, task)
|
|
59
|
+
|
|
60
|
+
history.append({
|
|
61
|
+
'iteration': iteration,
|
|
62
|
+
'output': current_output,
|
|
63
|
+
'critique': critique
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
# Check if satisfactory
|
|
67
|
+
if critique['satisfactory']:
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
# Check budget
|
|
71
|
+
if self.refinement_budget <= 0:
|
|
72
|
+
break
|
|
73
|
+
|
|
74
|
+
# Refine
|
|
75
|
+
current_output = await self.producer.refine(
|
|
76
|
+
current_output,
|
|
77
|
+
critique,
|
|
78
|
+
context
|
|
79
|
+
)
|
|
80
|
+
self.refinement_budget -= 1
|
|
81
|
+
iteration += 1
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
'final_output': current_output,
|
|
85
|
+
'iterations': iteration + 1,
|
|
86
|
+
'history': history,
|
|
87
|
+
'was_refined': iteration > 0
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Producer Agent
|
|
92
|
+
|
|
93
|
+
Responsible for generation and refinement:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
class ProducerAgent:
|
|
97
|
+
def __init__(self, config):
|
|
98
|
+
self.model = config['model']
|
|
99
|
+
self.temperature = config.get('temperature', 0.7)
|
|
100
|
+
|
|
101
|
+
async def generate(self, task, context):
|
|
102
|
+
prompt = self.build_generation_prompt(task, context)
|
|
103
|
+
return await self.model.generate(prompt, temperature=self.temperature)
|
|
104
|
+
|
|
105
|
+
async def refine(self, current_output, critique, context):
|
|
106
|
+
refinement_prompt = f"""
|
|
107
|
+
Original task: {context['task']}
|
|
108
|
+
|
|
109
|
+
Current output:
|
|
110
|
+
{current_output}
|
|
111
|
+
|
|
112
|
+
Critique:
|
|
113
|
+
{critique['feedback']}
|
|
114
|
+
|
|
115
|
+
Issues identified:
|
|
116
|
+
{critique['issues']}
|
|
117
|
+
|
|
118
|
+
Please refine the output addressing all identified issues.
|
|
119
|
+
Maintain the strengths while fixing the weaknesses.
|
|
120
|
+
"""
|
|
121
|
+
return await self.model.generate(
|
|
122
|
+
refinement_prompt,
|
|
123
|
+
temperature=self.temperature * 0.9 # Slightly lower for refinement
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Critic Agent
|
|
128
|
+
|
|
129
|
+
Evaluates outputs and provides structured feedback:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
class CriticAgent:
|
|
133
|
+
def __init__(self, config):
|
|
134
|
+
self.model = config['model']
|
|
135
|
+
self.evaluation_criteria = config.get('criteria', [
|
|
136
|
+
'accuracy', 'completeness', 'clarity', 'relevance'
|
|
137
|
+
])
|
|
138
|
+
|
|
139
|
+
async def evaluate(self, output, task):
|
|
140
|
+
critique_prompt = f"""
|
|
141
|
+
Evaluate the following output for the given task.
|
|
142
|
+
|
|
143
|
+
Task: {task}
|
|
144
|
+
|
|
145
|
+
Output to evaluate:
|
|
146
|
+
{output}
|
|
147
|
+
|
|
148
|
+
Provide evaluation in this format:
|
|
149
|
+
1. Overall assessment: (satisfactory/needs improvement)
|
|
150
|
+
2. Scores (0-10):
|
|
151
|
+
- Accuracy:
|
|
152
|
+
- Completeness:
|
|
153
|
+
- Clarity:
|
|
154
|
+
- Relevance:
|
|
155
|
+
3. Strengths:
|
|
156
|
+
4. Issues identified:
|
|
157
|
+
5. Specific improvement suggestions:
|
|
158
|
+
|
|
159
|
+
Evaluation:
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
evaluation = await self.model.generate(critique_prompt)
|
|
163
|
+
parsed = self.parse_evaluation(evaluation)
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
'satisfactory': parsed['overall'] == 'satisfactory',
|
|
167
|
+
'scores': parsed['scores'],
|
|
168
|
+
'average_score': sum(parsed['scores'].values()) / len(parsed['scores']),
|
|
169
|
+
'strengths': parsed['strengths'],
|
|
170
|
+
'issues': parsed['issues'],
|
|
171
|
+
'feedback': parsed['suggestions']
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Refinement Strategies
|
|
176
|
+
|
|
177
|
+
### 1. Full Regeneration
|
|
178
|
+
|
|
179
|
+
Discard current output and generate fresh with critique as context.
|
|
180
|
+
|
|
181
|
+
**When to use:** Major structural issues or fundamental errors
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
def full_regenerate(task, critique_history):
|
|
185
|
+
context = synthesize_critiques(critique_history)
|
|
186
|
+
return generate_with_enhanced_context(task, context)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### 2. Targeted Revision
|
|
190
|
+
|
|
191
|
+
Keep valid portions, only revise problematic sections.
|
|
192
|
+
|
|
193
|
+
**When to use:** Localized errors or specific improvements needed
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
def targeted_revision(output, critique):
|
|
197
|
+
issues = critique['issues']
|
|
198
|
+
for issue in issues:
|
|
199
|
+
if issue['location']: # If issue has specific location
|
|
200
|
+
output = revise_section(output, issue)
|
|
201
|
+
return output
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 3. Progressive Enhancement
|
|
205
|
+
|
|
206
|
+
Build upon previous output incrementally.
|
|
207
|
+
|
|
208
|
+
**When to use:** Minor improvements needed
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
def progressive_enhancement(output, critique):
|
|
212
|
+
enhancements = critique['suggestions']
|
|
213
|
+
for enhancement in enhancements:
|
|
214
|
+
output = apply_enhancement(output, enhancement)
|
|
215
|
+
return output
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Budget Management
|
|
219
|
+
|
|
220
|
+
Control computational cost:
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
class RefinementBudget:
|
|
224
|
+
def __init__(self, max_tokens, max_time, max_iterations):
|
|
225
|
+
self.max_tokens = max_tokens
|
|
226
|
+
self.max_time = max_time
|
|
227
|
+
self.max_iterations = max_iterations
|
|
228
|
+
self.used_tokens = 0
|
|
229
|
+
self.start_time = time.now()
|
|
230
|
+
self.iterations = 0
|
|
231
|
+
|
|
232
|
+
def can_continue(self):
|
|
233
|
+
if self.iterations >= self.max_iterations:
|
|
234
|
+
return False, "Max iterations reached"
|
|
235
|
+
|
|
236
|
+
if self.used_tokens >= self.max_tokens:
|
|
237
|
+
return False, "Token budget exhausted"
|
|
238
|
+
|
|
239
|
+
elapsed = time.now() - self.start_time
|
|
240
|
+
if elapsed >= self.max_time:
|
|
241
|
+
return False, "Time budget exhausted"
|
|
242
|
+
|
|
243
|
+
return True, "Can continue"
|
|
244
|
+
|
|
245
|
+
def consume(self, tokens_used):
|
|
246
|
+
self.used_tokens += tokens_used
|
|
247
|
+
self.iterations += 1
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## Early Stopping Conditions
|
|
251
|
+
|
|
252
|
+
Stop refinement when:
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
STOP_CONDITIONS = {
|
|
256
|
+
'satisfactory': lambda s: s['average_score'] >= SATISFACTION_THRESHOLD,
|
|
257
|
+
'diminishing_returns': lambda h: score_improvement(h) < 0.05,
|
|
258
|
+
'oscillation': lambda h: detect_oscillation(h),
|
|
259
|
+
'budget_exhausted': lambda b: b.can_continue()[0] == False
|
|
260
|
+
}
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Usage Patterns
|
|
264
|
+
|
|
265
|
+
### Pattern 1: Quality-First
|
|
266
|
+
|
|
267
|
+
Maximize quality regardless of cost:
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
config = {
|
|
271
|
+
'max_iterations': 5,
|
|
272
|
+
'budget': 10,
|
|
273
|
+
'threshold': 0.9,
|
|
274
|
+
'stop_on_satisfaction': True
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
### Pattern 2: Budget-Constrained
|
|
279
|
+
|
|
280
|
+
Best quality within budget:
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
config = {
|
|
284
|
+
'max_iterations': 3,
|
|
285
|
+
'budget': 5,
|
|
286
|
+
'threshold': 0.8,
|
|
287
|
+
'early_stop': True
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Pattern 3: Fast Iteration
|
|
292
|
+
|
|
293
|
+
Quick improvements, accept "good enough":
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
config = {
|
|
297
|
+
'max_iterations': 2,
|
|
298
|
+
'budget': 3,
|
|
299
|
+
'threshold': 0.7,
|
|
300
|
+
'parallel_critiques': True
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Evaluation Metrics
|
|
305
|
+
|
|
306
|
+
Track orchestration effectiveness:
|
|
307
|
+
|
|
308
|
+
- **Refinement Success Rate:** % of iterations that improve score
|
|
309
|
+
- **Average Iterations:** Mean iterations before satisfaction or stop
|
|
310
|
+
- **Quality Improvement:** Score delta from initial to final
|
|
311
|
+
- **Cost Efficiency:** Quality gain per token spent
|
|
312
|
+
- **Satisfaction Rate:** % of tasks reaching threshold
|
|
313
|
+
|
|
314
|
+
## Best Practices
|
|
315
|
+
|
|
316
|
+
1. **Start with strict criteria** and relax based on performance
|
|
317
|
+
2. **Log all critiques** for post-hoc analysis and pattern detection
|
|
318
|
+
3. **Vary temperatures** between generation and refinement
|
|
319
|
+
4. **Set hard limits** to prevent infinite loops
|
|
320
|
+
5. **Parallelize critiques** when multiple critics available
|
|
321
|
+
6. **Cache successful patterns** for similar future tasks
|
|
322
|
+
|
|
323
|
+
## Common Pitfalls
|
|
324
|
+
|
|
325
|
+
- **Over-refinement:** Excessive iterations with diminishing returns
|
|
326
|
+
- **Critique drift:** Critic becomes increasingly harsh over iterations
|
|
327
|
+
- **Oscillation:** Alternating between two imperfect solutions
|
|
328
|
+
- **Budget explosion:** Underestimating computational cost
|
|
329
|
+
|
|
330
|
+
---
|
|
331
|
+
|
|
332
|
+
**Sources:**
|
|
333
|
+
- [Self-Refine: Iterative Refinement with Self-Feedback](https://arxiv.org/abs/2303.17651)
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Reasoning Router
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The Reasoning Router analyzes task characteristics and automatically selects the optimal reasoning method (CoT, ToT, GoT, or Self-Consistency) based on complexity, structure, and criticality of the task.
|
|
6
|
+
|
|
7
|
+
## Decision Matrix
|
|
8
|
+
|
|
9
|
+
### Selection Criteria
|
|
10
|
+
|
|
11
|
+
| Factor | CoT | ToT-BFS | ToT-DFS | GoT | Self-Consistency |
|
|
12
|
+
|--------|-----|---------|---------|-----|------------------|
|
|
13
|
+
| **Complexity** | Low | High | High | Very High | Medium |
|
|
14
|
+
| **Path Count** | Single | Multiple | Multiple | Interconnected | Single |
|
|
15
|
+
| **Backtracking** | No | Yes | Yes | Yes | No |
|
|
16
|
+
| **Exploration** | Linear | Breadth | Depth | Graph | Parallel |
|
|
17
|
+
| **Cost** | Low | High | High | Very High | Medium |
|
|
18
|
+
| **Latency** | Fast | Slow | Slow | Very Slow | Medium |
|
|
19
|
+
|
|
20
|
+
### Selection Algorithm
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
function select_reasoning_method(problem):
|
|
24
|
+
complexity_score = analyze_complexity(problem)
|
|
25
|
+
path_diversity = estimate_path_count(problem)
|
|
26
|
+
criticality = assess_criticality(problem)
|
|
27
|
+
interconnectivity = check_interconnections(problem)
|
|
28
|
+
|
|
29
|
+
if criticality == HIGH and path_diversity == SINGLE:
|
|
30
|
+
return SELF_CONSISTENCY
|
|
31
|
+
|
|
32
|
+
if interconnectivity == HIGH:
|
|
33
|
+
return GRAPH_OF_THOUGHTS
|
|
34
|
+
|
|
35
|
+
if path_diversity == MULTIPLE:
|
|
36
|
+
if requires_deep_exploration(problem):
|
|
37
|
+
return TREE_OF_THOUGHTS_DFS
|
|
38
|
+
else:
|
|
39
|
+
return TREE_OF_THOUGHTS_BFS
|
|
40
|
+
|
|
41
|
+
return CHAIN_OF_THOUGHT
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Implementation
|
|
45
|
+
|
|
46
|
+
### Complexity Analysis
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
def analyze_complexity(problem):
|
|
50
|
+
factors = {
|
|
51
|
+
'step_count': estimate_reasoning_steps(problem),
|
|
52
|
+
'domain_knowledge': assess_domain_requirements(problem),
|
|
53
|
+
'ambiguity_level': detect_ambiguity(problem),
|
|
54
|
+
'constraint_density': count_constraints(problem)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Weighted scoring
|
|
58
|
+
score = (
|
|
59
|
+
factors['step_count'] * 0.3 +
|
|
60
|
+
factors['domain_knowledge'] * 0.2 +
|
|
61
|
+
factors['ambiguity_level'] * 0.3 +
|
|
62
|
+
factors['constraint_density'] * 0.2
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if score < 0.3:
|
|
66
|
+
return COMPLEXITY.LOW
|
|
67
|
+
elif score < 0.7:
|
|
68
|
+
return COMPLEXITY.MEDIUM
|
|
69
|
+
else:
|
|
70
|
+
return COMPLEXITY.HIGH
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Method Configurations
|
|
74
|
+
|
|
75
|
+
#### Chain-of-Thought (CoT)
|
|
76
|
+
|
|
77
|
+
**Use when:**
|
|
78
|
+
- Straightforward arithmetic or logic
|
|
79
|
+
- Single solution path
|
|
80
|
+
- Well-defined problem space
|
|
81
|
+
- Low ambiguity
|
|
82
|
+
|
|
83
|
+
**Template:**
|
|
84
|
+
```
|
|
85
|
+
Let's solve this step by step:
|
|
86
|
+
1. [First step with explanation]
|
|
87
|
+
2. [Second step with explanation]
|
|
88
|
+
...
|
|
89
|
+
Therefore, [final answer]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
#### Tree-of-Thoughts - BFS (ToT-BFS)
|
|
93
|
+
|
|
94
|
+
**Use when:**
|
|
95
|
+
- Multiple viable approaches exist
|
|
96
|
+
- Need to compare alternatives at each step
|
|
97
|
+
- Problem requires exploration of options
|
|
98
|
+
|
|
99
|
+
**Configuration:**
|
|
100
|
+
```python
|
|
101
|
+
ToTConfig(
|
|
102
|
+
branching_factor=3, # Number of candidates per step
|
|
103
|
+
beam_width=2, # Keep top N candidates
|
|
104
|
+
max_depth=5, # Maximum reasoning depth
|
|
105
|
+
search_algorithm='BFS',
|
|
106
|
+
evaluation_criteria='progress_toward_goal'
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
#### Tree-of-Thoughts - DFS (ToT-DFS)
|
|
111
|
+
|
|
112
|
+
**Use when:**
|
|
113
|
+
- Deep reasoning chains needed
|
|
114
|
+
- Early decisions strongly influence outcome
|
|
115
|
+
- Problem has hierarchical structure
|
|
116
|
+
|
|
117
|
+
**Configuration:**
|
|
118
|
+
```python
|
|
119
|
+
ToTConfig(
|
|
120
|
+
branching_factor=2,
|
|
121
|
+
max_depth=10,
|
|
122
|
+
search_algorithm='DFS',
|
|
123
|
+
backtrack_threshold=0.3, # Backtrack if score below this
|
|
124
|
+
evaluation_criteria='path_viability'
|
|
125
|
+
)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
#### Graph-of-Thoughts (GoT)
|
|
129
|
+
|
|
130
|
+
**Use when:**
|
|
131
|
+
- Interconnected reasoning required
|
|
132
|
+
- Thoughts can merge or aggregate
|
|
133
|
+
- Non-linear problem structure
|
|
134
|
+
|
|
135
|
+
**Configuration:**
|
|
136
|
+
```python
|
|
137
|
+
GoTConfig(
|
|
138
|
+
node_types=['premise', 'inference', 'conclusion'],
|
|
139
|
+
edge_types=['supports', 'contradicts', 'aggregates'],
|
|
140
|
+
aggregation_functions=['sum', 'max', 'graph_attention'],
|
|
141
|
+
max_nodes=50
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
#### Self-Consistency
|
|
146
|
+
|
|
147
|
+
**Use when:**
|
|
148
|
+
- High-stakes decisions
|
|
149
|
+
- Single answer required
|
|
150
|
+
- Need confidence estimation
|
|
151
|
+
|
|
152
|
+
**Configuration:**
|
|
153
|
+
```python
|
|
154
|
+
SelfConsistencyConfig(
|
|
155
|
+
sample_count=10, # Number of reasoning paths
|
|
156
|
+
temperature=0.7, # Variation in sampling
|
|
157
|
+
aggregation_method='majority_vote',
|
|
158
|
+
confidence_threshold=0.8
|
|
159
|
+
)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Usage Examples
|
|
163
|
+
|
|
164
|
+
### Example 1: Simple Math Problem
|
|
165
|
+
|
|
166
|
+
**Problem:** "What is 25 * 48?"
|
|
167
|
+
|
|
168
|
+
**Analysis:**
|
|
169
|
+
- Complexity: Low (single arithmetic operation)
|
|
170
|
+
- Path diversity: Single
|
|
171
|
+
- Criticality: Low
|
|
172
|
+
|
|
173
|
+
**Selection:** Chain-of-Thought
|
|
174
|
+
|
|
175
|
+
**Reasoning:**
|
|
176
|
+
```
|
|
177
|
+
25 * 48 = 25 * (50 - 2)
|
|
178
|
+
= 25 * 50 - 25 * 2
|
|
179
|
+
= 1250 - 50
|
|
180
|
+
= 1200
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Example 2: Game of 24
|
|
184
|
+
|
|
185
|
+
**Problem:** "Make 24 using 4, 9, 10, 13"
|
|
186
|
+
|
|
187
|
+
**Analysis:**
|
|
188
|
+
- Complexity: High (multiple operations needed)
|
|
189
|
+
- Path diversity: Multiple (different operation combinations)
|
|
190
|
+
- Criticality: Medium
|
|
191
|
+
|
|
192
|
+
**Selection:** Tree-of-Thoughts (BFS)
|
|
193
|
+
|
|
194
|
+
**Exploration:**
|
|
195
|
+
```
|
|
196
|
+
Level 0: [4, 9, 10, 13]
|
|
197
|
+
Level 1: [(4+9), 10, 13], [(13-9), 4, 10], [(10*4), 9, 13], ...
|
|
198
|
+
Level 2: ... continue until 24 reached or max depth
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Example 3: Complex Planning
|
|
202
|
+
|
|
203
|
+
**Problem:** "Plan a 5-day trip to Japan considering budget, weather, and preferences"
|
|
204
|
+
|
|
205
|
+
**Analysis:**
|
|
206
|
+
- Complexity: Very High
|
|
207
|
+
- Interconnectivity: High (decisions affect each other)
|
|
208
|
+
- Multiple constraints
|
|
209
|
+
|
|
210
|
+
**Selection:** Graph-of-Thoughts
|
|
211
|
+
|
|
212
|
+
**Graph Structure:**
|
|
213
|
+
- Nodes: Destinations, activities, accommodations, transport
|
|
214
|
+
- Edges: Temporal dependencies, budget constraints, preferences
|
|
215
|
+
|
|
216
|
+
## Best Practices
|
|
217
|
+
|
|
218
|
+
1. **Start Simple:** Default to CoT unless complexity indicators are present
|
|
219
|
+
2. **Monitor Performance:** Track success rates per method for your domain
|
|
220
|
+
3. **Adaptive Switching:** Allow runtime method switching if initial choice underperforms
|
|
221
|
+
4. **Budget Awareness:** Consider computational cost vs accuracy trade-offs
|
|
222
|
+
5. **Hybrid Approaches:** Combine methods (e.g., ToT with Self-Consistency for final answer)
|
|
223
|
+
|
|
224
|
+
## Common Pitfalls
|
|
225
|
+
|
|
226
|
+
- **Over-engineering:** Using ToT for simple problems wastes compute
|
|
227
|
+
- **Under-exploring:** Using CoT for complex multi-path problems leads to dead ends
|
|
228
|
+
- **Fixed Configuration:** Not adapting parameters to problem specifics
|
|
229
|
+
- **Ignoring Latency:** ToT/GoT may be too slow for real-time applications
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
**Sources:**
|
|
234
|
+
- [Tree of Thoughts: Deliberate Problem Solving with Large Language Models](https://arxiv.org/abs/2305.10601)
|
|
235
|
+
- [Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https://arxiv.org/abs/2201.11903)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Reasoning Validator
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The Reasoning Validator provides logical consistency checking and structural validation for reasoning chains to detect contradictions, gaps, and logical fallacies.
|
|
6
|
+
|
|
7
|
+
## Validation Types
|
|
8
|
+
|
|
9
|
+
### 1. Logical Consistency Checks
|
|
10
|
+
|
|
11
|
+
Verify that the reasoning does not contain contradictions.
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
class LogicalConsistencyChecker:
|
|
15
|
+
def check(self, reasoning_chain):
|
|
16
|
+
issues = []
|
|
17
|
+
contradictions = self.find_contradictions(reasoning_chain)
|
|
18
|
+
issues.extend(contradictions)
|
|
19
|
+
return {'valid': len(issues) == 0, 'issues': issues}
|
|
20
|
+
|
|
21
|
+
def find_contradictions(self, chain):
|
|
22
|
+
contradictions = []
|
|
23
|
+
statements = extract_statements(chain)
|
|
24
|
+
for i, stmt1 in enumerate(statements):
|
|
25
|
+
for stmt2 in statements[i+1:]:
|
|
26
|
+
if self.are_contradictory(stmt1, stmt2):
|
|
27
|
+
contradictions.append({
|
|
28
|
+
'type': 'contradiction',
|
|
29
|
+
'statement1': stmt1,
|
|
30
|
+
'statement2': stmt2
|
|
31
|
+
})
|
|
32
|
+
return contradictions
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### 2. Structural Completeness
|
|
36
|
+
|
|
37
|
+
Verify that the reasoning has all necessary components.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
class StructuralValidator:
|
|
41
|
+
REQUIRED_COMPONENTS = [
|
|
42
|
+
'problem_statement',
|
|
43
|
+
'initial_assumptions',
|
|
44
|
+
'reasoning_steps',
|
|
45
|
+
'intermediate_conclusions',
|
|
46
|
+
'final_conclusion'
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
def validate(self, reasoning_chain):
|
|
50
|
+
missing = []
|
|
51
|
+
for component in self.REQUIRED_COMPONENTS:
|
|
52
|
+
if not self.has_component(reasoning_chain, component):
|
|
53
|
+
missing.append(component)
|
|
54
|
+
return {'complete': len(missing) == 0, 'missing_components': missing}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### 3. Fallacy Detection
|
|
58
|
+
|
|
59
|
+
Identify common logical fallacies.
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
class FallacyDetector:
|
|
63
|
+
FALLACY_PATTERNS = {
|
|
64
|
+
'circular_reasoning': {
|
|
65
|
+
'check': lambda chain: chain['conclusion'] in chain['premises']
|
|
66
|
+
},
|
|
67
|
+
'false_dichotomy': {
|
|
68
|
+
'check': self.check_false_dichotomy
|
|
69
|
+
},
|
|
70
|
+
'hasty_generalization': {
|
|
71
|
+
'check': self.check_generalization
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
def detect(self, reasoning_chain):
|
|
76
|
+
detected = []
|
|
77
|
+
for fallacy_name, pattern in self.FALLACY_PATTERNS.items():
|
|
78
|
+
if pattern['check'](reasoning_chain):
|
|
79
|
+
detected.append({'type': fallacy_name})
|
|
80
|
+
return {'has_fallacies': len(detected) > 0, 'fallacies': detected}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Usage Example
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
validator = ReasoningValidator()
|
|
87
|
+
result = validator.validate(reasoning_chain)
|
|
88
|
+
|
|
89
|
+
if not result['valid']:
|
|
90
|
+
for rec in result['recommendations']:
|
|
91
|
+
print(f"Fix: {rec['action']}")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
**Sources:**
|
|
97
|
+
- [Logical Reasoning in AI Systems](https://www.emergentmind.com/topics/logical-reasoning)
|