crca 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRCA.py +172 -7
- MODEL_CARD.md +53 -0
- PKG-INFO +8 -2
- RELEASE_NOTES.md +17 -0
- STABILITY.md +19 -0
- architecture/hybrid/consistency_engine.py +362 -0
- architecture/hybrid/conversation_manager.py +421 -0
- architecture/hybrid/explanation_generator.py +452 -0
- architecture/hybrid/few_shot_learner.py +533 -0
- architecture/hybrid/graph_compressor.py +286 -0
- architecture/hybrid/hybrid_agent.py +4398 -0
- architecture/hybrid/language_compiler.py +623 -0
- architecture/hybrid/main,py +0 -0
- architecture/hybrid/reasoning_tracker.py +322 -0
- architecture/hybrid/self_verifier.py +524 -0
- architecture/hybrid/task_decomposer.py +567 -0
- architecture/hybrid/text_corrector.py +341 -0
- benchmark_results/crca_core_benchmarks.json +178 -0
- branches/crca_sd/crca_sd_realtime.py +6 -2
- branches/general_agent/__init__.py +102 -0
- branches/general_agent/general_agent.py +1400 -0
- branches/general_agent/personality.py +169 -0
- branches/general_agent/utils/__init__.py +19 -0
- branches/general_agent/utils/prompt_builder.py +170 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/METADATA +8 -2
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/RECORD +303 -20
- crca_core/__init__.py +35 -0
- crca_core/benchmarks/__init__.py +14 -0
- crca_core/benchmarks/synthetic_scm.py +103 -0
- crca_core/core/__init__.py +23 -0
- crca_core/core/api.py +120 -0
- crca_core/core/estimate.py +208 -0
- crca_core/core/godclass.py +72 -0
- crca_core/core/intervention_design.py +174 -0
- crca_core/core/lifecycle.py +48 -0
- crca_core/discovery/__init__.py +9 -0
- crca_core/discovery/tabular.py +193 -0
- crca_core/identify/__init__.py +171 -0
- crca_core/identify/backdoor.py +39 -0
- crca_core/identify/frontdoor.py +48 -0
- crca_core/identify/graph.py +106 -0
- crca_core/identify/id_algorithm.py +43 -0
- crca_core/identify/iv.py +48 -0
- crca_core/models/__init__.py +67 -0
- crca_core/models/provenance.py +56 -0
- crca_core/models/refusal.py +39 -0
- crca_core/models/result.py +83 -0
- crca_core/models/spec.py +151 -0
- crca_core/models/validation.py +68 -0
- crca_core/scm/__init__.py +9 -0
- crca_core/scm/linear_gaussian.py +198 -0
- crca_core/timeseries/__init__.py +6 -0
- crca_core/timeseries/pcmci.py +181 -0
- crca_llm/__init__.py +12 -0
- crca_llm/client.py +85 -0
- crca_llm/coauthor.py +118 -0
- crca_llm/orchestrator.py +289 -0
- crca_llm/types.py +21 -0
- crca_reasoning/__init__.py +16 -0
- crca_reasoning/critique.py +54 -0
- crca_reasoning/godclass.py +206 -0
- crca_reasoning/memory.py +24 -0
- crca_reasoning/rationale.py +10 -0
- crca_reasoning/react_controller.py +81 -0
- crca_reasoning/tool_router.py +97 -0
- crca_reasoning/types.py +40 -0
- crca_sd/__init__.py +15 -0
- crca_sd/crca_sd_core.py +2 -0
- crca_sd/crca_sd_governance.py +2 -0
- crca_sd/crca_sd_mpc.py +2 -0
- crca_sd/crca_sd_realtime.py +2 -0
- crca_sd/crca_sd_tui.py +2 -0
- cuda-keyring_1.1-1_all.deb +0 -0
- cuda-keyring_1.1-1_all.deb.1 +0 -0
- docs/IMAGE_ANNOTATION_USAGE.md +539 -0
- docs/INSTALL_DEEPSPEED.md +125 -0
- docs/api/branches/crca-cg.md +19 -0
- docs/api/branches/crca-q.md +27 -0
- docs/api/branches/crca-sd.md +37 -0
- docs/api/branches/general-agent.md +24 -0
- docs/api/branches/overview.md +19 -0
- docs/api/crca/agent-methods.md +62 -0
- docs/api/crca/operations.md +79 -0
- docs/api/crca/overview.md +32 -0
- docs/api/image-annotation/engine.md +52 -0
- docs/api/image-annotation/overview.md +17 -0
- docs/api/schemas/annotation.md +34 -0
- docs/api/schemas/core-schemas.md +82 -0
- docs/api/schemas/overview.md +32 -0
- docs/api/schemas/policy.md +30 -0
- docs/api/utils/conversation.md +22 -0
- docs/api/utils/graph-reasoner.md +32 -0
- docs/api/utils/overview.md +21 -0
- docs/api/utils/router.md +19 -0
- docs/api/utils/utilities.md +97 -0
- docs/architecture/causal-graphs.md +41 -0
- docs/architecture/data-flow.md +29 -0
- docs/architecture/design-principles.md +33 -0
- docs/architecture/hybrid-agent/components.md +38 -0
- docs/architecture/hybrid-agent/consistency.md +26 -0
- docs/architecture/hybrid-agent/overview.md +44 -0
- docs/architecture/hybrid-agent/reasoning.md +22 -0
- docs/architecture/llm-integration.md +26 -0
- docs/architecture/modular-structure.md +37 -0
- docs/architecture/overview.md +69 -0
- docs/architecture/policy-engine-arch.md +29 -0
- docs/branches/crca-cg/corposwarm.md +39 -0
- docs/branches/crca-cg/esg-scoring.md +30 -0
- docs/branches/crca-cg/multi-agent.md +35 -0
- docs/branches/crca-cg/overview.md +40 -0
- docs/branches/crca-q/alternative-data.md +55 -0
- docs/branches/crca-q/architecture.md +71 -0
- docs/branches/crca-q/backtesting.md +45 -0
- docs/branches/crca-q/causal-engine.md +33 -0
- docs/branches/crca-q/execution.md +39 -0
- docs/branches/crca-q/market-data.md +60 -0
- docs/branches/crca-q/overview.md +58 -0
- docs/branches/crca-q/philosophy.md +60 -0
- docs/branches/crca-q/portfolio-optimization.md +66 -0
- docs/branches/crca-q/risk-management.md +102 -0
- docs/branches/crca-q/setup.md +65 -0
- docs/branches/crca-q/signal-generation.md +61 -0
- docs/branches/crca-q/signal-validation.md +43 -0
- docs/branches/crca-sd/core.md +84 -0
- docs/branches/crca-sd/governance.md +53 -0
- docs/branches/crca-sd/mpc-solver.md +65 -0
- docs/branches/crca-sd/overview.md +59 -0
- docs/branches/crca-sd/realtime.md +28 -0
- docs/branches/crca-sd/tui.md +20 -0
- docs/branches/general-agent/overview.md +37 -0
- docs/branches/general-agent/personality.md +36 -0
- docs/branches/general-agent/prompt-builder.md +30 -0
- docs/changelog/index.md +79 -0
- docs/contributing/code-style.md +69 -0
- docs/contributing/documentation.md +43 -0
- docs/contributing/overview.md +29 -0
- docs/contributing/testing.md +29 -0
- docs/core/crcagent/async-operations.md +65 -0
- docs/core/crcagent/automatic-extraction.md +107 -0
- docs/core/crcagent/batch-prediction.md +80 -0
- docs/core/crcagent/bayesian-inference.md +60 -0
- docs/core/crcagent/causal-graph.md +92 -0
- docs/core/crcagent/counterfactuals.md +96 -0
- docs/core/crcagent/deterministic-simulation.md +78 -0
- docs/core/crcagent/dual-mode-operation.md +82 -0
- docs/core/crcagent/initialization.md +88 -0
- docs/core/crcagent/optimization.md +65 -0
- docs/core/crcagent/overview.md +63 -0
- docs/core/crcagent/time-series.md +57 -0
- docs/core/schemas/annotation.md +30 -0
- docs/core/schemas/core-schemas.md +82 -0
- docs/core/schemas/overview.md +30 -0
- docs/core/schemas/policy.md +41 -0
- docs/core/templates/base-agent.md +31 -0
- docs/core/templates/feature-mixins.md +31 -0
- docs/core/templates/overview.md +29 -0
- docs/core/templates/templates-guide.md +75 -0
- docs/core/tools/mcp-client.md +34 -0
- docs/core/tools/overview.md +24 -0
- docs/core/utils/conversation.md +27 -0
- docs/core/utils/graph-reasoner.md +29 -0
- docs/core/utils/overview.md +27 -0
- docs/core/utils/router.md +27 -0
- docs/core/utils/utilities.md +97 -0
- docs/css/custom.css +84 -0
- docs/examples/basic-usage.md +57 -0
- docs/examples/general-agent/general-agent-examples.md +50 -0
- docs/examples/hybrid-agent/hybrid-agent-examples.md +56 -0
- docs/examples/image-annotation/image-annotation-examples.md +54 -0
- docs/examples/integration/integration-examples.md +58 -0
- docs/examples/overview.md +37 -0
- docs/examples/trading/trading-examples.md +46 -0
- docs/features/causal-reasoning/advanced-topics.md +101 -0
- docs/features/causal-reasoning/counterfactuals.md +43 -0
- docs/features/causal-reasoning/do-calculus.md +50 -0
- docs/features/causal-reasoning/overview.md +47 -0
- docs/features/causal-reasoning/structural-models.md +52 -0
- docs/features/hybrid-agent/advanced-components.md +55 -0
- docs/features/hybrid-agent/core-components.md +64 -0
- docs/features/hybrid-agent/overview.md +34 -0
- docs/features/image-annotation/engine.md +82 -0
- docs/features/image-annotation/features.md +113 -0
- docs/features/image-annotation/integration.md +75 -0
- docs/features/image-annotation/overview.md +53 -0
- docs/features/image-annotation/quickstart.md +73 -0
- docs/features/policy-engine/doctrine-ledger.md +105 -0
- docs/features/policy-engine/monitoring.md +44 -0
- docs/features/policy-engine/mpc-control.md +89 -0
- docs/features/policy-engine/overview.md +46 -0
- docs/getting-started/configuration.md +225 -0
- docs/getting-started/first-agent.md +164 -0
- docs/getting-started/installation.md +144 -0
- docs/getting-started/quickstart.md +137 -0
- docs/index.md +118 -0
- docs/js/mathjax.js +13 -0
- docs/lrm/discovery_proof_notes.md +25 -0
- docs/lrm/finetune_full.md +83 -0
- docs/lrm/math_appendix.md +120 -0
- docs/lrm/overview.md +32 -0
- docs/mkdocs.yml +238 -0
- docs/stylesheets/extra.css +21 -0
- docs_generated/crca_core/CounterfactualResult.md +12 -0
- docs_generated/crca_core/DiscoveryHypothesisResult.md +13 -0
- docs_generated/crca_core/DraftSpec.md +13 -0
- docs_generated/crca_core/EstimateResult.md +13 -0
- docs_generated/crca_core/IdentificationResult.md +17 -0
- docs_generated/crca_core/InterventionDesignResult.md +12 -0
- docs_generated/crca_core/LockedSpec.md +15 -0
- docs_generated/crca_core/RefusalResult.md +12 -0
- docs_generated/crca_core/ValidationReport.md +9 -0
- docs_generated/crca_core/index.md +13 -0
- examples/general_agent_example.py +277 -0
- examples/general_agent_quickstart.py +202 -0
- examples/general_agent_simple.py +92 -0
- examples/hybrid_agent_auto_extraction.py +84 -0
- examples/hybrid_agent_dictionary_demo.py +104 -0
- examples/hybrid_agent_enhanced.py +179 -0
- examples/hybrid_agent_general_knowledge.py +107 -0
- examples/image_annotation_quickstart.py +328 -0
- examples/test_hybrid_fixes.py +77 -0
- image_annotation/__init__.py +27 -0
- image_annotation/annotation_engine.py +2593 -0
- install_cuda_wsl2.sh +59 -0
- install_deepspeed.sh +56 -0
- install_deepspeed_simple.sh +87 -0
- mkdocs.yml +252 -0
- ollama/Modelfile +8 -0
- prompts/__init__.py +2 -1
- prompts/default_crca.py +9 -1
- prompts/general_agent.py +227 -0
- prompts/image_annotation.py +56 -0
- pyproject.toml +17 -2
- requirements-docs.txt +10 -0
- requirements.txt +21 -2
- schemas/__init__.py +26 -1
- schemas/annotation.py +222 -0
- schemas/conversation.py +193 -0
- schemas/hybrid.py +211 -0
- schemas/reasoning.py +276 -0
- schemas_export/crca_core/CounterfactualResult.schema.json +108 -0
- schemas_export/crca_core/DiscoveryHypothesisResult.schema.json +113 -0
- schemas_export/crca_core/DraftSpec.schema.json +635 -0
- schemas_export/crca_core/EstimateResult.schema.json +113 -0
- schemas_export/crca_core/IdentificationResult.schema.json +145 -0
- schemas_export/crca_core/InterventionDesignResult.schema.json +111 -0
- schemas_export/crca_core/LockedSpec.schema.json +646 -0
- schemas_export/crca_core/RefusalResult.schema.json +90 -0
- schemas_export/crca_core/ValidationReport.schema.json +62 -0
- scripts/build_lrm_dataset.py +80 -0
- scripts/export_crca_core_schemas.py +54 -0
- scripts/export_hf_lrm.py +37 -0
- scripts/export_ollama_gguf.py +45 -0
- scripts/generate_changelog.py +157 -0
- scripts/generate_crca_core_docs_from_schemas.py +86 -0
- scripts/run_crca_core_benchmarks.py +163 -0
- scripts/run_full_finetune.py +198 -0
- scripts/run_lrm_eval.py +31 -0
- templates/graph_management.py +29 -0
- tests/conftest.py +9 -0
- tests/test_core.py +2 -3
- tests/test_crca_core_discovery_tabular.py +15 -0
- tests/test_crca_core_estimate_dowhy.py +36 -0
- tests/test_crca_core_identify.py +18 -0
- tests/test_crca_core_intervention_design.py +36 -0
- tests/test_crca_core_linear_gaussian_scm.py +69 -0
- tests/test_crca_core_spec.py +25 -0
- tests/test_crca_core_timeseries_pcmci.py +15 -0
- tests/test_crca_llm_coauthor.py +12 -0
- tests/test_crca_llm_orchestrator.py +80 -0
- tests/test_hybrid_agent_llm_enhanced.py +556 -0
- tests/test_image_annotation_demo.py +376 -0
- tests/test_image_annotation_operational.py +408 -0
- tests/test_image_annotation_unit.py +551 -0
- tests/test_training_moe.py +13 -0
- training/__init__.py +42 -0
- training/datasets.py +140 -0
- training/deepspeed_zero2_0_5b.json +22 -0
- training/deepspeed_zero2_1_5b.json +22 -0
- training/deepspeed_zero3_0_5b.json +28 -0
- training/deepspeed_zero3_14b.json +28 -0
- training/deepspeed_zero3_h100_3gpu.json +20 -0
- training/deepspeed_zero3_offload.json +28 -0
- training/eval.py +92 -0
- training/finetune.py +516 -0
- training/public_datasets.py +89 -0
- training_data/react_train.jsonl +7473 -0
- utils/agent_discovery.py +311 -0
- utils/batch_processor.py +317 -0
- utils/conversation.py +78 -0
- utils/edit_distance.py +118 -0
- utils/formatter.py +33 -0
- utils/graph_reasoner.py +530 -0
- utils/rate_limiter.py +283 -0
- utils/router.py +2 -2
- utils/tool_discovery.py +307 -0
- webui/__init__.py +10 -0
- webui/app.py +229 -0
- webui/config.py +104 -0
- webui/static/css/style.css +332 -0
- webui/static/js/main.js +284 -0
- webui/templates/index.html +42 -0
- tests/test_crca_excel.py +0 -166
- tests/test_data_broker.py +0 -424
- tests/test_palantir.py +0 -349
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/WHEEL +0 -0
- {crca-1.4.0.dist-info → crca-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from crca_llm import LLMCoauthor
|
|
2
|
+
from crca_core.models.spec import DraftSpec, LockedSpec
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_llm_coauthor_returns_draft_specs_only() -> None:
|
|
6
|
+
coauthor = LLMCoauthor()
|
|
7
|
+
bundle = coauthor.draft_specs(user_text="Study effect of X on Y", observed_columns=["X", "Y", "Z"])
|
|
8
|
+
assert bundle.drafts
|
|
9
|
+
assert all(isinstance(d, DraftSpec) for d in bundle.drafts)
|
|
10
|
+
# Ensure it never returns a locked spec
|
|
11
|
+
assert not any(isinstance(d, LockedSpec) for d in bundle.drafts)
|
|
12
|
+
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from crca_core.core.lifecycle import lock_spec
|
|
6
|
+
from crca_core.models.spec import CausalGraphSpec, DraftSpec, EdgeSpec, NodeSpec, RoleSpec
|
|
7
|
+
from crca_core.models.refusal import RefusalResult
|
|
8
|
+
from crca_llm.orchestrator import LLMOrchestrator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FakeClient:
|
|
12
|
+
def __init__(self, content: str):
|
|
13
|
+
self._content = content
|
|
14
|
+
|
|
15
|
+
def chat_completion(self, **kwargs) -> str:
|
|
16
|
+
return self._content
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_orchestrator_refuses_without_api_key() -> None:
|
|
20
|
+
old = os.environ.pop("OPENAI_API_KEY", None)
|
|
21
|
+
try:
|
|
22
|
+
orch = LLMOrchestrator()
|
|
23
|
+
res = orch.run(user_text="Test", observed_columns=["X", "Y"])
|
|
24
|
+
assert res.refusals
|
|
25
|
+
assert not res.draft_bundle.drafts
|
|
26
|
+
assert isinstance(res.refusals[0], RefusalResult)
|
|
27
|
+
finally:
|
|
28
|
+
if old is not None:
|
|
29
|
+
os.environ["OPENAI_API_KEY"] = old
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_orchestrator_never_emits_locked_spec() -> None:
|
|
33
|
+
payload = {
|
|
34
|
+
"drafts": [
|
|
35
|
+
{
|
|
36
|
+
"nodes": ["X", "Y"],
|
|
37
|
+
"edges": [["X", "Y"]],
|
|
38
|
+
"treatments": ["X"],
|
|
39
|
+
"outcomes": ["Y"],
|
|
40
|
+
"columns": ["X", "Y"],
|
|
41
|
+
}
|
|
42
|
+
],
|
|
43
|
+
"review_checklist": ["Confirm time ordering"],
|
|
44
|
+
}
|
|
45
|
+
orch = LLMOrchestrator(client=FakeClient(content=str(payload).replace("'", '"')))
|
|
46
|
+
res = orch.run(user_text="Test", observed_columns=["X", "Y"])
|
|
47
|
+
assert res.draft_bundle.drafts
|
|
48
|
+
assert all(d.status.value == "draft" for d in res.draft_bundle.drafts)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_orchestrator_refuses_estimate_without_identification() -> None:
|
|
52
|
+
payload = {
|
|
53
|
+
"drafts": [
|
|
54
|
+
{
|
|
55
|
+
"nodes": ["X", "Y"],
|
|
56
|
+
"edges": [["X", "Y"]],
|
|
57
|
+
"treatments": ["X"],
|
|
58
|
+
"outcomes": ["Y"],
|
|
59
|
+
"columns": ["X", "Y"],
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"review_checklist": [],
|
|
63
|
+
}
|
|
64
|
+
orch = LLMOrchestrator(client=FakeClient(content=str(payload).replace("'", '"')))
|
|
65
|
+
|
|
66
|
+
draft = DraftSpec(
|
|
67
|
+
graph=CausalGraphSpec(nodes=[NodeSpec(name="X"), NodeSpec(name="Y")], edges=[EdgeSpec(source="X", target="Y")]),
|
|
68
|
+
roles=RoleSpec(treatments=["X"], outcomes=["Y"]),
|
|
69
|
+
)
|
|
70
|
+
locked = lock_spec(draft, approvals=["human"])
|
|
71
|
+
df = pd.DataFrame({"X": [1, 2, 3], "Y": [2, 3, 4]})
|
|
72
|
+
|
|
73
|
+
res = orch.run(
|
|
74
|
+
user_text="Test",
|
|
75
|
+
observed_columns=["X", "Y"],
|
|
76
|
+
locked_spec=locked,
|
|
77
|
+
data=df,
|
|
78
|
+
actions=["estimate"],
|
|
79
|
+
)
|
|
80
|
+
assert res.refusals
|
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Comprehensive tests for LLM-enhanced hybrid agent capabilities.
|
|
3
|
+
|
|
4
|
+
Tests all new features:
|
|
5
|
+
- Conversation memory and context management
|
|
6
|
+
- Chain-of-thought reasoning
|
|
7
|
+
- Few-shot learning
|
|
8
|
+
- Task decomposition
|
|
9
|
+
- Explanation generation
|
|
10
|
+
- Self-verification
|
|
11
|
+
- Consistency guarantees
|
|
12
|
+
- Causal validation
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
import pytest
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
# Add parent directory to path
|
|
21
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
from architecture.hybrid.hybrid_agent import HybridAgent
|
|
25
|
+
from architecture.hybrid.conversation_manager import ConversationHistory, ContextTracker
|
|
26
|
+
from architecture.hybrid.reasoning_tracker import ReasoningTracker
|
|
27
|
+
from architecture.hybrid.few_shot_learner import ExampleStore, PatternLearner, AdaptiveExtractor
|
|
28
|
+
from architecture.hybrid.task_decomposer import TaskAnalyzer, SubTaskExecutor, PlanGenerator
|
|
29
|
+
from architecture.hybrid.explanation_generator import ExplanationBuilder, TransparencyLayer
|
|
30
|
+
from architecture.hybrid.self_verifier import ConsistencyChecker, ErrorDetector, SelfCorrector
|
|
31
|
+
from architecture.hybrid.consistency_engine import ConsistencyEngine, DeterministicProcessor, StateSnapshot
|
|
32
|
+
from schemas.conversation import ConversationContext, MessageRole, GraphSnapshot
|
|
33
|
+
from schemas.reasoning import ReasoningChain, StepType, InferenceRule, Evidence
|
|
34
|
+
HYBRID_AGENT_AVAILABLE = True
|
|
35
|
+
except ImportError as e:
|
|
36
|
+
HYBRID_AGENT_AVAILABLE = False
|
|
37
|
+
pytest.skip(f"Hybrid agent not available: {e}", allow_module_level=True)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TestConversationManagement:
|
|
41
|
+
"""Test conversation memory and context management."""
|
|
42
|
+
|
|
43
|
+
def test_conversation_history_creation(self):
|
|
44
|
+
"""Test creating conversation history."""
|
|
45
|
+
history = ConversationHistory()
|
|
46
|
+
assert history.conversation_id is not None
|
|
47
|
+
assert len(history.context.messages) == 0
|
|
48
|
+
|
|
49
|
+
def test_add_message(self):
|
|
50
|
+
"""Test adding messages to conversation."""
|
|
51
|
+
history = ConversationHistory()
|
|
52
|
+
message = history.add_message(MessageRole.USER, "What affects price?")
|
|
53
|
+
assert message.role == MessageRole.USER
|
|
54
|
+
assert message.content == "What affects price?"
|
|
55
|
+
assert len(history.context.messages) == 1
|
|
56
|
+
|
|
57
|
+
def test_attention_weights(self):
|
|
58
|
+
"""Test attention weight computation."""
|
|
59
|
+
history = ConversationHistory(decay_lambda=0.1)
|
|
60
|
+
history.add_message(MessageRole.USER, "Message 1")
|
|
61
|
+
history.add_message(MessageRole.AGENT, "Response 1")
|
|
62
|
+
history.add_message(MessageRole.USER, "Message 2")
|
|
63
|
+
|
|
64
|
+
weights = history.context.compute_attention_weights()
|
|
65
|
+
assert len(weights) == 3
|
|
66
|
+
# Most recent message should have highest weight
|
|
67
|
+
assert weights[2] > weights[0]
|
|
68
|
+
|
|
69
|
+
def test_context_retrieval(self):
|
|
70
|
+
"""Test context retrieval."""
|
|
71
|
+
history = ConversationHistory()
|
|
72
|
+
history.add_message(MessageRole.USER, "What affects price?")
|
|
73
|
+
history.add_message(MessageRole.AGENT, "Price depends on demand and supply")
|
|
74
|
+
history.add_message(MessageRole.USER, "How about demand?")
|
|
75
|
+
|
|
76
|
+
context = history.retrieve_context(k=2)
|
|
77
|
+
assert len(context) <= 2
|
|
78
|
+
assert all(isinstance(msg, type(history.context.messages[0])) for msg in context)
|
|
79
|
+
|
|
80
|
+
def test_context_tracker(self):
|
|
81
|
+
"""Test context tracker."""
|
|
82
|
+
history = ConversationHistory()
|
|
83
|
+
tracker = ContextTracker(history)
|
|
84
|
+
|
|
85
|
+
history.add_message(MessageRole.USER, "What affects price?")
|
|
86
|
+
history.add_message(MessageRole.AGENT, "Price depends on demand")
|
|
87
|
+
|
|
88
|
+
relevant = tracker.get_relevant_context("price", k=2)
|
|
89
|
+
assert len(relevant) <= 2
|
|
90
|
+
|
|
91
|
+
def test_reference_resolution(self):
|
|
92
|
+
"""Test reference resolution."""
|
|
93
|
+
history = ConversationHistory()
|
|
94
|
+
tracker = ContextTracker(history)
|
|
95
|
+
|
|
96
|
+
history.add_message(MessageRole.USER, "What affects price?")
|
|
97
|
+
history.add_message(MessageRole.AGENT, "Price depends on demand")
|
|
98
|
+
history.add_message(MessageRole.USER, "How about it?")
|
|
99
|
+
|
|
100
|
+
resolved = tracker.resolve_reference("it", history.context.current_turn)
|
|
101
|
+
# Should resolve to "price" or "demand"
|
|
102
|
+
assert resolved is not None or resolved is None # May or may not resolve
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class TestReasoningTracking:
|
|
106
|
+
"""Test chain-of-thought reasoning tracking."""
|
|
107
|
+
|
|
108
|
+
def test_reasoning_tracker_creation(self):
|
|
109
|
+
"""Test creating reasoning tracker."""
|
|
110
|
+
tracker = ReasoningTracker()
|
|
111
|
+
assert tracker.chains == {}
|
|
112
|
+
assert tracker.current_chain is None
|
|
113
|
+
|
|
114
|
+
def test_create_chain(self):
|
|
115
|
+
"""Test creating reasoning chain."""
|
|
116
|
+
tracker = ReasoningTracker()
|
|
117
|
+
chain = tracker.create_chain()
|
|
118
|
+
assert chain is not None
|
|
119
|
+
assert chain.chain_id is not None
|
|
120
|
+
assert tracker.current_chain == chain
|
|
121
|
+
|
|
122
|
+
def test_add_step(self):
|
|
123
|
+
"""Test adding reasoning steps."""
|
|
124
|
+
tracker = ReasoningTracker()
|
|
125
|
+
tracker.create_chain()
|
|
126
|
+
|
|
127
|
+
step = tracker.add_step(
|
|
128
|
+
step_type=StepType.EXTRACTION,
|
|
129
|
+
operation="extract_variables",
|
|
130
|
+
input_state={'task': 'test'},
|
|
131
|
+
output_state={'variables': ['x', 'y']},
|
|
132
|
+
conclusion="Extracted 2 variables"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
assert step is not None
|
|
136
|
+
assert step.step_id is not None
|
|
137
|
+
assert len(tracker.current_chain.steps) == 1
|
|
138
|
+
|
|
139
|
+
def test_chain_validation(self):
|
|
140
|
+
"""Test reasoning chain validation."""
|
|
141
|
+
tracker = ReasoningTracker()
|
|
142
|
+
chain = tracker.create_chain()
|
|
143
|
+
|
|
144
|
+
# Add valid step
|
|
145
|
+
tracker.add_step(
|
|
146
|
+
step_type=StepType.EXTRACTION,
|
|
147
|
+
operation="extract",
|
|
148
|
+
input_state={},
|
|
149
|
+
output_state={},
|
|
150
|
+
conclusion="test",
|
|
151
|
+
evidence=[Evidence(source="test", content="evidence")]
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
is_valid, error = tracker.validate_chain()
|
|
155
|
+
assert is_valid or error is not None # May be valid or have error
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class TestFewShotLearning:
|
|
159
|
+
"""Test few-shot learning capabilities."""
|
|
160
|
+
|
|
161
|
+
def test_example_store(self):
|
|
162
|
+
"""Test example store."""
|
|
163
|
+
store = ExampleStore()
|
|
164
|
+
store.add_example("price depends on demand", {"variables": ["price", "demand"], "edges": [("demand", "price")]})
|
|
165
|
+
assert len(store.examples) == 1
|
|
166
|
+
|
|
167
|
+
def test_find_similar_examples(self):
|
|
168
|
+
"""Test finding similar examples."""
|
|
169
|
+
store = ExampleStore()
|
|
170
|
+
store.add_example("price depends on demand", {"variables": ["price", "demand"]})
|
|
171
|
+
store.add_example("cost affects profit", {"variables": ["cost", "profit"]})
|
|
172
|
+
|
|
173
|
+
similar = store.find_similar_examples("price relates to demand", k=1)
|
|
174
|
+
assert len(similar) <= 1
|
|
175
|
+
if similar:
|
|
176
|
+
assert "price" in similar[0][0].lower() or "demand" in similar[0][0].lower()
|
|
177
|
+
|
|
178
|
+
def test_pattern_learning(self):
|
|
179
|
+
"""Test pattern learning."""
|
|
180
|
+
store = ExampleStore()
|
|
181
|
+
learner = PatternLearner(store)
|
|
182
|
+
|
|
183
|
+
examples = [
|
|
184
|
+
("price depends on demand", {"variables": ["price", "demand"], "edges": [("demand", "price")]}),
|
|
185
|
+
("cost affects profit", {"variables": ["cost", "profit"], "edges": [("cost", "profit")]})
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
patterns = learner.learn_from_examples(examples)
|
|
189
|
+
assert len(patterns) > 0
|
|
190
|
+
|
|
191
|
+
def test_adaptive_extraction(self):
|
|
192
|
+
"""Test adaptive extraction."""
|
|
193
|
+
store = ExampleStore()
|
|
194
|
+
learner = PatternLearner(store)
|
|
195
|
+
extractor = AdaptiveExtractor(learner, store)
|
|
196
|
+
|
|
197
|
+
# Add examples
|
|
198
|
+
store.add_example("price depends on demand", {"variables": ["price", "demand"], "edges": [("demand", "price")]})
|
|
199
|
+
learner.learn_from_examples()
|
|
200
|
+
|
|
201
|
+
# Try extraction
|
|
202
|
+
result = extractor.adapt_extraction("cost affects revenue")
|
|
203
|
+
assert 'variables' in result or 'edges' in result
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class TestTaskDecomposition:
|
|
207
|
+
"""Test task decomposition."""
|
|
208
|
+
|
|
209
|
+
def test_task_analyzer(self):
|
|
210
|
+
"""Test task analyzer."""
|
|
211
|
+
analyzer = TaskAnalyzer()
|
|
212
|
+
complexity, should_decompose = analyzer.analyze_task("Analyze the system")
|
|
213
|
+
assert complexity is not None
|
|
214
|
+
assert isinstance(should_decompose, bool)
|
|
215
|
+
|
|
216
|
+
def test_task_decomposition(self):
|
|
217
|
+
"""Test task decomposition."""
|
|
218
|
+
analyzer = TaskAnalyzer()
|
|
219
|
+
subtasks = analyzer.decompose_task("Analyze price and demand, then compare results")
|
|
220
|
+
assert len(subtasks) > 0
|
|
221
|
+
assert all(hasattr(st, 'task_id') for st in subtasks)
|
|
222
|
+
|
|
223
|
+
def test_dependency_graph(self):
|
|
224
|
+
"""Test dependency graph building."""
|
|
225
|
+
analyzer = TaskAnalyzer()
|
|
226
|
+
subtasks = analyzer.decompose_task("Analyze X and Y")
|
|
227
|
+
|
|
228
|
+
if len(subtasks) > 1:
|
|
229
|
+
dependencies = analyzer.build_dependency_graph(subtasks)
|
|
230
|
+
assert isinstance(dependencies, dict)
|
|
231
|
+
|
|
232
|
+
def test_plan_generation(self):
|
|
233
|
+
"""Test plan generation."""
|
|
234
|
+
analyzer = TaskAnalyzer()
|
|
235
|
+
generator = PlanGenerator(analyzer)
|
|
236
|
+
|
|
237
|
+
plan = generator.generate_plan("Analyze the system")
|
|
238
|
+
assert 'subtasks' in plan
|
|
239
|
+
assert 'execution_order' in plan
|
|
240
|
+
assert 'dependencies' in plan
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class TestExplanationGeneration:
|
|
244
|
+
"""Test explanation generation."""
|
|
245
|
+
|
|
246
|
+
def test_explanation_builder(self):
|
|
247
|
+
"""Test explanation builder."""
|
|
248
|
+
builder = ExplanationBuilder()
|
|
249
|
+
|
|
250
|
+
# Create mock reasoning chain
|
|
251
|
+
from schemas.reasoning import ReasoningChain, ReasoningStep
|
|
252
|
+
chain = ReasoningChain(chain_id="test")
|
|
253
|
+
step = ReasoningStep(
|
|
254
|
+
step_id="step1",
|
|
255
|
+
step_type=StepType.EXTRACTION,
|
|
256
|
+
operation="extract",
|
|
257
|
+
input_state={},
|
|
258
|
+
output_state={},
|
|
259
|
+
conclusion="test"
|
|
260
|
+
)
|
|
261
|
+
chain.add_step(step)
|
|
262
|
+
|
|
263
|
+
explanation = builder.generate_explanation(chain)
|
|
264
|
+
assert 'steps' in explanation
|
|
265
|
+
assert 'summary' in explanation
|
|
266
|
+
|
|
267
|
+
def test_transparency_layer(self):
|
|
268
|
+
"""Test transparency layer."""
|
|
269
|
+
layer = TransparencyLayer()
|
|
270
|
+
|
|
271
|
+
from schemas.reasoning import ReasoningChain, ReasoningStep
|
|
272
|
+
chain = ReasoningChain(chain_id="test")
|
|
273
|
+
step = ReasoningStep(
|
|
274
|
+
step_id="step1",
|
|
275
|
+
step_type=StepType.EXTRACTION,
|
|
276
|
+
operation="extract",
|
|
277
|
+
input_state={},
|
|
278
|
+
output_state={},
|
|
279
|
+
conclusion="test",
|
|
280
|
+
confidence=0.8
|
|
281
|
+
)
|
|
282
|
+
chain.add_step(step)
|
|
283
|
+
|
|
284
|
+
trace = layer.show_reasoning_trace(chain)
|
|
285
|
+
assert 'chain_id' in trace
|
|
286
|
+
assert 'steps' in trace
|
|
287
|
+
|
|
288
|
+
confidence_viz = layer.visualize_confidence(chain)
|
|
289
|
+
assert 'mean_confidence' in confidence_viz
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class TestSelfVerification:
|
|
293
|
+
"""Test self-verification and error detection."""
|
|
294
|
+
|
|
295
|
+
def test_consistency_checker(self):
|
|
296
|
+
"""Test consistency checker."""
|
|
297
|
+
checker = ConsistencyChecker()
|
|
298
|
+
|
|
299
|
+
graph = {
|
|
300
|
+
'nodes': ['A', 'B', 'C'],
|
|
301
|
+
'edges': [('A', 'B'), ('B', 'C')]
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
is_consistent, error = checker.verify_consistency(graph)
|
|
305
|
+
assert isinstance(is_consistent, bool)
|
|
306
|
+
|
|
307
|
+
def test_epistemic_grounding(self):
|
|
308
|
+
"""Test epistemic grounding verification."""
|
|
309
|
+
checker = ConsistencyChecker()
|
|
310
|
+
|
|
311
|
+
graph = {
|
|
312
|
+
'nodes': ['A', 'B', 'C'],
|
|
313
|
+
'edges': [('A', 'B'), ('B', 'C')]
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
observables = {'A'}
|
|
317
|
+
all_grounded, ungrounded = checker.verify_epistemic_grounding(graph, observables)
|
|
318
|
+
assert isinstance(all_grounded, bool)
|
|
319
|
+
assert isinstance(ungrounded, list)
|
|
320
|
+
|
|
321
|
+
def test_error_detector(self):
|
|
322
|
+
"""Test error detection."""
|
|
323
|
+
detector = ErrorDetector()
|
|
324
|
+
|
|
325
|
+
from schemas.reasoning import ReasoningChain, ReasoningStep
|
|
326
|
+
chain = ReasoningChain(chain_id="test")
|
|
327
|
+
step = ReasoningStep(
|
|
328
|
+
step_id="step1",
|
|
329
|
+
step_type=StepType.EXTRACTION,
|
|
330
|
+
operation="extract",
|
|
331
|
+
input_state={},
|
|
332
|
+
output_state={},
|
|
333
|
+
conclusion="test",
|
|
334
|
+
confidence=0.3 # Low confidence
|
|
335
|
+
)
|
|
336
|
+
chain.add_step(step)
|
|
337
|
+
|
|
338
|
+
graph = {'nodes': ['A'], 'edges': []}
|
|
339
|
+
errors = detector.detect_errors(chain, graph)
|
|
340
|
+
assert isinstance(errors, list)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class TestConsistencyEngine:
|
|
344
|
+
"""Test consistency engine."""
|
|
345
|
+
|
|
346
|
+
def test_deterministic_processor(self):
|
|
347
|
+
"""Test deterministic processing."""
|
|
348
|
+
processor = DeterministicProcessor(seed=42)
|
|
349
|
+
|
|
350
|
+
# Get random number
|
|
351
|
+
r1 = processor.get_random()
|
|
352
|
+
r2 = processor.get_random()
|
|
353
|
+
|
|
354
|
+
# Reset seed and get again
|
|
355
|
+
processor.reset_seed(42)
|
|
356
|
+
r3 = processor.get_random()
|
|
357
|
+
|
|
358
|
+
# Should be deterministic (same seed -> same sequence)
|
|
359
|
+
assert r1 == r3
|
|
360
|
+
|
|
361
|
+
def test_state_snapshot(self):
|
|
362
|
+
"""Test state snapshot."""
|
|
363
|
+
from architecture.hybrid.consistency_engine import StateSnapshot
|
|
364
|
+
snapshot_manager = StateSnapshot()
|
|
365
|
+
|
|
366
|
+
state = {'nodes': ['A', 'B'], 'edges': [('A', 'B')]}
|
|
367
|
+
snapshot_id = snapshot_manager.snapshot(state)
|
|
368
|
+
|
|
369
|
+
assert snapshot_id is not None
|
|
370
|
+
retrieved = snapshot_manager.get_snapshot(snapshot_id)
|
|
371
|
+
assert retrieved == state
|
|
372
|
+
|
|
373
|
+
def test_consistency_engine(self):
|
|
374
|
+
"""Test consistency engine."""
|
|
375
|
+
engine = ConsistencyEngine(seed=42)
|
|
376
|
+
|
|
377
|
+
initial_state = {'value': 0}
|
|
378
|
+
operations = [lambda s, rng: {'value': s['value'] + 1}]
|
|
379
|
+
|
|
380
|
+
result, snapshot_ids = engine.process_with_snapshots(initial_state, operations)
|
|
381
|
+
assert result['value'] == 1
|
|
382
|
+
assert len(snapshot_ids) > 0
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class TestCausalValidation:
|
|
386
|
+
"""Test causal validation."""
|
|
387
|
+
|
|
388
|
+
def test_causal_relationship_validation(self):
|
|
389
|
+
"""Test causal relationship validation."""
|
|
390
|
+
from architecture.hybrid.hybrid_agent import SymbolicReasoner
|
|
391
|
+
from templates.graph_management import GraphManager
|
|
392
|
+
|
|
393
|
+
graph_manager = GraphManager()
|
|
394
|
+
reasoner = SymbolicReasoner(graph_manager)
|
|
395
|
+
|
|
396
|
+
graph = {
|
|
397
|
+
'nodes': ['A', 'B', 'C'],
|
|
398
|
+
'edges': [('A', 'B')]
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
is_valid, error = reasoner.validate_causal_relationship('A', 'B', graph)
|
|
402
|
+
assert isinstance(is_valid, bool)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
class TestIntegration:
|
|
406
|
+
"""Integration tests for full hybrid agent."""
|
|
407
|
+
|
|
408
|
+
def test_hybrid_agent_creation(self):
|
|
409
|
+
"""Test creating hybrid agent with all features enabled."""
|
|
410
|
+
agent = HybridAgent(
|
|
411
|
+
enable_conversation=True,
|
|
412
|
+
enable_reasoning_tracking=True,
|
|
413
|
+
enable_few_shot_learning=True,
|
|
414
|
+
enable_task_decomposition=True,
|
|
415
|
+
enable_explanations=True,
|
|
416
|
+
enable_verification=True,
|
|
417
|
+
enable_consistency=True
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
assert agent.conversation_history is not None
|
|
421
|
+
assert agent.reasoning_tracker is not None
|
|
422
|
+
assert agent.example_store is not None
|
|
423
|
+
assert agent.explanation_builder is not None
|
|
424
|
+
assert agent.consistency_checker is not None
|
|
425
|
+
assert agent.consistency_engine is not None
|
|
426
|
+
|
|
427
|
+
def test_simple_task(self):
|
|
428
|
+
"""Test simple task execution."""
|
|
429
|
+
agent = HybridAgent()
|
|
430
|
+
response = agent.run("price depends on demand")
|
|
431
|
+
assert isinstance(response, str)
|
|
432
|
+
assert len(response) > 0
|
|
433
|
+
|
|
434
|
+
def test_conversation_context(self):
|
|
435
|
+
"""Test multi-turn conversation."""
|
|
436
|
+
agent = HybridAgent(enable_conversation=True)
|
|
437
|
+
|
|
438
|
+
response1 = agent.run("What affects price?")
|
|
439
|
+
context = agent.conversation_history.context
|
|
440
|
+
|
|
441
|
+
response2 = agent.run("How about demand?", context=context)
|
|
442
|
+
assert isinstance(response2, str)
|
|
443
|
+
assert len(agent.conversation_history.context.messages) >= 2
|
|
444
|
+
|
|
445
|
+
def test_chain_of_thought(self):
|
|
446
|
+
"""Test chain-of-thought reasoning."""
|
|
447
|
+
agent = HybridAgent(enable_reasoning_tracking=True)
|
|
448
|
+
|
|
449
|
+
response = agent.run("price depends on demand", show_reasoning=True)
|
|
450
|
+
assert isinstance(response, str)
|
|
451
|
+
|
|
452
|
+
# Check if reasoning chain was created
|
|
453
|
+
if agent.reasoning_tracker and agent.reasoning_tracker.current_chain:
|
|
454
|
+
assert len(agent.reasoning_tracker.current_chain.steps) > 0
|
|
455
|
+
|
|
456
|
+
def test_few_shot_learning(self):
|
|
457
|
+
"""Test few-shot learning."""
|
|
458
|
+
agent = HybridAgent(enable_few_shot_learning=True)
|
|
459
|
+
|
|
460
|
+
# Learn from examples
|
|
461
|
+
examples = [
|
|
462
|
+
("price depends on demand", {"variables": ["price", "demand"], "edges": [("demand", "price")]}),
|
|
463
|
+
("cost affects profit", {"variables": ["cost", "profit"], "edges": [("cost", "profit")]})
|
|
464
|
+
]
|
|
465
|
+
|
|
466
|
+
agent.learn_from_examples(examples)
|
|
467
|
+
assert len(agent.example_store.examples) == 2
|
|
468
|
+
|
|
469
|
+
# Use learned patterns
|
|
470
|
+
response = agent.run("quality influences satisfaction")
|
|
471
|
+
assert isinstance(response, str)
|
|
472
|
+
|
|
473
|
+
def test_scm_parsing(self):
|
|
474
|
+
"""Test JSON SCM parsing."""
|
|
475
|
+
agent = HybridAgent()
|
|
476
|
+
|
|
477
|
+
scm_task = """
|
|
478
|
+
{
|
|
479
|
+
"task_id": "test",
|
|
480
|
+
"variables": [
|
|
481
|
+
{ "id": "S", "role": "state", "domain": "real" },
|
|
482
|
+
{ "id": "C", "role": "state", "domain": "real" }
|
|
483
|
+
],
|
|
484
|
+
"equations": [
|
|
485
|
+
{
|
|
486
|
+
"id": "S_next",
|
|
487
|
+
"defines": "S[t+1]",
|
|
488
|
+
"parents": ["S[t]", "C[t]"],
|
|
489
|
+
"expr": "S[t] + C[t]"
|
|
490
|
+
}
|
|
491
|
+
]
|
|
492
|
+
}
|
|
493
|
+
Parse this SCM.
|
|
494
|
+
"""
|
|
495
|
+
|
|
496
|
+
response = agent.run(scm_task)
|
|
497
|
+
assert isinstance(response, str)
|
|
498
|
+
# Should parse SCM successfully
|
|
499
|
+
assert "error" not in response.lower() or "epistemic" not in response.lower()
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
class TestDeterministicOperations:
|
|
503
|
+
"""Test deterministic operations."""
|
|
504
|
+
|
|
505
|
+
def test_deterministic_reproducibility(self):
|
|
506
|
+
"""Test that operations are reproducible."""
|
|
507
|
+
agent1 = HybridAgent(seed=42, enable_consistency=True)
|
|
508
|
+
agent2 = HybridAgent(seed=42, enable_consistency=True)
|
|
509
|
+
|
|
510
|
+
response1 = agent1.run("price depends on demand")
|
|
511
|
+
response2 = agent2.run("price depends on demand")
|
|
512
|
+
|
|
513
|
+
# Should be identical with same seed
|
|
514
|
+
assert response1 == response2
|
|
515
|
+
|
|
516
|
+
def test_consistency_engine_integration(self):
|
|
517
|
+
"""Test consistency engine integration."""
|
|
518
|
+
agent = HybridAgent(enable_consistency=True, seed=42)
|
|
519
|
+
|
|
520
|
+
# Process with snapshots
|
|
521
|
+
if agent.consistency_engine:
|
|
522
|
+
initial_state = {'test': 0}
|
|
523
|
+
operations = [lambda s, rng: {'test': s['test'] + 1}]
|
|
524
|
+
|
|
525
|
+
result, snapshots = agent.consistency_engine.process_with_snapshots(initial_state, operations)
|
|
526
|
+
assert result['test'] == 1
|
|
527
|
+
assert len(snapshots) > 0
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
class TestErrorHandling:
|
|
531
|
+
"""Test error handling and self-correction."""
|
|
532
|
+
|
|
533
|
+
def test_error_detection(self):
|
|
534
|
+
"""Test error detection."""
|
|
535
|
+
agent = HybridAgent(enable_verification=True)
|
|
536
|
+
|
|
537
|
+
# Run with potentially problematic input
|
|
538
|
+
response = agent.run("identify past policy")
|
|
539
|
+
assert isinstance(response, str)
|
|
540
|
+
# Should detect epistemic issues
|
|
541
|
+
assert "epistemic" in response.lower() or len(response) > 0
|
|
542
|
+
|
|
543
|
+
def test_self_correction(self):
|
|
544
|
+
"""Test self-correction."""
|
|
545
|
+
agent = HybridAgent(enable_verification=True)
|
|
546
|
+
|
|
547
|
+
if agent.self_corrector:
|
|
548
|
+
errors = [{'type': 'low_confidence', 'step_id': 'test'}]
|
|
549
|
+
graph = {'nodes': [], 'edges': []}
|
|
550
|
+
|
|
551
|
+
corrections = agent.self_corrector.correct_errors(errors, graph)
|
|
552
|
+
assert isinstance(corrections, list)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
if __name__ == "__main__":
|
|
556
|
+
pytest.main([__file__, "-v"])
|