PyPI - aiecs - Versions diffs - 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show

aiecs/__init__.py +13 -16
aiecs/__main__.py +7 -7
aiecs/aiecs_client.py +269 -75
aiecs/application/executors/operation_executor.py +79 -54
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +98 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
aiecs/application/knowledge_graph/search/reranker.py +293 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +471 -0
aiecs/config/__init__.py +20 -5
aiecs/config/config.py +762 -31
aiecs/config/graph_config.py +131 -0
aiecs/config/tool_config.py +399 -0
aiecs/core/__init__.py +29 -13
aiecs/core/interface/__init__.py +2 -2
aiecs/core/interface/execution_interface.py +22 -22
aiecs/core/interface/storage_interface.py +37 -88
aiecs/core/registry/__init__.py +31 -0
aiecs/core/registry/service_registry.py +92 -0
aiecs/domain/__init__.py +270 -1
aiecs/domain/agent/__init__.py +191 -0
aiecs/domain/agent/base_agent.py +3870 -0
aiecs/domain/agent/exceptions.py +99 -0
aiecs/domain/agent/graph_aware_mixin.py +569 -0
aiecs/domain/agent/hybrid_agent.py +1435 -0
aiecs/domain/agent/integration/__init__.py +29 -0
aiecs/domain/agent/integration/context_compressor.py +216 -0
aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
aiecs/domain/agent/integration/protocols.py +281 -0
aiecs/domain/agent/integration/retry_policy.py +218 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
aiecs/domain/agent/lifecycle.py +291 -0
aiecs/domain/agent/llm_agent.py +692 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +1124 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +163 -0
aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
aiecs/domain/agent/models.py +884 -0
aiecs/domain/agent/observability.py +479 -0
aiecs/domain/agent/persistence.py +449 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +159 -0
aiecs/domain/agent/prompts/formatters.py +187 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +253 -0
aiecs/domain/agent/tool_agent.py +444 -0
aiecs/domain/agent/tools/__init__.py +15 -0
aiecs/domain/agent/tools/schema_generator.py +364 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +469 -0
aiecs/domain/community/analytics.py +432 -0
aiecs/domain/community/collaborative_workflow.py +648 -0
aiecs/domain/community/communication_hub.py +634 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +796 -0
aiecs/domain/community/community_manager.py +803 -0
aiecs/domain/community/decision_engine.py +849 -0
aiecs/domain/community/exceptions.py +231 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +234 -0
aiecs/domain/community/resource_manager.py +461 -0
aiecs/domain/community/shared_context_manager.py +589 -0
aiecs/domain/context/__init__.py +40 -10
aiecs/domain/context/context_engine.py +1910 -0
aiecs/domain/context/conversation_models.py +87 -53
aiecs/domain/context/graph_memory.py +582 -0
aiecs/domain/execution/model.py +12 -4
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +148 -0
aiecs/domain/knowledge_graph/models/evidence.py +178 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
aiecs/domain/knowledge_graph/models/path.py +171 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
aiecs/domain/knowledge_graph/models/query.py +261 -0
aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
aiecs/domain/knowledge_graph/models/relation.py +202 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
aiecs/domain/task/dsl_processor.py +172 -56
aiecs/domain/task/model.py +20 -8
aiecs/domain/task/task_context.py +27 -24
aiecs/infrastructure/__init__.py +0 -2
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +837 -0
aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
aiecs/infrastructure/graph_storage/cache.py +424 -0
aiecs/infrastructure/graph_storage/distributed.py +223 -0
aiecs/infrastructure/graph_storage/error_handling.py +380 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
aiecs/infrastructure/graph_storage/metrics.py +344 -0
aiecs/infrastructure/graph_storage/migration.py +400 -0
aiecs/infrastructure/graph_storage/pagination.py +483 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
aiecs/infrastructure/graph_storage/postgres.py +1563 -0
aiecs/infrastructure/graph_storage/property_storage.py +353 -0
aiecs/infrastructure/graph_storage/protocols.py +76 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
aiecs/infrastructure/graph_storage/streaming.py +487 -0
aiecs/infrastructure/graph_storage/tenant.py +412 -0
aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
aiecs/infrastructure/messaging/websocket_manager.py +51 -35
aiecs/infrastructure/monitoring/__init__.py +22 -0
aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
aiecs/infrastructure/monitoring/structured_logger.py +3 -7
aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
aiecs/infrastructure/persistence/__init__.py +14 -1
aiecs/infrastructure/persistence/context_engine_client.py +184 -0
aiecs/infrastructure/persistence/database_manager.py +67 -43
aiecs/infrastructure/persistence/file_storage.py +180 -103
aiecs/infrastructure/persistence/redis_client.py +74 -21
aiecs/llm/__init__.py +73 -25
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
aiecs/llm/client_factory.py +224 -36
aiecs/llm/client_resolver.py +155 -0
aiecs/llm/clients/__init__.py +38 -0
aiecs/llm/clients/base_client.py +324 -0
aiecs/llm/clients/google_function_calling_mixin.py +457 -0
aiecs/llm/clients/googleai_client.py +241 -0
aiecs/llm/clients/openai_client.py +158 -0
aiecs/llm/clients/openai_compatible_mixin.py +367 -0
aiecs/llm/clients/vertex_client.py +897 -0
aiecs/llm/clients/xai_client.py +201 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +272 -0
aiecs/llm/config/config_validator.py +206 -0
aiecs/llm/config/model_config.py +143 -0
aiecs/llm/protocols.py +149 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +89 -0
aiecs/main.py +140 -121
aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/module_checker.py +499 -0
aiecs/scripts/aid/version_manager.py +235 -0
aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
aiecs/scripts/dependance_check/__init__.py +15 -0
aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
aiecs/scripts/knowledge_graph/__init__.py +3 -0
aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
aiecs/scripts/tools_develop/README.md +671 -0
aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
aiecs/scripts/tools_develop/schema_coverage.py +511 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
aiecs/scripts/tools_develop/verify_tools.py +352 -0
aiecs/tasks/__init__.py +0 -1
aiecs/tasks/worker.py +115 -47
aiecs/tools/__init__.py +194 -72
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +330 -0
aiecs/tools/apisource/providers/__init__.py +112 -0
aiecs/tools/apisource/providers/base.py +671 -0
aiecs/tools/apisource/providers/census.py +397 -0
aiecs/tools/apisource/providers/fred.py +535 -0
aiecs/tools/apisource/providers/newsapi.py +409 -0
aiecs/tools/apisource/providers/worldbank.py +352 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +363 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
aiecs/tools/apisource/tool.py +832 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +334 -0
aiecs/tools/base_tool.py +415 -21
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +607 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
aiecs/tools/docs/content_insertion_tool.py +1320 -0
aiecs/tools/docs/document_creator_tool.py +1323 -0
aiecs/tools/docs/document_layout_tool.py +1160 -0
aiecs/tools/docs/document_parser_tool.py +1011 -0
aiecs/tools/docs/document_writer_tool.py +1829 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
aiecs/tools/langchain_adapter.py +300 -138
aiecs/tools/schema_generator.py +455 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +581 -0
aiecs/tools/search_tool/cache.py +264 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +224 -0
aiecs/tools/search_tool/core.py +778 -0
aiecs/tools/search_tool/deduplicator.py +119 -0
aiecs/tools/search_tool/error_handler.py +242 -0
aiecs/tools/search_tool/metrics.py +343 -0
aiecs/tools/search_tool/rate_limiter.py +172 -0
aiecs/tools/search_tool/schemas.py +275 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
aiecs/tools/statistics/data_loader_tool.py +555 -0
aiecs/tools/statistics/data_profiler_tool.py +638 -0
aiecs/tools/statistics/data_transformer_tool.py +580 -0
aiecs/tools/statistics/data_visualizer_tool.py +498 -0
aiecs/tools/statistics/model_trainer_tool.py +507 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
aiecs/tools/task_tools/__init__.py +49 -36
aiecs/tools/task_tools/chart_tool.py +200 -184
aiecs/tools/task_tools/classfire_tool.py +268 -267
aiecs/tools/task_tools/image_tool.py +175 -131
aiecs/tools/task_tools/office_tool.py +226 -146
aiecs/tools/task_tools/pandas_tool.py +477 -121
aiecs/tools/task_tools/report_tool.py +390 -142
aiecs/tools/task_tools/research_tool.py +149 -79
aiecs/tools/task_tools/scraper_tool.py +339 -145
aiecs/tools/task_tools/stats_tool.py +448 -209
aiecs/tools/temp_file_manager.py +26 -24
aiecs/tools/tool_executor/__init__.py +18 -16
aiecs/tools/tool_executor/tool_executor.py +364 -52
aiecs/utils/LLM_output_structor.py +74 -48
aiecs/utils/__init__.py +14 -3
aiecs/utils/base_callback.py +0 -3
aiecs/utils/cache_provider.py +696 -0
aiecs/utils/execution_utils.py +50 -31
aiecs/utils/prompt_loader.py +1 -0
aiecs/utils/token_usage_repository.py +37 -11
aiecs/ws/socket_server.py +14 -4
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
aiecs-1.7.6.dist-info/RECORD +337 -0
aiecs-1.7.6.dist-info/entry_points.txt +13 -0
aiecs/config/registry.py +0 -19
aiecs/domain/context/content_engine.py +0 -982
aiecs/llm/base_client.py +0 -99
aiecs/llm/openai_client.py +0 -125
aiecs/llm/vertex_client.py +0 -186
aiecs/llm/xai_client.py +0 -184
aiecs/scripts/dependency_checker.py +0 -857
aiecs/scripts/quick_dependency_check.py +0 -269
aiecs/tools/task_tools/search_api.py +0 -7
aiecs-1.0.1.dist-info/RECORD +0 -90
aiecs-1.0.1.dist-info/entry_points.txt +0 -7
/aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
/aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
/aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
/aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0

aiecs/application/knowledge_graph/fusion/ab_testing.py ADDED Viewed

@@ -0,0 +1,395 @@
+"""
+A/B Testing Framework for Knowledge Fusion Threshold Validation.
+Provides tools for comparing different threshold configurations and
+evaluating matching performance across different parameter sets.
+"""
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+from aiecs.application.knowledge_graph.fusion.evaluation_dataset import (
+    EntityPair,
+    EvaluationDataset,
+)
+from aiecs.application.knowledge_graph.fusion.matching_config import (
+    FusionMatchingConfig,
+)
+from aiecs.application.knowledge_graph.fusion.similarity_pipeline import (
+    MatchStage,
+    PipelineResult,
+    SimilarityPipeline,
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class EvaluationMetrics:
+    """
+    Metrics for evaluating matching performance.
+    Attributes:
+        true_positives: Number of correct matches
+        false_positives: Number of incorrect matches
+        false_negatives: Number of missed matches
+        true_negatives: Number of correct non-matches
+        precision: Precision score (TP / (TP + FP))
+        recall: Recall score (TP / (TP + FN))
+        f1_score: F1 score (harmonic mean of precision and recall)
+        accuracy: Overall accuracy ((TP + TN) / Total)
+    """
+    true_positives: int = 0
+    false_positives: int = 0
+    false_negatives: int = 0
+    true_negatives: int = 0
+    @property
+    def precision(self) -> float:
+        """Calculate precision."""
+        total_positive = self.true_positives + self.false_positives
+        if total_positive == 0:
+            return 0.0
+        return self.true_positives / total_positive
+    @property
+    def recall(self) -> float:
+        """Calculate recall."""
+        total_should_match = self.true_positives + self.false_negatives
+        if total_should_match == 0:
+            return 0.0
+        return self.true_positives / total_should_match
+    @property
+    def f1_score(self) -> float:
+        """Calculate F1 score."""
+        p = self.precision
+        r = self.recall
+        if p + r == 0:
+            return 0.0
+        return 2 * (p * r) / (p + r)
+    @property
+    def accuracy(self) -> float:
+        """Calculate overall accuracy."""
+        total = (
+            self.true_positives
+            + self.false_positives
+            + self.false_negatives
+            + self.true_negatives
+        )
+        if total == 0:
+            return 0.0
+        return (self.true_positives + self.true_negatives) / total
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert metrics to dictionary."""
+        return {
+            "true_positives": self.true_positives,
+            "false_positives": self.false_positives,
+            "false_negatives": self.false_negatives,
+            "true_negatives": self.true_negatives,
+            "precision": self.precision,
+            "recall": self.recall,
+            "f1_score": self.f1_score,
+            "accuracy": self.accuracy,
+        }
+@dataclass
+class ExperimentResult:
+    """
+    Result from a single threshold configuration experiment.
+    Attributes:
+        config_name: Name/identifier for this configuration
+        config: The FusionMatchingConfig used
+        metrics: Evaluation metrics
+        stage_breakdown: Breakdown of matches by stage
+        errors: List of errors encountered during evaluation
+    """
+    config_name: str
+    config: FusionMatchingConfig
+    metrics: EvaluationMetrics
+    stage_breakdown: Dict[str, int] = field(default_factory=dict)
+    errors: List[str] = field(default_factory=list)
+class ABTestingFramework:
+    """
+    A/B Testing framework for comparing threshold configurations.
+    Example:
+        ```python
+        framework = ABTestingFramework(
+            pipeline=pipeline,
+            dataset=evaluation_dataset
+        )
+        # Test default config
+        default_result = await framework.evaluate_config(
+            "default",
+            FusionMatchingConfig()
+        )
+        # Test custom config
+        custom_config = FusionMatchingConfig(
+            semantic_threshold=0.80,
+            alias_match_score=0.95
+        )
+        custom_result = await framework.evaluate_config(
+            "custom",
+            custom_config
+        )
+        # Compare results
+        comparison = framework.compare_results([default_result, custom_result])
+        ```
+    """
+    def __init__(
+        self,
+        pipeline: SimilarityPipeline,
+        dataset: EvaluationDataset,
+    ):
+        """
+        Initialize A/B testing framework.
+        Args:
+            pipeline: SimilarityPipeline instance for matching
+            dataset: EvaluationDataset with test cases
+        """
+        self._pipeline = pipeline
+        self._dataset = dataset
+    async def evaluate_config(
+        self,
+        config_name: str,
+        config: FusionMatchingConfig,
+        entity_type: Optional[str] = None,
+    ) -> ExperimentResult:
+        """
+        Evaluate a threshold configuration against the dataset.
+        Args:
+            config_name: Name/identifier for this configuration
+            config: FusionMatchingConfig to evaluate
+            entity_type: Optional entity type filter
+        Returns:
+            ExperimentResult with metrics and breakdown
+        """
+        logger.info(f"Evaluating configuration: {config_name}")
+        # Update pipeline config
+        self._pipeline.set_config(config)
+        # Filter dataset if entity type specified
+        test_dataset = self._dataset
+        if entity_type:
+            test_dataset = self._dataset.get_by_type(entity_type)
+        # Initialize metrics
+        metrics = EvaluationMetrics()
+        stage_breakdown: Dict[str, int] = {}
+        errors: List[str] = []
+        # Evaluate each pair
+        for pair in test_dataset.pairs:
+            try:
+                result = await self._pipeline.compute_similarity(
+                    name1=pair.name1,
+                    name2=pair.name2,
+                    entity_type=pair.entity_type or entity_type,
+                )
+                # Track which stage matched
+                if result.is_match and result.matched_stage:
+                    stage_name = result.matched_stage.value
+                    stage_breakdown[stage_name] = (
+                        stage_breakdown.get(stage_name, 0) + 1
+                    )
+                # Update metrics
+                if pair.should_match:
+                    if result.is_match:
+                        metrics.true_positives += 1
+                    else:
+                        metrics.false_negatives += 1
+                else:
+                    if result.is_match:
+                        metrics.false_positives += 1
+                    else:
+                        metrics.true_negatives += 1
+            except Exception as e:
+                error_msg = f"Error evaluating pair ({pair.name1}, {pair.name2}): {e}"
+                logger.warning(error_msg)
+                errors.append(error_msg)
+                # Count errors as false negatives if should match, false positives if shouldn't
+                if pair.should_match:
+                    metrics.false_negatives += 1
+                else:
+                    metrics.false_positives += 1
+        return ExperimentResult(
+            config_name=config_name,
+            config=config,
+            metrics=metrics,
+            stage_breakdown=stage_breakdown,
+            errors=errors,
+        )
+    async def threshold_sweep(
+        self,
+        threshold_name: str,
+        threshold_range: List[float],
+        base_config: Optional[FusionMatchingConfig] = None,
+        entity_type: Optional[str] = None,
+    ) -> List[ExperimentResult]:
+        """
+        Perform threshold sweep for a specific threshold parameter.
+        Tests multiple values of a threshold to find optimal value.
+        Args:
+            threshold_name: Name of threshold to sweep (e.g., "semantic_threshold")
+            threshold_range: List of threshold values to test
+            base_config: Base configuration (uses default if not provided)
+            entity_type: Optional entity type filter
+        Returns:
+            List of ExperimentResult for each threshold value
+        """
+        if base_config is None:
+            base_config = FusionMatchingConfig()
+        results: List[ExperimentResult] = []
+        for threshold_value in threshold_range:
+            # Create config with modified threshold
+            config = FusionMatchingConfig(
+                alias_match_score=base_config.alias_match_score,
+                abbreviation_match_score=base_config.abbreviation_match_score,
+                normalization_match_score=base_config.normalization_match_score,
+                semantic_threshold=base_config.semantic_threshold,
+                string_similarity_threshold=base_config.string_similarity_threshold,
+                enabled_stages=base_config.enabled_stages.copy(),
+                semantic_enabled=base_config.semantic_enabled,
+                entity_type_configs=base_config.entity_type_configs.copy(),
+            )
+            # Set the threshold being swept
+            if threshold_name == "alias_match_score":
+                config.alias_match_score = threshold_value
+            elif threshold_name == "abbreviation_match_score":
+                config.abbreviation_match_score = threshold_value
+            elif threshold_name == "normalization_match_score":
+                config.normalization_match_score = threshold_value
+            elif threshold_name == "semantic_threshold":
+                config.semantic_threshold = threshold_value
+            elif threshold_name == "string_similarity_threshold":
+                config.string_similarity_threshold = threshold_value
+            else:
+                raise ValueError(f"Unknown threshold name: {threshold_name}")
+            config_name = f"{threshold_name}_{threshold_value:.3f}"
+            result = await self.evaluate_config(config_name, config, entity_type)
+            results.append(result)
+        return results
+    def compare_results(
+        self, results: List[ExperimentResult]
+    ) -> Dict[str, Any]:
+        """
+        Compare multiple experiment results.
+        Args:
+            results: List of ExperimentResult to compare
+        Returns:
+            Dictionary with comparison metrics
+        """
+        if not results:
+            return {}
+        comparison = {
+            "configs": [],
+            "best_precision": None,
+            "best_recall": None,
+            "best_f1": None,
+            "best_accuracy": None,
+        }
+        best_precision_score = -1
+        best_recall_score = -1
+        best_f1_score = -1
+        best_accuracy_score = -1
+        for result in results:
+            config_info = {
+                "name": result.config_name,
+                "metrics": result.metrics.to_dict(),
+                "stage_breakdown": result.stage_breakdown,
+                "errors": len(result.errors),
+            }
+            comparison["configs"].append(config_info)
+            # Track best scores
+            if result.metrics.precision > best_precision_score:
+                best_precision_score = result.metrics.precision
+                comparison["best_precision"] = result.config_name
+            if result.metrics.recall > best_recall_score:
+                best_recall_score = result.metrics.recall
+                comparison["best_recall"] = result.config_name
+            if result.metrics.f1_score > best_f1_score:
+                best_f1_score = result.metrics.f1_score
+                comparison["best_f1"] = result.config_name
+            if result.metrics.accuracy > best_accuracy_score:
+                best_accuracy_score = result.metrics.accuracy
+                comparison["best_accuracy"] = result.config_name
+        return comparison
+    def validate_thresholds(
+        self,
+        result: ExperimentResult,
+        min_recall: float = 0.90,
+        min_precision: float = 0.75,
+    ) -> Tuple[bool, Dict[str, Any]]:
+        """
+        Validate that thresholds meet minimum performance requirements.
+        Args:
+            result: ExperimentResult to validate
+            min_recall: Minimum recall requirement (default: 0.90)
+            min_precision: Minimum precision requirement (default: 0.75)
+        Returns:
+            Tuple of (is_valid, validation_details)
+        """
+        metrics = result.metrics
+        is_valid = (
+            metrics.recall >= min_recall and metrics.precision >= min_precision
+        )
+        validation_details = {
+            "is_valid": is_valid,
+            "recall": metrics.recall,
+            "precision": metrics.precision,
+            "f1_score": metrics.f1_score,
+            "min_recall": min_recall,
+            "min_precision": min_precision,
+            "recall_met": metrics.recall >= min_recall,
+            "precision_met": metrics.precision >= min_precision,
+        }
+        return is_valid, validation_details

aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

Potentially problematic release.

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl