aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +435 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3949 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1731 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +894 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +377 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +230 -37
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +328 -0
- aiecs/llm/clients/google_function_calling_mixin.py +415 -0
- aiecs/llm/clients/googleai_client.py +314 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +1186 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1464 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1016 -0
- aiecs/tools/docs/document_writer_tool.py +2008 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +220 -141
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
- aiecs-1.7.17.dist-info/RECORD +337 -0
- aiecs-1.7.17.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
|
@@ -1,46 +1,27 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Dict, Any, List, Optional,
|
|
2
|
+
from typing import Dict, Any, List, Optional, Set
|
|
3
3
|
import spacy
|
|
4
4
|
from spacy.language import Language
|
|
5
|
-
from pydantic import BaseModel,
|
|
6
|
-
from pydantic_settings import BaseSettings
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
7
|
from collections import Counter
|
|
8
|
-
from scipy.stats import pearsonr
|
|
8
|
+
from scipy.stats import pearsonr # type: ignore[import-untyped]
|
|
9
9
|
import os
|
|
10
10
|
|
|
11
11
|
from aiecs.tools.base_tool import BaseTool
|
|
12
12
|
from aiecs.tools import register_tool
|
|
13
13
|
|
|
14
|
-
# Configuration for ResearchTool
|
|
15
|
-
class ResearchSettings(BaseSettings):
|
|
16
|
-
"""
|
|
17
|
-
Configuration for ResearchTool.
|
|
18
|
-
|
|
19
|
-
Attributes:
|
|
20
|
-
max_workers (int): Maximum number of thread pool workers.
|
|
21
|
-
spacy_model (str): Default spaCy model to use.
|
|
22
|
-
max_text_length (int): Maximum text length for inputs.
|
|
23
|
-
allowed_spacy_models (List[str]): Allowed spaCy models.
|
|
24
|
-
env_prefix (str): Environment variable prefix.
|
|
25
|
-
"""
|
|
26
|
-
max_workers: int = min(32, (os.cpu_count() or 4) * 2)
|
|
27
|
-
spacy_model: str = "en_core_web_sm"
|
|
28
|
-
max_text_length: int = 10_000
|
|
29
|
-
allowed_spacy_models: List[str] = ["en_core_web_sm", "zh_core_web_sm"]
|
|
30
|
-
env_prefix: str = 'RESEARCH_TOOL_'
|
|
31
|
-
|
|
32
|
-
model_config = ConfigDict(env_prefix='RESEARCH_TOOL_')
|
|
33
14
|
|
|
34
15
|
# Exceptions
|
|
35
16
|
class ResearchToolError(Exception):
|
|
36
17
|
"""Base exception for ResearchTool errors."""
|
|
37
|
-
|
|
18
|
+
|
|
38
19
|
|
|
39
20
|
class FileOperationError(ResearchToolError):
|
|
40
21
|
"""Raised when file operations fail."""
|
|
41
|
-
pass
|
|
42
22
|
|
|
43
|
-
|
|
23
|
+
|
|
24
|
+
@register_tool("research")
|
|
44
25
|
class ResearchTool(BaseTool):
|
|
45
26
|
"""
|
|
46
27
|
Tool for causal inference using Mill's methods, advanced induction, deduction, and text summarization.
|
|
@@ -57,34 +38,112 @@ class ResearchTool(BaseTool):
|
|
|
57
38
|
|
|
58
39
|
Inherits from BaseTool.
|
|
59
40
|
"""
|
|
60
|
-
|
|
41
|
+
|
|
42
|
+
# Configuration schema
|
|
43
|
+
class Config(BaseSettings):
|
|
44
|
+
"""Configuration for the research tool
|
|
45
|
+
|
|
46
|
+
Automatically reads from environment variables with RESEARCH_TOOL_ prefix.
|
|
47
|
+
Example: RESEARCH_TOOL_SPACY_MODEL -> spacy_model
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
model_config = SettingsConfigDict(env_prefix="RESEARCH_TOOL_")
|
|
51
|
+
|
|
52
|
+
max_workers: int = Field(
|
|
53
|
+
default=min(32, (os.cpu_count() or 4) * 2),
|
|
54
|
+
description="Maximum number of worker threads",
|
|
55
|
+
)
|
|
56
|
+
spacy_model: str = Field(default="en_core_web_sm", description="Default spaCy model to use")
|
|
57
|
+
max_text_length: int = Field(default=10_000, description="Maximum text length for inputs")
|
|
58
|
+
allowed_spacy_models: List[str] = Field(
|
|
59
|
+
default=["en_core_web_sm", "zh_core_web_sm"],
|
|
60
|
+
description="Allowed spaCy models",
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Schema definitions
|
|
64
|
+
class Mill_agreementSchema(BaseModel):
|
|
65
|
+
"""Schema for mill_agreement operation"""
|
|
66
|
+
|
|
67
|
+
cases: List[Dict[str, Any]] = Field(description="List of cases with attributes and outcomes. Each case should have 'attrs' (dict of attributes) and 'outcome' (boolean)")
|
|
68
|
+
|
|
69
|
+
class Mill_differenceSchema(BaseModel):
|
|
70
|
+
"""Schema for mill_difference operation"""
|
|
71
|
+
|
|
72
|
+
positive_case: Dict[str, Any] = Field(description="Positive case with attributes and outcome. Should have 'attrs' (dict of attributes) and 'outcome' (boolean)")
|
|
73
|
+
negative_case: Dict[str, Any] = Field(description="Negative case with attributes and outcome. Should have 'attrs' (dict of attributes) and 'outcome' (boolean)")
|
|
74
|
+
|
|
75
|
+
class Mill_jointSchema(BaseModel):
|
|
76
|
+
"""Schema for mill_joint operation"""
|
|
77
|
+
|
|
78
|
+
positive_cases: List[Dict[str, Any]] = Field(description="List of positive cases. Each case should have 'attrs' (dict of attributes) and 'outcome' (boolean)")
|
|
79
|
+
negative_cases: List[Dict[str, Any]] = Field(description="List of negative cases. Each case should have 'attrs' (dict of attributes) and 'outcome' (boolean)")
|
|
80
|
+
|
|
81
|
+
class Mill_residuesSchema(BaseModel):
|
|
82
|
+
"""Schema for mill_residues operation"""
|
|
83
|
+
|
|
84
|
+
cases: List[Dict[str, Any]] = Field(description="List of cases with attributes and effects. Each case should have 'attrs' (dict of attributes) and 'effects' (list of effect names)")
|
|
85
|
+
known_causes: Dict[str, List[str]] = Field(description="Dictionary mapping effect names to lists of known cause attribute names")
|
|
86
|
+
|
|
87
|
+
class Mill_concomitantSchema(BaseModel):
|
|
88
|
+
"""Schema for mill_concomitant operation"""
|
|
89
|
+
|
|
90
|
+
cases: List[Dict[str, Any]] = Field(description="List of cases with attributes. Each case should have 'attrs' (dict of attributes with numeric values)")
|
|
91
|
+
factor: str = Field(description="Name of the factor attribute to analyze")
|
|
92
|
+
effect: str = Field(description="Name of the effect attribute to analyze")
|
|
93
|
+
|
|
94
|
+
class InductionSchema(BaseModel):
|
|
95
|
+
"""Schema for induction operation"""
|
|
96
|
+
|
|
97
|
+
examples: List[str] = Field(description="List of example text strings to generalize patterns from")
|
|
98
|
+
max_keywords: int = Field(default=10, description="Maximum number of keywords/patterns to extract from the examples")
|
|
99
|
+
|
|
100
|
+
class DeductionSchema(BaseModel):
|
|
101
|
+
"""Schema for deduction operation"""
|
|
102
|
+
|
|
103
|
+
premises: List[str] = Field(description="List of premise statement strings to validate against")
|
|
104
|
+
conclusion: Optional[str] = Field(default=None, description="Optional conclusion statement string to validate. If None, validation will fail")
|
|
105
|
+
|
|
106
|
+
class SummarizeSchema(BaseModel):
|
|
107
|
+
"""Schema for summarize operation"""
|
|
108
|
+
|
|
109
|
+
text: str = Field(description="Text string to summarize")
|
|
110
|
+
max_length: int = Field(default=150, description="Maximum length of the summary in words")
|
|
111
|
+
language: Optional[str] = Field(default=None, description="Optional language code for the text. If None, uses the default spaCy model language")
|
|
112
|
+
|
|
113
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
|
|
61
114
|
"""
|
|
62
115
|
Initialize ResearchTool with settings and resources.
|
|
63
116
|
|
|
64
117
|
Args:
|
|
65
|
-
config (Dict, optional): Configuration overrides for
|
|
118
|
+
config (Dict, optional): Configuration overrides for ResearchTool.
|
|
119
|
+
**kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
|
|
66
120
|
|
|
67
121
|
Raises:
|
|
68
122
|
ValueError: If config contains invalid settings.
|
|
123
|
+
|
|
124
|
+
Configuration is automatically loaded by BaseTool from:
|
|
125
|
+
1. Explicit config dict (highest priority)
|
|
126
|
+
2. YAML config files (config/tools/research.yaml)
|
|
127
|
+
3. Environment variables (via dotenv from .env files)
|
|
128
|
+
4. Tool defaults (lowest priority)
|
|
69
129
|
"""
|
|
70
|
-
super().__init__(config)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
raise ValueError(f"Invalid configuration: {e}")
|
|
130
|
+
super().__init__(config, **kwargs)
|
|
131
|
+
|
|
132
|
+
# Configuration is automatically loaded by BaseTool into self._config_obj
|
|
133
|
+
# Access config via self._config_obj (BaseSettings instance)
|
|
134
|
+
self.config = self._config_obj if self._config_obj else self.Config()
|
|
135
|
+
|
|
77
136
|
self.logger = logging.getLogger(__name__)
|
|
78
137
|
if not self.logger.handlers:
|
|
79
138
|
handler = logging.StreamHandler()
|
|
80
|
-
handler.setFormatter(logging.Formatter(
|
|
139
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
81
140
|
self.logger.addHandler(handler)
|
|
82
141
|
self.logger.setLevel(logging.INFO)
|
|
83
142
|
self._spacy_nlp: Optional[Language] = None
|
|
84
143
|
|
|
85
144
|
def __del__(self):
|
|
86
145
|
"""Clean up resources when the object is destroyed."""
|
|
87
|
-
if hasattr(self,
|
|
146
|
+
if hasattr(self, "_spacy_nlp") and self._spacy_nlp is not None:
|
|
88
147
|
self._spacy_nlp = None
|
|
89
148
|
|
|
90
149
|
def _get_spacy(self) -> Language:
|
|
@@ -98,9 +157,9 @@ class ResearchTool(BaseTool):
|
|
|
98
157
|
ResearchToolError: If the spaCy model is invalid.
|
|
99
158
|
"""
|
|
100
159
|
if self._spacy_nlp is None:
|
|
101
|
-
if self.
|
|
102
|
-
raise ResearchToolError(f"Invalid spaCy model '{self.
|
|
103
|
-
self._spacy_nlp = spacy.load(self.
|
|
160
|
+
if self.config.spacy_model not in self.config.allowed_spacy_models:
|
|
161
|
+
raise ResearchToolError(f"Invalid spaCy model '{self.config.spacy_model}', expected {self.config.allowed_spacy_models}")
|
|
162
|
+
self._spacy_nlp = spacy.load(self.config.spacy_model, disable=["textcat"])
|
|
104
163
|
return self._spacy_nlp
|
|
105
164
|
|
|
106
165
|
def mill_agreement(self, cases: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
@@ -117,13 +176,13 @@ class ResearchTool(BaseTool):
|
|
|
117
176
|
FileOperationError: If processing fails.
|
|
118
177
|
"""
|
|
119
178
|
try:
|
|
120
|
-
truthy = [c[
|
|
179
|
+
truthy = [c["attrs"] for c in cases if c.get("outcome")]
|
|
121
180
|
if not truthy:
|
|
122
|
-
return {
|
|
181
|
+
return {"common_factors": []}
|
|
123
182
|
common = set(k for k, v in truthy[0].items() if v)
|
|
124
183
|
for attrs in truthy[1:]:
|
|
125
184
|
common &= set(k for k, v in attrs.items() if v)
|
|
126
|
-
return {
|
|
185
|
+
return {"common_factors": list(common)}
|
|
127
186
|
except Exception as e:
|
|
128
187
|
raise FileOperationError(f"Failed to process mill_agreement: {str(e)}")
|
|
129
188
|
|
|
@@ -142,14 +201,18 @@ class ResearchTool(BaseTool):
|
|
|
142
201
|
FileOperationError: If processing fails.
|
|
143
202
|
"""
|
|
144
203
|
try:
|
|
145
|
-
pos = {k for k, v in positive_case.get(
|
|
146
|
-
neg = {k for k, v in negative_case.get(
|
|
204
|
+
pos = {k for k, v in positive_case.get("attrs", {}).items() if v}
|
|
205
|
+
neg = {k for k, v in negative_case.get("attrs", {}).items() if v}
|
|
147
206
|
diff = pos - neg
|
|
148
|
-
return {
|
|
207
|
+
return {"difference_factors": list(diff)}
|
|
149
208
|
except Exception as e:
|
|
150
209
|
raise FileOperationError(f"Failed to process mill_difference: {str(e)}")
|
|
151
210
|
|
|
152
|
-
def mill_joint(
|
|
211
|
+
def mill_joint(
|
|
212
|
+
self,
|
|
213
|
+
positive_cases: List[Dict[str, Any]],
|
|
214
|
+
negative_cases: List[Dict[str, Any]],
|
|
215
|
+
) -> Dict[str, Any]:
|
|
153
216
|
"""
|
|
154
217
|
Combine Mill's Method of Agreement and Difference to identify causal factors.
|
|
155
218
|
|
|
@@ -164,18 +227,18 @@ class ResearchTool(BaseTool):
|
|
|
164
227
|
FileOperationError: If processing fails.
|
|
165
228
|
"""
|
|
166
229
|
try:
|
|
167
|
-
truthy = [c[
|
|
230
|
+
truthy = [c["attrs"] for c in positive_cases if c.get("outcome")]
|
|
168
231
|
if not truthy:
|
|
169
|
-
return {
|
|
232
|
+
return {"causal_factors": []}
|
|
170
233
|
common = set(k for k, v in truthy[0].items() if v)
|
|
171
234
|
for attrs in truthy[1:]:
|
|
172
235
|
common &= set(k for k, v in attrs.items() if v)
|
|
173
|
-
falsy = [c[
|
|
236
|
+
falsy = [c["attrs"] for c in negative_cases if not c.get("outcome")]
|
|
174
237
|
if not falsy:
|
|
175
|
-
return {
|
|
238
|
+
return {"causal_factors": list(common)}
|
|
176
239
|
for attrs in falsy:
|
|
177
240
|
common -= set(k for k, v in attrs.items() if v)
|
|
178
|
-
return {
|
|
241
|
+
return {"causal_factors": list(common)}
|
|
179
242
|
except Exception as e:
|
|
180
243
|
raise FileOperationError(f"Failed to process mill_joint: {str(e)}")
|
|
181
244
|
|
|
@@ -196,15 +259,15 @@ class ResearchTool(BaseTool):
|
|
|
196
259
|
try:
|
|
197
260
|
residual = {}
|
|
198
261
|
for case in cases:
|
|
199
|
-
effects = case.get(
|
|
200
|
-
attrs = set(k for k, v in case.get(
|
|
262
|
+
effects = case.get("effects", {})
|
|
263
|
+
attrs = set(k for k, v in case.get("attrs", {}).items() if v)
|
|
201
264
|
for effect in effects:
|
|
202
265
|
if effect in known_causes:
|
|
203
266
|
known = set(known_causes[effect])
|
|
204
267
|
residual[effect] = list(attrs - known)
|
|
205
268
|
else:
|
|
206
269
|
residual[effect] = list(attrs)
|
|
207
|
-
return {
|
|
270
|
+
return {"residual_causes": residual}
|
|
208
271
|
except Exception as e:
|
|
209
272
|
raise FileOperationError(f"Failed to process mill_residues: {str(e)}")
|
|
210
273
|
|
|
@@ -224,16 +287,17 @@ class ResearchTool(BaseTool):
|
|
|
224
287
|
FileOperationError: If processing fails.
|
|
225
288
|
"""
|
|
226
289
|
try:
|
|
227
|
-
factor_vals = [case[
|
|
228
|
-
effect_vals = [case[
|
|
290
|
+
factor_vals = [case["attrs"].get(factor, 0) for case in cases]
|
|
291
|
+
effect_vals = [case["attrs"].get(effect, 0) for case in cases]
|
|
229
292
|
if len(factor_vals) < 2:
|
|
230
|
-
return {
|
|
231
|
-
|
|
293
|
+
return {"correlation": 0.0, "pvalue": 1.0}
|
|
294
|
+
|
|
232
295
|
# Convert to numpy arrays to avoid PyTorch compatibility issues
|
|
233
296
|
import numpy as np
|
|
297
|
+
|
|
234
298
|
factor_array = np.array(factor_vals, dtype=np.float64)
|
|
235
299
|
effect_array = np.array(effect_vals, dtype=np.float64)
|
|
236
|
-
|
|
300
|
+
|
|
237
301
|
# Calculate correlation using numpy if scipy fails
|
|
238
302
|
try:
|
|
239
303
|
corr, pval = pearsonr(factor_array, effect_array)
|
|
@@ -241,17 +305,19 @@ class ResearchTool(BaseTool):
|
|
|
241
305
|
# Fallback to numpy correlation calculation
|
|
242
306
|
self.logger.warning(f"scipy pearsonr failed ({e}), using numpy fallback")
|
|
243
307
|
corr = np.corrcoef(factor_array, effect_array)[0, 1]
|
|
244
|
-
# Simple p-value approximation (not statistically rigorous but
|
|
308
|
+
# Simple p-value approximation (not statistically rigorous but
|
|
309
|
+
# functional)
|
|
245
310
|
n = len(factor_array)
|
|
246
311
|
if n <= 2:
|
|
247
312
|
pval = 1.0
|
|
248
313
|
else:
|
|
249
314
|
# Approximate p-value using t-distribution
|
|
250
315
|
t_stat = corr * np.sqrt((n - 2) / (1 - corr**2 + 1e-10))
|
|
251
|
-
from scipy.stats import t
|
|
316
|
+
from scipy.stats import t # type: ignore[import-untyped]
|
|
317
|
+
|
|
252
318
|
pval = 2 * (1 - t.cdf(abs(t_stat), n - 2))
|
|
253
|
-
|
|
254
|
-
return {
|
|
319
|
+
|
|
320
|
+
return {"correlation": float(corr), "pvalue": float(pval)}
|
|
255
321
|
except Exception as e:
|
|
256
322
|
raise FileOperationError(f"Failed to process mill_concomitant: {str(e)}")
|
|
257
323
|
|
|
@@ -275,10 +341,10 @@ class ResearchTool(BaseTool):
|
|
|
275
341
|
patterns = []
|
|
276
342
|
for doc in docs:
|
|
277
343
|
patterns.extend([chunk.text.lower() for chunk in doc.noun_chunks])
|
|
278
|
-
patterns.extend([token.lemma_.lower() for token in doc if token.pos_ ==
|
|
344
|
+
patterns.extend([token.lemma_.lower() for token in doc if token.pos_ == "VERB"])
|
|
279
345
|
counter = Counter(patterns)
|
|
280
346
|
common = [word for word, count in counter.most_common() if count > 1][:max_keywords]
|
|
281
|
-
return {
|
|
347
|
+
return {"patterns": common}
|
|
282
348
|
except Exception as e:
|
|
283
349
|
raise FileOperationError(f"Failed to process induction: {str(e)}")
|
|
284
350
|
|
|
@@ -301,14 +367,18 @@ class ResearchTool(BaseTool):
|
|
|
301
367
|
premises_docs = [nlp(p) for p in premises]
|
|
302
368
|
conclusion_doc = nlp(conclusion) if conclusion else None
|
|
303
369
|
if not conclusion_doc:
|
|
304
|
-
return {
|
|
305
|
-
|
|
306
|
-
|
|
370
|
+
return {
|
|
371
|
+
"valid": False,
|
|
372
|
+
"conclusion": None,
|
|
373
|
+
"reason": "No conclusion provided",
|
|
374
|
+
}
|
|
375
|
+
premise_entities: Set[str] = set()
|
|
376
|
+
premise_predicates: Set[str] = set()
|
|
307
377
|
for doc in premises_docs:
|
|
308
378
|
premise_entities.update(ent.text.lower() for ent in doc.ents)
|
|
309
|
-
premise_predicates.update(token.lemma_.lower() for token in doc if token.pos_ ==
|
|
379
|
+
premise_predicates.update(token.lemma_.lower() for token in doc if token.pos_ == "VERB")
|
|
310
380
|
conclusion_entities = set(ent.text.lower() for ent in conclusion_doc.ents)
|
|
311
|
-
conclusion_predicates = set(token.lemma_.lower() for token in conclusion_doc if token.pos_ ==
|
|
381
|
+
conclusion_predicates = set(token.lemma_.lower() for token in conclusion_doc if token.pos_ == "VERB")
|
|
312
382
|
entities_valid = conclusion_entities.issubset(premise_entities)
|
|
313
383
|
predicates_valid = conclusion_predicates.issubset(premise_predicates)
|
|
314
384
|
valid = entities_valid and predicates_valid
|
|
@@ -316,10 +386,10 @@ class ResearchTool(BaseTool):
|
|
|
316
386
|
"Conclusion matches premise patterns."
|
|
317
387
|
if valid
|
|
318
388
|
else f"Conclusion contains unmatched {'entities' if not entities_valid else ''} "
|
|
319
|
-
|
|
320
|
-
|
|
389
|
+
f"{'and ' if not entities_valid and not predicates_valid else ''}"
|
|
390
|
+
f"{'predicates' if not predicates_valid else ''}."
|
|
321
391
|
)
|
|
322
|
-
return {
|
|
392
|
+
return {"valid": valid, "conclusion": conclusion, "reason": reason}
|
|
323
393
|
except Exception as e:
|
|
324
394
|
raise FileOperationError(f"Failed to process deduction: {str(e)}")
|
|
325
395
|
|
|
@@ -344,20 +414,20 @@ class ResearchTool(BaseTool):
|
|
|
344
414
|
sentences = [sent.text for sent in doc.sents]
|
|
345
415
|
if not sentences:
|
|
346
416
|
return ""
|
|
347
|
-
keywords = [token.lemma_.lower() for token in doc if token.pos_ in (
|
|
417
|
+
keywords = [token.lemma_.lower() for token in doc if token.pos_ in ("NOUN", "VERB", "ADJ") and not token.is_stop]
|
|
348
418
|
keyword_freq = Counter(keywords)
|
|
349
419
|
scores = []
|
|
350
420
|
for sent in sentences:
|
|
351
421
|
sent_doc = nlp(sent)
|
|
352
|
-
sent_keywords = [token.lemma_.lower() for token in sent_doc if token.pos_ in (
|
|
422
|
+
sent_keywords = [token.lemma_.lower() for token in sent_doc if token.pos_ in ("NOUN", "VERB", "ADJ")]
|
|
353
423
|
score = sum(keyword_freq.get(k, 0) for k in sent_keywords) / (len(sent_keywords) + 1)
|
|
354
424
|
scores.append((sent, score))
|
|
355
425
|
scores.sort(key=lambda x: x[1], reverse=True)
|
|
356
|
-
selected = [sent for sent, _ in scores[:max(1, max_length // 50)]]
|
|
357
|
-
summary =
|
|
426
|
+
selected = [sent for sent, _ in scores[: max(1, max_length // 50)]]
|
|
427
|
+
summary = " ".join(selected)
|
|
358
428
|
words = summary.split()
|
|
359
429
|
if len(words) > max_length:
|
|
360
|
-
summary =
|
|
430
|
+
summary = " ".join(words[:max_length]) + "..."
|
|
361
431
|
return summary
|
|
362
432
|
except Exception as e:
|
|
363
433
|
raise FileOperationError(f"Failed to process summarize: {str(e)}")
|