aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM-based Entity Extractor
|
|
3
|
+
|
|
4
|
+
Extracts entities from text using Large Language Models (GPT-4, Gemini, etc.).
|
|
5
|
+
Uses AIECS's LLM client infrastructure for provider-agnostic extraction.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import uuid
|
|
10
|
+
from typing import List, Optional, Dict, Any, Union, TYPE_CHECKING
|
|
11
|
+
from aiecs.application.knowledge_graph.extractors.base import EntityExtractor
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
13
|
+
from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
|
|
14
|
+
from aiecs.llm import get_llm_manager, AIProvider, LLMClientManager, LLMClientFactory
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from aiecs.llm.protocols import LLMClientProtocol
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LLMEntityExtractor(EntityExtractor):
|
|
21
|
+
"""
|
|
22
|
+
Extract entities using Large Language Models
|
|
23
|
+
|
|
24
|
+
This extractor uses LLMs (like GPT-4, Gemini) to identify and extract entities
|
|
25
|
+
from text. It's schema-aware and can extract custom entity types with properties.
|
|
26
|
+
|
|
27
|
+
Features:
|
|
28
|
+
- Schema-guided extraction (tells LLM what entity types to look for)
|
|
29
|
+
- Property extraction (not just entity names, but also attributes)
|
|
30
|
+
- Confidence scoring (LLM provides confidence for each entity)
|
|
31
|
+
- Configurable LLM provider (Vertex AI default, configurable)
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
```python
|
|
35
|
+
from aiecs.llm import AIProvider
|
|
36
|
+
|
|
37
|
+
extractor = LLMEntityExtractor(
|
|
38
|
+
schema=graph_schema,
|
|
39
|
+
provider=AIProvider.VERTEX,
|
|
40
|
+
model="gemini-pro"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
entities = await extractor.extract_entities(
|
|
44
|
+
"Alice, a 30-year-old data scientist, works at Tech Corp."
|
|
45
|
+
)
|
|
46
|
+
# Returns: [
|
|
47
|
+
# Entity(type="Person", properties={"name": "Alice", "age": 30, "occupation": "data scientist"}),
|
|
48
|
+
# Entity(type="Company", properties={"name": "Tech Corp"})
|
|
49
|
+
# ]
|
|
50
|
+
```
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
schema: Optional[GraphSchema] = None,
|
|
56
|
+
provider: Optional[Union[AIProvider, str]] = None,
|
|
57
|
+
model: Optional[str] = None,
|
|
58
|
+
temperature: float = 0.1, # Low temperature for more deterministic extraction
|
|
59
|
+
max_tokens: Optional[int] = 2000,
|
|
60
|
+
llm_client: Optional["LLMClientProtocol"] = None,
|
|
61
|
+
):
|
|
62
|
+
"""
|
|
63
|
+
Initialize LLM entity extractor
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
schema: Optional GraphSchema to guide extraction (provides entity types and properties)
|
|
67
|
+
provider: LLM provider to use (default: Vertex AI via AIECS configuration)
|
|
68
|
+
Can be AIProvider enum or custom provider name string
|
|
69
|
+
model: Specific model to use (default: from AIECS configuration)
|
|
70
|
+
temperature: LLM temperature (0.1 = more deterministic, good for extraction)
|
|
71
|
+
max_tokens: Maximum tokens in response
|
|
72
|
+
llm_client: Optional custom LLM client implementing LLMClientProtocol
|
|
73
|
+
If provided, this client will be used directly instead of creating one via provider
|
|
74
|
+
"""
|
|
75
|
+
self.schema = schema
|
|
76
|
+
self.provider = provider
|
|
77
|
+
self.model = model
|
|
78
|
+
self.temperature = temperature
|
|
79
|
+
self.max_tokens = max_tokens
|
|
80
|
+
self.llm_client = llm_client
|
|
81
|
+
self._llm_manager: Optional[LLMClientManager] = None # Lazy-loaded in async methods
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def from_config(
|
|
85
|
+
schema: Optional[GraphSchema] = None,
|
|
86
|
+
provider: Optional[str] = None,
|
|
87
|
+
model: Optional[str] = None,
|
|
88
|
+
temperature: Optional[float] = None,
|
|
89
|
+
max_tokens: Optional[int] = None,
|
|
90
|
+
) -> "LLMEntityExtractor":
|
|
91
|
+
"""
|
|
92
|
+
Create LLMEntityExtractor from configuration.
|
|
93
|
+
|
|
94
|
+
This method resolves the LLM client from the provider name using LLMClientFactory,
|
|
95
|
+
supporting both standard and custom providers.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
schema: Optional GraphSchema to guide extraction
|
|
99
|
+
provider: LLM provider name (standard or custom)
|
|
100
|
+
model: Specific model to use
|
|
101
|
+
temperature: LLM temperature
|
|
102
|
+
max_tokens: Maximum tokens in response
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
LLMEntityExtractor instance with resolved client
|
|
106
|
+
|
|
107
|
+
Example:
|
|
108
|
+
```python
|
|
109
|
+
# Using standard provider
|
|
110
|
+
extractor = LLMEntityExtractor.from_config(
|
|
111
|
+
provider="OpenAI",
|
|
112
|
+
model="gpt-4",
|
|
113
|
+
temperature=0.1
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Using custom provider
|
|
117
|
+
LLMClientFactory.register_custom_provider("my-llm", custom_client)
|
|
118
|
+
extractor = LLMEntityExtractor.from_config(
|
|
119
|
+
provider="my-llm",
|
|
120
|
+
model="custom-model"
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
"""
|
|
124
|
+
from aiecs.config import get_settings
|
|
125
|
+
|
|
126
|
+
settings = get_settings()
|
|
127
|
+
|
|
128
|
+
# Use config values if not provided
|
|
129
|
+
if provider is None:
|
|
130
|
+
provider = settings.kg_entity_extraction_llm_provider or None
|
|
131
|
+
if model is None:
|
|
132
|
+
model = settings.kg_entity_extraction_llm_model or None
|
|
133
|
+
if temperature is None:
|
|
134
|
+
temperature = settings.kg_entity_extraction_temperature
|
|
135
|
+
if max_tokens is None:
|
|
136
|
+
max_tokens = settings.kg_entity_extraction_max_tokens
|
|
137
|
+
|
|
138
|
+
# Resolve client from provider name if provider is specified
|
|
139
|
+
llm_client = None
|
|
140
|
+
if provider:
|
|
141
|
+
client = LLMClientFactory.get_client(provider)
|
|
142
|
+
# Cast to LLMClientProtocol since BaseLLMClient implements the protocol
|
|
143
|
+
from typing import cast
|
|
144
|
+
from aiecs.llm.protocols import LLMClientProtocol
|
|
145
|
+
llm_client = cast(LLMClientProtocol, client) if client else None
|
|
146
|
+
else:
|
|
147
|
+
llm_client = None
|
|
148
|
+
|
|
149
|
+
return LLMEntityExtractor(
|
|
150
|
+
schema=schema,
|
|
151
|
+
provider=provider,
|
|
152
|
+
model=model,
|
|
153
|
+
temperature=temperature,
|
|
154
|
+
max_tokens=max_tokens,
|
|
155
|
+
llm_client=llm_client,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
async def extract_entities(self, text: str, entity_types: Optional[List[str]] = None, **kwargs) -> List[Entity]:
|
|
159
|
+
"""
|
|
160
|
+
Extract entities from text using LLM
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
text: Input text to extract entities from
|
|
164
|
+
entity_types: Optional filter for specific entity types
|
|
165
|
+
**kwargs: Additional parameters (e.g., custom prompt, examples)
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
List of extracted Entity objects
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
ValueError: If text is empty
|
|
172
|
+
RuntimeError: If LLM extraction fails
|
|
173
|
+
"""
|
|
174
|
+
if not text or not text.strip():
|
|
175
|
+
raise ValueError("Input text cannot be empty")
|
|
176
|
+
|
|
177
|
+
# Build extraction prompt
|
|
178
|
+
prompt = self._build_extraction_prompt(text, entity_types)
|
|
179
|
+
|
|
180
|
+
# Call LLM
|
|
181
|
+
try:
|
|
182
|
+
# Use custom client if provided
|
|
183
|
+
if self.llm_client is not None:
|
|
184
|
+
# Convert string prompt to list of LLMMessage
|
|
185
|
+
from aiecs.llm.clients.base_client import LLMMessage
|
|
186
|
+
messages = [LLMMessage(role="user", content=prompt)]
|
|
187
|
+
response = await self.llm_client.generate_text(
|
|
188
|
+
messages=messages,
|
|
189
|
+
model=self.model,
|
|
190
|
+
temperature=self.temperature,
|
|
191
|
+
max_tokens=self.max_tokens,
|
|
192
|
+
)
|
|
193
|
+
# Otherwise use LLM manager with provider
|
|
194
|
+
else:
|
|
195
|
+
# Lazy-load LLM manager
|
|
196
|
+
if self._llm_manager is None:
|
|
197
|
+
self._llm_manager = await get_llm_manager()
|
|
198
|
+
|
|
199
|
+
response = await self._llm_manager.generate_text(
|
|
200
|
+
messages=prompt,
|
|
201
|
+
provider=self.provider,
|
|
202
|
+
model=self.model,
|
|
203
|
+
temperature=self.temperature,
|
|
204
|
+
max_tokens=self.max_tokens,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Parse LLM response to Entity objects
|
|
208
|
+
entities = self._parse_llm_response(response.content)
|
|
209
|
+
|
|
210
|
+
return entities
|
|
211
|
+
|
|
212
|
+
except Exception as e:
|
|
213
|
+
raise RuntimeError(f"LLM entity extraction failed: {str(e)}") from e
|
|
214
|
+
|
|
215
|
+
def _build_extraction_prompt(self, text: str, entity_types: Optional[List[str]] = None) -> str:
|
|
216
|
+
"""
|
|
217
|
+
Build prompt for LLM entity extraction
|
|
218
|
+
|
|
219
|
+
The prompt is structured to:
|
|
220
|
+
1. Explain the task (entity extraction)
|
|
221
|
+
2. Provide entity type schemas (if available)
|
|
222
|
+
3. Request JSON output format
|
|
223
|
+
4. Include the text to extract from
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
text: Input text
|
|
227
|
+
entity_types: Optional filter for entity types
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Formatted prompt string
|
|
231
|
+
"""
|
|
232
|
+
# Determine which entity types to extract
|
|
233
|
+
types_to_extract = []
|
|
234
|
+
if self.schema:
|
|
235
|
+
available_types = self.schema.get_entity_type_names()
|
|
236
|
+
if entity_types:
|
|
237
|
+
# Filter to requested types that exist in schema
|
|
238
|
+
types_to_extract = [t for t in entity_types if t in available_types]
|
|
239
|
+
else:
|
|
240
|
+
# Use all types from schema
|
|
241
|
+
types_to_extract = available_types
|
|
242
|
+
elif entity_types:
|
|
243
|
+
# No schema, but user specified types
|
|
244
|
+
types_to_extract = entity_types
|
|
245
|
+
else:
|
|
246
|
+
# No schema and no filter - use common types
|
|
247
|
+
types_to_extract = [
|
|
248
|
+
"Person",
|
|
249
|
+
"Organization",
|
|
250
|
+
"Location",
|
|
251
|
+
"Event",
|
|
252
|
+
"Product",
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
# Build entity type descriptions
|
|
256
|
+
type_descriptions = []
|
|
257
|
+
for entity_type in types_to_extract:
|
|
258
|
+
if self.schema and self.schema.has_entity_type(entity_type):
|
|
259
|
+
# Use schema definition
|
|
260
|
+
schema_type = self.schema.get_entity_type(entity_type)
|
|
261
|
+
if schema_type is not None:
|
|
262
|
+
properties = list(schema_type.properties.keys()) if schema_type.properties else []
|
|
263
|
+
prop_str = ", ".join(properties) if properties else "any relevant properties"
|
|
264
|
+
desc = f"- {entity_type}: {schema_type.description or 'Extract properties: ' + prop_str}"
|
|
265
|
+
type_descriptions.append(desc)
|
|
266
|
+
else:
|
|
267
|
+
type_descriptions.append(f"- {entity_type}: Extract name and any relevant properties")
|
|
268
|
+
else:
|
|
269
|
+
# Generic description
|
|
270
|
+
type_descriptions.append(f"- {entity_type}: Extract name and any relevant properties")
|
|
271
|
+
|
|
272
|
+
types_description = "\n".join(type_descriptions)
|
|
273
|
+
|
|
274
|
+
# Build prompt
|
|
275
|
+
prompt = f"""You are an expert at extracting structured entities from text.
|
|
276
|
+
|
|
277
|
+
Extract entities of the following types from the text:
|
|
278
|
+
{types_description}
|
|
279
|
+
|
|
280
|
+
For each entity, provide:
|
|
281
|
+
1. type: The entity type (one of the types listed above)
|
|
282
|
+
2. properties: A dictionary of properties (e.g., name, age, location, etc.)
|
|
283
|
+
3. confidence: Your confidence in this extraction (0.0 to 1.0)
|
|
284
|
+
|
|
285
|
+
Return ONLY a valid JSON array with this structure:
|
|
286
|
+
[
|
|
287
|
+
{{
|
|
288
|
+
"type": "EntityType",
|
|
289
|
+
"properties": {{"property1": "value1", "property2": "value2"}},
|
|
290
|
+
"confidence": 0.95
|
|
291
|
+
}}
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
Important:
|
|
295
|
+
- Extract ALL entities you find of the specified types
|
|
296
|
+
- Include as many relevant properties as you can find
|
|
297
|
+
- Use consistent property names (e.g., "name" not "title" or "full_name")
|
|
298
|
+
- If unsure about a property, omit it rather than guessing
|
|
299
|
+
- Confidence should reflect how certain you are about the entity and its properties
|
|
300
|
+
|
|
301
|
+
Text to extract from:
|
|
302
|
+
\"\"\"{text}\"\"\"
|
|
303
|
+
|
|
304
|
+
JSON output:"""
|
|
305
|
+
|
|
306
|
+
return prompt
|
|
307
|
+
|
|
308
|
+
def _parse_llm_response(self, response_text: str) -> List[Entity]:
|
|
309
|
+
"""
|
|
310
|
+
Parse LLM response to Entity objects
|
|
311
|
+
|
|
312
|
+
Expected JSON format:
|
|
313
|
+
[
|
|
314
|
+
{"type": "Person", "properties": {"name": "Alice", "age": 30}, "confidence": 0.95},
|
|
315
|
+
{"type": "Company", "properties": {"name": "Tech Corp"}, "confidence": 0.90}
|
|
316
|
+
]
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
response_text: LLM response string (should be JSON)
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
List of Entity objects
|
|
323
|
+
"""
|
|
324
|
+
entities = []
|
|
325
|
+
|
|
326
|
+
try:
|
|
327
|
+
# Extract JSON from response (LLM might include extra text)
|
|
328
|
+
json_str = self._extract_json_from_text(response_text)
|
|
329
|
+
|
|
330
|
+
# Parse JSON
|
|
331
|
+
extracted_data = json.loads(json_str)
|
|
332
|
+
|
|
333
|
+
if not isinstance(extracted_data, list):
|
|
334
|
+
# Sometimes LLM returns single object instead of array
|
|
335
|
+
extracted_data = [extracted_data]
|
|
336
|
+
|
|
337
|
+
# Convert to Entity objects
|
|
338
|
+
for item in extracted_data:
|
|
339
|
+
entity_type = item.get("type", "Unknown")
|
|
340
|
+
properties = item.get("properties", {})
|
|
341
|
+
confidence = item.get("confidence", 0.5)
|
|
342
|
+
|
|
343
|
+
# Generate unique ID
|
|
344
|
+
entity_id = self._generate_entity_id(entity_type, properties)
|
|
345
|
+
|
|
346
|
+
# Create Entity
|
|
347
|
+
entity = Entity(
|
|
348
|
+
id=entity_id,
|
|
349
|
+
entity_type=entity_type,
|
|
350
|
+
properties=properties,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Store confidence in properties for later use
|
|
354
|
+
entity.properties["_extraction_confidence"] = confidence
|
|
355
|
+
|
|
356
|
+
entities.append(entity)
|
|
357
|
+
|
|
358
|
+
except json.JSONDecodeError as e:
|
|
359
|
+
# Log error but don't fail completely
|
|
360
|
+
# In production, you might want to retry or use fallback
|
|
361
|
+
print(f"Warning: Failed to parse LLM response as JSON: {e}")
|
|
362
|
+
print(f"Response was: {response_text[:200]}...")
|
|
363
|
+
return []
|
|
364
|
+
|
|
365
|
+
return entities
|
|
366
|
+
|
|
367
|
+
def _extract_json_from_text(self, text: str) -> str:
|
|
368
|
+
"""
|
|
369
|
+
Extract JSON array from text (handles cases where LLM includes extra text)
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
text: Response text that may contain JSON
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Extracted JSON string
|
|
376
|
+
"""
|
|
377
|
+
# Find JSON array boundaries
|
|
378
|
+
start = text.find("[")
|
|
379
|
+
end = text.rfind("]") + 1
|
|
380
|
+
|
|
381
|
+
if start != -1 and end > start:
|
|
382
|
+
return text[start:end]
|
|
383
|
+
|
|
384
|
+
# Try to find JSON object (single entity)
|
|
385
|
+
start = text.find("{")
|
|
386
|
+
end = text.rfind("}") + 1
|
|
387
|
+
|
|
388
|
+
if start != -1 and end > start:
|
|
389
|
+
return text[start:end]
|
|
390
|
+
|
|
391
|
+
# No JSON found, return original
|
|
392
|
+
return text
|
|
393
|
+
|
|
394
|
+
def _generate_entity_id(self, entity_type: str, properties: Dict[str, Any]) -> str:
|
|
395
|
+
"""
|
|
396
|
+
Generate a unique ID for an entity
|
|
397
|
+
|
|
398
|
+
Uses entity type + key property (usually "name") to create deterministic ID,
|
|
399
|
+
with fallback to UUID for uniqueness.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
entity_type: Entity type name
|
|
403
|
+
properties: Entity properties
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
Unique entity ID string
|
|
407
|
+
"""
|
|
408
|
+
# Try to use name for deterministic ID
|
|
409
|
+
name = properties.get("name") or properties.get("title") or properties.get("id")
|
|
410
|
+
|
|
411
|
+
if name:
|
|
412
|
+
# Create deterministic ID from type + name
|
|
413
|
+
# Normalize to lowercase and remove spaces
|
|
414
|
+
normalized = f"{entity_type}_{name}".lower().replace(" ", "_")
|
|
415
|
+
# Add short hash for uniqueness
|
|
416
|
+
import hashlib
|
|
417
|
+
|
|
418
|
+
hash_suffix = hashlib.md5(normalized.encode()).hexdigest()[:8]
|
|
419
|
+
return f"{normalized}_{hash_suffix}"
|
|
420
|
+
else:
|
|
421
|
+
# No name property, use UUID
|
|
422
|
+
return f"{entity_type.lower()}_{uuid.uuid4().hex[:12]}"
|