aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +435 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3949 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1731 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +894 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +377 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +230 -37
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +328 -0
- aiecs/llm/clients/google_function_calling_mixin.py +415 -0
- aiecs/llm/clients/googleai_client.py +314 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +1186 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1464 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1016 -0
- aiecs/tools/docs/document_writer_tool.py +2008 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +220 -141
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
- aiecs-1.7.17.dist-info/RECORD +337 -0
- aiecs-1.7.17.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Search Result Enhancement and Ranking
|
|
3
|
+
|
|
4
|
+
Intelligently scores and filters search results:
|
|
5
|
+
- Calculate relevance scores using keyword matching
|
|
6
|
+
- Compute popularity scores
|
|
7
|
+
- Calculate recency/freshness scores
|
|
8
|
+
- Apply composite scoring with configurable weights
|
|
9
|
+
- Filter by quality, relevance, and date ranges
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SearchEnhancer:
|
|
20
|
+
"""
|
|
21
|
+
Enhances search results with relevance scoring and intelligent filtering.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
relevance_weight: float = 0.5,
|
|
27
|
+
popularity_weight: float = 0.3,
|
|
28
|
+
recency_weight: float = 0.2,
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initialize search enhancer.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
relevance_weight: Weight for relevance score in composite score
|
|
35
|
+
popularity_weight: Weight for popularity score in composite score
|
|
36
|
+
recency_weight: Weight for recency score in composite score
|
|
37
|
+
"""
|
|
38
|
+
self.relevance_weight = relevance_weight
|
|
39
|
+
self.popularity_weight = popularity_weight
|
|
40
|
+
self.recency_weight = recency_weight
|
|
41
|
+
|
|
42
|
+
# Normalize weights
|
|
43
|
+
total_weight = relevance_weight + popularity_weight + recency_weight
|
|
44
|
+
self.relevance_weight /= total_weight
|
|
45
|
+
self.popularity_weight /= total_weight
|
|
46
|
+
self.recency_weight /= total_weight
|
|
47
|
+
|
|
48
|
+
def enhance_search_results(
|
|
49
|
+
self,
|
|
50
|
+
query: str,
|
|
51
|
+
results: List[Dict[str, Any]],
|
|
52
|
+
options: Optional[Dict[str, Any]] = None,
|
|
53
|
+
) -> List[Dict[str, Any]]:
|
|
54
|
+
"""
|
|
55
|
+
Enhance search results with scoring and filtering.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
query: Original search query
|
|
59
|
+
results: Raw search results
|
|
60
|
+
options: Enhancement options:
|
|
61
|
+
- relevance_threshold: Minimum composite score (0-1)
|
|
62
|
+
- sort_by: Sort method ('relevance', 'popularity', 'recency', 'composite')
|
|
63
|
+
- date_range: {'start': 'YYYY-MM-DD', 'end': 'YYYY-MM-DD'}
|
|
64
|
+
- min_quality_score: Minimum quality score (0-1)
|
|
65
|
+
- max_results: Maximum number of results to return
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Enhanced and filtered results
|
|
69
|
+
"""
|
|
70
|
+
if not results:
|
|
71
|
+
return []
|
|
72
|
+
|
|
73
|
+
options = options or {}
|
|
74
|
+
enhanced = []
|
|
75
|
+
|
|
76
|
+
for result in results:
|
|
77
|
+
# Calculate scores
|
|
78
|
+
relevance = self._calculate_relevance(query, result)
|
|
79
|
+
popularity = self._get_popularity_score(result)
|
|
80
|
+
recency = self._calculate_recency(result)
|
|
81
|
+
|
|
82
|
+
# Calculate composite score
|
|
83
|
+
composite_score = relevance * self.relevance_weight + popularity * self.popularity_weight + recency * self.recency_weight
|
|
84
|
+
|
|
85
|
+
# Add search metadata
|
|
86
|
+
result_copy = result.copy()
|
|
87
|
+
result_copy["_search_metadata"] = {
|
|
88
|
+
"relevance_score": round(relevance, 3),
|
|
89
|
+
"popularity_score": round(popularity, 3),
|
|
90
|
+
"recency_score": round(recency, 3),
|
|
91
|
+
"composite_score": round(composite_score, 3),
|
|
92
|
+
"match_type": self._get_match_type(query, result),
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Apply filters
|
|
96
|
+
if self._passes_filters(result_copy, options):
|
|
97
|
+
enhanced.append(result_copy)
|
|
98
|
+
|
|
99
|
+
# Sort results
|
|
100
|
+
enhanced = self._sort_results(enhanced, options.get("sort_by", "composite"))
|
|
101
|
+
|
|
102
|
+
# Apply max results limit
|
|
103
|
+
max_results = options.get("max_results")
|
|
104
|
+
if max_results and max_results > 0:
|
|
105
|
+
enhanced = enhanced[:max_results]
|
|
106
|
+
|
|
107
|
+
return enhanced
|
|
108
|
+
|
|
109
|
+
def _calculate_relevance(self, query: str, result: Dict[str, Any]) -> float:
|
|
110
|
+
"""
|
|
111
|
+
Calculate relevance score using keyword matching.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
query: Search query
|
|
115
|
+
result: Result item
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Relevance score (0-1)
|
|
119
|
+
"""
|
|
120
|
+
query_terms = set(query.lower().split())
|
|
121
|
+
if not query_terms:
|
|
122
|
+
return 0.0
|
|
123
|
+
|
|
124
|
+
# Extract searchable text from result
|
|
125
|
+
|
|
126
|
+
title_text = ""
|
|
127
|
+
description_text = ""
|
|
128
|
+
|
|
129
|
+
for field in ["title", "name"]:
|
|
130
|
+
if field in result:
|
|
131
|
+
title_text += " " + str(result[field]).lower()
|
|
132
|
+
|
|
133
|
+
for field in ["description", "notes", "sourceNote"]:
|
|
134
|
+
if field in result:
|
|
135
|
+
description_text += " " + str(result[field]).lower()
|
|
136
|
+
|
|
137
|
+
# Count matches in title (weighted higher)
|
|
138
|
+
title_matches = sum(1 for term in query_terms if term in title_text)
|
|
139
|
+
title_score = min(title_matches / len(query_terms), 1.0)
|
|
140
|
+
|
|
141
|
+
# Count matches in description
|
|
142
|
+
desc_matches = sum(1 for term in query_terms if term in description_text)
|
|
143
|
+
desc_score = min(desc_matches / len(query_terms), 1.0)
|
|
144
|
+
|
|
145
|
+
# Weight title matches more heavily
|
|
146
|
+
relevance = title_score * 0.7 + desc_score * 0.3
|
|
147
|
+
|
|
148
|
+
# Boost for exact phrase match
|
|
149
|
+
query_lower = query.lower()
|
|
150
|
+
if query_lower in title_text:
|
|
151
|
+
relevance = min(relevance * 1.5, 1.0)
|
|
152
|
+
|
|
153
|
+
return relevance
|
|
154
|
+
|
|
155
|
+
def _get_popularity_score(self, result: Dict[str, Any]) -> float:
|
|
156
|
+
"""
|
|
157
|
+
Calculate popularity score based on usage indicators.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
result: Result item
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Popularity score (0-1)
|
|
164
|
+
"""
|
|
165
|
+
# Look for popularity indicators
|
|
166
|
+
popularity_fields = [
|
|
167
|
+
"popularity",
|
|
168
|
+
"usage_count",
|
|
169
|
+
"frequency",
|
|
170
|
+
"popularity_rank",
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
for field in popularity_fields:
|
|
174
|
+
if field in result:
|
|
175
|
+
value = result[field]
|
|
176
|
+
if isinstance(value, (int, float)):
|
|
177
|
+
# Normalize to 0-1 range (assumes max popularity of 100)
|
|
178
|
+
return min(value / 100, 1.0)
|
|
179
|
+
|
|
180
|
+
# Check for "popular" or "commonly used" in metadata
|
|
181
|
+
frequency = result.get("frequency")
|
|
182
|
+
if frequency in ["Daily", "Weekly", "Monthly"]:
|
|
183
|
+
# More frequent updates = more popular
|
|
184
|
+
frequency_scores = {"Daily": 1.0, "Weekly": 0.8, "Monthly": 0.6}
|
|
185
|
+
return frequency_scores.get(str(frequency) if frequency else "", 0.5)
|
|
186
|
+
|
|
187
|
+
# Default: medium popularity
|
|
188
|
+
return 0.5
|
|
189
|
+
|
|
190
|
+
def _calculate_recency(self, result: Dict[str, Any]) -> float:
|
|
191
|
+
"""
|
|
192
|
+
Calculate recency/freshness score.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
result: Result item
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Recency score (0-1)
|
|
199
|
+
"""
|
|
200
|
+
# Look for date fields
|
|
201
|
+
date_fields = [
|
|
202
|
+
"updated",
|
|
203
|
+
"last_updated",
|
|
204
|
+
"observation_end",
|
|
205
|
+
"date",
|
|
206
|
+
"publishedAt",
|
|
207
|
+
"last_modified",
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
latest_date = None
|
|
211
|
+
|
|
212
|
+
for field in date_fields:
|
|
213
|
+
if field in result:
|
|
214
|
+
date_str = result[field]
|
|
215
|
+
try:
|
|
216
|
+
# Parse date
|
|
217
|
+
if "T" in str(date_str):
|
|
218
|
+
# ISO format
|
|
219
|
+
date_obj = datetime.fromisoformat(str(date_str).replace("Z", "+00:00"))
|
|
220
|
+
else:
|
|
221
|
+
# Simple date format
|
|
222
|
+
date_obj = datetime.strptime(str(date_str)[:10], "%Y-%m-%d")
|
|
223
|
+
|
|
224
|
+
if latest_date is None or date_obj > latest_date:
|
|
225
|
+
latest_date = date_obj
|
|
226
|
+
except (ValueError, TypeError):
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
if latest_date is None:
|
|
230
|
+
# No date found, assume moderate recency
|
|
231
|
+
return 0.5
|
|
232
|
+
|
|
233
|
+
# Calculate age in days
|
|
234
|
+
now = datetime.utcnow()
|
|
235
|
+
age_days = (now - latest_date).days
|
|
236
|
+
|
|
237
|
+
# Score based on age
|
|
238
|
+
if age_days < 7:
|
|
239
|
+
return 1.0 # Very recent
|
|
240
|
+
elif age_days < 30:
|
|
241
|
+
return 0.9 # Recent
|
|
242
|
+
elif age_days < 90:
|
|
243
|
+
return 0.7 # Somewhat recent
|
|
244
|
+
elif age_days < 365:
|
|
245
|
+
return 0.5 # This year
|
|
246
|
+
elif age_days < 365 * 2:
|
|
247
|
+
return 0.3 # Last 2 years
|
|
248
|
+
else:
|
|
249
|
+
# Older data, score decreases slowly
|
|
250
|
+
return max(0.1, 0.3 - (age_days - 365 * 2) / (365 * 10))
|
|
251
|
+
|
|
252
|
+
def _get_match_type(self, query: str, result: Dict[str, Any]) -> str:
|
|
253
|
+
"""
|
|
254
|
+
Determine the type of match.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
query: Search query
|
|
258
|
+
result: Result item
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Match type string ('exact', 'partial', 'fuzzy')
|
|
262
|
+
"""
|
|
263
|
+
query_lower = query.lower()
|
|
264
|
+
|
|
265
|
+
# Check title/name fields
|
|
266
|
+
for field in ["title", "name", "id", "series_id"]:
|
|
267
|
+
if field in result:
|
|
268
|
+
value = str(result[field]).lower()
|
|
269
|
+
|
|
270
|
+
if value == query_lower:
|
|
271
|
+
return "exact"
|
|
272
|
+
elif query_lower in value or value in query_lower:
|
|
273
|
+
return "partial"
|
|
274
|
+
|
|
275
|
+
return "fuzzy"
|
|
276
|
+
|
|
277
|
+
def _passes_filters(self, result: Dict[str, Any], options: Dict[str, Any]) -> bool:
|
|
278
|
+
"""
|
|
279
|
+
Check if result passes filter criteria.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
result: Result with _search_metadata
|
|
283
|
+
options: Filter options
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
True if result passes all filters
|
|
287
|
+
"""
|
|
288
|
+
# Relevance threshold
|
|
289
|
+
threshold = options.get("relevance_threshold", 0.0)
|
|
290
|
+
composite_score = result["_search_metadata"]["composite_score"]
|
|
291
|
+
if composite_score < threshold:
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
# Quality score threshold
|
|
295
|
+
min_quality = options.get("min_quality_score")
|
|
296
|
+
if min_quality is not None:
|
|
297
|
+
# Check if result has quality metadata
|
|
298
|
+
quality_score = result.get("_quality", {}).get("score")
|
|
299
|
+
if quality_score is None:
|
|
300
|
+
quality_score = result.get("metadata", {}).get("quality", {}).get("score")
|
|
301
|
+
|
|
302
|
+
if quality_score is not None and quality_score < min_quality:
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
# Date range filter
|
|
306
|
+
date_range = options.get("date_range")
|
|
307
|
+
if date_range:
|
|
308
|
+
# Check if result falls within date range
|
|
309
|
+
result_date = self._extract_date(result)
|
|
310
|
+
if result_date:
|
|
311
|
+
start = date_range.get("start")
|
|
312
|
+
end = date_range.get("end")
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
if start:
|
|
316
|
+
start_date = datetime.strptime(start, "%Y-%m-%d")
|
|
317
|
+
if result_date < start_date:
|
|
318
|
+
return False
|
|
319
|
+
|
|
320
|
+
if end:
|
|
321
|
+
end_date = datetime.strptime(end, "%Y-%m-%d")
|
|
322
|
+
if result_date > end_date:
|
|
323
|
+
return False
|
|
324
|
+
except ValueError:
|
|
325
|
+
logger.warning(f"Invalid date range format: {date_range}")
|
|
326
|
+
|
|
327
|
+
return True
|
|
328
|
+
|
|
329
|
+
def _extract_date(self, result: Dict[str, Any]) -> Optional[datetime]:
|
|
330
|
+
"""Extract date from result"""
|
|
331
|
+
date_fields = [
|
|
332
|
+
"date",
|
|
333
|
+
"observation_end",
|
|
334
|
+
"last_updated",
|
|
335
|
+
"publishedAt",
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
for field in date_fields:
|
|
339
|
+
if field in result:
|
|
340
|
+
try:
|
|
341
|
+
date_str = str(result[field])
|
|
342
|
+
if "T" in date_str:
|
|
343
|
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
344
|
+
else:
|
|
345
|
+
return datetime.strptime(date_str[:10], "%Y-%m-%d")
|
|
346
|
+
except (ValueError, TypeError):
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
def _sort_results(self, results: List[Dict[str, Any]], sort_by: str) -> List[Dict[str, Any]]:
|
|
352
|
+
"""
|
|
353
|
+
Sort results by specified criteria.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
results: Results with _search_metadata
|
|
357
|
+
sort_by: Sort method
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Sorted results
|
|
361
|
+
"""
|
|
362
|
+
if sort_by == "relevance":
|
|
363
|
+
return sorted(
|
|
364
|
+
results,
|
|
365
|
+
key=lambda x: x["_search_metadata"]["relevance_score"],
|
|
366
|
+
reverse=True,
|
|
367
|
+
)
|
|
368
|
+
elif sort_by == "popularity":
|
|
369
|
+
return sorted(
|
|
370
|
+
results,
|
|
371
|
+
key=lambda x: x["_search_metadata"]["popularity_score"],
|
|
372
|
+
reverse=True,
|
|
373
|
+
)
|
|
374
|
+
elif sort_by == "recency":
|
|
375
|
+
return sorted(
|
|
376
|
+
results,
|
|
377
|
+
key=lambda x: x["_search_metadata"]["recency_score"],
|
|
378
|
+
reverse=True,
|
|
379
|
+
)
|
|
380
|
+
else: # composite (default)
|
|
381
|
+
return sorted(
|
|
382
|
+
results,
|
|
383
|
+
key=lambda x: x["_search_metadata"]["composite_score"],
|
|
384
|
+
reverse=True,
|
|
385
|
+
)
|