aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,388 @@
1
+ """
2
+ Search Result Enhancement and Ranking
3
+
4
+ Intelligently scores and filters search results:
5
+ - Calculate relevance scores using keyword matching
6
+ - Compute popularity scores
7
+ - Calculate recency/freshness scores
8
+ - Apply composite scoring with configurable weights
9
+ - Filter by quality, relevance, and date ranges
10
+ """
11
+
12
+ import logging
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class SearchEnhancer:
20
+ """
21
+ Enhances search results with relevance scoring and intelligent filtering.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ relevance_weight: float = 0.5,
27
+ popularity_weight: float = 0.3,
28
+ recency_weight: float = 0.2,
29
+ ):
30
+ """
31
+ Initialize search enhancer.
32
+
33
+ Args:
34
+ relevance_weight: Weight for relevance score in composite score
35
+ popularity_weight: Weight for popularity score in composite score
36
+ recency_weight: Weight for recency score in composite score
37
+ """
38
+ self.relevance_weight = relevance_weight
39
+ self.popularity_weight = popularity_weight
40
+ self.recency_weight = recency_weight
41
+
42
+ # Normalize weights
43
+ total_weight = relevance_weight + popularity_weight + recency_weight
44
+ self.relevance_weight /= total_weight
45
+ self.popularity_weight /= total_weight
46
+ self.recency_weight /= total_weight
47
+
48
+ def enhance_search_results(
49
+ self,
50
+ query: str,
51
+ results: List[Dict[str, Any]],
52
+ options: Optional[Dict[str, Any]] = None,
53
+ ) -> List[Dict[str, Any]]:
54
+ """
55
+ Enhance search results with scoring and filtering.
56
+
57
+ Args:
58
+ query: Original search query
59
+ results: Raw search results
60
+ options: Enhancement options:
61
+ - relevance_threshold: Minimum composite score (0-1)
62
+ - sort_by: Sort method ('relevance', 'popularity', 'recency', 'composite')
63
+ - date_range: {'start': 'YYYY-MM-DD', 'end': 'YYYY-MM-DD'}
64
+ - min_quality_score: Minimum quality score (0-1)
65
+ - max_results: Maximum number of results to return
66
+
67
+ Returns:
68
+ Enhanced and filtered results
69
+ """
70
+ if not results:
71
+ return []
72
+
73
+ options = options or {}
74
+ enhanced = []
75
+
76
+ for result in results:
77
+ # Calculate scores
78
+ relevance = self._calculate_relevance(query, result)
79
+ popularity = self._get_popularity_score(result)
80
+ recency = self._calculate_recency(result)
81
+
82
+ # Calculate composite score
83
+ composite_score = (
84
+ relevance * self.relevance_weight
85
+ + popularity * self.popularity_weight
86
+ + recency * self.recency_weight
87
+ )
88
+
89
+ # Add search metadata
90
+ result_copy = result.copy()
91
+ result_copy["_search_metadata"] = {
92
+ "relevance_score": round(relevance, 3),
93
+ "popularity_score": round(popularity, 3),
94
+ "recency_score": round(recency, 3),
95
+ "composite_score": round(composite_score, 3),
96
+ "match_type": self._get_match_type(query, result),
97
+ }
98
+
99
+ # Apply filters
100
+ if self._passes_filters(result_copy, options):
101
+ enhanced.append(result_copy)
102
+
103
+ # Sort results
104
+ enhanced = self._sort_results(enhanced, options.get("sort_by", "composite"))
105
+
106
+ # Apply max results limit
107
+ max_results = options.get("max_results")
108
+ if max_results and max_results > 0:
109
+ enhanced = enhanced[:max_results]
110
+
111
+ return enhanced
112
+
113
+ def _calculate_relevance(self, query: str, result: Dict[str, Any]) -> float:
114
+ """
115
+ Calculate relevance score using keyword matching.
116
+
117
+ Args:
118
+ query: Search query
119
+ result: Result item
120
+
121
+ Returns:
122
+ Relevance score (0-1)
123
+ """
124
+ query_terms = set(query.lower().split())
125
+ if not query_terms:
126
+ return 0.0
127
+
128
+ # Extract searchable text from result
129
+
130
+ title_text = ""
131
+ description_text = ""
132
+
133
+ for field in ["title", "name"]:
134
+ if field in result:
135
+ title_text += " " + str(result[field]).lower()
136
+
137
+ for field in ["description", "notes", "sourceNote"]:
138
+ if field in result:
139
+ description_text += " " + str(result[field]).lower()
140
+
141
+ # Count matches in title (weighted higher)
142
+ title_matches = sum(1 for term in query_terms if term in title_text)
143
+ title_score = min(title_matches / len(query_terms), 1.0)
144
+
145
+ # Count matches in description
146
+ desc_matches = sum(1 for term in query_terms if term in description_text)
147
+ desc_score = min(desc_matches / len(query_terms), 1.0)
148
+
149
+ # Weight title matches more heavily
150
+ relevance = title_score * 0.7 + desc_score * 0.3
151
+
152
+ # Boost for exact phrase match
153
+ query_lower = query.lower()
154
+ if query_lower in title_text:
155
+ relevance = min(relevance * 1.5, 1.0)
156
+
157
+ return relevance
158
+
159
+ def _get_popularity_score(self, result: Dict[str, Any]) -> float:
160
+ """
161
+ Calculate popularity score based on usage indicators.
162
+
163
+ Args:
164
+ result: Result item
165
+
166
+ Returns:
167
+ Popularity score (0-1)
168
+ """
169
+ # Look for popularity indicators
170
+ popularity_fields = [
171
+ "popularity",
172
+ "usage_count",
173
+ "frequency",
174
+ "popularity_rank",
175
+ ]
176
+
177
+ for field in popularity_fields:
178
+ if field in result:
179
+ value = result[field]
180
+ if isinstance(value, (int, float)):
181
+ # Normalize to 0-1 range (assumes max popularity of 100)
182
+ return min(value / 100, 1.0)
183
+
184
+ # Check for "popular" or "commonly used" in metadata
185
+ if result.get("frequency") in ["Daily", "Weekly", "Monthly"]:
186
+ # More frequent updates = more popular
187
+ frequency_scores = {"Daily": 1.0, "Weekly": 0.8, "Monthly": 0.6}
188
+ return frequency_scores.get(result.get("frequency"), 0.5)
189
+
190
+ # Default: medium popularity
191
+ return 0.5
192
+
193
+ def _calculate_recency(self, result: Dict[str, Any]) -> float:
194
+ """
195
+ Calculate recency/freshness score.
196
+
197
+ Args:
198
+ result: Result item
199
+
200
+ Returns:
201
+ Recency score (0-1)
202
+ """
203
+ # Look for date fields
204
+ date_fields = [
205
+ "updated",
206
+ "last_updated",
207
+ "observation_end",
208
+ "date",
209
+ "publishedAt",
210
+ "last_modified",
211
+ ]
212
+
213
+ latest_date = None
214
+
215
+ for field in date_fields:
216
+ if field in result:
217
+ date_str = result[field]
218
+ try:
219
+ # Parse date
220
+ if "T" in str(date_str):
221
+ # ISO format
222
+ date_obj = datetime.fromisoformat(str(date_str).replace("Z", "+00:00"))
223
+ else:
224
+ # Simple date format
225
+ date_obj = datetime.strptime(str(date_str)[:10], "%Y-%m-%d")
226
+
227
+ if latest_date is None or date_obj > latest_date:
228
+ latest_date = date_obj
229
+ except (ValueError, TypeError):
230
+ continue
231
+
232
+ if latest_date is None:
233
+ # No date found, assume moderate recency
234
+ return 0.5
235
+
236
+ # Calculate age in days
237
+ now = datetime.utcnow()
238
+ age_days = (now - latest_date).days
239
+
240
+ # Score based on age
241
+ if age_days < 7:
242
+ return 1.0 # Very recent
243
+ elif age_days < 30:
244
+ return 0.9 # Recent
245
+ elif age_days < 90:
246
+ return 0.7 # Somewhat recent
247
+ elif age_days < 365:
248
+ return 0.5 # This year
249
+ elif age_days < 365 * 2:
250
+ return 0.3 # Last 2 years
251
+ else:
252
+ # Older data, score decreases slowly
253
+ return max(0.1, 0.3 - (age_days - 365 * 2) / (365 * 10))
254
+
255
+ def _get_match_type(self, query: str, result: Dict[str, Any]) -> str:
256
+ """
257
+ Determine the type of match.
258
+
259
+ Args:
260
+ query: Search query
261
+ result: Result item
262
+
263
+ Returns:
264
+ Match type string ('exact', 'partial', 'fuzzy')
265
+ """
266
+ query_lower = query.lower()
267
+
268
+ # Check title/name fields
269
+ for field in ["title", "name", "id", "series_id"]:
270
+ if field in result:
271
+ value = str(result[field]).lower()
272
+
273
+ if value == query_lower:
274
+ return "exact"
275
+ elif query_lower in value or value in query_lower:
276
+ return "partial"
277
+
278
+ return "fuzzy"
279
+
280
+ def _passes_filters(self, result: Dict[str, Any], options: Dict[str, Any]) -> bool:
281
+ """
282
+ Check if result passes filter criteria.
283
+
284
+ Args:
285
+ result: Result with _search_metadata
286
+ options: Filter options
287
+
288
+ Returns:
289
+ True if result passes all filters
290
+ """
291
+ # Relevance threshold
292
+ threshold = options.get("relevance_threshold", 0.0)
293
+ composite_score = result["_search_metadata"]["composite_score"]
294
+ if composite_score < threshold:
295
+ return False
296
+
297
+ # Quality score threshold
298
+ min_quality = options.get("min_quality_score")
299
+ if min_quality is not None:
300
+ # Check if result has quality metadata
301
+ quality_score = result.get("_quality", {}).get("score")
302
+ if quality_score is None:
303
+ quality_score = result.get("metadata", {}).get("quality", {}).get("score")
304
+
305
+ if quality_score is not None and quality_score < min_quality:
306
+ return False
307
+
308
+ # Date range filter
309
+ date_range = options.get("date_range")
310
+ if date_range:
311
+ # Check if result falls within date range
312
+ result_date = self._extract_date(result)
313
+ if result_date:
314
+ start = date_range.get("start")
315
+ end = date_range.get("end")
316
+
317
+ try:
318
+ if start:
319
+ start_date = datetime.strptime(start, "%Y-%m-%d")
320
+ if result_date < start_date:
321
+ return False
322
+
323
+ if end:
324
+ end_date = datetime.strptime(end, "%Y-%m-%d")
325
+ if result_date > end_date:
326
+ return False
327
+ except ValueError:
328
+ logger.warning(f"Invalid date range format: {date_range}")
329
+
330
+ return True
331
+
332
+ def _extract_date(self, result: Dict[str, Any]) -> Optional[datetime]:
333
+ """Extract date from result"""
334
+ date_fields = [
335
+ "date",
336
+ "observation_end",
337
+ "last_updated",
338
+ "publishedAt",
339
+ ]
340
+
341
+ for field in date_fields:
342
+ if field in result:
343
+ try:
344
+ date_str = str(result[field])
345
+ if "T" in date_str:
346
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
347
+ else:
348
+ return datetime.strptime(date_str[:10], "%Y-%m-%d")
349
+ except (ValueError, TypeError):
350
+ continue
351
+
352
+ return None
353
+
354
+ def _sort_results(self, results: List[Dict[str, Any]], sort_by: str) -> List[Dict[str, Any]]:
355
+ """
356
+ Sort results by specified criteria.
357
+
358
+ Args:
359
+ results: Results with _search_metadata
360
+ sort_by: Sort method
361
+
362
+ Returns:
363
+ Sorted results
364
+ """
365
+ if sort_by == "relevance":
366
+ return sorted(
367
+ results,
368
+ key=lambda x: x["_search_metadata"]["relevance_score"],
369
+ reverse=True,
370
+ )
371
+ elif sort_by == "popularity":
372
+ return sorted(
373
+ results,
374
+ key=lambda x: x["_search_metadata"]["popularity_score"],
375
+ reverse=True,
376
+ )
377
+ elif sort_by == "recency":
378
+ return sorted(
379
+ results,
380
+ key=lambda x: x["_search_metadata"]["recency_score"],
381
+ reverse=True,
382
+ )
383
+ else: # composite (default)
384
+ return sorted(
385
+ results,
386
+ key=lambda x: x["_search_metadata"]["composite_score"],
387
+ reverse=True,
388
+ )
@@ -0,0 +1,9 @@
1
+ """
2
+ Monitoring Module
3
+
4
+ Contains metrics and health monitoring components.
5
+ """
6
+
7
+ from aiecs.tools.apisource.monitoring.metrics import DetailedMetrics
8
+
9
+ __all__ = ["DetailedMetrics"]
@@ -0,0 +1,303 @@
1
+ """
2
+ Detailed Metrics and Health Monitoring for API Providers
3
+
4
+ This module provides comprehensive performance tracking including:
5
+ - Response time percentiles
6
+ - Data volume statistics
7
+ - Error type distribution
8
+ - Rate limiting events
9
+ - Cache hit rates
10
+ - Overall health scoring
11
+ """
12
+
13
+ import logging
14
+ from collections import defaultdict
15
+ from datetime import datetime
16
+ from threading import Lock
17
+ from typing import Any, Dict, Optional
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DetailedMetrics:
23
+ """
24
+ Tracks detailed performance metrics for API providers.
25
+
26
+ Provides comprehensive monitoring including response times, data volumes,
27
+ error patterns, and overall health scoring.
28
+ """
29
+
30
+ def __init__(self, max_response_times: int = 100):
31
+ """
32
+ Initialize metrics tracker.
33
+
34
+ Args:
35
+ max_response_times: Maximum number of response times to keep in memory
36
+ """
37
+ self.max_response_times = max_response_times
38
+ self.lock = Lock()
39
+
40
+ # Request metrics
41
+ self.metrics = {
42
+ "requests": {
43
+ "total": 0,
44
+ "successful": 0,
45
+ "failed": 0,
46
+ "cached": 0,
47
+ },
48
+ "performance": {
49
+ "response_times": [], # Last N response times
50
+ "avg_response_time_ms": 0.0,
51
+ "p50_response_time_ms": 0.0,
52
+ "p95_response_time_ms": 0.0,
53
+ "p99_response_time_ms": 0.0,
54
+ "min_response_time_ms": 0.0,
55
+ "max_response_time_ms": 0.0,
56
+ },
57
+ "data_volume": {
58
+ "total_records_fetched": 0,
59
+ "total_bytes_transferred": 0,
60
+ "avg_records_per_request": 0.0,
61
+ "avg_bytes_per_request": 0.0,
62
+ },
63
+ "errors": {
64
+ "by_type": defaultdict(int), # {error_type: count}
65
+ "recent_errors": [], # Last 10 errors with details
66
+ },
67
+ "rate_limiting": {
68
+ "throttled_requests": 0,
69
+ "total_wait_time_ms": 0.0,
70
+ "avg_wait_time_ms": 0.0,
71
+ },
72
+ "timestamps": {
73
+ "first_request": None,
74
+ "last_request": None,
75
+ "last_success": None,
76
+ "last_failure": None,
77
+ },
78
+ }
79
+
80
+ def record_request(
81
+ self,
82
+ success: bool,
83
+ response_time_ms: float,
84
+ record_count: int = 0,
85
+ bytes_transferred: int = 0,
86
+ cached: bool = False,
87
+ error_type: Optional[str] = None,
88
+ error_message: Optional[str] = None,
89
+ ):
90
+ """
91
+ Record a request with its metrics.
92
+
93
+ Args:
94
+ success: Whether the request was successful
95
+ response_time_ms: Response time in milliseconds
96
+ record_count: Number of records returned
97
+ bytes_transferred: Bytes transferred in the response
98
+ cached: Whether the response was cached
99
+ error_type: Type of error if failed (e.g., 'timeout', 'auth', 'rate_limit')
100
+ error_message: Error message if failed
101
+ """
102
+ with self.lock:
103
+ now = datetime.utcnow().isoformat()
104
+
105
+ # Update request counts
106
+ self.metrics["requests"]["total"] += 1
107
+ if success:
108
+ self.metrics["requests"]["successful"] += 1
109
+ self.metrics["timestamps"]["last_success"] = now
110
+ else:
111
+ self.metrics["requests"]["failed"] += 1
112
+ self.metrics["timestamps"]["last_failure"] = now
113
+
114
+ if cached:
115
+ self.metrics["requests"]["cached"] += 1
116
+
117
+ # Update timestamps
118
+ if self.metrics["timestamps"]["first_request"] is None:
119
+ self.metrics["timestamps"]["first_request"] = now
120
+ self.metrics["timestamps"]["last_request"] = now
121
+
122
+ # Update performance metrics
123
+ self.metrics["performance"]["response_times"].append(response_time_ms)
124
+ if len(self.metrics["performance"]["response_times"]) > self.max_response_times:
125
+ self.metrics["performance"]["response_times"].pop(0)
126
+
127
+ # Calculate percentiles
128
+ self._calculate_percentiles()
129
+
130
+ # Update data volume metrics
131
+ self.metrics["data_volume"]["total_records_fetched"] += record_count
132
+ self.metrics["data_volume"]["total_bytes_transferred"] += bytes_transferred
133
+
134
+ total_requests = self.metrics["requests"]["total"]
135
+ if total_requests > 0:
136
+ self.metrics["data_volume"]["avg_records_per_request"] = (
137
+ self.metrics["data_volume"]["total_records_fetched"] / total_requests
138
+ )
139
+ self.metrics["data_volume"]["avg_bytes_per_request"] = (
140
+ self.metrics["data_volume"]["total_bytes_transferred"] / total_requests
141
+ )
142
+
143
+ # Record errors
144
+ if not success and error_type:
145
+ self.metrics["errors"]["by_type"][error_type] += 1
146
+
147
+ error_entry = {
148
+ "type": error_type,
149
+ "message": error_message or "Unknown error",
150
+ "timestamp": now,
151
+ "response_time_ms": response_time_ms,
152
+ }
153
+
154
+ self.metrics["errors"]["recent_errors"].append(error_entry)
155
+ if len(self.metrics["errors"]["recent_errors"]) > 10:
156
+ self.metrics["errors"]["recent_errors"].pop(0)
157
+
158
+ def record_rate_limit_wait(self, wait_time_ms: float):
159
+ """
160
+ Record a rate limit wait event.
161
+
162
+ Args:
163
+ wait_time_ms: Time waited in milliseconds
164
+ """
165
+ with self.lock:
166
+ self.metrics["rate_limiting"]["throttled_requests"] += 1
167
+ self.metrics["rate_limiting"]["total_wait_time_ms"] += wait_time_ms
168
+
169
+ throttled = self.metrics["rate_limiting"]["throttled_requests"]
170
+ if throttled > 0:
171
+ self.metrics["rate_limiting"]["avg_wait_time_ms"] = (
172
+ self.metrics["rate_limiting"]["total_wait_time_ms"] / throttled
173
+ )
174
+
175
+ def _calculate_percentiles(self):
176
+ """Calculate response time percentiles"""
177
+ times = sorted(self.metrics["performance"]["response_times"])
178
+ if not times:
179
+ return
180
+
181
+ n = len(times)
182
+ self.metrics["performance"]["avg_response_time_ms"] = sum(times) / n
183
+ self.metrics["performance"]["min_response_time_ms"] = times[0]
184
+ self.metrics["performance"]["max_response_time_ms"] = times[-1]
185
+ self.metrics["performance"]["p50_response_time_ms"] = times[n // 2]
186
+ self.metrics["performance"]["p95_response_time_ms"] = times[int(n * 0.95)]
187
+ self.metrics["performance"]["p99_response_time_ms"] = times[min(int(n * 0.99), n - 1)]
188
+
189
+ def _calculate_health_score_unlocked(self) -> float:
190
+ """
191
+ Calculate health score without acquiring lock (internal use only).
192
+ Must be called while holding self.lock.
193
+ """
194
+ total = self.metrics["requests"]["total"]
195
+ if total == 0:
196
+ return 1.0
197
+
198
+ # Success rate score (40%)
199
+ success_rate = self.metrics["requests"]["successful"] / total
200
+ success_score = success_rate * 0.4
201
+
202
+ # Performance score (30%)
203
+ avg_time = self.metrics["performance"]["avg_response_time_ms"]
204
+ # Assume < 200ms is excellent, > 2000ms is poor
205
+ if avg_time < 200:
206
+ performance_score = 0.3
207
+ elif avg_time > 2000:
208
+ performance_score = 0.0
209
+ else:
210
+ performance_score = max(0, min(1, (2000 - avg_time) / 1800)) * 0.3
211
+
212
+ # Cache hit rate score (20%)
213
+ cache_rate = self.metrics["requests"]["cached"] / total
214
+ cache_score = cache_rate * 0.2
215
+
216
+ # Error diversity score (10%) - fewer error types is better
217
+ error_types = len(self.metrics["errors"]["by_type"])
218
+ error_score = max(0, (5 - error_types) / 5) * 0.1
219
+
220
+ return success_score + performance_score + cache_score + error_score
221
+
222
+ def get_health_score(self) -> float:
223
+ """
224
+ Calculate overall health score (0-1).
225
+
226
+ The health score considers:
227
+ - Success rate (40%)
228
+ - Performance (30%)
229
+ - Cache hit rate (20%)
230
+ - Error diversity (10%)
231
+
232
+ Returns:
233
+ Health score between 0 and 1
234
+ """
235
+ with self.lock:
236
+ return self._calculate_health_score_unlocked()
237
+
238
+ def get_stats(self) -> Dict[str, Any]:
239
+ """
240
+ Get all metrics as a dictionary.
241
+
242
+ Returns:
243
+ Complete metrics dictionary
244
+ """
245
+ with self.lock:
246
+ # Convert defaultdict to regular dict for JSON serialization
247
+ stats = {
248
+ "requests": dict(self.metrics["requests"]),
249
+ "performance": dict(self.metrics["performance"]),
250
+ "data_volume": dict(self.metrics["data_volume"]),
251
+ "errors": {
252
+ "by_type": dict(self.metrics["errors"]["by_type"]),
253
+ "recent_errors": list(self.metrics["errors"]["recent_errors"]),
254
+ },
255
+ "rate_limiting": dict(self.metrics["rate_limiting"]),
256
+ "timestamps": dict(self.metrics["timestamps"]),
257
+ "health_score": self.get_health_score(),
258
+ }
259
+
260
+ # Remove response_times array to keep output clean
261
+ stats["performance"] = {
262
+ k: v for k, v in stats["performance"].items() if k != "response_times"
263
+ }
264
+
265
+ return stats
266
+
267
+ def get_summary(self) -> Dict[str, Any]:
268
+ """
269
+ Get a concise summary of key metrics.
270
+
271
+ Returns:
272
+ Summary dictionary with key metrics
273
+ """
274
+ with self.lock:
275
+ total = self.metrics["requests"]["total"]
276
+ if total == 0:
277
+ return {"status": "no_activity", "health_score": 1.0}
278
+
279
+ success_rate = self.metrics["requests"]["successful"] / total
280
+ cache_hit_rate = self.metrics["requests"]["cached"] / total
281
+ # Use unlocked version to avoid deadlock
282
+ health_score = self._calculate_health_score_unlocked()
283
+
284
+ return {
285
+ "status": "healthy" if health_score > 0.7 else "degraded",
286
+ "health_score": round(health_score, 3),
287
+ "total_requests": total,
288
+ "success_rate": round(success_rate, 3),
289
+ "cache_hit_rate": round(cache_hit_rate, 3),
290
+ "avg_response_time_ms": round(
291
+ self.metrics["performance"]["avg_response_time_ms"], 2
292
+ ),
293
+ "p95_response_time_ms": round(
294
+ self.metrics["performance"]["p95_response_time_ms"], 2
295
+ ),
296
+ "total_errors": self.metrics["requests"]["failed"],
297
+ "error_types": len(self.metrics["errors"]["by_type"]),
298
+ }
299
+
300
+ def reset(self):
301
+ """Reset all metrics"""
302
+ with self.lock:
303
+ self.__init__(self.max_response_times)