aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -2,23 +2,24 @@ import os
2
2
  import json
3
3
  import time
4
4
  import logging
5
- import asyncio
6
5
  import tempfile
7
6
  import subprocess
8
- from typing import Dict, Any, List, Optional, Union, Tuple
7
+ from typing import Dict, Any, List, Optional, Tuple, Union
8
+ import csv
9
9
  from enum import Enum
10
- from urllib.parse import urlparse, urljoin
11
10
 
12
11
  import httpx
13
12
  from bs4 import BeautifulSoup
14
13
  from urllib import request as urllib_request
15
- from pydantic import BaseModel, ValidationError, ConfigDict
16
- from pydantic_settings import BaseSettings
14
+ from pydantic import BaseModel, Field
15
+ from pydantic_settings import BaseSettings, SettingsConfigDict
17
16
 
18
17
  from aiecs.tools.base_tool import BaseTool
19
18
  from aiecs.tools import register_tool
20
19
 
21
20
  # Enums for configuration options
21
+
22
+
22
23
  class HttpMethod(str, Enum):
23
24
  GET = "get"
24
25
  POST = "post"
@@ -28,12 +29,14 @@ class HttpMethod(str, Enum):
28
29
  OPTIONS = "options"
29
30
  PATCH = "patch"
30
31
 
32
+
31
33
  class ContentType(str, Enum):
32
34
  HTML = "html"
33
35
  JSON = "json"
34
36
  TEXT = "text"
35
37
  BINARY = "binary"
36
38
 
39
+
37
40
  class OutputFormat(str, Enum):
38
41
  TEXT = "text"
39
42
  JSON = "json"
@@ -41,68 +44,44 @@ class OutputFormat(str, Enum):
41
44
  MARKDOWN = "markdown"
42
45
  CSV = "csv"
43
46
 
47
+
44
48
  class RenderEngine(str, Enum):
45
49
  NONE = "none"
46
50
  PLAYWRIGHT = "playwright"
47
51
 
48
- # Global settings
49
- class ScraperSettings(BaseSettings):
50
- """
51
- Configuration for ScraperTool.
52
-
53
- Attributes:
54
- user_agent (str): User agent for HTTP requests.
55
- max_content_length (int): Maximum content length in bytes.
56
- output_dir (str): Directory for output files.
57
- scrapy_command (str): Command to run Scrapy.
58
- allowed_domains (List[str]): Allowed domains for scraping.
59
- blocked_domains (List[str]): Blocked domains for scraping.
60
- playwright_available (bool): Whether Playwright is available.
61
- env_prefix (str): Environment variable prefix.
62
- """
63
- user_agent: str = "PythonMiddlewareScraper/2.0"
64
- max_content_length: int = 10 * 1024 * 1024 # 10MB
65
- output_dir: str = os.path.join(tempfile.gettempdir(), 'scraper_outputs')
66
- scrapy_command: str = "scrapy"
67
- allowed_domains: List[str] = []
68
- blocked_domains: List[str] = []
69
- playwright_available: bool = False
70
- env_prefix: str = "SCRAPER_TOOL_"
71
-
72
- model_config = ConfigDict(env_prefix="SCRAPER_TOOL_")
73
52
 
74
53
  # Exceptions
75
54
  class ScraperToolError(Exception):
76
55
  """Base exception for ScraperTool errors."""
77
- pass
56
+
78
57
 
79
58
  class HttpError(ScraperToolError):
80
59
  """Raised when HTTP requests fail."""
81
- pass
60
+
82
61
 
83
62
  class TimeoutError(ScraperToolError):
84
63
  """Raised when operations time out."""
85
- pass
64
+
86
65
 
87
66
  class RateLimitError(ScraperToolError):
88
67
  """Raised when rate limits are exceeded."""
89
- pass
68
+
90
69
 
91
70
  class ParsingError(ScraperToolError):
92
71
  """Raised when HTML parsing fails."""
93
- pass
72
+
94
73
 
95
74
  class RenderingError(ScraperToolError):
96
75
  """Raised when rendering fails."""
97
- pass
76
+
98
77
 
99
78
  class ExternalToolError(ScraperToolError):
100
79
  """Raised when external tools fail."""
101
- pass
80
+
102
81
 
103
82
  class FileOperationError(ScraperToolError):
104
83
  """Raised when file operations fail."""
105
- pass
84
+
106
85
 
107
86
  @register_tool("scraper")
108
87
  class ScraperTool(BaseTool):
@@ -117,65 +96,128 @@ class ScraperTool(BaseTool):
117
96
  - Scrapy integration for advanced crawling
118
97
  - Output in various formats: text, JSON, HTML, Markdown, CSV
119
98
  """
120
- def __init__(self, config: Optional[Dict] = None):
99
+
100
+ # Configuration schema
101
+ class Config(BaseSettings):
102
+ """Configuration for the scraper tool
103
+
104
+ Automatically reads from environment variables with SCRAPER_TOOL_ prefix.
105
+ Example: SCRAPER_TOOL_USER_AGENT -> user_agent
106
+ """
107
+
108
+ model_config = SettingsConfigDict(env_prefix="SCRAPER_TOOL_")
109
+
110
+ user_agent: str = Field(
111
+ default="PythonMiddlewareScraper/2.0",
112
+ description="User agent for HTTP requests",
113
+ )
114
+ max_content_length: int = Field(
115
+ default=10 * 1024 * 1024,
116
+ description="Maximum content length in bytes",
117
+ )
118
+ output_dir: str = Field(
119
+ default=os.path.join(tempfile.gettempdir(), "scraper_outputs"),
120
+ description="Directory for output files",
121
+ )
122
+ scrapy_command: str = Field(default="scrapy", description="Command to run Scrapy")
123
+ allowed_domains: List[str] = Field(default=[], description="Allowed domains for scraping")
124
+ blocked_domains: List[str] = Field(default=[], description="Blocked domains for scraping")
125
+ playwright_available: bool = Field(
126
+ default=False,
127
+ description="Whether Playwright is available (auto-detected)",
128
+ )
129
+
130
+ # Schema definitions
131
+ class Get_httpxSchema(BaseModel):
132
+ """Schema for get_httpx operation"""
133
+
134
+ url: str = Field(description="URL to scrape")
135
+ method: HttpMethod = Field(default=HttpMethod.GET, description="HTTP method to use: GET, POST, PUT, DELETE, HEAD, OPTIONS, or PATCH")
136
+ params: Optional[Dict[str, str]] = Field(default=None, description="Optional query parameters as dictionary")
137
+ data: Optional[Dict[str, Any]] = Field(default=None, description="Optional form data as dictionary. Mutually exclusive with json_data")
138
+ json_data: Optional[Dict[str, Any]] = Field(default=None, description="Optional JSON data as dictionary. Mutually exclusive with data")
139
+ cookies: Optional[Dict[str, str]] = Field(default=None, description="Optional cookies as dictionary")
140
+ auth: Optional[Tuple[str, str]] = Field(default=None, description="Optional authentication credentials as (username, password) tuple")
141
+ verify_ssl: Optional[bool] = Field(default=None, description="Optional SSL certificate verification. If None, defaults to True")
142
+ allow_redirects: bool = Field(default=True, description="Whether to allow HTTP redirects")
143
+ content_type: ContentType = Field(default=ContentType.TEXT, description="Expected content type: TEXT, JSON, HTML, or BINARY")
144
+ headers: Optional[Dict[str, str]] = Field(default=None, description="Optional custom HTTP headers as dictionary")
145
+ output_format: Optional[OutputFormat] = Field(default=None, description="Optional output format for saving: TEXT, JSON, HTML, MARKDOWN, or CSV")
146
+ output_path: Optional[str] = Field(default=None, description="Optional path to save output file. Requires output_format to be specified")
147
+ async_mode: bool = Field(default=True, description="Whether to use async HTTP client. If False, uses synchronous client")
148
+
149
+ class Parse_htmlSchema(BaseModel):
150
+ """Schema for parse_html operation"""
151
+
152
+ html: str = Field(description="HTML content string to parse")
153
+ selector: str = Field(description="CSS selector or XPath expression to find elements")
154
+ selector_type: str = Field(default="css", description="Selector type: 'css' for CSS selectors or 'xpath' for XPath expressions")
155
+ extract_attr: Optional[str] = Field(default=None, description="Optional attribute name to extract from matched elements (e.g., 'href', 'src')")
156
+ extract_text: bool = Field(default=True, description="Whether to extract text content from matched elements. Ignored if extract_attr is specified")
157
+
158
+ def __init__(self, config: Optional[Dict] = None, **kwargs):
121
159
  """
122
160
  Initialize ScraperTool with settings and resources.
123
161
 
124
162
  Args:
125
- config (Dict, optional): Configuration overrides for ScraperSettings.
163
+ config (Dict, optional): Configuration overrides for ScraperTool.
164
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
126
165
 
127
166
  Raises:
128
167
  ValueError: If config contains invalid settings.
168
+
169
+ Configuration is automatically loaded by BaseTool from:
170
+ 1. Explicit config dict (highest priority)
171
+ 2. YAML config files (config/tools/scraper.yaml)
172
+ 3. Environment variables (via dotenv from .env files)
173
+ 4. Tool defaults (lowest priority)
129
174
  """
130
- super().__init__(config)
131
- self.settings = ScraperSettings()
132
- if config:
133
- try:
134
- self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
135
- except ValidationError as e:
136
- raise ValueError(f"Invalid settings: {e}")
175
+ super().__init__(config, **kwargs)
176
+
177
+ # Configuration is automatically loaded by BaseTool into self._config_obj
178
+ # Access config via self._config_obj (BaseSettings instance)
179
+ self.config = self._config_obj if self._config_obj else self.Config()
180
+
137
181
  self.logger = logging.getLogger(__name__)
138
182
  if not self.logger.handlers:
139
183
  handler = logging.StreamHandler()
140
- handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
184
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
141
185
  self.logger.addHandler(handler)
142
186
  self.logger.setLevel(logging.INFO)
143
- os.makedirs(self.settings.output_dir, exist_ok=True)
187
+ os.makedirs(self.config.output_dir, exist_ok=True)
144
188
  self._check_external_tools()
145
189
 
146
190
  def _check_external_tools(self):
147
191
  """Check if external tools are available."""
148
192
  try:
149
- import playwright
150
- self.settings.playwright_available = True
193
+ self.config.playwright_available = True
151
194
  except ImportError:
152
- self.settings.playwright_available = False
153
-
195
+ self.config.playwright_available = False
154
196
 
155
197
  async def _save_output(self, content: Any, path: str, format: OutputFormat) -> None:
156
198
  """Save content to file in the specified format."""
157
199
  try:
158
200
  os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
159
201
  if format == OutputFormat.TEXT:
160
- with open(path, 'w', encoding='utf-8') as f:
202
+ with open(path, "w", encoding="utf-8") as f:
161
203
  if isinstance(content, dict):
162
204
  f.write(json.dumps(content, indent=2))
163
205
  else:
164
206
  f.write(str(content))
165
207
  elif format == OutputFormat.JSON:
166
- with open(path, 'w', encoding='utf-8') as f:
208
+ with open(path, "w", encoding="utf-8") as f:
167
209
  if isinstance(content, dict):
168
210
  json.dump(content, f, indent=2)
169
211
  else:
170
212
  json.dump({"content": content}, f, indent=2)
171
213
  elif format == OutputFormat.HTML:
172
- with open(path, 'w', encoding='utf-8') as f:
173
- if isinstance(content, dict) and 'html' in content:
174
- f.write(content['html'])
214
+ with open(path, "w", encoding="utf-8") as f:
215
+ if isinstance(content, dict) and "html" in content:
216
+ f.write(content["html"])
175
217
  else:
176
218
  f.write(str(content))
177
219
  elif format == OutputFormat.MARKDOWN:
178
- with open(path, 'w', encoding='utf-8') as f:
220
+ with open(path, "w", encoding="utf-8") as f:
179
221
  if isinstance(content, dict):
180
222
  f.write("# Scraper Results\n\n")
181
223
  for key, value in content.items():
@@ -186,7 +228,9 @@ class ScraperTool(BaseTool):
186
228
  f.write(str(content))
187
229
  elif format == OutputFormat.CSV:
188
230
  import csv
189
- with open(path, 'w', newline='', encoding='utf-8') as f:
231
+
232
+ with open(path, "w", newline="", encoding="utf-8") as f:
233
+ writer: Union[Any, Any] # csv.writer or csv.DictWriter instance
190
234
  if isinstance(content, dict):
191
235
  writer = csv.writer(f)
192
236
  writer.writerow(content.keys())
@@ -203,7 +247,23 @@ class ScraperTool(BaseTool):
203
247
  except Exception as e:
204
248
  raise FileOperationError(f"Error saving output: {str(e)}")
205
249
 
206
- async def get_httpx(self, url: str, method: HttpMethod = HttpMethod.GET, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, cookies: Optional[Dict[str, str]] = None, auth: Optional[Tuple[str, str]] = None, verify_ssl: Optional[bool] = None, allow_redirects: bool = True, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None, async_mode: bool = True) -> Any:
250
+ async def get_httpx(
251
+ self,
252
+ url: str,
253
+ method: HttpMethod = HttpMethod.GET,
254
+ params: Optional[Dict[str, str]] = None,
255
+ data: Optional[Dict[str, Any]] = None,
256
+ json_data: Optional[Dict[str, Any]] = None,
257
+ cookies: Optional[Dict[str, str]] = None,
258
+ auth: Optional[Tuple[str, str]] = None,
259
+ verify_ssl: Optional[bool] = None,
260
+ allow_redirects: bool = True,
261
+ content_type: ContentType = ContentType.TEXT,
262
+ headers: Optional[Dict[str, str]] = None,
263
+ output_format: Optional[OutputFormat] = None,
264
+ output_path: Optional[str] = None,
265
+ async_mode: bool = True,
266
+ ) -> Any:
207
267
  """
208
268
  Execute HTTP request using httpx library (supports both sync and async).
209
269
 
@@ -231,21 +291,21 @@ class ScraperTool(BaseTool):
231
291
  """
232
292
  try:
233
293
  headers = headers or {}
234
- if 'User-Agent' not in headers:
235
- headers['User-Agent'] = self.settings.user_agent
236
- kwargs = {
237
- 'params': params,
238
- 'headers': headers,
239
- 'follow_redirects': allow_redirects,
294
+ if "User-Agent" not in headers:
295
+ headers["User-Agent"] = self.config.user_agent
296
+ kwargs: Dict[str, Any] = {
297
+ "params": params,
298
+ "headers": headers,
299
+ "follow_redirects": allow_redirects,
240
300
  }
241
301
  if auth:
242
- kwargs['auth'] = auth
302
+ kwargs["auth"] = auth # httpx accepts Tuple[str, str] for auth
243
303
  if cookies:
244
- kwargs['cookies'] = cookies
304
+ kwargs["cookies"] = cookies
245
305
  if json_data:
246
- kwargs['json'] = json_data
306
+ kwargs["json"] = json_data
247
307
  elif data:
248
- kwargs['data'] = data
308
+ kwargs["data"] = data
249
309
 
250
310
  if async_mode:
251
311
  async with httpx.AsyncClient(verify=verify_ssl if verify_ssl is not None else True) as client:
@@ -260,30 +320,47 @@ class ScraperTool(BaseTool):
260
320
  resp.raise_for_status()
261
321
  except httpx.HTTPStatusError as e:
262
322
  raise HttpError(f"HTTP {e.response.status_code}: {e.response.reason_phrase} for {url}")
263
-
264
- if len(resp.content) > self.settings.max_content_length:
323
+
324
+ if len(resp.content) > self.config.max_content_length:
265
325
  raise HttpError(f"Response content too large: {len(resp.content)} bytes")
266
326
 
267
327
  if content_type == ContentType.JSON:
268
328
  result = resp.json()
269
329
  elif content_type == ContentType.HTML:
270
- result = {'html': resp.text, 'url': str(resp.url), 'status': resp.status_code}
330
+ result = {
331
+ "html": resp.text,
332
+ "url": str(resp.url),
333
+ "status": resp.status_code,
334
+ }
271
335
  elif content_type == ContentType.BINARY:
272
- result = {'content': resp.content, 'url': str(resp.url), 'status': resp.status_code}
336
+ result = {
337
+ "content": resp.content,
338
+ "url": str(resp.url),
339
+ "status": resp.status_code,
340
+ }
273
341
  else:
274
342
  result = resp.text
275
343
 
276
344
  if output_format and output_path:
277
345
  await self._save_output(result, output_path, output_format)
278
346
  if isinstance(result, dict):
279
- result['saved_to'] = output_path
347
+ result["saved_to"] = output_path
280
348
  else:
281
- result = {'content': result, 'saved_to': output_path}
349
+ result = {"content": result, "saved_to": output_path}
282
350
  return result
283
351
  except httpx.RequestError as e:
284
352
  raise HttpError(f"Request failed: {str(e)}")
285
353
 
286
- async def get_urllib(self, url: str, method: HttpMethod = HttpMethod.GET, data: Optional[Dict[str, Any]] = None, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Any:
354
+ async def get_urllib(
355
+ self,
356
+ url: str,
357
+ method: HttpMethod = HttpMethod.GET,
358
+ data: Optional[Dict[str, Any]] = None,
359
+ content_type: ContentType = ContentType.TEXT,
360
+ headers: Optional[Dict[str, str]] = None,
361
+ output_format: Optional[OutputFormat] = None,
362
+ output_path: Optional[str] = None,
363
+ ) -> Any:
287
364
  """
288
365
  Execute HTTP request using urllib.
289
366
 
@@ -305,10 +382,10 @@ class ScraperTool(BaseTool):
305
382
  try:
306
383
  import urllib.parse
307
384
  import urllib.error
308
-
385
+
309
386
  headers = headers or {}
310
- if 'User-Agent' not in headers:
311
- headers['User-Agent'] = self.settings.user_agent
387
+ if "User-Agent" not in headers:
388
+ headers["User-Agent"] = self.config.user_agent
312
389
  data_bytes = None
313
390
  if data:
314
391
  data_bytes = urllib.parse.urlencode(data).encode()
@@ -316,42 +393,122 @@ class ScraperTool(BaseTool):
316
393
  str(url),
317
394
  data=data_bytes,
318
395
  headers=headers,
319
- method=method.value.upper()
396
+ method=method.value.upper(),
320
397
  )
321
398
  with urllib_request.urlopen(req) as resp:
322
- content_length = resp.getheader('Content-Length')
323
- if content_length and int(content_length) > self.settings.max_content_length:
399
+ content_length = resp.getheader("Content-Length")
400
+ if content_length and int(content_length) > self.config.max_content_length:
324
401
  raise HttpError(f"Response content too large: {content_length} bytes")
325
402
  content = resp.read()
326
- charset = resp.headers.get_content_charset() or 'utf-8'
403
+ charset = resp.headers.get_content_charset() or "utf-8"
327
404
  if content_type == ContentType.JSON:
328
- result = json.loads(content.decode(charset, errors='ignore'))
405
+ result = json.loads(content.decode(charset, errors="ignore"))
329
406
  elif content_type == ContentType.HTML:
330
- result = {'html': content.decode(charset, errors='ignore'), 'url': resp.url, 'status': resp.status}
407
+ result = {
408
+ "html": content.decode(charset, errors="ignore"),
409
+ "url": resp.url,
410
+ "status": resp.status,
411
+ }
331
412
  elif content_type == ContentType.BINARY:
332
- result = {'content': content, 'url': resp.url, 'status': resp.status}
413
+ result = {
414
+ "content": content,
415
+ "url": resp.url,
416
+ "status": resp.status,
417
+ }
333
418
  else:
334
- result = content.decode(charset, errors='ignore')
419
+ result = content.decode(charset, errors="ignore")
335
420
  if output_format and output_path:
336
421
  await self._save_output(result, output_path, output_format)
337
422
  if isinstance(result, dict):
338
- result['saved_to'] = output_path
423
+ result["saved_to"] = output_path
339
424
  else:
340
- result = {'content': result, 'saved_to': output_path}
425
+ result = {"content": result, "saved_to": output_path}
341
426
  return result
342
427
  except urllib.error.URLError as e:
343
428
  raise HttpError(f"Request failed: {str(e)}")
344
429
 
345
430
  # Legacy method names for backward compatibility
346
- async def get_requests(self, url: str, method: HttpMethod = HttpMethod.GET, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, cookies: Optional[Dict[str, str]] = None, auth: Optional[Tuple[str, str]] = None, verify_ssl: Optional[bool] = None, allow_redirects: bool = True, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Any:
431
+ async def get_requests(
432
+ self,
433
+ url: str,
434
+ method: HttpMethod = HttpMethod.GET,
435
+ params: Optional[Dict[str, str]] = None,
436
+ data: Optional[Dict[str, Any]] = None,
437
+ json_data: Optional[Dict[str, Any]] = None,
438
+ cookies: Optional[Dict[str, str]] = None,
439
+ auth: Optional[Tuple[str, str]] = None,
440
+ verify_ssl: Optional[bool] = None,
441
+ allow_redirects: bool = True,
442
+ content_type: ContentType = ContentType.TEXT,
443
+ headers: Optional[Dict[str, str]] = None,
444
+ output_format: Optional[OutputFormat] = None,
445
+ output_path: Optional[str] = None,
446
+ ) -> Any:
347
447
  """Legacy method - now uses httpx in sync mode."""
348
- return await self.get_httpx(url, method, params, data, json_data, cookies, auth, verify_ssl, allow_redirects, content_type, headers, output_format, output_path, async_mode=False)
349
-
350
- async def get_aiohttp(self, url: str, method: HttpMethod = HttpMethod.GET, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, cookies: Optional[Dict[str, str]] = None, auth: Optional[Tuple[str, str]] = None, verify_ssl: Optional[bool] = None, allow_redirects: bool = True, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Any:
448
+ return await self.get_httpx(
449
+ url,
450
+ method,
451
+ params,
452
+ data,
453
+ json_data,
454
+ cookies,
455
+ auth,
456
+ verify_ssl,
457
+ allow_redirects,
458
+ content_type,
459
+ headers,
460
+ output_format,
461
+ output_path,
462
+ async_mode=False,
463
+ )
464
+
465
+ async def get_aiohttp(
466
+ self,
467
+ url: str,
468
+ method: HttpMethod = HttpMethod.GET,
469
+ params: Optional[Dict[str, str]] = None,
470
+ data: Optional[Dict[str, Any]] = None,
471
+ json_data: Optional[Dict[str, Any]] = None,
472
+ cookies: Optional[Dict[str, str]] = None,
473
+ auth: Optional[Tuple[str, str]] = None,
474
+ verify_ssl: Optional[bool] = None,
475
+ allow_redirects: bool = True,
476
+ content_type: ContentType = ContentType.TEXT,
477
+ headers: Optional[Dict[str, str]] = None,
478
+ output_format: Optional[OutputFormat] = None,
479
+ output_path: Optional[str] = None,
480
+ ) -> Any:
351
481
  """Legacy method - now uses httpx in async mode."""
352
- return await self.get_httpx(url, method, params, data, json_data, cookies, auth, verify_ssl, allow_redirects, content_type, headers, output_format, output_path, async_mode=True)
353
-
354
- async def render(self, url: str, engine: RenderEngine = RenderEngine.PLAYWRIGHT, wait_time: int = 5, wait_selector: Optional[str] = None, scroll_to_bottom: bool = False, screenshot: bool = False, screenshot_path: Optional[str] = None, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Dict[str, Any]:
482
+ return await self.get_httpx(
483
+ url,
484
+ method,
485
+ params,
486
+ data,
487
+ json_data,
488
+ cookies,
489
+ auth,
490
+ verify_ssl,
491
+ allow_redirects,
492
+ content_type,
493
+ headers,
494
+ output_format,
495
+ output_path,
496
+ async_mode=True,
497
+ )
498
+
499
+ async def render(
500
+ self,
501
+ url: str,
502
+ engine: RenderEngine = RenderEngine.PLAYWRIGHT,
503
+ wait_time: int = 5,
504
+ wait_selector: Optional[str] = None,
505
+ scroll_to_bottom: bool = False,
506
+ screenshot: bool = False,
507
+ screenshot_path: Optional[str] = None,
508
+ headers: Optional[Dict[str, str]] = None,
509
+ output_format: Optional[OutputFormat] = None,
510
+ output_path: Optional[str] = None,
511
+ ) -> Dict[str, Any]:
355
512
  """
356
513
  Render a web page using a headless browser (Playwright).
357
514
 
@@ -375,56 +532,85 @@ class ScraperTool(BaseTool):
375
532
  """
376
533
  try:
377
534
  if engine == RenderEngine.PLAYWRIGHT:
378
- if not self.settings.playwright_available:
535
+ if not self.config.playwright_available:
379
536
  raise RenderingError("Playwright is not available. Install with 'pip install playwright'")
380
- result = await self._render_with_playwright(url, wait_time, wait_selector, scroll_to_bottom, screenshot, screenshot_path)
537
+ result = await self._render_with_playwright(
538
+ url,
539
+ wait_time,
540
+ wait_selector,
541
+ scroll_to_bottom,
542
+ screenshot,
543
+ screenshot_path,
544
+ )
381
545
  else:
382
546
  raise RenderingError(f"Unsupported rendering engine: {engine}. Only PLAYWRIGHT is supported.")
383
547
  if output_format and output_path:
384
548
  await self._save_output(result, output_path, output_format)
385
- result['saved_to'] = output_path
549
+ result["saved_to"] = output_path
386
550
  return result
387
551
  except Exception as e:
388
552
  raise RenderingError(f"Failed to render page: {str(e)}")
389
553
 
390
- async def _render_with_playwright(self, url: str, wait_time: int, wait_selector: Optional[str], scroll_to_bottom: bool, screenshot: bool, screenshot_path: Optional[str]) -> Dict[str, Any]:
554
+ async def _render_with_playwright(
555
+ self,
556
+ url: str,
557
+ wait_time: int,
558
+ wait_selector: Optional[str],
559
+ scroll_to_bottom: bool,
560
+ screenshot: bool,
561
+ screenshot_path: Optional[str],
562
+ ) -> Dict[str, Any]:
391
563
  """Render a web page using Playwright with async API."""
392
564
  from playwright.async_api import async_playwright
565
+
393
566
  async with async_playwright() as p:
394
567
  browser = await p.chromium.launch()
395
568
  page = await browser.new_page(
396
- user_agent=self.settings.user_agent,
397
- viewport={'width': 1280, 'height': 800}
569
+ user_agent=self.config.user_agent,
570
+ viewport={"width": 1280, "height": 800},
398
571
  )
399
572
  try:
400
573
  await page.goto(url)
401
574
  if wait_selector:
402
575
  await page.wait_for_selector(wait_selector)
403
576
  else:
404
- await page.wait_for_load_state('networkidle')
577
+ await page.wait_for_load_state("networkidle")
405
578
  if scroll_to_bottom:
406
- await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
579
+ await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
407
580
  await page.wait_for_timeout(1000)
408
581
  screenshot_result = None
409
582
  if screenshot:
410
- screenshot_path = screenshot_path or os.path.join(self.settings.output_dir, f"screenshot_{int(time.time())}.png")
411
- os.makedirs(os.path.dirname(os.path.abspath(screenshot_path)), exist_ok=True)
583
+ screenshot_path = screenshot_path or os.path.join(
584
+ self.config.output_dir,
585
+ f"screenshot_{int(time.time())}.png",
586
+ )
587
+ os.makedirs(
588
+ os.path.dirname(os.path.abspath(screenshot_path)),
589
+ exist_ok=True,
590
+ )
412
591
  await page.screenshot(path=screenshot_path)
413
592
  screenshot_result = screenshot_path
414
593
  html = await page.content()
415
594
  title = await page.title()
416
595
  result = {
417
- 'html': html,
418
- 'title': title,
419
- 'url': page.url,
420
- 'screenshot': screenshot_result
596
+ "html": html,
597
+ "title": title,
598
+ "url": page.url,
599
+ "screenshot": screenshot_result,
421
600
  }
422
601
  return result
423
602
  finally:
424
603
  await browser.close()
425
604
 
426
-
427
- def crawl_scrapy(self, project_path: str, spider_name: str, output_path: str, spider_args: Optional[Dict[str, str]] = None, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None) -> Dict[str, Any]:
605
+ def crawl_scrapy(
606
+ self,
607
+ project_path: str,
608
+ spider_name: str,
609
+ output_path: str,
610
+ spider_args: Optional[Dict[str, str]] = None,
611
+ headers: Optional[Dict[str, str]] = None,
612
+ output_format: Optional[OutputFormat] = None,
613
+ ) -> Dict[str, Any]:
428
614
  """
429
615
  Execute a Scrapy spider in an existing project and output results to a file.
430
616
 
@@ -447,21 +633,25 @@ class ScraperTool(BaseTool):
447
633
  start_time = time.time()
448
634
  os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
449
635
  cmd = [
450
- self.settings.scrapy_command,
451
- 'crawl', spider_name,
452
- '-o', output_path,
453
- '-s', f'USER_AGENT={self.settings.user_agent}',
454
- '-s', 'LOG_LEVEL=INFO'
636
+ self.config.scrapy_command,
637
+ "crawl",
638
+ spider_name,
639
+ "-o",
640
+ output_path,
641
+ "-s",
642
+ f"USER_AGENT={self.config.user_agent}",
643
+ "-s",
644
+ "LOG_LEVEL=INFO",
455
645
  ]
456
646
  if spider_args:
457
647
  for k, v in spider_args.items():
458
- cmd += ['-a', f"{k}={v}"]
648
+ cmd += ["-a", f"{k}={v}"]
459
649
  process = subprocess.run(
460
650
  cmd,
461
651
  cwd=project_path,
462
652
  stdout=subprocess.PIPE,
463
653
  stderr=subprocess.PIPE,
464
- text=True
654
+ text=True,
465
655
  )
466
656
  if process.returncode != 0:
467
657
  error_msg = process.stderr.strip()
@@ -470,19 +660,26 @@ class ScraperTool(BaseTool):
470
660
  raise ExternalToolError(f"Scrapy crawl did not create output file: {output_path}")
471
661
  file_size = os.path.getsize(output_path)
472
662
  result = {
473
- 'output_path': output_path,
474
- 'execution_time': time.time() - start_time,
475
- 'file_size': file_size,
476
- 'stdout': process.stdout.strip(),
477
- 'stderr': process.stderr.strip()
663
+ "output_path": output_path,
664
+ "execution_time": time.time() - start_time,
665
+ "file_size": file_size,
666
+ "stdout": process.stdout.strip(),
667
+ "stderr": process.stderr.strip(),
478
668
  }
479
669
  return result
480
670
  except subprocess.TimeoutExpired:
481
- raise TimeoutError(f"Scrapy crawl timed out")
671
+ raise TimeoutError("Scrapy crawl timed out")
482
672
  except Exception as e:
483
673
  raise ExternalToolError(f"Error running Scrapy: {str(e)}")
484
674
 
485
- def parse_html(self, html: str, selector: str, selector_type: str = "css", extract_attr: Optional[str] = None, extract_text: bool = True) -> Dict[str, Any]:
675
+ def parse_html(
676
+ self,
677
+ html: str,
678
+ selector: str,
679
+ selector_type: str = "css",
680
+ extract_attr: Optional[str] = None,
681
+ extract_text: bool = True,
682
+ ) -> Dict[str, Any]:
486
683
  """
487
684
  Parse HTML content using BeautifulSoup.
488
685
 
@@ -500,36 +697,37 @@ class ScraperTool(BaseTool):
500
697
  ParsingError: If parsing fails.
501
698
  """
502
699
  try:
503
- soup = BeautifulSoup(html, 'html.parser')
504
- if selector_type == 'css':
700
+ soup = BeautifulSoup(html, "html.parser")
701
+ if selector_type == "css":
505
702
  elements = soup.select(selector)
506
703
  else:
507
704
  from lxml.html import fromstring
508
705
  from lxml.etree import XPath
706
+
509
707
  root = fromstring(html)
510
708
  xpath = XPath(selector)
511
709
  elements = xpath(root)
512
710
  results = []
513
711
  for element in elements:
514
712
  if extract_attr:
515
- value = element.get(extract_attr) if hasattr(element, 'get') else element.get(extract_attr)
713
+ value = element.get(extract_attr) if hasattr(element, "get") else element.get(extract_attr)
516
714
  if value is not None:
517
715
  results.append(value)
518
716
  elif extract_text:
519
- if hasattr(element, 'text_content') and callable(getattr(element, 'text_content')):
717
+ if hasattr(element, "text_content") and callable(getattr(element, "text_content")):
520
718
  # lxml element
521
- text = element.text_content()
719
+ text = element.text_content() # type: ignore[misc]
522
720
  else:
523
721
  # BeautifulSoup element
524
- text = element.get_text()
525
-
526
- if text and text.strip():
527
- results.append(text.strip())
722
+ text = element.get_text() # type: ignore[misc]
723
+
724
+ if text and text.strip(): # type: ignore[misc]
725
+ results.append(text.strip()) # type: ignore[misc]
528
726
  return {
529
- 'selector': selector,
530
- 'selector_type': selector_type,
531
- 'count': len(results),
532
- 'results': results
727
+ "selector": selector,
728
+ "selector_type": selector_type,
729
+ "count": len(results),
730
+ "results": results,
533
731
  }
534
732
  except Exception as e:
535
733
  raise ParsingError(f"Error parsing HTML: {str(e)}")
@@ -542,7 +740,3 @@ class ScraperTool(BaseTool):
542
740
  head = get_httpx
543
741
  options = get_httpx
544
742
  patch = get_httpx
545
-
546
- # Legacy method aliases
547
- get_requests = get_httpx
548
- get_aiohttp = get_httpx