aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -3,13 +3,16 @@
|
|
|
3
3
|
Automated script to download required NLP data for AIECS ClassifierTool.
|
|
4
4
|
|
|
5
5
|
This script downloads:
|
|
6
|
-
1. NLTK stopwords data package for keyword extraction
|
|
6
|
+
1. NLTK stopwords data package for keyword extraction (to environment-specific location)
|
|
7
7
|
2. spaCy English model (en_core_web_sm) for text processing
|
|
8
8
|
3. spaCy Chinese model (zh_core_web_sm) for Chinese text processing
|
|
9
|
+
|
|
10
|
+
All NLP data is downloaded to the current Poetry/virtual environment to ensure
|
|
11
|
+
environment isolation. NLTK data is stored in <env_path>/nltk_data/ directory.
|
|
9
12
|
"""
|
|
10
13
|
|
|
11
|
-
import os
|
|
12
14
|
import sys
|
|
15
|
+
import os
|
|
13
16
|
import subprocess
|
|
14
17
|
import logging
|
|
15
18
|
from pathlib import Path
|
|
@@ -20,11 +23,11 @@ def setup_logging():
|
|
|
20
23
|
"""Setup logging configuration."""
|
|
21
24
|
logging.basicConfig(
|
|
22
25
|
level=logging.INFO,
|
|
23
|
-
format=
|
|
26
|
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
24
27
|
handlers=[
|
|
25
28
|
logging.StreamHandler(sys.stdout),
|
|
26
|
-
logging.FileHandler(
|
|
27
|
-
]
|
|
29
|
+
logging.FileHandler("nlp_data_download.log"),
|
|
30
|
+
],
|
|
28
31
|
)
|
|
29
32
|
return logging.getLogger(__name__)
|
|
30
33
|
|
|
@@ -32,22 +35,17 @@ def setup_logging():
|
|
|
32
35
|
def run_command(cmd: List[str], logger: logging.Logger) -> Tuple[bool, str]:
|
|
33
36
|
"""
|
|
34
37
|
Run a shell command and return success status and output.
|
|
35
|
-
|
|
38
|
+
|
|
36
39
|
Args:
|
|
37
40
|
cmd: List of command arguments
|
|
38
41
|
logger: Logger instance
|
|
39
|
-
|
|
42
|
+
|
|
40
43
|
Returns:
|
|
41
44
|
Tuple of (success, output)
|
|
42
45
|
"""
|
|
43
46
|
try:
|
|
44
47
|
logger.info(f"Running command: {' '.join(cmd)}")
|
|
45
|
-
result = subprocess.run(
|
|
46
|
-
cmd,
|
|
47
|
-
capture_output=True,
|
|
48
|
-
text=True,
|
|
49
|
-
check=True
|
|
50
|
-
)
|
|
48
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
51
49
|
logger.info(f"Command succeeded: {result.stdout}")
|
|
52
50
|
return True, result.stdout
|
|
53
51
|
except subprocess.CalledProcessError as e:
|
|
@@ -60,14 +58,54 @@ def run_command(cmd: List[str], logger: logging.Logger) -> Tuple[bool, str]:
|
|
|
60
58
|
return False, error_msg
|
|
61
59
|
|
|
62
60
|
|
|
61
|
+
def get_environment_path(logger: logging.Logger) -> Optional[Path]:
|
|
62
|
+
"""
|
|
63
|
+
Get the path to the current Python environment (virtual environment or Poetry environment).
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
logger: Logger instance
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Path to the environment if found, None otherwise
|
|
70
|
+
"""
|
|
71
|
+
# Check VIRTUAL_ENV environment variable first (common for venv/virtualenv)
|
|
72
|
+
venv_path = os.environ.get("VIRTUAL_ENV")
|
|
73
|
+
if venv_path:
|
|
74
|
+
env_path = Path(venv_path)
|
|
75
|
+
if env_path.exists():
|
|
76
|
+
logger.info(f"Found virtual environment via VIRTUAL_ENV: {env_path}")
|
|
77
|
+
return env_path
|
|
78
|
+
|
|
79
|
+
# Check sys.prefix - this points to the virtual environment if we're in one
|
|
80
|
+
# In a virtual environment, sys.prefix != sys.base_prefix
|
|
81
|
+
if sys.prefix != sys.base_prefix:
|
|
82
|
+
env_path = Path(sys.prefix)
|
|
83
|
+
if env_path.exists():
|
|
84
|
+
logger.info(f"Found virtual environment via sys.prefix: {env_path}")
|
|
85
|
+
return env_path
|
|
86
|
+
|
|
87
|
+
# Check if we're in a Poetry environment by checking sys.executable path
|
|
88
|
+
# Poetry environments are typically in ~/.cache/pypoetry/virtualenvs/ or similar
|
|
89
|
+
exec_path = Path(sys.executable)
|
|
90
|
+
if "pypoetry" in str(exec_path) or exec_path.parts[-3:-1] == ("bin", "python"):
|
|
91
|
+
# Try to find the environment root (go up from bin/python)
|
|
92
|
+
potential_env = exec_path.parent.parent
|
|
93
|
+
if potential_env.exists() and (potential_env / "pyvenv.cfg").exists():
|
|
94
|
+
logger.info(f"Found Poetry/virtual environment: {potential_env}")
|
|
95
|
+
return potential_env
|
|
96
|
+
|
|
97
|
+
logger.warning("No virtual environment detected. NLTK data will be downloaded to user directory.")
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
63
101
|
def check_python_package(package_name: str, logger: logging.Logger) -> bool:
|
|
64
102
|
"""
|
|
65
103
|
Check if a Python package is installed.
|
|
66
|
-
|
|
104
|
+
|
|
67
105
|
Args:
|
|
68
106
|
package_name: Name of the package to check
|
|
69
107
|
logger: Logger instance
|
|
70
|
-
|
|
108
|
+
|
|
71
109
|
Returns:
|
|
72
110
|
True if package is installed, False otherwise
|
|
73
111
|
"""
|
|
@@ -82,31 +120,53 @@ def check_python_package(package_name: str, logger: logging.Logger) -> bool:
|
|
|
82
120
|
|
|
83
121
|
def download_nltk_data(logger: logging.Logger) -> bool:
|
|
84
122
|
"""
|
|
85
|
-
Download required NLTK data packages.
|
|
86
|
-
|
|
123
|
+
Download required NLTK data packages to the current environment.
|
|
124
|
+
|
|
87
125
|
Args:
|
|
88
126
|
logger: Logger instance
|
|
89
|
-
|
|
127
|
+
|
|
90
128
|
Returns:
|
|
91
129
|
True if successful, False otherwise
|
|
92
130
|
"""
|
|
93
131
|
logger.info("Starting NLTK data download...")
|
|
94
|
-
|
|
95
|
-
if not check_python_package(
|
|
132
|
+
|
|
133
|
+
if not check_python_package("nltk", logger):
|
|
96
134
|
logger.error("NLTK is not installed. Please install it first with: pip install nltk")
|
|
97
135
|
return False
|
|
98
|
-
|
|
136
|
+
|
|
137
|
+
# Get the environment path to store NLTK data environment-specifically
|
|
138
|
+
env_path = get_environment_path(logger)
|
|
139
|
+
nltk_data_path: Optional[Path] = None
|
|
140
|
+
original_nltk_data = os.environ.get("NLTK_DATA")
|
|
141
|
+
|
|
142
|
+
# Set NLTK_DATA environment variable BEFORE importing nltk
|
|
143
|
+
# This ensures nltk.data.path is initialized with the correct path
|
|
144
|
+
if env_path:
|
|
145
|
+
# Create nltk_data directory in the environment
|
|
146
|
+
nltk_data_path = env_path / "nltk_data"
|
|
147
|
+
nltk_data_path.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
# Set NLTK_DATA environment variable BEFORE importing nltk
|
|
149
|
+
os.environ["NLTK_DATA"] = str(nltk_data_path)
|
|
150
|
+
logger.info(f"Using environment-specific NLTK data directory: {nltk_data_path}")
|
|
151
|
+
else:
|
|
152
|
+
logger.info("No virtual environment detected. Using default NLTK data location (~/nltk_data)")
|
|
153
|
+
|
|
99
154
|
try:
|
|
100
|
-
import nltk
|
|
101
|
-
|
|
155
|
+
import nltk # type: ignore[import-untyped]
|
|
156
|
+
|
|
157
|
+
# Ensure nltk.data.path includes our environment-specific path
|
|
158
|
+
if nltk_data_path and str(nltk_data_path) not in nltk.data.path:
|
|
159
|
+
nltk.data.path.insert(0, str(nltk_data_path))
|
|
160
|
+
|
|
102
161
|
# Download required NLTK data
|
|
103
162
|
packages_to_download = [
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
163
|
+
"stopwords",
|
|
164
|
+
"punkt",
|
|
165
|
+
"punkt_tab", # Added for RAKE-NLTK compatibility
|
|
166
|
+
"wordnet",
|
|
167
|
+
"averaged_perceptron_tagger",
|
|
108
168
|
]
|
|
109
|
-
|
|
169
|
+
|
|
110
170
|
for package in packages_to_download:
|
|
111
171
|
try:
|
|
112
172
|
logger.info(f"Downloading NLTK package: {package}")
|
|
@@ -114,36 +174,50 @@ def download_nltk_data(logger: logging.Logger) -> bool:
|
|
|
114
174
|
logger.info(f"Successfully downloaded NLTK package: {package}")
|
|
115
175
|
except Exception as e:
|
|
116
176
|
logger.error(f"Failed to download NLTK package {package}: {e}")
|
|
177
|
+
# Restore original NLTK_DATA if we changed it
|
|
178
|
+
if original_nltk_data is not None:
|
|
179
|
+
os.environ["NLTK_DATA"] = original_nltk_data
|
|
180
|
+
elif nltk_data_path:
|
|
181
|
+
os.environ.pop("NLTK_DATA", None)
|
|
117
182
|
return False
|
|
118
|
-
|
|
183
|
+
|
|
119
184
|
logger.info("All NLTK data packages downloaded successfully")
|
|
185
|
+
if nltk_data_path:
|
|
186
|
+
logger.info(f"NLTK data is stored in environment-specific location: {nltk_data_path}")
|
|
187
|
+
logger.info("Note: Set NLTK_DATA environment variable to this path if needed in other scripts")
|
|
120
188
|
return True
|
|
121
|
-
|
|
189
|
+
|
|
122
190
|
except Exception as e:
|
|
123
191
|
logger.error(f"Error downloading NLTK data: {e}")
|
|
192
|
+
# Restore original NLTK_DATA if we changed it
|
|
193
|
+
if original_nltk_data is not None:
|
|
194
|
+
os.environ["NLTK_DATA"] = original_nltk_data
|
|
195
|
+
elif nltk_data_path:
|
|
196
|
+
os.environ.pop("NLTK_DATA", None)
|
|
124
197
|
return False
|
|
125
198
|
|
|
126
199
|
|
|
127
200
|
def download_spacy_model(model_name: str, logger: logging.Logger) -> bool:
|
|
128
201
|
"""
|
|
129
202
|
Download a spaCy model.
|
|
130
|
-
|
|
203
|
+
|
|
131
204
|
Args:
|
|
132
205
|
model_name: Name of the spaCy model to download
|
|
133
206
|
logger: Logger instance
|
|
134
|
-
|
|
207
|
+
|
|
135
208
|
Returns:
|
|
136
209
|
True if successful, False otherwise
|
|
137
210
|
"""
|
|
138
211
|
logger.info(f"Starting spaCy model download: {model_name}")
|
|
139
|
-
|
|
140
|
-
if not check_python_package(
|
|
212
|
+
|
|
213
|
+
if not check_python_package("spacy", logger):
|
|
141
214
|
logger.error("spaCy is not installed. Please install it first with: pip install spacy")
|
|
142
215
|
return False
|
|
143
|
-
|
|
216
|
+
|
|
144
217
|
# Check if model is already installed
|
|
145
218
|
try:
|
|
146
219
|
import spacy
|
|
220
|
+
|
|
147
221
|
spacy.load(model_name)
|
|
148
222
|
logger.info(f"spaCy model {model_name} is already installed")
|
|
149
223
|
return True
|
|
@@ -153,17 +227,18 @@ def download_spacy_model(model_name: str, logger: logging.Logger) -> bool:
|
|
|
153
227
|
except Exception as e:
|
|
154
228
|
logger.error(f"Error checking spaCy model {model_name}: {e}")
|
|
155
229
|
return False
|
|
156
|
-
|
|
230
|
+
|
|
157
231
|
# Download the model
|
|
158
232
|
cmd = [sys.executable, "-m", "spacy", "download", model_name]
|
|
159
233
|
success, output = run_command(cmd, logger)
|
|
160
|
-
|
|
234
|
+
|
|
161
235
|
if success:
|
|
162
236
|
logger.info(f"Successfully downloaded spaCy model: {model_name}")
|
|
163
|
-
|
|
237
|
+
|
|
164
238
|
# Verify the model can be loaded
|
|
165
239
|
try:
|
|
166
240
|
import spacy
|
|
241
|
+
|
|
167
242
|
spacy.load(model_name)
|
|
168
243
|
logger.info(f"Verified spaCy model {model_name} can be loaded")
|
|
169
244
|
return True
|
|
@@ -173,7 +248,7 @@ def download_spacy_model(model_name: str, logger: logging.Logger) -> bool:
|
|
|
173
248
|
else:
|
|
174
249
|
logger.error(f"Failed to download spaCy model {model_name}: {output}")
|
|
175
250
|
return False
|
|
176
|
-
|
|
251
|
+
|
|
177
252
|
|
|
178
253
|
def download_spacy_pkuseg_model(logger: logging.Logger) -> bool:
|
|
179
254
|
"""
|
|
@@ -187,12 +262,12 @@ def download_spacy_pkuseg_model(logger: logging.Logger) -> bool:
|
|
|
187
262
|
"""
|
|
188
263
|
logger.info("Starting spaCy PKUSeg model installation...")
|
|
189
264
|
|
|
190
|
-
if not check_python_package(
|
|
265
|
+
if not check_python_package("spacy", logger):
|
|
191
266
|
logger.error("spaCy is not installed. Please install it first with: pip install spacy")
|
|
192
267
|
return False
|
|
193
268
|
|
|
194
269
|
# Check if spacy_pkuseg is already installed
|
|
195
|
-
if check_python_package(
|
|
270
|
+
if check_python_package("spacy_pkuseg", logger):
|
|
196
271
|
logger.info("spacy_pkuseg is already installed")
|
|
197
272
|
return True
|
|
198
273
|
|
|
@@ -205,7 +280,8 @@ def download_spacy_pkuseg_model(logger: logging.Logger) -> bool:
|
|
|
205
280
|
|
|
206
281
|
# Verify the package can be imported
|
|
207
282
|
try:
|
|
208
|
-
import spacy_pkuseg
|
|
283
|
+
import spacy_pkuseg # type: ignore[import-untyped]
|
|
284
|
+
|
|
209
285
|
logger.info("Verified spacy_pkuseg can be imported")
|
|
210
286
|
|
|
211
287
|
# Test basic functionality
|
|
@@ -224,25 +300,26 @@ def download_spacy_pkuseg_model(logger: logging.Logger) -> bool:
|
|
|
224
300
|
def download_rake_nltk_data(logger: logging.Logger) -> bool:
|
|
225
301
|
"""
|
|
226
302
|
Ensure RAKE-NLTK has required data.
|
|
227
|
-
|
|
303
|
+
|
|
228
304
|
Args:
|
|
229
305
|
logger: Logger instance
|
|
230
|
-
|
|
306
|
+
|
|
231
307
|
Returns:
|
|
232
308
|
True if successful, False otherwise
|
|
233
309
|
"""
|
|
234
310
|
logger.info("Checking RAKE-NLTK data...")
|
|
235
|
-
|
|
236
|
-
if not check_python_package(
|
|
311
|
+
|
|
312
|
+
if not check_python_package("rake_nltk", logger):
|
|
237
313
|
logger.warning("RAKE-NLTK is not installed. This is optional for English keyword extraction.")
|
|
238
314
|
return True # Not critical, return True
|
|
239
|
-
|
|
315
|
+
|
|
240
316
|
try:
|
|
241
|
-
from rake_nltk import Rake
|
|
317
|
+
from rake_nltk import Rake # type: ignore[import-untyped]
|
|
318
|
+
|
|
242
319
|
# Test RAKE functionality
|
|
243
320
|
rake = Rake()
|
|
244
321
|
rake.extract_keywords_from_text("This is a test sentence for RAKE.")
|
|
245
|
-
|
|
322
|
+
rake.get_ranked_phrases()
|
|
246
323
|
logger.info("RAKE-NLTK is working correctly")
|
|
247
324
|
return True
|
|
248
325
|
except Exception as e:
|
|
@@ -253,41 +330,52 @@ def download_rake_nltk_data(logger: logging.Logger) -> bool:
|
|
|
253
330
|
def verify_installation(logger: logging.Logger) -> bool:
|
|
254
331
|
"""
|
|
255
332
|
Verify all NLP components are properly installed.
|
|
256
|
-
|
|
333
|
+
|
|
257
334
|
Args:
|
|
258
335
|
logger: Logger instance
|
|
259
|
-
|
|
336
|
+
|
|
260
337
|
Returns:
|
|
261
338
|
True if all components work, False otherwise
|
|
262
339
|
"""
|
|
263
340
|
logger.info("Verifying NLP data installation...")
|
|
264
|
-
|
|
341
|
+
|
|
265
342
|
success = True
|
|
266
|
-
|
|
343
|
+
|
|
267
344
|
# Test NLTK
|
|
268
345
|
try:
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
346
|
+
# Ensure we check the environment-specific NLTK data location if it exists
|
|
347
|
+
env_path = get_environment_path(logger)
|
|
348
|
+
if env_path:
|
|
349
|
+
nltk_data_path = env_path / "nltk_data"
|
|
350
|
+
if nltk_data_path.exists():
|
|
351
|
+
os.environ["NLTK_DATA"] = str(nltk_data_path)
|
|
352
|
+
|
|
353
|
+
from nltk.corpus import stopwords # type: ignore[import-untyped]
|
|
354
|
+
|
|
355
|
+
english_stopwords = stopwords.words("english")
|
|
272
356
|
logger.info(f"NLTK verification successful. Loaded {len(english_stopwords)} English stopwords")
|
|
357
|
+
if env_path and (env_path / "nltk_data").exists():
|
|
358
|
+
logger.info(f"NLTK data is located in environment: {env_path / 'nltk_data'}")
|
|
273
359
|
except Exception as e:
|
|
274
360
|
logger.error(f"NLTK verification failed: {e}")
|
|
275
361
|
success = False
|
|
276
|
-
|
|
362
|
+
|
|
277
363
|
# Test spaCy English model
|
|
278
364
|
try:
|
|
279
365
|
import spacy
|
|
280
|
-
|
|
366
|
+
|
|
367
|
+
nlp_en = spacy.load("en_core_web_sm")
|
|
281
368
|
doc = nlp_en("This is a test sentence.")
|
|
282
369
|
logger.info(f"spaCy English model verification successful. Processed {len(doc)} tokens")
|
|
283
370
|
except Exception as e:
|
|
284
371
|
logger.error(f"spaCy English model verification failed: {e}")
|
|
285
372
|
success = False
|
|
286
|
-
|
|
373
|
+
|
|
287
374
|
# Test spaCy Chinese model (optional)
|
|
288
375
|
try:
|
|
289
376
|
import spacy
|
|
290
|
-
|
|
377
|
+
|
|
378
|
+
nlp_zh = spacy.load("zh_core_web_sm")
|
|
291
379
|
doc = nlp_zh("这是一个测试句子。")
|
|
292
380
|
logger.info(f"spaCy Chinese model verification successful. Processed {len(doc)} tokens")
|
|
293
381
|
except Exception as e:
|
|
@@ -296,6 +384,7 @@ def verify_installation(logger: logging.Logger) -> bool:
|
|
|
296
384
|
# Test spaCy PKUSeg model (optional)
|
|
297
385
|
try:
|
|
298
386
|
import spacy_pkuseg
|
|
387
|
+
|
|
299
388
|
seg = spacy_pkuseg.pkuseg()
|
|
300
389
|
result = list(seg.cut("这是一个测试句子"))
|
|
301
390
|
logger.info(f"spaCy PKUSeg model verification successful. Segmented: {result}")
|
|
@@ -305,23 +394,23 @@ def verify_installation(logger: logging.Logger) -> bool:
|
|
|
305
394
|
return success
|
|
306
395
|
|
|
307
396
|
|
|
308
|
-
def
|
|
309
|
-
"""
|
|
397
|
+
def download_all_nlp_data():
|
|
398
|
+
"""Download all required NLP data."""
|
|
310
399
|
logger = setup_logging()
|
|
311
400
|
logger.info("Starting AIECS NLP data download process...")
|
|
312
|
-
|
|
401
|
+
|
|
313
402
|
success = True
|
|
314
|
-
|
|
403
|
+
|
|
315
404
|
# Download NLTK data
|
|
316
405
|
if not download_nltk_data(logger):
|
|
317
406
|
success = False
|
|
318
|
-
|
|
407
|
+
|
|
319
408
|
# Download spaCy English model
|
|
320
|
-
if not download_spacy_model(
|
|
409
|
+
if not download_spacy_model("en_core_web_sm", logger):
|
|
321
410
|
success = False
|
|
322
|
-
|
|
411
|
+
|
|
323
412
|
# Download spaCy Chinese model (optional)
|
|
324
|
-
if not download_spacy_model(
|
|
413
|
+
if not download_spacy_model("zh_core_web_sm", logger):
|
|
325
414
|
logger.warning("Chinese model download failed, but this is optional")
|
|
326
415
|
# Don't mark as failure for Chinese model
|
|
327
416
|
|
|
@@ -329,10 +418,10 @@ def main():
|
|
|
329
418
|
if not download_spacy_pkuseg_model(logger):
|
|
330
419
|
logger.warning("spaCy PKUSeg model download failed, but this is optional")
|
|
331
420
|
# Don't mark as failure for PKUSeg model
|
|
332
|
-
|
|
421
|
+
|
|
333
422
|
# Check RAKE-NLTK (optional)
|
|
334
423
|
download_rake_nltk_data(logger)
|
|
335
|
-
|
|
424
|
+
|
|
336
425
|
# Verify installation
|
|
337
426
|
if success and verify_installation(logger):
|
|
338
427
|
logger.info("✅ All NLP data downloaded and verified successfully!")
|
|
@@ -344,5 +433,48 @@ def main():
|
|
|
344
433
|
return 1
|
|
345
434
|
|
|
346
435
|
|
|
436
|
+
def main():
|
|
437
|
+
"""Main entry point with argument parsing."""
|
|
438
|
+
import argparse
|
|
439
|
+
|
|
440
|
+
parser = argparse.ArgumentParser(
|
|
441
|
+
description="Download NLP data for AIECS tools",
|
|
442
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
443
|
+
epilog="""
|
|
444
|
+
Examples:
|
|
445
|
+
# Show this help message
|
|
446
|
+
aiecs-download-nlp-data --help
|
|
447
|
+
|
|
448
|
+
# Download all NLP data
|
|
449
|
+
aiecs-download-nlp-data --download
|
|
450
|
+
aiecs-download-nlp-data -d
|
|
451
|
+
|
|
452
|
+
NLP Data Includes:
|
|
453
|
+
- NLTK packages: stopwords, punkt, wordnet, averaged_perceptron_tagger
|
|
454
|
+
- spaCy models: en_core_web_sm (English), zh_core_web_sm (Chinese, optional)
|
|
455
|
+
- spaCy PKUSeg model (Chinese segmentation, optional)
|
|
456
|
+
- RAKE-NLTK data (keyword extraction, optional)
|
|
457
|
+
""",
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
parser.add_argument(
|
|
461
|
+
"-d",
|
|
462
|
+
"--download",
|
|
463
|
+
action="store_true",
|
|
464
|
+
help="Download all NLP data packages",
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
args = parser.parse_args()
|
|
468
|
+
|
|
469
|
+
# If no arguments provided, show help
|
|
470
|
+
if not args.download:
|
|
471
|
+
parser.print_help()
|
|
472
|
+
print("\n⚠️ No action specified. Use --download or -d to download NLP data.")
|
|
473
|
+
return 0
|
|
474
|
+
|
|
475
|
+
# Execute download
|
|
476
|
+
return download_all_nlp_data()
|
|
477
|
+
|
|
478
|
+
|
|
347
479
|
if __name__ == "__main__":
|
|
348
|
-
sys.exit(main())
|
|
480
|
+
sys.exit(main())
|
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py}
RENAMED
|
@@ -6,19 +6,20 @@ This script patches the weasel schemas.py file to add allow_reuse=True to duplic
|
|
|
6
6
|
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
|
-
import subprocess
|
|
10
9
|
import shutil
|
|
11
10
|
from datetime import datetime
|
|
12
11
|
import re
|
|
13
12
|
|
|
13
|
+
|
|
14
14
|
def get_weasel_path():
|
|
15
15
|
"""Get the weasel package path in the current Python environment."""
|
|
16
16
|
try:
|
|
17
|
-
import weasel
|
|
17
|
+
import weasel # type: ignore[import-untyped]
|
|
18
18
|
import inspect
|
|
19
|
+
|
|
19
20
|
weasel_file = inspect.getfile(weasel)
|
|
20
21
|
weasel_dir = os.path.dirname(weasel_file)
|
|
21
|
-
return os.path.join(weasel_dir,
|
|
22
|
+
return os.path.join(weasel_dir, "schemas.py")
|
|
22
23
|
except ImportError:
|
|
23
24
|
print("❌ Error: weasel package not found")
|
|
24
25
|
print("Please install aiecs with all dependencies")
|
|
@@ -27,6 +28,7 @@ def get_weasel_path():
|
|
|
27
28
|
print(f"❌ Error finding weasel package: {e}")
|
|
28
29
|
return None
|
|
29
30
|
|
|
31
|
+
|
|
30
32
|
def backup_file(file_path):
|
|
31
33
|
"""Create a backup of the file."""
|
|
32
34
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
@@ -34,17 +36,18 @@ def backup_file(file_path):
|
|
|
34
36
|
shutil.copy2(file_path, backup_path)
|
|
35
37
|
return backup_path
|
|
36
38
|
|
|
39
|
+
|
|
37
40
|
def fix_weasel_schemas(schemas_file_path):
|
|
38
41
|
"""Fix the weasel schemas.py file by adding allow_reuse=True to validators."""
|
|
39
42
|
|
|
40
43
|
print(f"📁 Processing file: {schemas_file_path}")
|
|
41
44
|
|
|
42
45
|
# Read the original file
|
|
43
|
-
with open(schemas_file_path,
|
|
46
|
+
with open(schemas_file_path, "r", encoding="utf-8") as f:
|
|
44
47
|
content = f.read()
|
|
45
48
|
|
|
46
49
|
# Check if already patched
|
|
47
|
-
if
|
|
50
|
+
if "allow_reuse=True" in content:
|
|
48
51
|
print("✅ File already patched with allow_reuse=True")
|
|
49
52
|
return True
|
|
50
53
|
|
|
@@ -53,19 +56,21 @@ def fix_weasel_schemas(schemas_file_path):
|
|
|
53
56
|
print(f"💾 Created backup at: {backup_path}")
|
|
54
57
|
|
|
55
58
|
# Show current problematic area
|
|
56
|
-
lines = content.split(
|
|
59
|
+
lines = content.split("\n")
|
|
57
60
|
print("\n📖 Current content around line 89:")
|
|
58
61
|
for i, line in enumerate(lines[84:94], 85):
|
|
59
62
|
print(f"{i:3d} | {line}")
|
|
60
63
|
|
|
61
|
-
# Pattern to match both @validator and @root_validator decorators without
|
|
62
|
-
|
|
64
|
+
# Pattern to match both @validator and @root_validator decorators without
|
|
65
|
+
# allow_reuse
|
|
66
|
+
validator_pattern = r"(@(?:root_)?validator\([^)]*)\)(?!\s*,\s*allow_reuse=True)"
|
|
63
67
|
|
|
64
|
-
# Replace @validator(...) or @root_validator(...) with allow_reuse=True if
|
|
68
|
+
# Replace @validator(...) or @root_validator(...) with allow_reuse=True if
|
|
69
|
+
# not already present
|
|
65
70
|
def replace_validator(match):
|
|
66
71
|
validator_call = match.group(1)
|
|
67
72
|
# Check if allow_reuse is already in the parameters
|
|
68
|
-
if
|
|
73
|
+
if "allow_reuse" in validator_call:
|
|
69
74
|
return match.group(0) # Return unchanged
|
|
70
75
|
else:
|
|
71
76
|
return f"{validator_call}, allow_reuse=True)"
|
|
@@ -74,23 +79,24 @@ def fix_weasel_schemas(schemas_file_path):
|
|
|
74
79
|
fixed_content = re.sub(validator_pattern, replace_validator, content)
|
|
75
80
|
|
|
76
81
|
# Write the fixed content back
|
|
77
|
-
with open(schemas_file_path,
|
|
82
|
+
with open(schemas_file_path, "w", encoding="utf-8") as f:
|
|
78
83
|
f.write(fixed_content)
|
|
79
84
|
|
|
80
85
|
# Show the fixed content
|
|
81
|
-
fixed_lines = fixed_content.split(
|
|
86
|
+
fixed_lines = fixed_content.split("\n")
|
|
82
87
|
print("\n📖 Patched content around line 89:")
|
|
83
88
|
for i, line in enumerate(fixed_lines[84:94], 85):
|
|
84
89
|
print(f"{i:3d} | {line}")
|
|
85
90
|
|
|
86
91
|
# Verify the fix
|
|
87
|
-
if
|
|
92
|
+
if "allow_reuse=True" in fixed_content:
|
|
88
93
|
print("✅ Verification successful: allow_reuse=True found in file")
|
|
89
94
|
return True
|
|
90
95
|
else:
|
|
91
96
|
print("⚠️ Warning: allow_reuse=True not found after patching")
|
|
92
97
|
return False
|
|
93
98
|
|
|
99
|
+
|
|
94
100
|
def main():
|
|
95
101
|
"""Main function to execute the patch."""
|
|
96
102
|
print("🔧 Starting weasel library patch for duplicate validator function...")
|
|
@@ -112,10 +118,11 @@ def main():
|
|
|
112
118
|
if success:
|
|
113
119
|
print("\n🎉 Weasel library patch completed successfully!")
|
|
114
120
|
print("\nYou can now run your tests again.")
|
|
115
|
-
print(
|
|
121
|
+
print("\nIf you need to revert the changes, restore from the backup file.")
|
|
116
122
|
else:
|
|
117
123
|
print("\n❌ Patch may not have been applied correctly. Please check manually.")
|
|
118
124
|
sys.exit(1)
|
|
119
125
|
|
|
126
|
+
|
|
120
127
|
if __name__ == "__main__":
|
|
121
128
|
main()
|
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh}
RENAMED
|
@@ -149,7 +149,7 @@ test_fix() {
|
|
|
149
149
|
import sys
|
|
150
150
|
sys.path.insert(0, '.')
|
|
151
151
|
try:
|
|
152
|
-
from
|
|
152
|
+
from aiecs.tools.task_tools.research_tool import *
|
|
153
153
|
print('✅ Import successful - fix appears to work!')
|
|
154
154
|
except Exception as e:
|
|
155
155
|
print(f'❌ Import still fails: {e}')
|