aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abbreviation Expander
|
|
3
|
+
|
|
4
|
+
Handles acronym and abbreviation expansion for entity matching.
|
|
5
|
+
Supports configurable dictionaries and bidirectional matching.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class AbbreviationMatch:
|
|
19
|
+
"""Result of abbreviation lookup"""
|
|
20
|
+
abbreviation: str
|
|
21
|
+
full_forms: List[str]
|
|
22
|
+
category: Optional[str] = None # e.g., "organization", "geographic", "technical"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AbbreviationExpander:
|
|
26
|
+
"""
|
|
27
|
+
Expand and match abbreviations/acronyms.
|
|
28
|
+
|
|
29
|
+
Supports:
|
|
30
|
+
- Configurable abbreviation dictionaries (JSON format)
|
|
31
|
+
- Common patterns (organization, geographic, technical)
|
|
32
|
+
- Bidirectional matching (abbreviation ↔ full form)
|
|
33
|
+
- Domain-specific dictionary loading
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
```python
|
|
37
|
+
expander = AbbreviationExpander()
|
|
38
|
+
|
|
39
|
+
# Load common abbreviations
|
|
40
|
+
expander.load_common_abbreviations()
|
|
41
|
+
|
|
42
|
+
# Add custom abbreviation
|
|
43
|
+
expander.add_abbreviation("MIT", ["Massachusetts Institute of Technology"])
|
|
44
|
+
|
|
45
|
+
# Bidirectional lookup
|
|
46
|
+
match = expander.lookup("MIT")
|
|
47
|
+
assert "Massachusetts Institute of Technology" in match.full_forms
|
|
48
|
+
|
|
49
|
+
match = expander.lookup("Massachusetts Institute of Technology")
|
|
50
|
+
assert match.abbreviation == "MIT"
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
"""Initialize AbbreviationExpander"""
|
|
56
|
+
# abbreviation -> full forms
|
|
57
|
+
self._abbrev_to_full: Dict[str, List[str]] = {}
|
|
58
|
+
# full form (lowercase) -> abbreviation
|
|
59
|
+
self._full_to_abbrev: Dict[str, str] = {}
|
|
60
|
+
# category for each abbreviation
|
|
61
|
+
self._categories: Dict[str, str] = {}
|
|
62
|
+
|
|
63
|
+
def add_abbreviation(
|
|
64
|
+
self,
|
|
65
|
+
abbreviation: str,
|
|
66
|
+
full_forms: List[str],
|
|
67
|
+
category: Optional[str] = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
"""
|
|
70
|
+
Add an abbreviation mapping.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
abbreviation: The abbreviation (e.g., "MIT")
|
|
74
|
+
full_forms: List of full forms (e.g., ["Massachusetts Institute of Technology"])
|
|
75
|
+
category: Optional category (e.g., "organization")
|
|
76
|
+
"""
|
|
77
|
+
abbrev_key = abbreviation.lower()
|
|
78
|
+
self._abbrev_to_full[abbrev_key] = full_forms
|
|
79
|
+
|
|
80
|
+
if category:
|
|
81
|
+
self._categories[abbrev_key] = category
|
|
82
|
+
|
|
83
|
+
# Build reverse index for bidirectional lookup
|
|
84
|
+
for full_form in full_forms:
|
|
85
|
+
self._full_to_abbrev[full_form.lower()] = abbreviation
|
|
86
|
+
|
|
87
|
+
def lookup(self, text: str) -> Optional[AbbreviationMatch]:
|
|
88
|
+
"""
|
|
89
|
+
Look up an abbreviation or full form.
|
|
90
|
+
|
|
91
|
+
Supports bidirectional matching:
|
|
92
|
+
- "MIT" → full forms
|
|
93
|
+
- "Massachusetts Institute of Technology" → abbreviation
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
text: The text to look up
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
AbbreviationMatch if found, None otherwise
|
|
100
|
+
"""
|
|
101
|
+
text_lower = text.lower()
|
|
102
|
+
|
|
103
|
+
# Try abbreviation -> full form
|
|
104
|
+
if text_lower in self._abbrev_to_full:
|
|
105
|
+
return AbbreviationMatch(
|
|
106
|
+
abbreviation=text,
|
|
107
|
+
full_forms=self._abbrev_to_full[text_lower],
|
|
108
|
+
category=self._categories.get(text_lower),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Try full form -> abbreviation
|
|
112
|
+
if text_lower in self._full_to_abbrev:
|
|
113
|
+
abbrev = self._full_to_abbrev[text_lower]
|
|
114
|
+
abbrev_lower = abbrev.lower()
|
|
115
|
+
return AbbreviationMatch(
|
|
116
|
+
abbreviation=abbrev,
|
|
117
|
+
full_forms=self._abbrev_to_full.get(abbrev_lower, [text]),
|
|
118
|
+
category=self._categories.get(abbrev_lower),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
def get_all_forms(self, text: str) -> Set[str]:
|
|
124
|
+
"""
|
|
125
|
+
Get all equivalent forms of a text (abbreviation + full forms).
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
text: The text to expand
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Set of all equivalent forms (including original)
|
|
132
|
+
"""
|
|
133
|
+
forms = {text, text.lower()}
|
|
134
|
+
|
|
135
|
+
match = self.lookup(text)
|
|
136
|
+
if match:
|
|
137
|
+
forms.add(match.abbreviation)
|
|
138
|
+
forms.add(match.abbreviation.lower())
|
|
139
|
+
for full_form in match.full_forms:
|
|
140
|
+
forms.add(full_form)
|
|
141
|
+
forms.add(full_form.lower())
|
|
142
|
+
|
|
143
|
+
return forms
|
|
144
|
+
|
|
145
|
+
def matches(self, text1: str, text2: str) -> bool:
|
|
146
|
+
"""
|
|
147
|
+
Check if two texts match via abbreviation expansion.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
text1: First text
|
|
151
|
+
text2: Second text
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
True if texts match (same abbreviation or same full form)
|
|
155
|
+
"""
|
|
156
|
+
forms1 = self.get_all_forms(text1)
|
|
157
|
+
forms2 = self.get_all_forms(text2)
|
|
158
|
+
return bool(forms1 & forms2)
|
|
159
|
+
|
|
160
|
+
def load_from_dict(
|
|
161
|
+
self,
|
|
162
|
+
data: Dict[str, List[str]],
|
|
163
|
+
category: Optional[str] = None,
|
|
164
|
+
) -> int:
|
|
165
|
+
"""
|
|
166
|
+
Load abbreviations from a dictionary.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
data: Dictionary of abbreviation -> full forms
|
|
170
|
+
category: Optional category for all entries
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Number of abbreviations loaded
|
|
174
|
+
"""
|
|
175
|
+
count = 0
|
|
176
|
+
for abbrev, full_forms in data.items():
|
|
177
|
+
self.add_abbreviation(abbrev, full_forms, category)
|
|
178
|
+
count += 1
|
|
179
|
+
return count
|
|
180
|
+
|
|
181
|
+
def load_from_json(self, filepath: str, category: Optional[str] = None) -> int:
|
|
182
|
+
"""
|
|
183
|
+
Load abbreviations from a JSON file.
|
|
184
|
+
|
|
185
|
+
JSON format:
|
|
186
|
+
```json
|
|
187
|
+
{
|
|
188
|
+
"MIT": ["Massachusetts Institute of Technology", "MIT"],
|
|
189
|
+
"NYC": ["New York City", "New York", "NYC"]
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
filepath: Path to JSON file
|
|
195
|
+
category: Optional category for all entries
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Number of abbreviations loaded
|
|
199
|
+
"""
|
|
200
|
+
path = Path(filepath)
|
|
201
|
+
if not path.exists():
|
|
202
|
+
logger.warning(f"Abbreviation file not found: {filepath}")
|
|
203
|
+
return 0
|
|
204
|
+
|
|
205
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
206
|
+
data = json.load(f)
|
|
207
|
+
|
|
208
|
+
return self.load_from_dict(data, category)
|
|
209
|
+
|
|
210
|
+
def load_common_abbreviations(self) -> int:
|
|
211
|
+
"""
|
|
212
|
+
Load common abbreviation patterns.
|
|
213
|
+
|
|
214
|
+
Categories:
|
|
215
|
+
- Organization abbreviations (MIT, NASA, etc.)
|
|
216
|
+
- Geographic abbreviations (NYC, LA, etc.)
|
|
217
|
+
- Technical abbreviations (API, CPU, etc.)
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Number of abbreviations loaded
|
|
221
|
+
"""
|
|
222
|
+
total = 0
|
|
223
|
+
|
|
224
|
+
# Organization abbreviations
|
|
225
|
+
org_abbrevs = {
|
|
226
|
+
"MIT": ["Massachusetts Institute of Technology"],
|
|
227
|
+
"NASA": ["National Aeronautics and Space Administration"],
|
|
228
|
+
"IBM": ["International Business Machines"],
|
|
229
|
+
"AT&T": ["American Telephone and Telegraph", "AT and T"],
|
|
230
|
+
"FBI": ["Federal Bureau of Investigation"],
|
|
231
|
+
"CIA": ["Central Intelligence Agency"],
|
|
232
|
+
"WHO": ["World Health Organization"],
|
|
233
|
+
"UN": ["United Nations"],
|
|
234
|
+
"EU": ["European Union"],
|
|
235
|
+
"NATO": ["North Atlantic Treaty Organization"],
|
|
236
|
+
"OPEC": ["Organization of the Petroleum Exporting Countries"],
|
|
237
|
+
"IMF": ["International Monetary Fund"],
|
|
238
|
+
"WTO": ["World Trade Organization"],
|
|
239
|
+
"UNICEF": ["United Nations Children's Fund"],
|
|
240
|
+
"UNESCO": ["United Nations Educational, Scientific and Cultural Organization"],
|
|
241
|
+
}
|
|
242
|
+
total += self.load_from_dict(org_abbrevs, "organization")
|
|
243
|
+
|
|
244
|
+
# Geographic abbreviations
|
|
245
|
+
geo_abbrevs = {
|
|
246
|
+
"NYC": ["New York City", "New York"],
|
|
247
|
+
"LA": ["Los Angeles"],
|
|
248
|
+
"SF": ["San Francisco"],
|
|
249
|
+
"DC": ["District of Columbia", "Washington DC", "Washington D.C."],
|
|
250
|
+
"UK": ["United Kingdom", "Great Britain"],
|
|
251
|
+
"USA": ["United States of America", "United States", "US"],
|
|
252
|
+
"UAE": ["United Arab Emirates"],
|
|
253
|
+
}
|
|
254
|
+
total += self.load_from_dict(geo_abbrevs, "geographic")
|
|
255
|
+
|
|
256
|
+
# Technical abbreviations
|
|
257
|
+
tech_abbrevs = {
|
|
258
|
+
"API": ["Application Programming Interface"],
|
|
259
|
+
"CPU": ["Central Processing Unit"],
|
|
260
|
+
"GPU": ["Graphics Processing Unit"],
|
|
261
|
+
"RAM": ["Random Access Memory"],
|
|
262
|
+
"ROM": ["Read Only Memory"],
|
|
263
|
+
"SSD": ["Solid State Drive"],
|
|
264
|
+
"HDD": ["Hard Disk Drive"],
|
|
265
|
+
"URL": ["Uniform Resource Locator"],
|
|
266
|
+
"HTML": ["HyperText Markup Language"],
|
|
267
|
+
"CSS": ["Cascading Style Sheets"],
|
|
268
|
+
"JSON": ["JavaScript Object Notation"],
|
|
269
|
+
"XML": ["Extensible Markup Language"],
|
|
270
|
+
"SQL": ["Structured Query Language"],
|
|
271
|
+
"AI": ["Artificial Intelligence"],
|
|
272
|
+
"ML": ["Machine Learning"],
|
|
273
|
+
"NLP": ["Natural Language Processing"],
|
|
274
|
+
"LLM": ["Large Language Model"],
|
|
275
|
+
}
|
|
276
|
+
total += self.load_from_dict(tech_abbrevs, "technical")
|
|
277
|
+
|
|
278
|
+
logger.info(f"Loaded {total} common abbreviations")
|
|
279
|
+
return total
|
|
280
|
+
|
|
281
|
+
def get_abbreviations_by_category(self, category: str) -> List[str]:
|
|
282
|
+
"""
|
|
283
|
+
Get all abbreviations in a category.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
category: Category name (e.g., "organization")
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
List of abbreviations in that category
|
|
290
|
+
"""
|
|
291
|
+
return [
|
|
292
|
+
abbrev for abbrev, cat in self._categories.items()
|
|
293
|
+
if cat == category
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
def size(self) -> int:
|
|
297
|
+
"""Get number of abbreviations in the expander"""
|
|
298
|
+
return len(self._abbrev_to_full)
|
|
299
|
+
|
|
300
|
+
def clear(self) -> None:
|
|
301
|
+
"""Clear all abbreviations"""
|
|
302
|
+
self._abbrev_to_full.clear()
|
|
303
|
+
self._full_to_abbrev.clear()
|
|
304
|
+
self._categories.clear()
|
|
305
|
+
|
|
306
|
+
def to_dict(self) -> Dict[str, List[str]]:
|
|
307
|
+
"""
|
|
308
|
+
Export abbreviations as dictionary.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Dictionary of abbreviation -> full forms
|
|
312
|
+
"""
|
|
313
|
+
return dict(self._abbrev_to_full)
|
|
314
|
+
|
|
315
|
+
def save_to_json(self, filepath: str) -> None:
|
|
316
|
+
"""
|
|
317
|
+
Save abbreviations to a JSON file.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
filepath: Path to output JSON file
|
|
321
|
+
"""
|
|
322
|
+
path = Path(filepath)
|
|
323
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
324
|
+
|
|
325
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
326
|
+
json.dump(self._abbrev_to_full, f, indent=2)
|
|
327
|
+
|