aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +435 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3949 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1731 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +894 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +377 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +230 -37
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +328 -0
- aiecs/llm/clients/google_function_calling_mixin.py +415 -0
- aiecs/llm/clients/googleai_client.py +314 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +1186 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1464 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1016 -0
- aiecs/tools/docs/document_writer_tool.py +2008 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +220 -141
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
- aiecs-1.7.17.dist-info/RECORD +337 -0
- aiecs-1.7.17.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Script to run threshold sweep experiments for knowledge fusion matching.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
poetry run python -m aiecs.scripts.knowledge_graph.run_threshold_experiments
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import List
|
|
14
|
+
|
|
15
|
+
# Add project root to path
|
|
16
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
17
|
+
sys.path.insert(0, str(project_root))
|
|
18
|
+
|
|
19
|
+
from aiecs.application.knowledge_graph.fusion.ab_testing import (
|
|
20
|
+
ABTestingFramework,
|
|
21
|
+
ExperimentResult,
|
|
22
|
+
)
|
|
23
|
+
from aiecs.application.knowledge_graph.fusion.evaluation_dataset import (
|
|
24
|
+
create_default_evaluation_dataset,
|
|
25
|
+
)
|
|
26
|
+
from aiecs.application.knowledge_graph.fusion.matching_config import (
|
|
27
|
+
FusionMatchingConfig,
|
|
28
|
+
)
|
|
29
|
+
from aiecs.application.knowledge_graph.fusion.similarity_pipeline import (
|
|
30
|
+
SimilarityPipeline,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
logging.basicConfig(
|
|
34
|
+
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
35
|
+
)
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def run_threshold_sweeps(
|
|
40
|
+
framework: ABTestingFramework,
|
|
41
|
+
output_dir: Path,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Run threshold sweep experiments for all matching stages.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
framework: ABTestingFramework instance
|
|
48
|
+
output_dir: Directory to save results
|
|
49
|
+
"""
|
|
50
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
|
|
52
|
+
# Define threshold ranges for each stage
|
|
53
|
+
threshold_sweeps = {
|
|
54
|
+
"alias_match_score": [0.90, 0.92, 0.94, 0.96, 0.98, 1.0],
|
|
55
|
+
"abbreviation_match_score": [0.85, 0.88, 0.90, 0.92, 0.95, 0.98],
|
|
56
|
+
"normalization_match_score": [0.80, 0.85, 0.88, 0.90, 0.92, 0.95],
|
|
57
|
+
"semantic_threshold": [0.70, 0.75, 0.80, 0.85, 0.90, 0.95],
|
|
58
|
+
"string_similarity_threshold": [0.70, 0.75, 0.80, 0.85, 0.90],
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
all_results: List[ExperimentResult] = []
|
|
62
|
+
|
|
63
|
+
# Run sweeps for each threshold
|
|
64
|
+
for threshold_name, threshold_range in threshold_sweeps.items():
|
|
65
|
+
logger.info(f"Running sweep for {threshold_name}...")
|
|
66
|
+
results = await framework.threshold_sweep(
|
|
67
|
+
threshold_name=threshold_name,
|
|
68
|
+
threshold_range=threshold_range,
|
|
69
|
+
)
|
|
70
|
+
all_results.extend(results)
|
|
71
|
+
|
|
72
|
+
# Save individual sweep results
|
|
73
|
+
sweep_file = output_dir / f"sweep_{threshold_name}.json"
|
|
74
|
+
sweep_data = [
|
|
75
|
+
{
|
|
76
|
+
"config_name": r.config_name,
|
|
77
|
+
"threshold_value": getattr(r.config, threshold_name),
|
|
78
|
+
"metrics": r.metrics.to_dict(),
|
|
79
|
+
"stage_breakdown": r.stage_breakdown,
|
|
80
|
+
}
|
|
81
|
+
for r in results
|
|
82
|
+
]
|
|
83
|
+
with open(sweep_file, "w") as f:
|
|
84
|
+
json.dump(sweep_data, f, indent=2)
|
|
85
|
+
logger.info(f"Saved sweep results to {sweep_file}")
|
|
86
|
+
|
|
87
|
+
# Evaluate default configuration
|
|
88
|
+
logger.info("Evaluating default configuration...")
|
|
89
|
+
default_config = FusionMatchingConfig()
|
|
90
|
+
default_result = await framework.evaluate_config("default", default_config)
|
|
91
|
+
all_results.append(default_result)
|
|
92
|
+
|
|
93
|
+
# Compare all results
|
|
94
|
+
comparison = framework.compare_results(all_results)
|
|
95
|
+
comparison_file = output_dir / "comparison.json"
|
|
96
|
+
with open(comparison_file, "w") as f:
|
|
97
|
+
json.dump(comparison, f, indent=2)
|
|
98
|
+
logger.info(f"Saved comparison results to {comparison_file}")
|
|
99
|
+
|
|
100
|
+
# Validate default thresholds
|
|
101
|
+
is_valid, validation = framework.validate_thresholds(
|
|
102
|
+
default_result, min_recall=0.90, min_precision=0.75
|
|
103
|
+
)
|
|
104
|
+
validation_file = output_dir / "validation.json"
|
|
105
|
+
with open(validation_file, "w") as f:
|
|
106
|
+
json.dump(validation, f, indent=2)
|
|
107
|
+
logger.info(f"Validation result: {'PASS' if is_valid else 'FAIL'}")
|
|
108
|
+
logger.info(f"Saved validation results to {validation_file}")
|
|
109
|
+
|
|
110
|
+
# Print summary
|
|
111
|
+
print("\n" + "=" * 80)
|
|
112
|
+
print("EXPERIMENT SUMMARY")
|
|
113
|
+
print("=" * 80)
|
|
114
|
+
print(f"\nDefault Configuration Metrics:")
|
|
115
|
+
print(f" Recall: {default_result.metrics.recall:.3f}")
|
|
116
|
+
print(f" Precision: {default_result.metrics.precision:.3f}")
|
|
117
|
+
print(f" F1 Score: {default_result.metrics.f1_score:.3f}")
|
|
118
|
+
print(f" Accuracy: {default_result.metrics.accuracy:.3f}")
|
|
119
|
+
print(f"\nValidation: {'PASS' if is_valid else 'FAIL'}")
|
|
120
|
+
if not is_valid:
|
|
121
|
+
print(f" Recall requirement: {validation['recall']:.3f} >= {validation['min_recall']:.3f} ({'✓' if validation['recall_met'] else '✗'})")
|
|
122
|
+
print(f" Precision requirement: {validation['precision']:.3f} >= {validation['min_precision']:.3f} ({'✓' if validation['precision_met'] else '✗'})")
|
|
123
|
+
print("\n" + "=" * 80)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def run_domain_specific_experiments(
|
|
127
|
+
framework: ABTestingFramework,
|
|
128
|
+
output_dir: Path,
|
|
129
|
+
) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Run experiments for domain-specific datasets.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
framework: ABTestingFramework instance
|
|
135
|
+
output_dir: Directory to save results
|
|
136
|
+
"""
|
|
137
|
+
domains = ["academic", "corporate", "medical"]
|
|
138
|
+
domain_results: dict = {}
|
|
139
|
+
|
|
140
|
+
for domain in domains:
|
|
141
|
+
logger.info(f"Running experiments for {domain} domain...")
|
|
142
|
+
domain_dataset = framework._dataset.get_by_domain(domain)
|
|
143
|
+
|
|
144
|
+
# Create domain-specific framework
|
|
145
|
+
domain_framework = ABTestingFramework(
|
|
146
|
+
pipeline=framework._pipeline,
|
|
147
|
+
dataset=domain_dataset,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Evaluate default config on domain dataset
|
|
151
|
+
default_config = FusionMatchingConfig()
|
|
152
|
+
result = await domain_framework.evaluate_config(
|
|
153
|
+
f"default_{domain}", default_config
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
domain_results[domain] = {
|
|
157
|
+
"dataset_size": len(domain_dataset),
|
|
158
|
+
"metrics": result.metrics.to_dict(),
|
|
159
|
+
"stage_breakdown": result.stage_breakdown,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# Save domain-specific results
|
|
163
|
+
domain_file = output_dir / "domain_results.json"
|
|
164
|
+
with open(domain_file, "w") as f:
|
|
165
|
+
json.dump(domain_results, f, indent=2)
|
|
166
|
+
logger.info(f"Saved domain-specific results to {domain_file}")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def main():
|
|
170
|
+
"""Main entry point."""
|
|
171
|
+
import argparse
|
|
172
|
+
|
|
173
|
+
parser = argparse.ArgumentParser(
|
|
174
|
+
description="Run threshold sweep experiments for knowledge fusion"
|
|
175
|
+
)
|
|
176
|
+
parser.add_argument(
|
|
177
|
+
"--output-dir",
|
|
178
|
+
type=str,
|
|
179
|
+
default="experiment_results",
|
|
180
|
+
help="Output directory for results",
|
|
181
|
+
)
|
|
182
|
+
parser.add_argument(
|
|
183
|
+
"--domain-only",
|
|
184
|
+
action="store_true",
|
|
185
|
+
help="Only run domain-specific experiments",
|
|
186
|
+
)
|
|
187
|
+
args = parser.parse_args()
|
|
188
|
+
|
|
189
|
+
output_dir = Path(args.output_dir)
|
|
190
|
+
|
|
191
|
+
# Create evaluation dataset
|
|
192
|
+
logger.info("Creating evaluation dataset...")
|
|
193
|
+
dataset = create_default_evaluation_dataset()
|
|
194
|
+
logger.info(f"Dataset contains {len(dataset)} pairs")
|
|
195
|
+
|
|
196
|
+
# Initialize pipeline (without actual matchers for now - they're optional)
|
|
197
|
+
pipeline = SimilarityPipeline()
|
|
198
|
+
|
|
199
|
+
# Create framework
|
|
200
|
+
framework = ABTestingFramework(pipeline=pipeline, dataset=dataset)
|
|
201
|
+
|
|
202
|
+
if args.domain_only:
|
|
203
|
+
await run_domain_specific_experiments(framework, output_dir)
|
|
204
|
+
else:
|
|
205
|
+
await run_threshold_sweeps(framework, output_dir)
|
|
206
|
+
await run_domain_specific_experiments(framework, output_dir)
|
|
207
|
+
|
|
208
|
+
logger.info("Experiments completed!")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
if __name__ == "__main__":
|
|
212
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Multi-Tenancy Migration Scripts
|
|
2
|
+
|
|
3
|
+
This directory contains SQL migration scripts for adding multi-tenancy support to existing Knowledge Graph deployments.
|
|
4
|
+
|
|
5
|
+
## Script Overview
|
|
6
|
+
|
|
7
|
+
| Script | Description | Downtime | Rollback |
|
|
8
|
+
|--------|-------------|----------|----------|
|
|
9
|
+
| `001_add_tenant_id_column.sql` | Add tenant_id columns | No | `rollback_001_remove_tenant_id.sql` |
|
|
10
|
+
| `002_backfill_tenant_id.sql` | Backfill existing data | No | `rollback_002_reset_tenant_id.sql` |
|
|
11
|
+
| `003_create_tenant_indexes.sql` | Create performance indexes | No | `rollback_003_drop_tenant_indexes.sql` |
|
|
12
|
+
| `004_enable_rls_policies.sql` | Enable RLS (optional) | No | `rollback_004_disable_rls.sql` |
|
|
13
|
+
|
|
14
|
+
## Execution Order
|
|
15
|
+
|
|
16
|
+
Run scripts in numerical order:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# PostgreSQL
|
|
20
|
+
psql -U postgres -d knowledge_graph -f 001_add_tenant_id_column.sql
|
|
21
|
+
psql -U postgres -d knowledge_graph -f 002_backfill_tenant_id.sql
|
|
22
|
+
psql -U postgres -d knowledge_graph -f 003_create_tenant_indexes.sql
|
|
23
|
+
psql -U postgres -d knowledge_graph -f 004_enable_rls_policies.sql # Optional
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Pre-Migration Checklist
|
|
27
|
+
|
|
28
|
+
- [ ] Backup database: `pg_dump knowledge_graph > backup.sql`
|
|
29
|
+
- [ ] Test on staging environment first
|
|
30
|
+
- [ ] Review and adjust tenant_id in backfill script (`legacy_default` vs `default`)
|
|
31
|
+
- [ ] Verify disk space for indexes
|
|
32
|
+
- [ ] Schedule migration during low-traffic period (for large databases)
|
|
33
|
+
|
|
34
|
+
## Safety Features
|
|
35
|
+
|
|
36
|
+
- All scripts use `IF NOT EXISTS` / `IF EXISTS` for idempotence
|
|
37
|
+
- Script 003 uses `CREATE INDEX CONCURRENTLY` (no table locks)
|
|
38
|
+
- Scripts include verification queries
|
|
39
|
+
- Each script has corresponding rollback script
|
|
40
|
+
|
|
41
|
+
## Rollback
|
|
42
|
+
|
|
43
|
+
To rollback, run rollback scripts in reverse order:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
psql -U postgres -d knowledge_graph -f rollback_004_disable_rls.sql
|
|
47
|
+
psql -U postgres -d knowledge_graph -f rollback_003_drop_tenant_indexes.sql
|
|
48
|
+
psql -U postgres -d knowledge_graph -f rollback_002_reset_tenant_id.sql
|
|
49
|
+
psql -U postgres -d knowledge_graph -f rollback_001_remove_tenant_id.sql
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Or restore from backup:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pg_restore -U postgres -d knowledge_graph -c backup.sql
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Monitoring Progress
|
|
59
|
+
|
|
60
|
+
### Check Index Creation Progress
|
|
61
|
+
|
|
62
|
+
```sql
|
|
63
|
+
SELECT
|
|
64
|
+
now()::time(0),
|
|
65
|
+
query,
|
|
66
|
+
state,
|
|
67
|
+
wait_event_type,
|
|
68
|
+
wait_event
|
|
69
|
+
FROM pg_stat_activity
|
|
70
|
+
WHERE query LIKE '%CREATE INDEX%';
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Check Table Sizes
|
|
74
|
+
|
|
75
|
+
```sql
|
|
76
|
+
SELECT
|
|
77
|
+
tablename,
|
|
78
|
+
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
|
|
79
|
+
FROM pg_tables
|
|
80
|
+
WHERE tablename IN ('graph_entities', 'graph_relations');
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Verify Migration Status
|
|
84
|
+
|
|
85
|
+
```sql
|
|
86
|
+
-- Check if tenant_id columns exist
|
|
87
|
+
SELECT column_name, data_type, column_default
|
|
88
|
+
FROM information_schema.columns
|
|
89
|
+
WHERE table_name IN ('graph_entities', 'graph_relations')
|
|
90
|
+
AND column_name = 'tenant_id';
|
|
91
|
+
|
|
92
|
+
-- Check tenant distribution
|
|
93
|
+
SELECT tenant_id, COUNT(*)
|
|
94
|
+
FROM graph_entities
|
|
95
|
+
GROUP BY tenant_id;
|
|
96
|
+
|
|
97
|
+
-- Check if indexes exist
|
|
98
|
+
SELECT indexname FROM pg_indexes
|
|
99
|
+
WHERE indexname LIKE '%tenant%';
|
|
100
|
+
|
|
101
|
+
-- Check if RLS is enabled
|
|
102
|
+
SELECT tablename, rowsecurity
|
|
103
|
+
FROM pg_tables
|
|
104
|
+
WHERE tablename IN ('graph_entities', 'graph_relations');
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Troubleshooting
|
|
108
|
+
|
|
109
|
+
### Script Fails with "column already exists"
|
|
110
|
+
|
|
111
|
+
The scripts are idempotent - this message means the migration was already run. Verify with:
|
|
112
|
+
|
|
113
|
+
```sql
|
|
114
|
+
\d graph_entities
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Index creation is slow
|
|
118
|
+
|
|
119
|
+
Index creation time depends on table size. For very large tables (>10M rows), consider:
|
|
120
|
+
|
|
121
|
+
- Running during maintenance window
|
|
122
|
+
- Increasing `maintenance_work_mem`:
|
|
123
|
+
```sql
|
|
124
|
+
SET maintenance_work_mem = '2GB';
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### RLS causes performance issues
|
|
128
|
+
|
|
129
|
+
If RLS adds unacceptable overhead, disable it and rely on application-level filtering:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
psql -U postgres -d knowledge_graph -f rollback_004_disable_rls.sql
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Or switch to SEPARATE_SCHEMA mode for better performance.
|
|
136
|
+
|
|
137
|
+
## Support
|
|
138
|
+
|
|
139
|
+
For detailed documentation:
|
|
140
|
+
- [Migration Guide](../../../../docs/user/knowledge_graph/deployment/MULTI_TENANCY_MIGRATION.md)
|
|
141
|
+
- [Setup Guide](../../../../docs/user/knowledge_graph/deployment/MULTI_TENANCY_GUIDE.md)
|
|
142
|
+
- [Troubleshooting](../../../../docs/user/knowledge_graph/deployment/MULTI_TENANCY_TROUBLESHOOTING.md)
|