PyPI - aiecs - Versions diffs - 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show

aiecs/__init__.py +13 -16
aiecs/__main__.py +7 -7
aiecs/aiecs_client.py +269 -75
aiecs/application/executors/operation_executor.py +79 -54
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +98 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
aiecs/application/knowledge_graph/search/reranker.py +293 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +471 -0
aiecs/config/__init__.py +20 -5
aiecs/config/config.py +762 -31
aiecs/config/graph_config.py +131 -0
aiecs/config/tool_config.py +399 -0
aiecs/core/__init__.py +29 -13
aiecs/core/interface/__init__.py +2 -2
aiecs/core/interface/execution_interface.py +22 -22
aiecs/core/interface/storage_interface.py +37 -88
aiecs/core/registry/__init__.py +31 -0
aiecs/core/registry/service_registry.py +92 -0
aiecs/domain/__init__.py +270 -1
aiecs/domain/agent/__init__.py +191 -0
aiecs/domain/agent/base_agent.py +3870 -0
aiecs/domain/agent/exceptions.py +99 -0
aiecs/domain/agent/graph_aware_mixin.py +569 -0
aiecs/domain/agent/hybrid_agent.py +1435 -0
aiecs/domain/agent/integration/__init__.py +29 -0
aiecs/domain/agent/integration/context_compressor.py +216 -0
aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
aiecs/domain/agent/integration/protocols.py +281 -0
aiecs/domain/agent/integration/retry_policy.py +218 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
aiecs/domain/agent/lifecycle.py +291 -0
aiecs/domain/agent/llm_agent.py +692 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +1124 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +163 -0
aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
aiecs/domain/agent/models.py +884 -0
aiecs/domain/agent/observability.py +479 -0
aiecs/domain/agent/persistence.py +449 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +159 -0
aiecs/domain/agent/prompts/formatters.py +187 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +253 -0
aiecs/domain/agent/tool_agent.py +444 -0
aiecs/domain/agent/tools/__init__.py +15 -0
aiecs/domain/agent/tools/schema_generator.py +364 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +469 -0
aiecs/domain/community/analytics.py +432 -0
aiecs/domain/community/collaborative_workflow.py +648 -0
aiecs/domain/community/communication_hub.py +634 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +796 -0
aiecs/domain/community/community_manager.py +803 -0
aiecs/domain/community/decision_engine.py +849 -0
aiecs/domain/community/exceptions.py +231 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +234 -0
aiecs/domain/community/resource_manager.py +461 -0
aiecs/domain/community/shared_context_manager.py +589 -0
aiecs/domain/context/__init__.py +40 -10
aiecs/domain/context/context_engine.py +1910 -0
aiecs/domain/context/conversation_models.py +87 -53
aiecs/domain/context/graph_memory.py +582 -0
aiecs/domain/execution/model.py +12 -4
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +148 -0
aiecs/domain/knowledge_graph/models/evidence.py +178 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
aiecs/domain/knowledge_graph/models/path.py +171 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
aiecs/domain/knowledge_graph/models/query.py +261 -0
aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
aiecs/domain/knowledge_graph/models/relation.py +202 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
aiecs/domain/task/dsl_processor.py +172 -56
aiecs/domain/task/model.py +20 -8
aiecs/domain/task/task_context.py +27 -24
aiecs/infrastructure/__init__.py +0 -2
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +837 -0
aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
aiecs/infrastructure/graph_storage/cache.py +424 -0
aiecs/infrastructure/graph_storage/distributed.py +223 -0
aiecs/infrastructure/graph_storage/error_handling.py +380 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
aiecs/infrastructure/graph_storage/metrics.py +344 -0
aiecs/infrastructure/graph_storage/migration.py +400 -0
aiecs/infrastructure/graph_storage/pagination.py +483 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
aiecs/infrastructure/graph_storage/postgres.py +1563 -0
aiecs/infrastructure/graph_storage/property_storage.py +353 -0
aiecs/infrastructure/graph_storage/protocols.py +76 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
aiecs/infrastructure/graph_storage/streaming.py +487 -0
aiecs/infrastructure/graph_storage/tenant.py +412 -0
aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
aiecs/infrastructure/messaging/websocket_manager.py +51 -35
aiecs/infrastructure/monitoring/__init__.py +22 -0
aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
aiecs/infrastructure/monitoring/structured_logger.py +3 -7
aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
aiecs/infrastructure/persistence/__init__.py +14 -1
aiecs/infrastructure/persistence/context_engine_client.py +184 -0
aiecs/infrastructure/persistence/database_manager.py +67 -43
aiecs/infrastructure/persistence/file_storage.py +180 -103
aiecs/infrastructure/persistence/redis_client.py +74 -21
aiecs/llm/__init__.py +73 -25
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
aiecs/llm/client_factory.py +224 -36
aiecs/llm/client_resolver.py +155 -0
aiecs/llm/clients/__init__.py +38 -0
aiecs/llm/clients/base_client.py +324 -0
aiecs/llm/clients/google_function_calling_mixin.py +457 -0
aiecs/llm/clients/googleai_client.py +241 -0
aiecs/llm/clients/openai_client.py +158 -0
aiecs/llm/clients/openai_compatible_mixin.py +367 -0
aiecs/llm/clients/vertex_client.py +897 -0
aiecs/llm/clients/xai_client.py +201 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +272 -0
aiecs/llm/config/config_validator.py +206 -0
aiecs/llm/config/model_config.py +143 -0
aiecs/llm/protocols.py +149 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +89 -0
aiecs/main.py +140 -121
aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/module_checker.py +499 -0
aiecs/scripts/aid/version_manager.py +235 -0
aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
aiecs/scripts/dependance_check/__init__.py +15 -0
aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
aiecs/scripts/knowledge_graph/__init__.py +3 -0
aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
aiecs/scripts/tools_develop/README.md +671 -0
aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
aiecs/scripts/tools_develop/schema_coverage.py +511 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
aiecs/scripts/tools_develop/verify_tools.py +352 -0
aiecs/tasks/__init__.py +0 -1
aiecs/tasks/worker.py +115 -47
aiecs/tools/__init__.py +194 -72
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +330 -0
aiecs/tools/apisource/providers/__init__.py +112 -0
aiecs/tools/apisource/providers/base.py +671 -0
aiecs/tools/apisource/providers/census.py +397 -0
aiecs/tools/apisource/providers/fred.py +535 -0
aiecs/tools/apisource/providers/newsapi.py +409 -0
aiecs/tools/apisource/providers/worldbank.py +352 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +363 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
aiecs/tools/apisource/tool.py +832 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +334 -0
aiecs/tools/base_tool.py +415 -21
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +607 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
aiecs/tools/docs/content_insertion_tool.py +1320 -0
aiecs/tools/docs/document_creator_tool.py +1323 -0
aiecs/tools/docs/document_layout_tool.py +1160 -0
aiecs/tools/docs/document_parser_tool.py +1011 -0
aiecs/tools/docs/document_writer_tool.py +1829 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
aiecs/tools/langchain_adapter.py +300 -138
aiecs/tools/schema_generator.py +455 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +581 -0
aiecs/tools/search_tool/cache.py +264 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +224 -0
aiecs/tools/search_tool/core.py +778 -0
aiecs/tools/search_tool/deduplicator.py +119 -0
aiecs/tools/search_tool/error_handler.py +242 -0
aiecs/tools/search_tool/metrics.py +343 -0
aiecs/tools/search_tool/rate_limiter.py +172 -0
aiecs/tools/search_tool/schemas.py +275 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
aiecs/tools/statistics/data_loader_tool.py +555 -0
aiecs/tools/statistics/data_profiler_tool.py +638 -0
aiecs/tools/statistics/data_transformer_tool.py +580 -0
aiecs/tools/statistics/data_visualizer_tool.py +498 -0
aiecs/tools/statistics/model_trainer_tool.py +507 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
aiecs/tools/task_tools/__init__.py +49 -36
aiecs/tools/task_tools/chart_tool.py +200 -184
aiecs/tools/task_tools/classfire_tool.py +268 -267
aiecs/tools/task_tools/image_tool.py +175 -131
aiecs/tools/task_tools/office_tool.py +226 -146
aiecs/tools/task_tools/pandas_tool.py +477 -121
aiecs/tools/task_tools/report_tool.py +390 -142
aiecs/tools/task_tools/research_tool.py +149 -79
aiecs/tools/task_tools/scraper_tool.py +339 -145
aiecs/tools/task_tools/stats_tool.py +448 -209
aiecs/tools/temp_file_manager.py +26 -24
aiecs/tools/tool_executor/__init__.py +18 -16
aiecs/tools/tool_executor/tool_executor.py +364 -52
aiecs/utils/LLM_output_structor.py +74 -48
aiecs/utils/__init__.py +14 -3
aiecs/utils/base_callback.py +0 -3
aiecs/utils/cache_provider.py +696 -0
aiecs/utils/execution_utils.py +50 -31
aiecs/utils/prompt_loader.py +1 -0
aiecs/utils/token_usage_repository.py +37 -11
aiecs/ws/socket_server.py +14 -4
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
aiecs-1.7.6.dist-info/RECORD +337 -0
aiecs-1.7.6.dist-info/entry_points.txt +13 -0
aiecs/config/registry.py +0 -19
aiecs/domain/context/content_engine.py +0 -982
aiecs/llm/base_client.py +0 -99
aiecs/llm/openai_client.py +0 -125
aiecs/llm/vertex_client.py +0 -186
aiecs/llm/xai_client.py +0 -184
aiecs/scripts/dependency_checker.py +0 -857
aiecs/scripts/quick_dependency_check.py +0 -269
aiecs/tools/task_tools/search_api.py +0 -7
aiecs-1.0.1.dist-info/RECORD +0 -90
aiecs-1.0.1.dist-info/entry_points.txt +0 -7
/aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
/aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
/aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
/aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0

aiecs/application/knowledge_graph/search/text_similarity.py ADDED Viewed

@@ -0,0 +1,392 @@
+"""
+Text Similarity Utilities
+Provides various text similarity and matching functions for knowledge graph operations.
+Includes BM25, Jaccard, cosine similarity, Levenshtein distance, and fuzzy matching.
+"""
+import re
+import math
+from typing import List, Optional, Tuple, Callable, Any
+from collections import Counter
+from difflib import SequenceMatcher
+class BM25Scorer:
+    """
+    BM25 (Best Matching 25) scorer for text similarity
+    BM25 is a ranking function used to estimate the relevance of documents
+    to a given search query. It's an improvement over TF-IDF.
+    Example::
+        scorer = BM25Scorer(corpus=[
+            "The quick brown fox jumps over the lazy dog",
+            "A quick brown dog jumps over a lazy fox",
+            "The lazy dog sleeps all day"
+        ])
+        scores = scorer.score("quick brown fox")
+        # Returns scores for each document in corpus
+    """
+    def __init__(
+        self,
+        corpus: List[str],
+        k1: float = 1.5,
+        b: float = 0.75,
+        tokenizer: Optional[Callable[[str], List[str]]] = None,
+    ):
+        """
+        Initialize BM25 scorer
+        Args:
+            corpus: List of documents to score against
+            k1: Term frequency saturation parameter (default: 1.5)
+            b: Length normalization parameter (default: 0.75)
+            tokenizer: Optional tokenizer function (default: simple word split)
+        """
+        self.k1 = k1
+        self.b = b
+        self.tokenizer = tokenizer or self._default_tokenizer
+        # Tokenize corpus
+        self.documents = [self.tokenizer(doc) for doc in corpus]
+        self.doc_count = len(self.documents)
+        # Calculate document lengths
+        self.doc_lengths = [len(doc) for doc in self.documents]
+        self.avg_doc_length = sum(self.doc_lengths) / self.doc_count if self.doc_count > 0 else 0
+        # Build term frequency dictionary
+        self.term_freqs = []
+        self.doc_freqs: Counter[str] = Counter()
+        for doc in self.documents:
+            tf = Counter(doc)
+            self.term_freqs.append(tf)
+            for term in set(doc):
+                self.doc_freqs[term] += 1
+        # Calculate IDF (Inverse Document Frequency)
+        self.idf = {}
+        for term, df in self.doc_freqs.items():
+            self.idf[term] = math.log((self.doc_count - df + 0.5) / (df + 0.5) + 1.0)
+    def _default_tokenizer(self, text: str) -> List[str]:
+        """Default tokenizer: lowercase and split on whitespace"""
+        return re.findall(r"\w+", text.lower())
+    def score(self, query: str) -> List[float]:
+        """
+        Score documents against query
+        Args:
+            query: Query string
+        Returns:
+            List of BM25 scores for each document
+        """
+        query_terms = self.tokenizer(query)
+        scores = []
+        for i, doc in enumerate(self.documents):
+            score = 0.0
+            doc_length = self.doc_lengths[i]
+            term_freq = self.term_freqs[i]
+            for term in query_terms:
+                if term in term_freq:
+                    tf = term_freq[term]
+                    idf = self.idf.get(term, 0.0)
+                    # BM25 formula
+                    numerator = idf * tf * (self.k1 + 1)
+                    denominator = tf + self.k1 * (1 - self.b + self.b * (doc_length / self.avg_doc_length))
+                    score += numerator / denominator
+            scores.append(score)
+        return scores
+    def get_top_n(self, query: str, n: int = 10) -> List[Tuple[int, float]]:
+        """
+        Get top N documents by BM25 score
+        Args:
+            query: Query string
+            n: Number of top results to return
+        Returns:
+            List of (document_index, score) tuples, sorted by score descending
+        """
+        scores = self.score(query)
+        indexed_scores = [(i, score) for i, score in enumerate(scores)]
+        indexed_scores.sort(key=lambda x: x[1], reverse=True)
+        return indexed_scores[:n]
+def jaccard_similarity(set1: set, set2: set) -> float:
+    """
+    Calculate Jaccard similarity between two sets
+    Jaccard similarity = (size of intersection) / (size of union)
+    Args:
+        set1: First set
+        set2: Second set
+    Returns:
+        Jaccard similarity score (0.0 to 1.0)
+    """
+    if not set1 and not set2:
+        return 1.0
+    intersection = len(set1 & set2)
+    union = len(set1 | set2)
+    if union == 0:
+        return 0.0
+    return intersection / union
+def jaccard_similarity_text(text1: str, text2: str, tokenizer: Optional[Callable[[str], Any]] = None) -> float:
+    """
+    Calculate Jaccard similarity between two text strings
+    Args:
+        text1: First text string
+        text2: Second text string
+        tokenizer: Optional tokenizer function (default: word split)
+    Returns:
+        Jaccard similarity score (0.0 to 1.0)
+    """
+    if tokenizer is None:
+        def tokenizer(t):
+            return set(re.findall(r"\w+", t.lower()))
+    else:
+        # Wrap tokenizer to ensure it returns a set
+        original_tokenizer = tokenizer
+        def tokenizer(t):
+            return set(original_tokenizer(t))
+    set1 = tokenizer(text1)
+    set2 = tokenizer(text2)
+    return jaccard_similarity(set1, set2)
+def cosine_similarity_text(text1: str, text2: str, tokenizer: Optional[Callable[[str], List[str]]] = None) -> float:
+    """
+    Calculate cosine similarity between two text strings
+    Cosine similarity measures the cosine of the angle between two vectors
+    in a multi-dimensional space. For text, vectors are TF-IDF representations.
+    Args:
+        text1: First text string
+        text2: Second text string
+        tokenizer: Optional tokenizer function (default: word split)
+    Returns:
+        Cosine similarity score (0.0 to 1.0)
+    """
+    if tokenizer is None:
+        def tokenizer(t):
+            return re.findall(r"\w+", t.lower())
+    tokens1 = tokenizer(text1)
+    tokens2 = tokenizer(text2)
+    # Build vocabulary
+    vocab = set(tokens1) | set(tokens2)
+    if not vocab:
+        return 1.0 if not text1 and not text2 else 0.0
+    # Create term frequency vectors
+    tf1 = Counter(tokens1)
+    tf2 = Counter(tokens2)
+    # Calculate dot product and magnitudes
+    dot_product = sum(tf1.get(term, 0) * tf2.get(term, 0) for term in vocab)
+    magnitude1 = math.sqrt(sum(tf1.get(term, 0) ** 2 for term in vocab))
+    magnitude2 = math.sqrt(sum(tf2.get(term, 0) ** 2 for term in vocab))
+    if magnitude1 == 0 or magnitude2 == 0:
+        return 0.0
+    similarity = dot_product / (magnitude1 * magnitude2)
+    # Handle floating point precision issues
+    return min(1.0, max(0.0, similarity))
+def levenshtein_distance(s1: str, s2: str) -> int:
+    """
+    Calculate Levenshtein distance (edit distance) between two strings
+    Levenshtein distance is the minimum number of single-character edits
+    (insertions, deletions, or substitutions) required to change one string
+    into another.
+    Args:
+        s1: First string
+        s2: Second string
+    Returns:
+        Levenshtein distance (0 = identical, higher = more different)
+    """
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+    if len(s2) == 0:
+        return len(s1)
+    # Use dynamic programming
+    previous_row = list(range(len(s2) + 1))
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+    return previous_row[-1]
+def normalized_levenshtein_similarity(s1: str, s2: str) -> float:
+    """
+    Calculate normalized Levenshtein similarity (0.0 to 1.0)
+    Args:
+        s1: First string
+        s2: Second string
+    Returns:
+        Normalized similarity score (1.0 = identical, 0.0 = completely different)
+    """
+    max_len = max(len(s1), len(s2))
+    if max_len == 0:
+        return 1.0
+    distance = levenshtein_distance(s1, s2)
+    return 1.0 - (distance / max_len)
+def fuzzy_match(
+    query: str,
+    candidates: List[str],
+    threshold: float = 0.6,
+    method: str = "jaccard",
+) -> List[Tuple[str, float]]:
+    """
+    Find fuzzy matches for a query string in a list of candidates
+    Args:
+        query: Query string to match
+        candidates: List of candidate strings
+        threshold: Minimum similarity threshold (0.0 to 1.0)
+        method: Similarity method ("jaccard", "cosine", "levenshtein", "ratio")
+    Returns:
+        List of (candidate, similarity_score) tuples above threshold,
+        sorted by score descending
+    """
+    results = []
+    for candidate in candidates:
+        if method == "jaccard":
+            score = jaccard_similarity_text(query, candidate)
+        elif method == "cosine":
+            score = cosine_similarity_text(query, candidate)
+        elif method == "levenshtein":
+            score = normalized_levenshtein_similarity(query, candidate)
+        elif method == "ratio":
+            # Use SequenceMatcher ratio (built-in fuzzy matching)
+            score = SequenceMatcher(None, query.lower(), candidate.lower()).ratio()
+        else:
+            raise ValueError(f"Unknown method: {method}. Use 'jaccard', 'cosine', 'levenshtein', or 'ratio'")
+        if score >= threshold:
+            results.append((candidate, score))
+    # Sort by score descending
+    results.sort(key=lambda x: x[1], reverse=True)
+    return results
+class TextSimilarity:
+    """
+    Convenience class for text similarity operations
+    Provides a unified interface for various text similarity methods.
+    Example::
+        similarity = TextSimilarity()
+        # Jaccard similarity
+        score = similarity.jaccard("hello world", "world hello")
+        # Cosine similarity
+        score = similarity.cosine("machine learning", "deep learning")
+        # Levenshtein distance
+        distance = similarity.levenshtein("kitten", "sitting")
+        # Fuzzy matching
+        matches = similarity.fuzzy_match(
+            "python",
+            ["python3", "pyton", "java", "pythn"],
+            threshold=0.7
+        )
+    """
+    def __init__(self, tokenizer: Optional[Callable[[str], List[str]]] = None):
+        """
+        Initialize TextSimilarity
+        Args:
+            tokenizer: Optional tokenizer function for text processing
+        """
+        self.tokenizer = tokenizer
+    def jaccard(self, text1: str, text2: str) -> float:
+        """Calculate Jaccard similarity between two texts"""
+        return jaccard_similarity_text(text1, text2, self.tokenizer)
+    def cosine(self, text1: str, text2: str) -> float:
+        """Calculate cosine similarity between two texts"""
+        return cosine_similarity_text(text1, text2, self.tokenizer)
+    def levenshtein(self, text1: str, text2: str) -> int:
+        """Calculate Levenshtein distance between two texts"""
+        return levenshtein_distance(text1, text2)
+    def levenshtein_similarity(self, text1: str, text2: str) -> float:
+        """Calculate normalized Levenshtein similarity"""
+        return normalized_levenshtein_similarity(text1, text2)
+    def fuzzy_match(
+        self,
+        query: str,
+        candidates: List[str],
+        threshold: float = 0.6,
+        method: str = "jaccard",
+    ) -> List[Tuple[str, float]]:
+        """Find fuzzy matches for a query"""
+        return fuzzy_match(query, candidates, threshold, method)
+    def bm25(self, corpus: List[str], k1: float = 1.5, b: float = 0.75) -> BM25Scorer:
+        """Create a BM25 scorer for a corpus"""
+        return BM25Scorer(corpus, k1=k1, b=b, tokenizer=self.tokenizer)

aiecs/application/knowledge_graph/traversal/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+Knowledge Graph Traversal Application Layer
+Advanced traversal algorithms and path ranking utilities.
+"""
+from aiecs.application.knowledge_graph.traversal.path_scorer import PathScorer
+from aiecs.application.knowledge_graph.traversal.enhanced_traversal import (
+    EnhancedTraversal,
+)
+__all__ = [
+    "PathScorer",
+    "EnhancedTraversal",
+]

aiecs/application/knowledge_graph/traversal/enhanced_traversal.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""
+Enhanced Graph Traversal
+Provides advanced traversal capabilities with PathPattern support,
+cycle detection, and sophisticated path filtering.
+"""
+from typing import List, Optional
+from collections import deque
+from aiecs.domain.knowledge_graph.models.relation import Relation
+from aiecs.domain.knowledge_graph.models.path import Path
+from aiecs.domain.knowledge_graph.models.path_pattern import (
+    PathPattern,
+    TraversalDirection,
+)
+from aiecs.infrastructure.graph_storage.base import GraphStore
+class EnhancedTraversal:
+    """
+    Enhanced Graph Traversal Service
+    Provides advanced traversal capabilities beyond basic BFS:
+    - PathPattern-based traversal
+    - Cycle detection and handling
+    - Depth-limited traversal with constraints
+    - Path filtering by pattern
+    Example:
+        ```python
+        traversal = EnhancedTraversal(graph_store)
+        # Define pattern
+        pattern = PathPattern(
+            relation_types=["WORKS_FOR", "LOCATED_IN"],
+            max_depth=2,
+            allow_cycles=False
+        )
+        # Traverse with pattern
+        paths = await traversal.traverse_with_pattern(
+            start_entity_id="person_1",
+            pattern=pattern,
+            max_results=10
+        )
+        ```
+    """
+    def __init__(self, graph_store: GraphStore):
+        """
+        Initialize enhanced traversal service
+        Args:
+            graph_store: Graph storage backend to use
+        """
+        self.graph_store = graph_store
+    async def traverse_with_pattern(
+        self,
+        start_entity_id: str,
+        pattern: PathPattern,
+        max_results: int = 100,
+    ) -> List[Path]:
+        """
+        Traverse graph following a path pattern
+        Args:
+            start_entity_id: Starting entity ID
+            pattern: Path pattern to follow
+            max_results: Maximum number of paths to return
+        Returns:
+            List of paths matching the pattern
+        """
+        start_entity = await self.graph_store.get_entity(start_entity_id)
+        if start_entity is None:
+            return []
+        # Check if start entity is allowed
+        if not pattern.is_entity_allowed(start_entity.id, start_entity.entity_type):
+            return []
+        paths: List[Path] = []
+        # visited_in_path: Set[str] = set() if not pattern.allow_cycles else
+        # None  # Reserved for future use
+        # BFS with pattern matching
+        queue: deque = deque()
+        queue.append(
+            {
+                "entity": start_entity,
+                "path_entities": [start_entity],
+                "path_edges": [],
+                "depth": 0,
+                "visited": ({start_entity.id} if not pattern.allow_cycles else set()),
+            }
+        )
+        while queue and len(paths) < max_results:
+            current = queue.popleft()
+            current_entity = current["entity"]
+            current_depth = current["depth"]
+            path_entities = current["path_entities"]
+            path_edges = current["path_edges"]
+            visited_nodes = current["visited"]
+            # Add path if it meets length requirements
+            if pattern.is_valid_path_length(len(path_edges)):
+                path = Path(nodes=path_entities, edges=path_edges)
+                paths.append(path)
+            # Continue traversal if not at max depth
+            if not pattern.should_continue_traversal(current_depth):
+                continue
+            # Get neighbors based on pattern direction
+            # pattern.direction is already a string due to use_enum_values=True
+            direction_str = pattern.direction if isinstance(pattern.direction, str) else pattern.direction.value
+            neighbors = await self.graph_store.get_neighbors(
+                current_entity.id,
+                relation_type=None,  # We'll filter by pattern
+                direction=direction_str,
+            )
+            for neighbor in neighbors:
+                # Check if entity is allowed
+                if not pattern.is_entity_allowed(neighbor.id, neighbor.entity_type):
+                    continue
+                # Check for cycles
+                if not pattern.allow_cycles and neighbor.id in visited_nodes:
+                    continue
+                # Get the relation between current and neighbor
+                # We need to find the actual relation
+                relation = await self._find_relation(current_entity.id, neighbor.id, pattern.direction)
+                if relation is None:
+                    continue
+                # Check if relation is allowed at this depth
+                if not pattern.is_relation_allowed(relation.relation_type, current_depth):
+                    continue
+                # For incoming direction, we need to reverse the relation for path construction
+                # because paths expect edges[i].source_id == nodes[i].id
+                direction_str = pattern.direction if isinstance(pattern.direction, str) else pattern.direction.value
+                if direction_str == "incoming":
+                    # Reverse the relation: if we have e1->e2 and we're going from e2 to e1,
+                    # the path needs e2->e1 (source=current, target=neighbor)
+                    path_relation = Relation(
+                        id=f"{relation.id}_reversed",
+                        relation_type=relation.relation_type,
+                        source_id=current_entity.id,
+                        target_id=neighbor.id,
+                        weight=relation.weight,
+                    )
+                else:
+                    path_relation = relation
+                # Create new path state
+                new_path_entities = path_entities + [neighbor]
+                new_path_edges = path_edges + [path_relation]
+                new_visited = visited_nodes | {neighbor.id} if not pattern.allow_cycles else visited_nodes
+                queue.append(
+                    {
+                        "entity": neighbor,
+                        "path_entities": new_path_entities,
+                        "path_edges": new_path_edges,
+                        "depth": current_depth + 1,
+                        "visited": new_visited,
+                    }
+                )
+        return paths
+    async def _find_relation(self, source_id: str, target_id: str, direction: TraversalDirection) -> Optional[Relation]:
+        """
+        Find the relation between two entities
+        Args:
+            source_id: Source entity ID
+            target_id: Target entity ID
+            direction: Traversal direction (can be enum or string)
+        Returns:
+            Relation if found, None otherwise
+        """
+        # Try to find the actual relation in the graph store
+        # This works with both InMemoryGraphStore and SQLiteGraphStore
+        # Handle both enum and string directions
+        direction_str = direction if isinstance(direction, str) else direction.value
+        direction_enum = TraversalDirection(direction_str) if isinstance(direction, str) else direction
+        if direction_enum == TraversalDirection.OUTGOING or direction_enum == TraversalDirection.BOTH:
+            # Look for outgoing relations from source
+            neighbors = await self.graph_store.get_neighbors(source_id, relation_type=None, direction="outgoing")
+            for neighbor in neighbors:
+                if neighbor.id == target_id:
+                    # Found the neighbor, now get the relation
+                    # This is a workaround - ideally get_neighbors would return relations too
+                    # For now, check if the store exposes relations
+                    from aiecs.infrastructure.graph_storage.in_memory import (
+                        InMemoryGraphStore,
+                    )
+                    if isinstance(self.graph_store, InMemoryGraphStore):
+                        for rel in self.graph_store.relations.values():
+                            if rel.source_id == source_id and rel.target_id == target_id:
+                                return rel
+                    else:
+                        # For SQLite or other stores, try to get the relation
+                        # This is a placeholder - real implementation would
+                        # query the DB
+                        return Relation(
+                            id=f"rel_{source_id}_{target_id}",
+                            relation_type="CONNECTED_TO",
+                            source_id=source_id,
+                            target_id=target_id,
+                        )
+        if direction_enum == TraversalDirection.INCOMING or direction_enum == TraversalDirection.BOTH:
+            # Look for incoming relations to source (i.e., outgoing from
+            # target)
+            neighbors = await self.graph_store.get_neighbors(target_id, relation_type=None, direction="outgoing")
+            for neighbor in neighbors:
+                if neighbor.id == source_id:
+                    from aiecs.infrastructure.graph_storage.in_memory import (
+                        InMemoryGraphStore,
+                    )
+                    if isinstance(self.graph_store, InMemoryGraphStore):
+                        for rel in self.graph_store.relations.values():
+                            if rel.source_id == target_id and rel.target_id == source_id:
+                                return rel
+                    else:
+                        return Relation(
+                            id=f"rel_{target_id}_{source_id}",
+                            relation_type="CONNECTED_TO",
+                            source_id=target_id,
+                            target_id=source_id,
+                        )
+        return None
+    def detect_cycles(self, path: Path) -> bool:
+        """
+        Detect if a path contains cycles (repeated nodes)
+        Args:
+            path: Path to check
+        Returns:
+            True if path contains cycles
+        """
+        entity_ids = path.get_entity_ids()
+        return len(entity_ids) != len(set(entity_ids))
+    def filter_paths_without_cycles(self, paths: List[Path]) -> List[Path]:
+        """
+        Filter out paths that contain cycles
+        Args:
+            paths: List of paths to filter
+        Returns:
+            List of paths without cycles
+        """
+        return [path for path in paths if not self.detect_cycles(path)]
+    async def find_all_paths_between(
+        self,
+        source_id: str,
+        target_id: str,
+        pattern: Optional[PathPattern] = None,
+        max_paths: int = 10,
+    ) -> List[Path]:
+        """
+        Find all paths between two entities matching a pattern
+        Args:
+            source_id: Source entity ID
+            target_id: Target entity ID
+            pattern: Optional path pattern to follow
+            max_paths: Maximum number of paths to return
+        Returns:
+            List of paths from source to target
+        """
+        if pattern is None:
+            pattern = PathPattern(max_depth=5, allow_cycles=False)
+        # Traverse from source
+        all_paths = await self.traverse_with_pattern(
+            start_entity_id=source_id,
+            pattern=pattern,
+            max_results=max_paths * 10,  # Get more paths for filtering
+        )
+        # Filter paths that end at target
+        target_paths = [path for path in all_paths if path.end_entity.id == target_id]
+        return target_paths[:max_paths]

aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

Potentially problematic release.

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl