PyPI - kailash - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

kailash 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

kailash/__init__.py +1 -1
kailash/api/__init__.py +11 -1
kailash/api/gateway.py +394 -0
kailash/api/mcp_integration.py +478 -0
kailash/api/workflow_api.py +29 -13
kailash/nodes/ai/__init__.py +40 -4
kailash/nodes/ai/a2a.py +1143 -0
kailash/nodes/ai/agents.py +120 -6
kailash/nodes/ai/ai_providers.py +224 -30
kailash/nodes/ai/embedding_generator.py +34 -38
kailash/nodes/ai/intelligent_agent_orchestrator.py +2114 -0
kailash/nodes/ai/llm_agent.py +351 -356
kailash/nodes/ai/self_organizing.py +1624 -0
kailash/nodes/api/http.py +106 -25
kailash/nodes/api/rest.py +116 -21
kailash/nodes/base.py +60 -64
kailash/nodes/code/python.py +61 -42
kailash/nodes/data/__init__.py +10 -10
kailash/nodes/data/readers.py +117 -66
kailash/nodes/data/retrieval.py +1 -1
kailash/nodes/data/sharepoint_graph.py +23 -25
kailash/nodes/data/sql.py +24 -26
kailash/nodes/data/writers.py +41 -44
kailash/nodes/logic/__init__.py +9 -3
kailash/nodes/logic/async_operations.py +60 -21
kailash/nodes/logic/operations.py +43 -22
kailash/nodes/logic/workflow.py +26 -18
kailash/nodes/mcp/client.py +29 -33
kailash/nodes/transform/__init__.py +8 -1
kailash/nodes/transform/formatters.py +1 -1
kailash/nodes/transform/processors.py +119 -4
kailash/tracking/metrics_collector.py +6 -7
kailash/utils/export.py +2 -2
kailash/utils/templates.py +16 -16
{kailash-0.1.3.dist-info → kailash-0.1.5.dist-info}/METADATA +293 -29
{kailash-0.1.3.dist-info → kailash-0.1.5.dist-info}/RECORD +40 -35
{kailash-0.1.3.dist-info → kailash-0.1.5.dist-info}/WHEEL +0 -0
{kailash-0.1.3.dist-info → kailash-0.1.5.dist-info}/entry_points.txt +0 -0
{kailash-0.1.3.dist-info → kailash-0.1.5.dist-info}/licenses/LICENSE +0 -0
{kailash-0.1.3.dist-info → kailash-0.1.5.dist-info}/top_level.txt +0 -0

kailash/nodes/ai/agents.py CHANGED Viewed

@@ -7,7 +7,64 @@ from kailash.nodes.base import Node, NodeParameter, register_node
 @register_node()
 class ChatAgent(Node):
-    """Chat-based AI agent node."""
+    """
+    Chat-based AI agent node for conversational interactions.
+    This node provides a conversational AI interface that maintains context across
+    multiple message exchanges. It supports various LLM configurations and can be
+    customized with system prompts to create specialized conversational agents for
+    different domains and use cases.
+    Design Philosophy:
+        The ChatAgent embodies the principle of contextual conversation, maintaining
+        the full dialogue history to provide coherent and relevant responses. It
+        abstracts away the complexities of LLM APIs while providing a consistent
+        interface for chat-based interactions across different providers.
+    Upstream Dependencies:
+        - User interfaces or APIs providing conversation messages
+        - Context injection systems adding relevant information
+        - Authentication systems for user-specific interactions
+        - Workflow orchestrators managing conversation flow
+    Downstream Consumers:
+        - Response formatting nodes processing agent outputs
+        - Logging systems recording conversations
+        - Analytics nodes analyzing interaction patterns
+        - UI components displaying chat responses
+    Configuration:
+        The agent can be configured with different models, temperature settings,
+        and token limits. System prompts allow specialization for specific domains
+        or behaviors without code changes.
+    Implementation Details:
+        - Maintains conversation history with role-based messages
+        - Prepends system prompt to establish agent behavior
+        - Currently uses mock responses for testing (production would use LLM APIs)
+        - Supports streaming responses (when integrated with real LLMs)
+        - Implements token counting for cost management
+        - Thread-safe for concurrent conversations
+    Error Handling:
+        - Validates message format and required fields
+        - Handles empty or malformed conversations gracefully
+        - Returns appropriate responses for API failures
+        - Implements retry logic for transient errors
+    Side Effects:
+        - May log conversations for debugging (configurable)
+        - Updates internal conversation state
+        - May trigger external LLM API calls (in production)
+    Examples:
+        >>> # Test parameter structure without constructor validation
+        >>> agent = ChatAgent.__new__(ChatAgent)
+        >>> params = agent.get_parameters()
+        >>> assert "messages" in params
+        >>> assert "model" in params
+        >>> assert "temperature" in params
+    """
     def get_parameters(self) -> Dict[str, NodeParameter]:
         return {
@@ -97,7 +154,64 @@ class ChatAgent(Node):
 @register_node()
 class RetrievalAgent(Node):
-    """Retrieval-augmented generation agent."""
+    """
+    Retrieval-augmented generation (RAG) agent for knowledge-based responses.
+    This node implements a RAG pipeline that retrieves relevant documents based on
+    a query and optionally generates answers using the retrieved context. It combines
+    information retrieval techniques with language generation to provide accurate,
+    grounded responses based on provided documents.
+    Design Philosophy:
+        The RetrievalAgent addresses the hallucination problem in LLMs by grounding
+        responses in retrieved documents. It implements a two-stage process: first
+        finding relevant information, then synthesizing an answer based only on that
+        information. This ensures factual accuracy and traceability.
+    Upstream Dependencies:
+        - Document ingestion nodes providing indexed content
+        - Query processing nodes enhancing user queries
+        - Embedding generation nodes (in production implementations)
+        - Vector databases or search indices
+    Downstream Consumers:
+        - Response formatting nodes presenting answers
+        - Citation generation nodes adding references
+        - Quality assessment nodes evaluating retrieval accuracy
+        - UI components displaying results with sources
+    Configuration:
+        The agent can be configured with retrieval parameters like top_k results
+        and similarity thresholds. Answer generation can be toggled based on use
+        case requirements.
+    Implementation Details:
+        - Currently uses keyword-based similarity (production would use embeddings)
+        - Supports various document formats (dict with content, or strings)
+        - Implements relevance scoring and ranking
+        - Filters results by similarity threshold
+        - Generates contextual answers from retrieved documents
+        - Maintains retrieval provenance for transparency
+    Error Handling:
+        - Handles empty document sets gracefully
+        - Validates query format and parameters
+        - Returns empty results for no matches
+        - Provides meaningful responses even with limited retrieval
+    Side Effects:
+        - No persistent side effects
+        - May trigger embedding generation (in production)
+        - May access external vector databases
+    Examples:
+        >>> # Test parameter structure without constructor validation
+        >>> agent = RetrievalAgent.__new__(RetrievalAgent)
+        >>> params = agent.get_parameters()
+        >>> assert "query" in params
+        >>> assert "documents" in params
+        >>> assert "top_k" in params
+    """
     def get_parameters(self) -> Dict[str, NodeParameter]:
         return {
@@ -333,7 +447,7 @@ class PlanningAgent(Node):
             # Data processing workflow
             potential_steps = [
                 {
-                    "tool": "CSVReader",
+                    "tool": "CSVReaderNode",
                     "description": "Read input data",
                     "parameters": {"file_path": "input.csv"},
                 },
@@ -348,7 +462,7 @@ class PlanningAgent(Node):
                     "parameters": {"group_by": "category", "operation": "sum"},
                 },
                 {
-                    "tool": "CSVWriter",
+                    "tool": "CSVWriterNode",
                     "description": "Write results",
                     "parameters": {"file_path": "output.csv"},
                 },
@@ -357,7 +471,7 @@ class PlanningAgent(Node):
             # Text analysis workflow
             potential_steps = [
                 {
-                    "tool": "TextReader",
+                    "tool": "TextReaderNode",
                     "description": "Read text data",
                     "parameters": {"file_path": "text.txt"},
                 },
@@ -372,7 +486,7 @@ class PlanningAgent(Node):
                     "parameters": {"max_length": 200},
                 },
                 {
-                    "tool": "JSONWriter",
+                    "tool": "JSONWriterNode",
                     "description": "Save analysis results",
                     "parameters": {"file_path": "analysis.json"},
                 },

kailash/nodes/ai/ai_providers.py CHANGED Viewed

@@ -17,12 +17,69 @@ class BaseAIProvider(ABC):
     This abstract class defines the common interface and shared functionality
     for providers that may support LLM operations, embedding operations, or both.
+    It establishes a unified pattern for provider initialization, capability
+    detection, and error handling across different AI services.
     Design Philosophy:
-    - Single source of truth for provider availability
-    - Shared client management and initialization
-    - Common error handling patterns
-    - Flexible support for providers with different capabilities
+        The BaseAIProvider follows the principle of "capability-based architecture"
+        where providers declare their capabilities explicitly. This allows for
+        flexible provider implementations that may support chat, embeddings, or
+        both, while maintaining a consistent interface. The design promotes:
+        - Single source of truth for provider availability
+        - Shared client management and initialization
+        - Common error handling patterns
+        - Flexible support for providers with different capabilities
+    Upstream Dependencies:
+        - Configuration systems providing API keys and credentials
+        - Environment variable loaders for secure credential management
+        - Package managers ensuring required dependencies
+        - Network infrastructure for API access
+    Downstream Consumers:
+        - LLMAgentNode: Uses chat capabilities for conversational AI
+        - EmbeddingGeneratorNode: Uses embedding capabilities for vector generation
+        - Provider selection logic choosing appropriate implementations
+        - Error handling systems catching provider-specific exceptions
+    Configuration:
+        Each provider implementation handles its own configuration needs,
+        typically through environment variables or explicit parameters.
+        Common patterns include API keys, endpoints, and model selections.
+    Implementation Details:
+        - Lazy initialization of clients to avoid unnecessary connections
+        - Cached availability checks to reduce repeated validation
+        - Capability dictionary for runtime feature detection
+        - Abstract methods enforce implementation of core functionality
+        - Thread-safe design for concurrent usage
+    Error Handling:
+        - Provider availability checked before operations
+        - Graceful degradation when providers are unavailable
+        - Standardized error responses across different providers
+        - Detailed error messages for debugging
+    Side Effects:
+        - May establish network connections to AI services
+        - May consume API quotas during availability checks
+        - Caches client instances for performance
+    Examples:
+        >>> # Provider implementation
+        >>> class MyProvider(BaseAIProvider):
+        ...     def __init__(self):
+        ...         super().__init__()
+        ...         self._capabilities = {"chat": True, "embeddings": False}
+        ...
+        ...     def is_available(self) -> bool:
+        ...         # Check API key, dependencies, etc.
+        ...         return True
+        >>>
+        >>> provider = MyProvider()
+        >>> assert provider.supports_chat() == True
+        >>> assert provider.supports_embeddings() == False
+        >>> assert provider.is_available() == True
     """
     def __init__(self):
@@ -70,8 +127,78 @@ class LLMProvider(BaseAIProvider):
     """
     Abstract base class for providers that support LLM chat operations.
-    Providers that support chat operations should inherit from this class
-    and implement the chat() method.
+    This class extends BaseAIProvider to define the interface for language model
+    providers. It ensures consistent chat operation interfaces across different
+    LLM services while allowing provider-specific optimizations and features.
+    Design Philosophy:
+        LLMProvider standardizes the chat interface while preserving flexibility
+        for provider-specific features. It follows the OpenAI message format as
+        the de facto standard, enabling easy switching between providers. The
+        design supports both simple completions and advanced features like
+        streaming, function calling, and custom parameters.
+    Upstream Dependencies:
+        - BaseAIProvider: Inherits core provider functionality
+        - Message formatting systems preparing chat inputs
+        - Token counting utilities for cost management
+        - Rate limiting systems managing API quotas
+    Downstream Consumers:
+        - LLMAgentNode: Primary consumer for chat operations
+        - ChatAgent: Uses for conversational interactions
+        - A2AAgentNode: Leverages for agent communication
+        - Response processing nodes handling outputs
+    Configuration:
+        Provider-specific parameters are passed through kwargs, allowing:
+        - Model selection (model parameter)
+        - Temperature and sampling parameters
+        - Token limits and stop sequences
+        - Provider-specific features (tools, functions, etc.)
+    Implementation Details:
+        - Standardized message format: List[Dict[str, str]]
+        - Messages contain 'role' and 'content' fields minimum
+        - Supports system, user, and assistant roles
+        - Response format standardized across providers
+        - Streaming support through callbacks (implementation-specific)
+    Error Handling:
+        - Invalid message format validation
+        - API error standardization
+        - Rate limit handling with retry guidance
+        - Token limit exceeded handling
+        - Network error recovery strategies
+    Side Effects:
+        - API calls consume tokens/credits
+        - May log conversations for debugging
+        - Updates internal usage metrics
+        - May trigger rate limiting
+    Examples:
+        >>> class MyLLMProvider(LLMProvider):
+        ...     def is_available(self) -> bool:
+        ...         return True  # Check actual availability
+        ...
+        ...     def chat(self, messages, **kwargs):
+        ...         # Simulate LLM response
+        ...         return {
+        ...             "success": True,
+        ...             "content": "Response to: " + messages[-1]["content"],
+        ...             "model": kwargs.get("model", "default"),
+        ...             "usage": {"prompt_tokens": 10, "completion_tokens": 5}
+        ...         }
+        >>>
+        >>> provider = MyLLMProvider()
+        >>> messages = [
+        ...     {"role": "system", "content": "You are helpful."},
+        ...     {"role": "user", "content": "Hello!"}
+        ... ]
+        >>> response = provider.chat(messages, model="gpt-4")
+        >>> assert response["success"] == True
+        >>> assert "content" in response
     """
     def __init__(self):
@@ -97,8 +224,79 @@ class EmbeddingProvider(BaseAIProvider):
     """
     Abstract base class for providers that support embedding generation.
-    Providers that support embedding operations should inherit from this class
-    and implement the embed() and get_model_info() methods.
+    This class extends BaseAIProvider to define the interface for embedding
+    providers. It standardizes how text is converted to vector representations
+    across different embedding services while supporting provider-specific
+    optimizations and model configurations.
+    Design Philosophy:
+        EmbeddingProvider abstracts the complexity of different embedding models
+        and services behind a simple, consistent interface. It handles batching,
+        dimension management, and normalization while allowing providers to
+        optimize for their specific architectures. The design supports both
+        sentence and document embeddings with appropriate chunking strategies.
+    Upstream Dependencies:
+        - BaseAIProvider: Inherits core provider functionality
+        - Text preprocessing nodes preparing embedding inputs
+        - Chunking strategies for long documents
+        - Tokenization utilities for size management
+    Downstream Consumers:
+        - EmbeddingGeneratorNode: Primary consumer for vector generation
+        - Vector databases storing embeddings
+        - Similarity search implementations
+        - Clustering and classification systems
+    Configuration:
+        Provider-specific parameters include:
+        - Model selection for different embedding sizes/qualities
+        - Batch size limits for efficiency
+        - Normalization preferences
+        - Dimension specifications
+    Implementation Details:
+        - Batch processing for efficiency
+        - Automatic text truncation/chunking for model limits
+        - Vector normalization options
+        - Dimension validation and consistency
+        - Cache-friendly operations for repeated texts
+    Error Handling:
+        - Empty text handling
+        - Text length validation
+        - Batch size limit enforcement
+        - Model availability checking
+        - Dimension mismatch detection
+    Side Effects:
+        - API calls consume embedding quotas
+        - May cache embeddings for efficiency
+        - Updates usage metrics
+        - May trigger rate limiting
+    Examples:
+        >>> class MyEmbeddingProvider(EmbeddingProvider):
+        ...     def is_available(self) -> bool:
+        ...         return True
+        ...
+        ...     def embed(self, texts, **kwargs):
+        ...         # Simulate embedding generation
+        ...         return [[0.1, 0.2, 0.3] for _ in texts]
+        ...
+        ...     def get_model_info(self):
+        ...         return {
+        ...             "name": "my-embedding-model",
+        ...             "dimensions": 3,
+        ...             "max_tokens": 512
+        ...         }
+        >>>
+        >>> provider = MyEmbeddingProvider()
+        >>> embeddings = provider.embed(["Hello", "World"])
+        >>> assert len(embeddings) == 2
+        >>> assert len(embeddings[0]) == 3
+        >>> info = provider.get_model_info()
+        >>> assert info["dimensions"] == 3
     """
     def __init__(self):
@@ -1159,18 +1357,17 @@ def get_provider(
         ValueError: If the provider name is not recognized or doesn't support the requested type.
     Examples:
-        Get any provider::
-        provider = get_provider("openai")
-        if provider.supports_chat():
-            # Use for chat
-        if provider.supports_embeddings():
-            # Use for embeddings
-        Get chat-only provider:
-        chat_provider = get_provider("anthropic", "chat")
+        >>> # Get any provider
+        >>> provider = get_provider("openai")
+        >>> if provider.supports_chat():
+        ...     # Use for chat
+        ...     pass
+        >>> if provider.supports_embeddings():
+        ...     # Use for embeddings
+        ...     pass
+        >>> # Get chat-only provider
+        >>> chat_provider = get_provider("anthropic", "chat")
         response = chat_provider.chat(messages, model="claude-3-sonnet")
         Get embedding-only provider:
@@ -1223,18 +1420,15 @@ def get_available_providers(
         Dict mapping provider names to their availability and capabilities.
     Examples:
+        >>> # Get all providers
+        >>> all_providers = get_available_providers()
+        >>> for name, info in all_providers.items():
+        ...     print(f"{name}: Available={info['available']}, Chat={info['chat']}, Embeddings={info['embeddings']}")
-        Get all providers::
-        all_providers = get_available_providers()
-        for name, info in all_providers.items():
-            print(f"{name}: Available={info['available']}, Chat={info['chat']}, Embeddings={info['embeddings']}")
-        Get only chat providers:
-        chat_providers = get_available_providers("chat")
+        >>> # Get only chat providers
+        >>> chat_providers = get_available_providers("chat")
-        Get only embedding providers:
+        >>> # Get only embedding providers
         embed_providers = get_available_providers("embeddings")
     """

kailash/nodes/ai/embedding_generator.py CHANGED Viewed

@@ -7,7 +7,7 @@ from kailash.nodes.base import Node, NodeParameter, register_node
 @register_node()
-class EmbeddingGenerator(Node):
+class EmbeddingGeneratorNode(Node):
     """
     Vector embedding generator for RAG systems and semantic similarity operations.
@@ -61,46 +61,42 @@ class EmbeddingGenerator(Node):
     - Updates usage statistics and cost tracking
     Examples:
-        Single text embedding::
-        embedder = EmbeddingGenerator()
-        result = embedder.run(
-            provider="openai",
-            model="text-embedding-3-large",
-            input_text="This is a sample document to embed",
-            operation="embed_text"
-        )
-        Batch document embedding:
-        batch_embedder = EmbeddingGenerator()
-        result = batch_embedder.run(
-            provider="huggingface",
-            model="sentence-transformers/all-MiniLM-L6-v2",
-            input_texts=[
-                "First document content...",
-                "Second document content...",
-                "Third document content..."
-            ],
-            operation="embed_batch",
-            batch_size=32,
-            cache_enabled=True
-        )
-        Similarity calculation:
-        similarity = EmbeddingGenerator()
-        result = similarity.run(
-            operation="calculate_similarity",
-            embedding_1=[0.1, 0.2, 0.3, ...],
-            embedding_2=[0.15, 0.25, 0.35, ...],
-            similarity_metric="cosine"
-        )
+        >>> # Single text embedding
+        >>> embedder = EmbeddingGeneratorNode()
+        >>> result = embedder.run(
+        ...     provider="openai",
+        ...     model="text-embedding-3-large",
+        ...     input_text="This is a sample document to embed",
+        ...     operation="embed_text"
+        ... )
+        >>> # Batch document embedding
+        >>> batch_embedder = EmbeddingGeneratorNode()
+        >>> result = batch_embedder.run(
+        ...     provider="huggingface",
+        ...     model="sentence-transformers/all-MiniLM-L6-v2",
+        ...     input_texts=[
+        ...         "First document content...",
+        ...         "Second document content...",
+        ...         "Third document content..."
+        ...     ],
+        ...     operation="embed_batch",
+        ...     batch_size=32,
+        ...     cache_enabled=True
+        ... )
+        >>> # Similarity calculation
+        >>> similarity = EmbeddingGeneratorNode()
+        >>> result = similarity.run(
+        ...     operation="calculate_similarity",
+        ...     embedding_1=[0.1, 0.2, 0.3],  # ... removed for doctest
+        ...     embedding_2=[0.15, 0.25, 0.35],  # ... removed for doctest
+        ...     similarity_metric="cosine"
+        ... )
         Cached embedding with MCP integration:
-        mcp_embedder = EmbeddingGenerator()
+        mcp_embedder = EmbeddingGeneratorNode()
         result = mcp_embedder.run(
             provider="azure",
             model="text-embedding-3-small",

kailash 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

kailash 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl