PyPI - kailash - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

kailash 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

kailash/__init__.py +1 -1
kailash/nodes/__init__.py +2 -1
kailash/nodes/ai/__init__.py +26 -0
kailash/nodes/ai/ai_providers.py +1272 -0
kailash/nodes/ai/embedding_generator.py +853 -0
kailash/nodes/ai/llm_agent.py +1166 -0
kailash/nodes/api/auth.py +3 -3
kailash/nodes/api/graphql.py +2 -2
kailash/nodes/api/http.py +391 -44
kailash/nodes/api/rate_limiting.py +2 -2
kailash/nodes/api/rest.py +464 -56
kailash/nodes/base.py +71 -12
kailash/nodes/code/python.py +2 -1
kailash/nodes/data/__init__.py +7 -0
kailash/nodes/data/readers.py +28 -26
kailash/nodes/data/retrieval.py +178 -0
kailash/nodes/data/sharepoint_graph.py +7 -7
kailash/nodes/data/sources.py +65 -0
kailash/nodes/data/sql.py +4 -2
kailash/nodes/data/writers.py +6 -3
kailash/nodes/logic/operations.py +2 -1
kailash/nodes/mcp/__init__.py +11 -0
kailash/nodes/mcp/client.py +558 -0
kailash/nodes/mcp/resource.py +682 -0
kailash/nodes/mcp/server.py +571 -0
kailash/nodes/transform/__init__.py +16 -1
kailash/nodes/transform/chunkers.py +78 -0
kailash/nodes/transform/formatters.py +96 -0
kailash/runtime/docker.py +6 -6
kailash/sdk_exceptions.py +24 -10
kailash/tracking/metrics_collector.py +2 -1
kailash/utils/templates.py +6 -6
{kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/METADATA +349 -49
{kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/RECORD +38 -27
{kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/WHEEL +0 -0
{kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/entry_points.txt +0 -0
{kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/licenses/LICENSE +0 -0
{kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/top_level.txt +0 -0

kailash/nodes/data/retrieval.py ADDED Viewed

@@ -0,0 +1,178 @@
+"""Document retrieval nodes for finding relevant content using various similarity methods."""
+from typing import Any, Dict, List
+from kailash.nodes.base import Node, NodeParameter, register_node
+@register_node()
+class RelevanceScorerNode(Node):
+    """Scores chunk relevance using various similarity methods including embeddings similarity."""
+    def get_parameters(self) -> Dict[str, NodeParameter]:
+        return {
+            "chunks": NodeParameter(
+                name="chunks",
+                type=list,
+                required=False,
+                description="List of chunks to score",
+            ),
+            "query_embedding": NodeParameter(
+                name="query_embedding",
+                type=list,
+                required=False,
+                description="Query embedding for similarity comparison",
+            ),
+            "chunk_embeddings": NodeParameter(
+                name="chunk_embeddings",
+                type=list,
+                required=False,
+                description="Embeddings for each chunk",
+            ),
+            "similarity_method": NodeParameter(
+                name="similarity_method",
+                type=str,
+                required=False,
+                default="cosine",
+                description="Similarity method: cosine, bm25, tfidf, jaccard (future)",
+            ),
+            "top_k": NodeParameter(
+                name="top_k",
+                type=int,
+                required=False,
+                default=3,
+                description="Number of top chunks to return",
+            ),
+        }
+    def run(self, **kwargs) -> Dict[str, Any]:
+        chunks = kwargs.get("chunks", [])
+        query_embeddings = kwargs.get("query_embedding", [])
+        chunk_embeddings = kwargs.get("chunk_embeddings", [])
+        similarity_method = kwargs.get("similarity_method", "cosine")
+        top_k = kwargs.get("top_k", 3)
+        print(
+            f"Debug: chunks={len(chunks)}, query_embeddings={len(query_embeddings)}, chunk_embeddings={len(chunk_embeddings)}"
+        )
+        # Handle case when no embeddings are available
+        if not query_embeddings or not chunk_embeddings:
+            print("Debug: No embeddings available, using fallback text matching")
+            # Simple text-based fallback scoring
+            query_text = "machine learning types"  # Extract keywords from query
+            scored_chunks = []
+            for chunk in chunks:
+                content = chunk.get("content", "").lower()
+                score = sum(1 for word in query_text.split() if word in content) / len(
+                    query_text.split()
+                )
+                scored_chunk = {**chunk, "relevance_score": score}
+                scored_chunks.append(scored_chunk)
+        else:
+            # Use the specified similarity method
+            if similarity_method == "cosine":
+                scored_chunks = self._cosine_similarity_scoring(
+                    chunks, query_embeddings, chunk_embeddings
+                )
+            elif similarity_method == "bm25":
+                # Future implementation
+                scored_chunks = self._bm25_scoring(
+                    chunks, query_embeddings, chunk_embeddings
+                )
+            elif similarity_method == "tfidf":
+                # Future implementation
+                scored_chunks = self._tfidf_scoring(
+                    chunks, query_embeddings, chunk_embeddings
+                )
+            else:
+                # Default to cosine
+                scored_chunks = self._cosine_similarity_scoring(
+                    chunks, query_embeddings, chunk_embeddings
+                )
+        # Sort by relevance and take top_k
+        scored_chunks.sort(key=lambda x: x["relevance_score"], reverse=True)
+        top_chunks = scored_chunks[:top_k]
+        return {"relevant_chunks": top_chunks}
+    def _cosine_similarity_scoring(
+        self, chunks: List[Dict], query_embeddings: List, chunk_embeddings: List
+    ) -> List[Dict]:
+        """Score chunks using cosine similarity."""
+        # Extract actual embedding vectors from the embedding objects
+        # EmbeddingGenerator returns embeddings in format: {"embedding": [...], "text": "...", "dimensions": X}
+        # Handle query embedding - should be the first (and only) embedding in the list
+        query_embedding_obj = query_embeddings[0] if query_embeddings else {}
+        if isinstance(query_embedding_obj, dict) and "embedding" in query_embedding_obj:
+            query_embedding = query_embedding_obj["embedding"]
+        elif isinstance(query_embedding_obj, list):
+            query_embedding = query_embedding_obj
+        else:
+            query_embedding = []
+        print(
+            f"Debug: Query embedding extracted, type: {type(query_embedding)}, length: {len(query_embedding) if isinstance(query_embedding, list) else 'N/A'}"
+        )
+        # Simple cosine similarity calculation
+        def cosine_similarity(a, b):
+            # Ensure embeddings are numeric lists
+            if not isinstance(a, list) or not isinstance(b, list):
+                print(f"Debug: Non-list embeddings detected, a={type(a)}, b={type(b)}")
+                return 0.5  # Default similarity
+            if len(a) == 0 or len(b) == 0:
+                print(
+                    f"Debug: Empty embeddings detected, len(a)={len(a)}, len(b)={len(b)}"
+                )
+                return 0.5
+            try:
+                dot_product = sum(x * y for x, y in zip(a, b))
+                norm_a = sum(x * x for x in a) ** 0.5
+                norm_b = sum(x * x for x in b) ** 0.5
+                return dot_product / (norm_a * norm_b) if norm_a * norm_b > 0 else 0
+            except (TypeError, ValueError) as e:
+                print(f"Debug: Cosine similarity error: {e}")
+                return 0.5
+        # Score each chunk
+        scored_chunks = []
+        for i, chunk in enumerate(chunks):
+            if i < len(chunk_embeddings):
+                # Extract embedding vector from chunk embedding object
+                chunk_embedding_obj = chunk_embeddings[i]
+                if (
+                    isinstance(chunk_embedding_obj, dict)
+                    and "embedding" in chunk_embedding_obj
+                ):
+                    chunk_embedding = chunk_embedding_obj["embedding"]
+                elif isinstance(chunk_embedding_obj, list):
+                    chunk_embedding = chunk_embedding_obj
+                else:
+                    chunk_embedding = []
+                similarity = cosine_similarity(query_embedding, chunk_embedding)
+                scored_chunk = {**chunk, "relevance_score": similarity}
+                scored_chunks.append(scored_chunk)
+        return scored_chunks
+    def _bm25_scoring(
+        self, chunks: List[Dict], query_embeddings: List, chunk_embeddings: List
+    ) -> List[Dict]:
+        """Score chunks using BM25 algorithm (future implementation)."""
+        # TODO: Implement BM25 scoring
+        # For now, return chunks with default scores
+        return [{**chunk, "relevance_score": 0.5} for chunk in chunks]
+    def _tfidf_scoring(
+        self, chunks: List[Dict], query_embeddings: List, chunk_embeddings: List
+    ) -> List[Dict]:
+        """Score chunks using TF-IDF similarity (future implementation)."""
+        # TODO: Implement TF-IDF scoring
+        # For now, return chunks with default scores
+        return [{**chunk, "relevance_score": 0.5} for chunk in chunks]

kailash/nodes/data/sharepoint_graph.py CHANGED Viewed

@@ -27,7 +27,7 @@ from typing import Any, Dict, List, Optional
 import requests
-from kailash.nodes.base import Node, NodeMetadata, NodeParameter
+from kailash.nodes.base import Node, NodeMetadata, NodeParameter, register_node
 from kailash.sdk_exceptions import (
     NodeConfigurationError,
     NodeExecutionError,
@@ -35,6 +35,7 @@ from kailash.sdk_exceptions import (
 )
+@register_node()
 class SharePointGraphReader(Node):
     """Node for reading files from SharePoint using Microsoft Graph API.
@@ -55,8 +56,8 @@ class SharePointGraphReader(Node):
     3. Search for files by name
     4. Navigate folder structures
-    Example:
-        ```python
+    Example::
         reader = SharePointGraphReader()
         result = reader.execute(
             tenant_id="your-tenant-id",
@@ -67,7 +68,6 @@ class SharePointGraphReader(Node):
             library_name="Documents",
             folder_path="Reports/2024"
         )
-        ```
     """
     def get_metadata(self) -> NodeMetadata:
@@ -464,14 +464,15 @@ class SharePointGraphReader(Node):
             return self._search_files(site_id, library_name, query, headers)
+@register_node()
 class SharePointGraphWriter(Node):
     """Node for uploading files to SharePoint using Microsoft Graph API.
     This node handles file uploads to SharePoint document libraries,
     supporting folder structures and metadata.
-    Example:
-        ```python
+    Example::
         writer = SharePointGraphWriter()
         result = writer.execute(
             tenant_id="your-tenant-id",
@@ -483,7 +484,6 @@ class SharePointGraphWriter(Node):
             folder_path="Reports/2024",
             sharepoint_name="Q4_Report_2024.pdf"
         )
-        ```
     """
     def get_metadata(self) -> NodeMetadata:

kailash/nodes/data/sources.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""Data source nodes for providing input data to workflows."""
+from typing import Any, Dict
+from kailash.nodes.base import Node, NodeParameter, register_node
+@register_node()
+class DocumentSourceNode(Node):
+    """Provides sample documents for hierarchical RAG processing."""
+    def get_parameters(self) -> Dict[str, NodeParameter]:
+        return {
+            "sample_documents": NodeParameter(
+                name="sample_documents",
+                type=bool,
+                required=False,
+                default=True,
+                description="Use built-in sample documents",
+            )
+        }
+    def run(self, **kwargs) -> Dict[str, Any]:
+        # Sample documents for demonstration
+        documents = [
+            {
+                "id": "doc1",
+                "title": "Machine Learning Basics",
+                "content": """Machine learning is a subset of artificial intelligence that enables computers to learn and make decisions from data without being explicitly programmed. There are three main types of machine learning: supervised learning, unsupervised learning, and reinforcement learning. Supervised learning uses labeled data to train models that can make predictions on new data. Common algorithms include linear regression, decision trees, and neural networks. The process involves splitting data into training and testing sets to evaluate model performance.""",
+            },
+            {
+                "id": "doc2",
+                "title": "Deep Learning Overview",
+                "content": """Deep learning is a specialized area of machine learning that uses neural networks with multiple layers to model and understand complex patterns in data. These networks, called deep neural networks, can automatically learn hierarchical representations of data. Popular architectures include convolutional neural networks (CNNs) for image processing, recurrent neural networks (RNNs) for sequential data, and transformers for natural language processing. Deep learning has achieved breakthrough results in computer vision, speech recognition, and language understanding.""",
+            },
+            {
+                "id": "doc3",
+                "title": "Natural Language Processing",
+                "content": """Natural Language Processing (NLP) is a field that combines computational linguistics with machine learning to help computers understand, interpret, and generate human language. Key NLP tasks include tokenization, part-of-speech tagging, named entity recognition, sentiment analysis, and machine translation. Modern NLP relies heavily on transformer architectures like BERT and GPT, which use attention mechanisms to understand context and relationships between words. Applications include chatbots, search engines, and language translation services.""",
+            },
+        ]
+        print(f"Debug DocumentSource: providing {len(documents)} documents")
+        return {"documents": documents}
+@register_node()
+class QuerySourceNode(Node):
+    """Provides sample queries for RAG processing."""
+    def get_parameters(self) -> Dict[str, NodeParameter]:
+        return {
+            "query": NodeParameter(
+                name="query",
+                type=str,
+                required=False,
+                default="What are the main types of machine learning?",
+                description="Query to process",
+            )
+        }
+    def run(self, **kwargs) -> Dict[str, Any]:
+        query = kwargs.get("query", "What are the main types of machine learning?")
+        print(f"Debug QuerySource: providing query='{query}'")
+        return {"query": query}

kailash/nodes/data/sql.py CHANGED Viewed

@@ -63,7 +63,8 @@ class SQLDatabaseNode(Node):
     - TimeoutError: Query execution timeout
     - PermissionError: Access denied
-    Example:
+    Example::
         # Query customer data
         sql_node = SQLDatabaseNode(
             connection_string='postgresql://user:pass@host/db',
@@ -258,7 +259,8 @@ class SQLQueryBuilderNode(Node):
     3. Multi-table joins
     4. Aggregation queries
-    Example:
+    Example::
         builder = SQLQueryBuilderNode(
             table='customers',
             select=['name', 'email'],

kailash/nodes/data/writers.py CHANGED Viewed

@@ -81,7 +81,8 @@ class CSVWriter(Node):
     - TypeError: Invalid data structure
     - UnicodeEncodeError: Encoding issues
-    Example:
+    Example::
         # Write customer data
         writer = CSVWriter(
             file_path='output.csv',
@@ -261,7 +262,8 @@ class JSONWriter(Node):
     - OSError: Path or disk issues
     - JSONEncodeError: Encoding problems
-    Example:
+    Example::
         # Write API response
         writer = JSONWriter(
             file_path='response.json',
@@ -412,7 +414,8 @@ class TextWriter(Node):
     - UnicodeEncodeError: Encoding mismatch
     - MemoryError: Text too large
-    Example:
+    Example::
         # Append to log file
         writer = TextWriter(
             file_path='app.log',

kailash/nodes/logic/operations.py CHANGED Viewed

@@ -25,7 +25,8 @@ class Switch(Node):
     The outputs of Switch nodes are typically connected to different processing
     nodes, and those branches can be rejoined later using a Merge node.
-    Example usage:
+    Example usage::
         # Simple boolean condition
         switch_node = Switch(condition_field="status", operator="==", value="success")
         workflow.add_node("router", switch_node)

kailash/nodes/mcp/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Model Context Protocol (MCP) nodes for the Kailash SDK."""
+from .client import MCPClient
+from .resource import MCPResource
+from .server import MCPServer
+__all__ = [
+    "MCPClient",
+    "MCPServer",
+    "MCPResource",
+]

kailash 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

kailash 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl