kailash 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/nodes/__init__.py +2 -1
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/ai_providers.py +1272 -0
- kailash/nodes/ai/embedding_generator.py +853 -0
- kailash/nodes/ai/llm_agent.py +1166 -0
- kailash/nodes/api/auth.py +3 -3
- kailash/nodes/api/graphql.py +2 -2
- kailash/nodes/api/http.py +391 -44
- kailash/nodes/api/rate_limiting.py +2 -2
- kailash/nodes/api/rest.py +464 -56
- kailash/nodes/base.py +71 -12
- kailash/nodes/code/python.py +2 -1
- kailash/nodes/data/__init__.py +7 -0
- kailash/nodes/data/readers.py +28 -26
- kailash/nodes/data/retrieval.py +178 -0
- kailash/nodes/data/sharepoint_graph.py +7 -7
- kailash/nodes/data/sources.py +65 -0
- kailash/nodes/data/sql.py +4 -2
- kailash/nodes/data/writers.py +6 -3
- kailash/nodes/logic/operations.py +2 -1
- kailash/nodes/mcp/__init__.py +11 -0
- kailash/nodes/mcp/client.py +558 -0
- kailash/nodes/mcp/resource.py +682 -0
- kailash/nodes/mcp/server.py +571 -0
- kailash/nodes/transform/__init__.py +16 -1
- kailash/nodes/transform/chunkers.py +78 -0
- kailash/nodes/transform/formatters.py +96 -0
- kailash/runtime/docker.py +6 -6
- kailash/sdk_exceptions.py +24 -10
- kailash/tracking/metrics_collector.py +2 -1
- kailash/utils/templates.py +6 -6
- {kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/METADATA +349 -49
- {kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/RECORD +38 -27
- {kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/WHEEL +0 -0
- {kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.0.dist-info → kailash-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,178 @@
|
|
1
|
+
"""Document retrieval nodes for finding relevant content using various similarity methods."""
|
2
|
+
|
3
|
+
from typing import Any, Dict, List
|
4
|
+
|
5
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
6
|
+
|
7
|
+
|
8
|
+
@register_node()
|
9
|
+
class RelevanceScorerNode(Node):
|
10
|
+
"""Scores chunk relevance using various similarity methods including embeddings similarity."""
|
11
|
+
|
12
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
13
|
+
return {
|
14
|
+
"chunks": NodeParameter(
|
15
|
+
name="chunks",
|
16
|
+
type=list,
|
17
|
+
required=False,
|
18
|
+
description="List of chunks to score",
|
19
|
+
),
|
20
|
+
"query_embedding": NodeParameter(
|
21
|
+
name="query_embedding",
|
22
|
+
type=list,
|
23
|
+
required=False,
|
24
|
+
description="Query embedding for similarity comparison",
|
25
|
+
),
|
26
|
+
"chunk_embeddings": NodeParameter(
|
27
|
+
name="chunk_embeddings",
|
28
|
+
type=list,
|
29
|
+
required=False,
|
30
|
+
description="Embeddings for each chunk",
|
31
|
+
),
|
32
|
+
"similarity_method": NodeParameter(
|
33
|
+
name="similarity_method",
|
34
|
+
type=str,
|
35
|
+
required=False,
|
36
|
+
default="cosine",
|
37
|
+
description="Similarity method: cosine, bm25, tfidf, jaccard (future)",
|
38
|
+
),
|
39
|
+
"top_k": NodeParameter(
|
40
|
+
name="top_k",
|
41
|
+
type=int,
|
42
|
+
required=False,
|
43
|
+
default=3,
|
44
|
+
description="Number of top chunks to return",
|
45
|
+
),
|
46
|
+
}
|
47
|
+
|
48
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
49
|
+
chunks = kwargs.get("chunks", [])
|
50
|
+
query_embeddings = kwargs.get("query_embedding", [])
|
51
|
+
chunk_embeddings = kwargs.get("chunk_embeddings", [])
|
52
|
+
similarity_method = kwargs.get("similarity_method", "cosine")
|
53
|
+
top_k = kwargs.get("top_k", 3)
|
54
|
+
|
55
|
+
print(
|
56
|
+
f"Debug: chunks={len(chunks)}, query_embeddings={len(query_embeddings)}, chunk_embeddings={len(chunk_embeddings)}"
|
57
|
+
)
|
58
|
+
|
59
|
+
# Handle case when no embeddings are available
|
60
|
+
if not query_embeddings or not chunk_embeddings:
|
61
|
+
print("Debug: No embeddings available, using fallback text matching")
|
62
|
+
# Simple text-based fallback scoring
|
63
|
+
query_text = "machine learning types" # Extract keywords from query
|
64
|
+
scored_chunks = []
|
65
|
+
for chunk in chunks:
|
66
|
+
content = chunk.get("content", "").lower()
|
67
|
+
score = sum(1 for word in query_text.split() if word in content) / len(
|
68
|
+
query_text.split()
|
69
|
+
)
|
70
|
+
scored_chunk = {**chunk, "relevance_score": score}
|
71
|
+
scored_chunks.append(scored_chunk)
|
72
|
+
else:
|
73
|
+
# Use the specified similarity method
|
74
|
+
if similarity_method == "cosine":
|
75
|
+
scored_chunks = self._cosine_similarity_scoring(
|
76
|
+
chunks, query_embeddings, chunk_embeddings
|
77
|
+
)
|
78
|
+
elif similarity_method == "bm25":
|
79
|
+
# Future implementation
|
80
|
+
scored_chunks = self._bm25_scoring(
|
81
|
+
chunks, query_embeddings, chunk_embeddings
|
82
|
+
)
|
83
|
+
elif similarity_method == "tfidf":
|
84
|
+
# Future implementation
|
85
|
+
scored_chunks = self._tfidf_scoring(
|
86
|
+
chunks, query_embeddings, chunk_embeddings
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
# Default to cosine
|
90
|
+
scored_chunks = self._cosine_similarity_scoring(
|
91
|
+
chunks, query_embeddings, chunk_embeddings
|
92
|
+
)
|
93
|
+
|
94
|
+
# Sort by relevance and take top_k
|
95
|
+
scored_chunks.sort(key=lambda x: x["relevance_score"], reverse=True)
|
96
|
+
top_chunks = scored_chunks[:top_k]
|
97
|
+
|
98
|
+
return {"relevant_chunks": top_chunks}
|
99
|
+
|
100
|
+
def _cosine_similarity_scoring(
|
101
|
+
self, chunks: List[Dict], query_embeddings: List, chunk_embeddings: List
|
102
|
+
) -> List[Dict]:
|
103
|
+
"""Score chunks using cosine similarity."""
|
104
|
+
# Extract actual embedding vectors from the embedding objects
|
105
|
+
# EmbeddingGenerator returns embeddings in format: {"embedding": [...], "text": "...", "dimensions": X}
|
106
|
+
|
107
|
+
# Handle query embedding - should be the first (and only) embedding in the list
|
108
|
+
query_embedding_obj = query_embeddings[0] if query_embeddings else {}
|
109
|
+
if isinstance(query_embedding_obj, dict) and "embedding" in query_embedding_obj:
|
110
|
+
query_embedding = query_embedding_obj["embedding"]
|
111
|
+
elif isinstance(query_embedding_obj, list):
|
112
|
+
query_embedding = query_embedding_obj
|
113
|
+
else:
|
114
|
+
query_embedding = []
|
115
|
+
|
116
|
+
print(
|
117
|
+
f"Debug: Query embedding extracted, type: {type(query_embedding)}, length: {len(query_embedding) if isinstance(query_embedding, list) else 'N/A'}"
|
118
|
+
)
|
119
|
+
|
120
|
+
# Simple cosine similarity calculation
|
121
|
+
def cosine_similarity(a, b):
|
122
|
+
# Ensure embeddings are numeric lists
|
123
|
+
if not isinstance(a, list) or not isinstance(b, list):
|
124
|
+
print(f"Debug: Non-list embeddings detected, a={type(a)}, b={type(b)}")
|
125
|
+
return 0.5 # Default similarity
|
126
|
+
|
127
|
+
if len(a) == 0 or len(b) == 0:
|
128
|
+
print(
|
129
|
+
f"Debug: Empty embeddings detected, len(a)={len(a)}, len(b)={len(b)}"
|
130
|
+
)
|
131
|
+
return 0.5
|
132
|
+
|
133
|
+
try:
|
134
|
+
dot_product = sum(x * y for x, y in zip(a, b))
|
135
|
+
norm_a = sum(x * x for x in a) ** 0.5
|
136
|
+
norm_b = sum(x * x for x in b) ** 0.5
|
137
|
+
return dot_product / (norm_a * norm_b) if norm_a * norm_b > 0 else 0
|
138
|
+
except (TypeError, ValueError) as e:
|
139
|
+
print(f"Debug: Cosine similarity error: {e}")
|
140
|
+
return 0.5
|
141
|
+
|
142
|
+
# Score each chunk
|
143
|
+
scored_chunks = []
|
144
|
+
for i, chunk in enumerate(chunks):
|
145
|
+
if i < len(chunk_embeddings):
|
146
|
+
# Extract embedding vector from chunk embedding object
|
147
|
+
chunk_embedding_obj = chunk_embeddings[i]
|
148
|
+
if (
|
149
|
+
isinstance(chunk_embedding_obj, dict)
|
150
|
+
and "embedding" in chunk_embedding_obj
|
151
|
+
):
|
152
|
+
chunk_embedding = chunk_embedding_obj["embedding"]
|
153
|
+
elif isinstance(chunk_embedding_obj, list):
|
154
|
+
chunk_embedding = chunk_embedding_obj
|
155
|
+
else:
|
156
|
+
chunk_embedding = []
|
157
|
+
|
158
|
+
similarity = cosine_similarity(query_embedding, chunk_embedding)
|
159
|
+
scored_chunk = {**chunk, "relevance_score": similarity}
|
160
|
+
scored_chunks.append(scored_chunk)
|
161
|
+
|
162
|
+
return scored_chunks
|
163
|
+
|
164
|
+
def _bm25_scoring(
|
165
|
+
self, chunks: List[Dict], query_embeddings: List, chunk_embeddings: List
|
166
|
+
) -> List[Dict]:
|
167
|
+
"""Score chunks using BM25 algorithm (future implementation)."""
|
168
|
+
# TODO: Implement BM25 scoring
|
169
|
+
# For now, return chunks with default scores
|
170
|
+
return [{**chunk, "relevance_score": 0.5} for chunk in chunks]
|
171
|
+
|
172
|
+
def _tfidf_scoring(
|
173
|
+
self, chunks: List[Dict], query_embeddings: List, chunk_embeddings: List
|
174
|
+
) -> List[Dict]:
|
175
|
+
"""Score chunks using TF-IDF similarity (future implementation)."""
|
176
|
+
# TODO: Implement TF-IDF scoring
|
177
|
+
# For now, return chunks with default scores
|
178
|
+
return [{**chunk, "relevance_score": 0.5} for chunk in chunks]
|
@@ -27,7 +27,7 @@ from typing import Any, Dict, List, Optional
|
|
27
27
|
|
28
28
|
import requests
|
29
29
|
|
30
|
-
from kailash.nodes.base import Node, NodeMetadata, NodeParameter
|
30
|
+
from kailash.nodes.base import Node, NodeMetadata, NodeParameter, register_node
|
31
31
|
from kailash.sdk_exceptions import (
|
32
32
|
NodeConfigurationError,
|
33
33
|
NodeExecutionError,
|
@@ -35,6 +35,7 @@ from kailash.sdk_exceptions import (
|
|
35
35
|
)
|
36
36
|
|
37
37
|
|
38
|
+
@register_node()
|
38
39
|
class SharePointGraphReader(Node):
|
39
40
|
"""Node for reading files from SharePoint using Microsoft Graph API.
|
40
41
|
|
@@ -55,8 +56,8 @@ class SharePointGraphReader(Node):
|
|
55
56
|
3. Search for files by name
|
56
57
|
4. Navigate folder structures
|
57
58
|
|
58
|
-
Example
|
59
|
-
|
59
|
+
Example::
|
60
|
+
|
60
61
|
reader = SharePointGraphReader()
|
61
62
|
result = reader.execute(
|
62
63
|
tenant_id="your-tenant-id",
|
@@ -67,7 +68,6 @@ class SharePointGraphReader(Node):
|
|
67
68
|
library_name="Documents",
|
68
69
|
folder_path="Reports/2024"
|
69
70
|
)
|
70
|
-
```
|
71
71
|
"""
|
72
72
|
|
73
73
|
def get_metadata(self) -> NodeMetadata:
|
@@ -464,14 +464,15 @@ class SharePointGraphReader(Node):
|
|
464
464
|
return self._search_files(site_id, library_name, query, headers)
|
465
465
|
|
466
466
|
|
467
|
+
@register_node()
|
467
468
|
class SharePointGraphWriter(Node):
|
468
469
|
"""Node for uploading files to SharePoint using Microsoft Graph API.
|
469
470
|
|
470
471
|
This node handles file uploads to SharePoint document libraries,
|
471
472
|
supporting folder structures and metadata.
|
472
473
|
|
473
|
-
Example
|
474
|
-
|
474
|
+
Example::
|
475
|
+
|
475
476
|
writer = SharePointGraphWriter()
|
476
477
|
result = writer.execute(
|
477
478
|
tenant_id="your-tenant-id",
|
@@ -483,7 +484,6 @@ class SharePointGraphWriter(Node):
|
|
483
484
|
folder_path="Reports/2024",
|
484
485
|
sharepoint_name="Q4_Report_2024.pdf"
|
485
486
|
)
|
486
|
-
```
|
487
487
|
"""
|
488
488
|
|
489
489
|
def get_metadata(self) -> NodeMetadata:
|
@@ -0,0 +1,65 @@
|
|
1
|
+
"""Data source nodes for providing input data to workflows."""
|
2
|
+
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
6
|
+
|
7
|
+
|
8
|
+
@register_node()
|
9
|
+
class DocumentSourceNode(Node):
|
10
|
+
"""Provides sample documents for hierarchical RAG processing."""
|
11
|
+
|
12
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
13
|
+
return {
|
14
|
+
"sample_documents": NodeParameter(
|
15
|
+
name="sample_documents",
|
16
|
+
type=bool,
|
17
|
+
required=False,
|
18
|
+
default=True,
|
19
|
+
description="Use built-in sample documents",
|
20
|
+
)
|
21
|
+
}
|
22
|
+
|
23
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
24
|
+
# Sample documents for demonstration
|
25
|
+
documents = [
|
26
|
+
{
|
27
|
+
"id": "doc1",
|
28
|
+
"title": "Machine Learning Basics",
|
29
|
+
"content": """Machine learning is a subset of artificial intelligence that enables computers to learn and make decisions from data without being explicitly programmed. There are three main types of machine learning: supervised learning, unsupervised learning, and reinforcement learning. Supervised learning uses labeled data to train models that can make predictions on new data. Common algorithms include linear regression, decision trees, and neural networks. The process involves splitting data into training and testing sets to evaluate model performance.""",
|
30
|
+
},
|
31
|
+
{
|
32
|
+
"id": "doc2",
|
33
|
+
"title": "Deep Learning Overview",
|
34
|
+
"content": """Deep learning is a specialized area of machine learning that uses neural networks with multiple layers to model and understand complex patterns in data. These networks, called deep neural networks, can automatically learn hierarchical representations of data. Popular architectures include convolutional neural networks (CNNs) for image processing, recurrent neural networks (RNNs) for sequential data, and transformers for natural language processing. Deep learning has achieved breakthrough results in computer vision, speech recognition, and language understanding.""",
|
35
|
+
},
|
36
|
+
{
|
37
|
+
"id": "doc3",
|
38
|
+
"title": "Natural Language Processing",
|
39
|
+
"content": """Natural Language Processing (NLP) is a field that combines computational linguistics with machine learning to help computers understand, interpret, and generate human language. Key NLP tasks include tokenization, part-of-speech tagging, named entity recognition, sentiment analysis, and machine translation. Modern NLP relies heavily on transformer architectures like BERT and GPT, which use attention mechanisms to understand context and relationships between words. Applications include chatbots, search engines, and language translation services.""",
|
40
|
+
},
|
41
|
+
]
|
42
|
+
|
43
|
+
print(f"Debug DocumentSource: providing {len(documents)} documents")
|
44
|
+
return {"documents": documents}
|
45
|
+
|
46
|
+
|
47
|
+
@register_node()
|
48
|
+
class QuerySourceNode(Node):
|
49
|
+
"""Provides sample queries for RAG processing."""
|
50
|
+
|
51
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
52
|
+
return {
|
53
|
+
"query": NodeParameter(
|
54
|
+
name="query",
|
55
|
+
type=str,
|
56
|
+
required=False,
|
57
|
+
default="What are the main types of machine learning?",
|
58
|
+
description="Query to process",
|
59
|
+
)
|
60
|
+
}
|
61
|
+
|
62
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
63
|
+
query = kwargs.get("query", "What are the main types of machine learning?")
|
64
|
+
print(f"Debug QuerySource: providing query='{query}'")
|
65
|
+
return {"query": query}
|
kailash/nodes/data/sql.py
CHANGED
@@ -63,7 +63,8 @@ class SQLDatabaseNode(Node):
|
|
63
63
|
- TimeoutError: Query execution timeout
|
64
64
|
- PermissionError: Access denied
|
65
65
|
|
66
|
-
Example
|
66
|
+
Example::
|
67
|
+
|
67
68
|
# Query customer data
|
68
69
|
sql_node = SQLDatabaseNode(
|
69
70
|
connection_string='postgresql://user:pass@host/db',
|
@@ -258,7 +259,8 @@ class SQLQueryBuilderNode(Node):
|
|
258
259
|
3. Multi-table joins
|
259
260
|
4. Aggregation queries
|
260
261
|
|
261
|
-
Example
|
262
|
+
Example::
|
263
|
+
|
262
264
|
builder = SQLQueryBuilderNode(
|
263
265
|
table='customers',
|
264
266
|
select=['name', 'email'],
|
kailash/nodes/data/writers.py
CHANGED
@@ -81,7 +81,8 @@ class CSVWriter(Node):
|
|
81
81
|
- TypeError: Invalid data structure
|
82
82
|
- UnicodeEncodeError: Encoding issues
|
83
83
|
|
84
|
-
Example
|
84
|
+
Example::
|
85
|
+
|
85
86
|
# Write customer data
|
86
87
|
writer = CSVWriter(
|
87
88
|
file_path='output.csv',
|
@@ -261,7 +262,8 @@ class JSONWriter(Node):
|
|
261
262
|
- OSError: Path or disk issues
|
262
263
|
- JSONEncodeError: Encoding problems
|
263
264
|
|
264
|
-
Example
|
265
|
+
Example::
|
266
|
+
|
265
267
|
# Write API response
|
266
268
|
writer = JSONWriter(
|
267
269
|
file_path='response.json',
|
@@ -412,7 +414,8 @@ class TextWriter(Node):
|
|
412
414
|
- UnicodeEncodeError: Encoding mismatch
|
413
415
|
- MemoryError: Text too large
|
414
416
|
|
415
|
-
Example
|
417
|
+
Example::
|
418
|
+
|
416
419
|
# Append to log file
|
417
420
|
writer = TextWriter(
|
418
421
|
file_path='app.log',
|
@@ -25,7 +25,8 @@ class Switch(Node):
|
|
25
25
|
The outputs of Switch nodes are typically connected to different processing
|
26
26
|
nodes, and those branches can be rejoined later using a Merge node.
|
27
27
|
|
28
|
-
Example usage
|
28
|
+
Example usage::
|
29
|
+
|
29
30
|
# Simple boolean condition
|
30
31
|
switch_node = Switch(condition_field="status", operator="==", value="success")
|
31
32
|
workflow.add_node("router", switch_node)
|