kailash 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/api/custom_nodes_secure.py +2 -2
- kailash/api/studio_secure.py +1 -1
- kailash/mcp/client_new.py +1 -1
- kailash/mcp/server_new.py +6 -6
- kailash/nodes/ai/a2a.py +1 -1
- kailash/nodes/api/__init__.py +21 -0
- kailash/nodes/code/python.py +6 -0
- kailash/nodes/data/__init__.py +4 -2
- kailash/nodes/data/directory.py +278 -0
- kailash/nodes/data/sql.py +699 -256
- kailash/nodes/transform/processors.py +31 -0
- kailash/runtime/local.py +13 -0
- kailash/workflow/convergence.py +1 -1
- kailash/workflow/cycle_analyzer.py +346 -225
- kailash/workflow/cycle_builder.py +75 -69
- kailash/workflow/cycle_config.py +62 -46
- kailash/workflow/cycle_debugger.py +284 -184
- kailash/workflow/cycle_exceptions.py +111 -97
- kailash/workflow/cycle_profiler.py +272 -202
- kailash/workflow/graph.py +15 -0
- kailash/workflow/migration.py +238 -197
- kailash/workflow/templates.py +124 -105
- kailash/workflow/validation.py +356 -298
- kailash-0.2.2.dist-info/METADATA +121 -0
- {kailash-0.2.0.dist-info → kailash-0.2.2.dist-info}/RECORD +29 -28
- kailash-0.2.0.dist-info/METADATA +0 -1614
- {kailash-0.2.0.dist-info → kailash-0.2.2.dist-info}/WHEEL +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.2.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.2.dist-info}/top_level.txt +0 -0
@@ -350,7 +350,7 @@ async def _execute_api_node(
|
|
350
350
|
) -> Dict[str, Any]:
|
351
351
|
"""Execute an API-based custom node"""
|
352
352
|
|
353
|
-
from kailash.nodes.api.http import
|
353
|
+
from kailash.nodes.api.http import HTTPRequestNode
|
354
354
|
from kailash.security import TenantContext
|
355
355
|
|
356
356
|
# Execute in tenant context
|
@@ -359,7 +359,7 @@ async def _execute_api_node(
|
|
359
359
|
api_config = node.implementation.get("api", {})
|
360
360
|
|
361
361
|
# Create HTTP client node
|
362
|
-
http_node =
|
362
|
+
http_node = HTTPRequestNode(
|
363
363
|
url=api_config.get("url", ""),
|
364
364
|
method=api_config.get("method", "GET"),
|
365
365
|
headers=api_config.get("headers", {}),
|
kailash/api/studio_secure.py
CHANGED
kailash/mcp/client_new.py
CHANGED
@@ -292,7 +292,7 @@ class MCPClient:
|
|
292
292
|
|
293
293
|
# Convenience functions for LLM agents
|
294
294
|
async def discover_and_prepare_tools(
|
295
|
-
mcp_servers: List[Union[str, Dict[str, Any]]]
|
295
|
+
mcp_servers: List[Union[str, Dict[str, Any]]],
|
296
296
|
) -> List[Dict[str, Any]]:
|
297
297
|
"""
|
298
298
|
Discover tools from multiple MCP servers and prepare them for LLM use.
|
kailash/mcp/server_new.py
CHANGED
@@ -14,7 +14,7 @@ from typing import Callable, List, Optional
|
|
14
14
|
try:
|
15
15
|
from mcp.server import Server
|
16
16
|
from mcp.server.models import InitializationOptions
|
17
|
-
from mcp.types import
|
17
|
+
from mcp.types import Resource, TextContent, Tool
|
18
18
|
|
19
19
|
MCP_AVAILABLE = True
|
20
20
|
except ImportError:
|
@@ -202,15 +202,15 @@ class MCPServer:
|
|
202
202
|
# Determine type
|
203
203
|
param_type = "string" # Default
|
204
204
|
if param.annotation != inspect.Parameter.empty:
|
205
|
-
if param.annotation
|
205
|
+
if param.annotation is int:
|
206
206
|
param_type = "integer"
|
207
|
-
elif param.annotation
|
207
|
+
elif param.annotation is float:
|
208
208
|
param_type = "number"
|
209
|
-
elif param.annotation
|
209
|
+
elif param.annotation is bool:
|
210
210
|
param_type = "boolean"
|
211
|
-
elif param.annotation
|
211
|
+
elif param.annotation is dict:
|
212
212
|
param_type = "object"
|
213
|
-
elif param.annotation
|
213
|
+
elif param.annotation is list:
|
214
214
|
param_type = "array"
|
215
215
|
|
216
216
|
properties[param_name] = {
|
kailash/nodes/ai/a2a.py
CHANGED
@@ -832,7 +832,7 @@ Focus on actionable intelligence rather than just listing what each agent said."
|
|
832
832
|
summary = result.get("response", {}).get("content", "")
|
833
833
|
if summary:
|
834
834
|
return f"Shared Context Summary:\n{summary}"
|
835
|
-
except:
|
835
|
+
except Exception:
|
836
836
|
pass
|
837
837
|
|
838
838
|
# Fallback to simple summary
|
kailash/nodes/api/__init__.py
CHANGED
@@ -18,6 +18,8 @@ Design philosophy:
|
|
18
18
|
- Enable both synchronous and asynchronous operation
|
19
19
|
"""
|
20
20
|
|
21
|
+
import warnings
|
22
|
+
|
21
23
|
from .auth import APIKeyNode, BasicAuthNode, OAuth2Node
|
22
24
|
from .graphql import AsyncGraphQLClientNode, GraphQLClientNode
|
23
25
|
from .http import AsyncHTTPRequestNode, HTTPRequestNode
|
@@ -32,6 +34,23 @@ from .rate_limiting import (
|
|
32
34
|
)
|
33
35
|
from .rest import AsyncRESTClientNode, RESTClientNode
|
34
36
|
|
37
|
+
# Backwards compatibility aliases
|
38
|
+
HTTPClientNode = HTTPRequestNode # Deprecated: Use HTTPRequestNode instead
|
39
|
+
|
40
|
+
|
41
|
+
def __getattr__(name):
|
42
|
+
"""Provide deprecation warnings for backwards compatibility."""
|
43
|
+
if name == "HTTPClientNode":
|
44
|
+
warnings.warn(
|
45
|
+
"HTTPClientNode is deprecated and will be removed in v0.3.0. "
|
46
|
+
"Use HTTPRequestNode instead.",
|
47
|
+
DeprecationWarning,
|
48
|
+
stacklevel=2,
|
49
|
+
)
|
50
|
+
return HTTPRequestNode
|
51
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
52
|
+
|
53
|
+
|
35
54
|
__all__ = [
|
36
55
|
"HTTPRequestNode",
|
37
56
|
"AsyncHTTPRequestNode",
|
@@ -49,4 +68,6 @@ __all__ = [
|
|
49
68
|
"RateLimitedAPINode",
|
50
69
|
"AsyncRateLimitedAPINode",
|
51
70
|
"create_rate_limiter",
|
71
|
+
# Backwards compatibility
|
72
|
+
"HTTPClientNode", # Deprecated alias
|
52
73
|
]
|
kailash/nodes/code/python.py
CHANGED
@@ -93,6 +93,12 @@ ALLOWED_MODULES = {
|
|
93
93
|
"matplotlib",
|
94
94
|
"seaborn",
|
95
95
|
"plotly",
|
96
|
+
# File processing modules
|
97
|
+
"csv", # For CSV file processing
|
98
|
+
"mimetypes", # For MIME type detection
|
99
|
+
"pathlib", # For modern path operations
|
100
|
+
"glob", # For file pattern matching
|
101
|
+
"xml", # For XML processing
|
96
102
|
}
|
97
103
|
|
98
104
|
|
kailash/nodes/data/__init__.py
CHANGED
@@ -80,6 +80,7 @@ Example Workflows:
|
|
80
80
|
workflow.connect('process', 'publish')
|
81
81
|
"""
|
82
82
|
|
83
|
+
from kailash.nodes.data.directory import DirectoryReaderNode
|
83
84
|
from kailash.nodes.data.readers import CSVReaderNode, JSONReaderNode, TextReaderNode
|
84
85
|
from kailash.nodes.data.retrieval import RelevanceScorerNode
|
85
86
|
from kailash.nodes.data.sharepoint_graph import (
|
@@ -87,7 +88,7 @@ from kailash.nodes.data.sharepoint_graph import (
|
|
87
88
|
SharePointGraphWriter,
|
88
89
|
)
|
89
90
|
from kailash.nodes.data.sources import DocumentSourceNode, QuerySourceNode
|
90
|
-
from kailash.nodes.data.sql import SQLDatabaseNode
|
91
|
+
from kailash.nodes.data.sql import SQLDatabaseNode
|
91
92
|
from kailash.nodes.data.streaming import (
|
92
93
|
EventStreamNode,
|
93
94
|
KafkaConsumerNode,
|
@@ -102,6 +103,8 @@ from kailash.nodes.data.vector_db import (
|
|
102
103
|
from kailash.nodes.data.writers import CSVWriterNode, JSONWriterNode, TextWriterNode
|
103
104
|
|
104
105
|
__all__ = [
|
106
|
+
# Directory
|
107
|
+
"DirectoryReaderNode",
|
105
108
|
# Readers
|
106
109
|
"CSVReaderNode",
|
107
110
|
"JSONReaderNode",
|
@@ -119,7 +122,6 @@ __all__ = [
|
|
119
122
|
"RelevanceScorerNode",
|
120
123
|
# SQL
|
121
124
|
"SQLDatabaseNode",
|
122
|
-
"SQLQueryBuilderNode",
|
123
125
|
# Vector DB
|
124
126
|
"EmbeddingNode",
|
125
127
|
"VectorDatabaseNode",
|
@@ -0,0 +1,278 @@
|
|
1
|
+
"""Directory processing nodes for file discovery and batch operations."""
|
2
|
+
|
3
|
+
import mimetypes
|
4
|
+
import os
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Any, Dict, List, Optional
|
7
|
+
|
8
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
9
|
+
from kailash.security import validate_file_path
|
10
|
+
|
11
|
+
|
12
|
+
@register_node()
|
13
|
+
class DirectoryReaderNode(Node):
|
14
|
+
"""
|
15
|
+
Discovers and catalogs files in a directory with metadata extraction.
|
16
|
+
|
17
|
+
This node provides comprehensive directory scanning capabilities, handling
|
18
|
+
file discovery, metadata extraction, and filtering. It's designed for
|
19
|
+
batch file processing workflows and dynamic data source discovery.
|
20
|
+
|
21
|
+
Design Philosophy:
|
22
|
+
The DirectoryReaderNode embodies the principle of "dynamic data discovery."
|
23
|
+
Instead of hardcoding file paths, workflows can dynamically discover
|
24
|
+
available data sources at runtime. This makes workflows more flexible
|
25
|
+
and adaptable to changing data environments.
|
26
|
+
|
27
|
+
Features:
|
28
|
+
- Recursive directory scanning
|
29
|
+
- File type detection and filtering
|
30
|
+
- Metadata extraction (size, timestamps, MIME types)
|
31
|
+
- Pattern-based filtering
|
32
|
+
- Security-validated path operations
|
33
|
+
|
34
|
+
Use Cases:
|
35
|
+
- Batch file processing workflows
|
36
|
+
- Dynamic data pipeline creation
|
37
|
+
- File monitoring and cataloging
|
38
|
+
- Multi-format document processing
|
39
|
+
- Data lake exploration
|
40
|
+
|
41
|
+
Output Format:
|
42
|
+
Returns a structured catalog of discovered files with:
|
43
|
+
- File paths and names
|
44
|
+
- File types and MIME types
|
45
|
+
- File sizes and timestamps
|
46
|
+
- Directory structure information
|
47
|
+
"""
|
48
|
+
|
49
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
50
|
+
"""Define input parameters for directory scanning."""
|
51
|
+
return {
|
52
|
+
"directory_path": NodeParameter(
|
53
|
+
name="directory_path",
|
54
|
+
type=str,
|
55
|
+
required=True,
|
56
|
+
description="Path to the directory to scan",
|
57
|
+
),
|
58
|
+
"recursive": NodeParameter(
|
59
|
+
name="recursive",
|
60
|
+
type=bool,
|
61
|
+
required=False,
|
62
|
+
default=False,
|
63
|
+
description="Whether to scan subdirectories recursively",
|
64
|
+
),
|
65
|
+
"file_patterns": NodeParameter(
|
66
|
+
name="file_patterns",
|
67
|
+
type=list,
|
68
|
+
required=False,
|
69
|
+
default=[],
|
70
|
+
description="List of file patterns to include (e.g., ['*.csv', '*.json'])",
|
71
|
+
),
|
72
|
+
"exclude_patterns": NodeParameter(
|
73
|
+
name="exclude_patterns",
|
74
|
+
type=list,
|
75
|
+
required=False,
|
76
|
+
default=[],
|
77
|
+
description="List of file patterns to exclude",
|
78
|
+
),
|
79
|
+
"include_hidden": NodeParameter(
|
80
|
+
name="include_hidden",
|
81
|
+
type=bool,
|
82
|
+
required=False,
|
83
|
+
default=False,
|
84
|
+
description="Whether to include hidden files (starting with .)",
|
85
|
+
),
|
86
|
+
}
|
87
|
+
|
88
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
89
|
+
"""Execute directory scanning operation.
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
Dictionary containing:
|
93
|
+
- discovered_files: List of file information dictionaries
|
94
|
+
- files_by_type: Files grouped by type
|
95
|
+
- directory_stats: Summary statistics
|
96
|
+
"""
|
97
|
+
directory_path = kwargs.get("directory_path")
|
98
|
+
recursive = kwargs.get("recursive", False)
|
99
|
+
file_patterns = kwargs.get("file_patterns", [])
|
100
|
+
exclude_patterns = kwargs.get("exclude_patterns", [])
|
101
|
+
include_hidden = kwargs.get("include_hidden", False)
|
102
|
+
|
103
|
+
# Validate directory path for security
|
104
|
+
validated_path = validate_file_path(directory_path, operation="directory scan")
|
105
|
+
|
106
|
+
if not os.path.isdir(validated_path):
|
107
|
+
raise FileNotFoundError(f"Directory not found: {directory_path}")
|
108
|
+
|
109
|
+
discovered_files = []
|
110
|
+
|
111
|
+
try:
|
112
|
+
if recursive:
|
113
|
+
# Recursive scan
|
114
|
+
for root, dirs, files in os.walk(validated_path):
|
115
|
+
for filename in files:
|
116
|
+
file_path = os.path.join(root, filename)
|
117
|
+
file_info = self._extract_file_info(
|
118
|
+
file_path,
|
119
|
+
filename,
|
120
|
+
include_hidden,
|
121
|
+
file_patterns,
|
122
|
+
exclude_patterns,
|
123
|
+
)
|
124
|
+
if file_info:
|
125
|
+
discovered_files.append(file_info)
|
126
|
+
else:
|
127
|
+
# Single directory scan
|
128
|
+
for filename in os.listdir(validated_path):
|
129
|
+
file_path = os.path.join(validated_path, filename)
|
130
|
+
|
131
|
+
# Skip directories in non-recursive mode
|
132
|
+
if os.path.isdir(file_path):
|
133
|
+
continue
|
134
|
+
|
135
|
+
file_info = self._extract_file_info(
|
136
|
+
file_path,
|
137
|
+
filename,
|
138
|
+
include_hidden,
|
139
|
+
file_patterns,
|
140
|
+
exclude_patterns,
|
141
|
+
)
|
142
|
+
if file_info:
|
143
|
+
discovered_files.append(file_info)
|
144
|
+
|
145
|
+
except PermissionError as e:
|
146
|
+
raise PermissionError(f"Permission denied accessing directory: {e}")
|
147
|
+
except Exception as e:
|
148
|
+
raise RuntimeError(f"Error scanning directory: {e}")
|
149
|
+
|
150
|
+
# Group files by type
|
151
|
+
files_by_type = {}
|
152
|
+
for file_info in discovered_files:
|
153
|
+
file_type = file_info["file_type"]
|
154
|
+
if file_type not in files_by_type:
|
155
|
+
files_by_type[file_type] = []
|
156
|
+
files_by_type[file_type].append(file_info)
|
157
|
+
|
158
|
+
# Generate directory statistics
|
159
|
+
directory_stats = {
|
160
|
+
"total_files": len(discovered_files),
|
161
|
+
"file_types": list(files_by_type.keys()),
|
162
|
+
"files_by_type_count": {
|
163
|
+
file_type: len(files) for file_type, files in files_by_type.items()
|
164
|
+
},
|
165
|
+
"total_size": sum(f["file_size"] for f in discovered_files),
|
166
|
+
"scan_time": datetime.now().isoformat(),
|
167
|
+
"directory_path": directory_path,
|
168
|
+
"recursive": recursive,
|
169
|
+
}
|
170
|
+
|
171
|
+
return {
|
172
|
+
"discovered_files": discovered_files,
|
173
|
+
"files_by_type": files_by_type,
|
174
|
+
"directory_stats": directory_stats,
|
175
|
+
}
|
176
|
+
|
177
|
+
def _extract_file_info(
|
178
|
+
self,
|
179
|
+
file_path: str,
|
180
|
+
filename: str,
|
181
|
+
include_hidden: bool,
|
182
|
+
file_patterns: List[str],
|
183
|
+
exclude_patterns: List[str],
|
184
|
+
) -> Optional[Dict[str, Any]]:
|
185
|
+
"""Extract metadata from a single file.
|
186
|
+
|
187
|
+
Args:
|
188
|
+
file_path: Full path to the file
|
189
|
+
filename: Name of the file
|
190
|
+
include_hidden: Whether to include hidden files
|
191
|
+
file_patterns: Patterns to include
|
192
|
+
exclude_patterns: Patterns to exclude
|
193
|
+
|
194
|
+
Returns:
|
195
|
+
File information dictionary or None if file should be excluded
|
196
|
+
"""
|
197
|
+
# Skip hidden files if not included
|
198
|
+
if not include_hidden and filename.startswith("."):
|
199
|
+
return None
|
200
|
+
|
201
|
+
# Check exclude patterns
|
202
|
+
for pattern in exclude_patterns:
|
203
|
+
if self._matches_pattern(filename, pattern):
|
204
|
+
return None
|
205
|
+
|
206
|
+
# Check include patterns (if specified)
|
207
|
+
if file_patterns:
|
208
|
+
included = any(
|
209
|
+
self._matches_pattern(filename, pattern) for pattern in file_patterns
|
210
|
+
)
|
211
|
+
if not included:
|
212
|
+
return None
|
213
|
+
|
214
|
+
try:
|
215
|
+
# Get file statistics
|
216
|
+
file_stat = os.stat(file_path)
|
217
|
+
file_ext = os.path.splitext(filename)[1].lower()
|
218
|
+
|
219
|
+
# Map extensions to types
|
220
|
+
ext_to_type = {
|
221
|
+
".csv": "csv",
|
222
|
+
".json": "json",
|
223
|
+
".txt": "txt",
|
224
|
+
".xml": "xml",
|
225
|
+
".md": "markdown",
|
226
|
+
".py": "python",
|
227
|
+
".js": "javascript",
|
228
|
+
".html": "html",
|
229
|
+
".css": "css",
|
230
|
+
".pdf": "pdf",
|
231
|
+
".doc": "word",
|
232
|
+
".docx": "word",
|
233
|
+
".xls": "excel",
|
234
|
+
".xlsx": "excel",
|
235
|
+
".png": "image",
|
236
|
+
".jpg": "image",
|
237
|
+
".jpeg": "image",
|
238
|
+
".gif": "image",
|
239
|
+
".svg": "image",
|
240
|
+
}
|
241
|
+
|
242
|
+
file_type = ext_to_type.get(file_ext, "unknown")
|
243
|
+
|
244
|
+
# Get MIME type
|
245
|
+
mime_type, _ = mimetypes.guess_type(file_path)
|
246
|
+
if not mime_type:
|
247
|
+
mime_type = "application/octet-stream"
|
248
|
+
|
249
|
+
return {
|
250
|
+
"file_path": file_path,
|
251
|
+
"file_name": filename,
|
252
|
+
"file_type": file_type,
|
253
|
+
"file_extension": file_ext,
|
254
|
+
"file_size": file_stat.st_size,
|
255
|
+
"mime_type": mime_type,
|
256
|
+
"created_time": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
257
|
+
"modified_time": datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
|
258
|
+
"discovered_at": datetime.now().isoformat(),
|
259
|
+
}
|
260
|
+
|
261
|
+
except (OSError, PermissionError) as e:
|
262
|
+
# Log error but continue with other files
|
263
|
+
self.logger.warning(f"Could not process file {file_path}: {e}")
|
264
|
+
return None
|
265
|
+
|
266
|
+
def _matches_pattern(self, filename: str, pattern: str) -> bool:
|
267
|
+
"""Check if filename matches a glob-style pattern.
|
268
|
+
|
269
|
+
Args:
|
270
|
+
filename: Name of the file to check
|
271
|
+
pattern: Glob pattern (e.g., '*.csv', 'data*', 'file?.txt')
|
272
|
+
|
273
|
+
Returns:
|
274
|
+
True if filename matches pattern
|
275
|
+
"""
|
276
|
+
import fnmatch
|
277
|
+
|
278
|
+
return fnmatch.fnmatch(filename, pattern)
|