kailash 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -350,7 +350,7 @@ async def _execute_api_node(
350
350
  ) -> Dict[str, Any]:
351
351
  """Execute an API-based custom node"""
352
352
 
353
- from kailash.nodes.api.http import HTTPClientNode
353
+ from kailash.nodes.api.http import HTTPRequestNode
354
354
  from kailash.security import TenantContext
355
355
 
356
356
  # Execute in tenant context
@@ -359,7 +359,7 @@ async def _execute_api_node(
359
359
  api_config = node.implementation.get("api", {})
360
360
 
361
361
  # Create HTTP client node
362
- http_node = HTTPClientNode(
362
+ http_node = HTTPRequestNode(
363
363
  url=api_config.get("url", ""),
364
364
  method=api_config.get("method", "GET"),
365
365
  headers=api_config.get("headers", {}),
@@ -369,7 +369,7 @@ class WorkflowStudioAPI:
369
369
  ),
370
370
  }
371
371
  )
372
- except:
372
+ except Exception:
373
373
  outputs.append({"name": "output", "type": "any"})
374
374
  else:
375
375
  # Default output for all nodes
kailash/mcp/client_new.py CHANGED
@@ -292,7 +292,7 @@ class MCPClient:
292
292
 
293
293
  # Convenience functions for LLM agents
294
294
  async def discover_and_prepare_tools(
295
- mcp_servers: List[Union[str, Dict[str, Any]]]
295
+ mcp_servers: List[Union[str, Dict[str, Any]]],
296
296
  ) -> List[Dict[str, Any]]:
297
297
  """
298
298
  Discover tools from multiple MCP servers and prepare them for LLM use.
kailash/mcp/server_new.py CHANGED
@@ -14,7 +14,7 @@ from typing import Callable, List, Optional
14
14
  try:
15
15
  from mcp.server import Server
16
16
  from mcp.server.models import InitializationOptions
17
- from mcp.types import EmbeddedResource, ImageContent, Resource, TextContent, Tool
17
+ from mcp.types import Resource, TextContent, Tool
18
18
 
19
19
  MCP_AVAILABLE = True
20
20
  except ImportError:
@@ -202,15 +202,15 @@ class MCPServer:
202
202
  # Determine type
203
203
  param_type = "string" # Default
204
204
  if param.annotation != inspect.Parameter.empty:
205
- if param.annotation == int:
205
+ if param.annotation is int:
206
206
  param_type = "integer"
207
- elif param.annotation == float:
207
+ elif param.annotation is float:
208
208
  param_type = "number"
209
- elif param.annotation == bool:
209
+ elif param.annotation is bool:
210
210
  param_type = "boolean"
211
- elif param.annotation == dict:
211
+ elif param.annotation is dict:
212
212
  param_type = "object"
213
- elif param.annotation == list:
213
+ elif param.annotation is list:
214
214
  param_type = "array"
215
215
 
216
216
  properties[param_name] = {
kailash/nodes/ai/a2a.py CHANGED
@@ -832,7 +832,7 @@ Focus on actionable intelligence rather than just listing what each agent said."
832
832
  summary = result.get("response", {}).get("content", "")
833
833
  if summary:
834
834
  return f"Shared Context Summary:\n{summary}"
835
- except:
835
+ except Exception:
836
836
  pass
837
837
 
838
838
  # Fallback to simple summary
@@ -18,6 +18,8 @@ Design philosophy:
18
18
  - Enable both synchronous and asynchronous operation
19
19
  """
20
20
 
21
+ import warnings
22
+
21
23
  from .auth import APIKeyNode, BasicAuthNode, OAuth2Node
22
24
  from .graphql import AsyncGraphQLClientNode, GraphQLClientNode
23
25
  from .http import AsyncHTTPRequestNode, HTTPRequestNode
@@ -32,6 +34,23 @@ from .rate_limiting import (
32
34
  )
33
35
  from .rest import AsyncRESTClientNode, RESTClientNode
34
36
 
37
+ # Backwards compatibility aliases
38
+ HTTPClientNode = HTTPRequestNode # Deprecated: Use HTTPRequestNode instead
39
+
40
+
41
+ def __getattr__(name):
42
+ """Provide deprecation warnings for backwards compatibility."""
43
+ if name == "HTTPClientNode":
44
+ warnings.warn(
45
+ "HTTPClientNode is deprecated and will be removed in v0.3.0. "
46
+ "Use HTTPRequestNode instead.",
47
+ DeprecationWarning,
48
+ stacklevel=2,
49
+ )
50
+ return HTTPRequestNode
51
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
52
+
53
+
35
54
  __all__ = [
36
55
  "HTTPRequestNode",
37
56
  "AsyncHTTPRequestNode",
@@ -49,4 +68,6 @@ __all__ = [
49
68
  "RateLimitedAPINode",
50
69
  "AsyncRateLimitedAPINode",
51
70
  "create_rate_limiter",
71
+ # Backwards compatibility
72
+ "HTTPClientNode", # Deprecated alias
52
73
  ]
@@ -93,6 +93,12 @@ ALLOWED_MODULES = {
93
93
  "matplotlib",
94
94
  "seaborn",
95
95
  "plotly",
96
+ # File processing modules
97
+ "csv", # For CSV file processing
98
+ "mimetypes", # For MIME type detection
99
+ "pathlib", # For modern path operations
100
+ "glob", # For file pattern matching
101
+ "xml", # For XML processing
96
102
  }
97
103
 
98
104
 
@@ -80,6 +80,7 @@ Example Workflows:
80
80
  workflow.connect('process', 'publish')
81
81
  """
82
82
 
83
+ from kailash.nodes.data.directory import DirectoryReaderNode
83
84
  from kailash.nodes.data.readers import CSVReaderNode, JSONReaderNode, TextReaderNode
84
85
  from kailash.nodes.data.retrieval import RelevanceScorerNode
85
86
  from kailash.nodes.data.sharepoint_graph import (
@@ -87,7 +88,7 @@ from kailash.nodes.data.sharepoint_graph import (
87
88
  SharePointGraphWriter,
88
89
  )
89
90
  from kailash.nodes.data.sources import DocumentSourceNode, QuerySourceNode
90
- from kailash.nodes.data.sql import SQLDatabaseNode, SQLQueryBuilderNode
91
+ from kailash.nodes.data.sql import SQLDatabaseNode
91
92
  from kailash.nodes.data.streaming import (
92
93
  EventStreamNode,
93
94
  KafkaConsumerNode,
@@ -102,6 +103,8 @@ from kailash.nodes.data.vector_db import (
102
103
  from kailash.nodes.data.writers import CSVWriterNode, JSONWriterNode, TextWriterNode
103
104
 
104
105
  __all__ = [
106
+ # Directory
107
+ "DirectoryReaderNode",
105
108
  # Readers
106
109
  "CSVReaderNode",
107
110
  "JSONReaderNode",
@@ -119,7 +122,6 @@ __all__ = [
119
122
  "RelevanceScorerNode",
120
123
  # SQL
121
124
  "SQLDatabaseNode",
122
- "SQLQueryBuilderNode",
123
125
  # Vector DB
124
126
  "EmbeddingNode",
125
127
  "VectorDatabaseNode",
@@ -0,0 +1,278 @@
1
+ """Directory processing nodes for file discovery and batch operations."""
2
+
3
+ import mimetypes
4
+ import os
5
+ from datetime import datetime
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from kailash.nodes.base import Node, NodeParameter, register_node
9
+ from kailash.security import validate_file_path
10
+
11
+
12
+ @register_node()
13
+ class DirectoryReaderNode(Node):
14
+ """
15
+ Discovers and catalogs files in a directory with metadata extraction.
16
+
17
+ This node provides comprehensive directory scanning capabilities, handling
18
+ file discovery, metadata extraction, and filtering. It's designed for
19
+ batch file processing workflows and dynamic data source discovery.
20
+
21
+ Design Philosophy:
22
+ The DirectoryReaderNode embodies the principle of "dynamic data discovery."
23
+ Instead of hardcoding file paths, workflows can dynamically discover
24
+ available data sources at runtime. This makes workflows more flexible
25
+ and adaptable to changing data environments.
26
+
27
+ Features:
28
+ - Recursive directory scanning
29
+ - File type detection and filtering
30
+ - Metadata extraction (size, timestamps, MIME types)
31
+ - Pattern-based filtering
32
+ - Security-validated path operations
33
+
34
+ Use Cases:
35
+ - Batch file processing workflows
36
+ - Dynamic data pipeline creation
37
+ - File monitoring and cataloging
38
+ - Multi-format document processing
39
+ - Data lake exploration
40
+
41
+ Output Format:
42
+ Returns a structured catalog of discovered files with:
43
+ - File paths and names
44
+ - File types and MIME types
45
+ - File sizes and timestamps
46
+ - Directory structure information
47
+ """
48
+
49
+ def get_parameters(self) -> Dict[str, NodeParameter]:
50
+ """Define input parameters for directory scanning."""
51
+ return {
52
+ "directory_path": NodeParameter(
53
+ name="directory_path",
54
+ type=str,
55
+ required=True,
56
+ description="Path to the directory to scan",
57
+ ),
58
+ "recursive": NodeParameter(
59
+ name="recursive",
60
+ type=bool,
61
+ required=False,
62
+ default=False,
63
+ description="Whether to scan subdirectories recursively",
64
+ ),
65
+ "file_patterns": NodeParameter(
66
+ name="file_patterns",
67
+ type=list,
68
+ required=False,
69
+ default=[],
70
+ description="List of file patterns to include (e.g., ['*.csv', '*.json'])",
71
+ ),
72
+ "exclude_patterns": NodeParameter(
73
+ name="exclude_patterns",
74
+ type=list,
75
+ required=False,
76
+ default=[],
77
+ description="List of file patterns to exclude",
78
+ ),
79
+ "include_hidden": NodeParameter(
80
+ name="include_hidden",
81
+ type=bool,
82
+ required=False,
83
+ default=False,
84
+ description="Whether to include hidden files (starting with .)",
85
+ ),
86
+ }
87
+
88
+ def run(self, **kwargs) -> Dict[str, Any]:
89
+ """Execute directory scanning operation.
90
+
91
+ Returns:
92
+ Dictionary containing:
93
+ - discovered_files: List of file information dictionaries
94
+ - files_by_type: Files grouped by type
95
+ - directory_stats: Summary statistics
96
+ """
97
+ directory_path = kwargs.get("directory_path")
98
+ recursive = kwargs.get("recursive", False)
99
+ file_patterns = kwargs.get("file_patterns", [])
100
+ exclude_patterns = kwargs.get("exclude_patterns", [])
101
+ include_hidden = kwargs.get("include_hidden", False)
102
+
103
+ # Validate directory path for security
104
+ validated_path = validate_file_path(directory_path, operation="directory scan")
105
+
106
+ if not os.path.isdir(validated_path):
107
+ raise FileNotFoundError(f"Directory not found: {directory_path}")
108
+
109
+ discovered_files = []
110
+
111
+ try:
112
+ if recursive:
113
+ # Recursive scan
114
+ for root, dirs, files in os.walk(validated_path):
115
+ for filename in files:
116
+ file_path = os.path.join(root, filename)
117
+ file_info = self._extract_file_info(
118
+ file_path,
119
+ filename,
120
+ include_hidden,
121
+ file_patterns,
122
+ exclude_patterns,
123
+ )
124
+ if file_info:
125
+ discovered_files.append(file_info)
126
+ else:
127
+ # Single directory scan
128
+ for filename in os.listdir(validated_path):
129
+ file_path = os.path.join(validated_path, filename)
130
+
131
+ # Skip directories in non-recursive mode
132
+ if os.path.isdir(file_path):
133
+ continue
134
+
135
+ file_info = self._extract_file_info(
136
+ file_path,
137
+ filename,
138
+ include_hidden,
139
+ file_patterns,
140
+ exclude_patterns,
141
+ )
142
+ if file_info:
143
+ discovered_files.append(file_info)
144
+
145
+ except PermissionError as e:
146
+ raise PermissionError(f"Permission denied accessing directory: {e}")
147
+ except Exception as e:
148
+ raise RuntimeError(f"Error scanning directory: {e}")
149
+
150
+ # Group files by type
151
+ files_by_type = {}
152
+ for file_info in discovered_files:
153
+ file_type = file_info["file_type"]
154
+ if file_type not in files_by_type:
155
+ files_by_type[file_type] = []
156
+ files_by_type[file_type].append(file_info)
157
+
158
+ # Generate directory statistics
159
+ directory_stats = {
160
+ "total_files": len(discovered_files),
161
+ "file_types": list(files_by_type.keys()),
162
+ "files_by_type_count": {
163
+ file_type: len(files) for file_type, files in files_by_type.items()
164
+ },
165
+ "total_size": sum(f["file_size"] for f in discovered_files),
166
+ "scan_time": datetime.now().isoformat(),
167
+ "directory_path": directory_path,
168
+ "recursive": recursive,
169
+ }
170
+
171
+ return {
172
+ "discovered_files": discovered_files,
173
+ "files_by_type": files_by_type,
174
+ "directory_stats": directory_stats,
175
+ }
176
+
177
+ def _extract_file_info(
178
+ self,
179
+ file_path: str,
180
+ filename: str,
181
+ include_hidden: bool,
182
+ file_patterns: List[str],
183
+ exclude_patterns: List[str],
184
+ ) -> Optional[Dict[str, Any]]:
185
+ """Extract metadata from a single file.
186
+
187
+ Args:
188
+ file_path: Full path to the file
189
+ filename: Name of the file
190
+ include_hidden: Whether to include hidden files
191
+ file_patterns: Patterns to include
192
+ exclude_patterns: Patterns to exclude
193
+
194
+ Returns:
195
+ File information dictionary or None if file should be excluded
196
+ """
197
+ # Skip hidden files if not included
198
+ if not include_hidden and filename.startswith("."):
199
+ return None
200
+
201
+ # Check exclude patterns
202
+ for pattern in exclude_patterns:
203
+ if self._matches_pattern(filename, pattern):
204
+ return None
205
+
206
+ # Check include patterns (if specified)
207
+ if file_patterns:
208
+ included = any(
209
+ self._matches_pattern(filename, pattern) for pattern in file_patterns
210
+ )
211
+ if not included:
212
+ return None
213
+
214
+ try:
215
+ # Get file statistics
216
+ file_stat = os.stat(file_path)
217
+ file_ext = os.path.splitext(filename)[1].lower()
218
+
219
+ # Map extensions to types
220
+ ext_to_type = {
221
+ ".csv": "csv",
222
+ ".json": "json",
223
+ ".txt": "txt",
224
+ ".xml": "xml",
225
+ ".md": "markdown",
226
+ ".py": "python",
227
+ ".js": "javascript",
228
+ ".html": "html",
229
+ ".css": "css",
230
+ ".pdf": "pdf",
231
+ ".doc": "word",
232
+ ".docx": "word",
233
+ ".xls": "excel",
234
+ ".xlsx": "excel",
235
+ ".png": "image",
236
+ ".jpg": "image",
237
+ ".jpeg": "image",
238
+ ".gif": "image",
239
+ ".svg": "image",
240
+ }
241
+
242
+ file_type = ext_to_type.get(file_ext, "unknown")
243
+
244
+ # Get MIME type
245
+ mime_type, _ = mimetypes.guess_type(file_path)
246
+ if not mime_type:
247
+ mime_type = "application/octet-stream"
248
+
249
+ return {
250
+ "file_path": file_path,
251
+ "file_name": filename,
252
+ "file_type": file_type,
253
+ "file_extension": file_ext,
254
+ "file_size": file_stat.st_size,
255
+ "mime_type": mime_type,
256
+ "created_time": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
257
+ "modified_time": datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
258
+ "discovered_at": datetime.now().isoformat(),
259
+ }
260
+
261
+ except (OSError, PermissionError) as e:
262
+ # Log error but continue with other files
263
+ self.logger.warning(f"Could not process file {file_path}: {e}")
264
+ return None
265
+
266
+ def _matches_pattern(self, filename: str, pattern: str) -> bool:
267
+ """Check if filename matches a glob-style pattern.
268
+
269
+ Args:
270
+ filename: Name of the file to check
271
+ pattern: Glob pattern (e.g., '*.csv', 'data*', 'file?.txt')
272
+
273
+ Returns:
274
+ True if filename matches pattern
275
+ """
276
+ import fnmatch
277
+
278
+ return fnmatch.fnmatch(filename, pattern)