kailash 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/nodes/__init__.py +2 -1
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/ai_providers.py +1272 -0
- kailash/nodes/ai/embedding_generator.py +853 -0
- kailash/nodes/ai/llm_agent.py +1166 -0
- kailash/nodes/api/auth.py +3 -3
- kailash/nodes/api/graphql.py +2 -2
- kailash/nodes/api/http.py +391 -44
- kailash/nodes/api/rate_limiting.py +2 -2
- kailash/nodes/api/rest.py +464 -56
- kailash/nodes/base.py +71 -12
- kailash/nodes/code/python.py +2 -1
- kailash/nodes/data/__init__.py +7 -0
- kailash/nodes/data/readers.py +28 -26
- kailash/nodes/data/retrieval.py +178 -0
- kailash/nodes/data/sharepoint_graph.py +7 -7
- kailash/nodes/data/sources.py +65 -0
- kailash/nodes/data/sql.py +4 -2
- kailash/nodes/data/writers.py +6 -3
- kailash/nodes/logic/operations.py +2 -1
- kailash/nodes/mcp/__init__.py +11 -0
- kailash/nodes/mcp/client.py +558 -0
- kailash/nodes/mcp/resource.py +682 -0
- kailash/nodes/mcp/server.py +571 -0
- kailash/nodes/transform/__init__.py +16 -1
- kailash/nodes/transform/chunkers.py +78 -0
- kailash/nodes/transform/formatters.py +96 -0
- kailash/runtime/docker.py +6 -6
- kailash/sdk_exceptions.py +24 -10
- kailash/tracking/metrics_collector.py +2 -1
- kailash/utils/templates.py +6 -6
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/METADATA +344 -46
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/RECORD +37 -26
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/WHEEL +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,571 @@
|
|
1
|
+
"""MCP Server node for hosting Model Context Protocol resources and tools."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
from typing import Any, Dict, List
|
5
|
+
|
6
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
7
|
+
|
8
|
+
|
9
|
+
@register_node()
|
10
|
+
class MCPServer(Node):
|
11
|
+
"""
|
12
|
+
Server node for hosting Model Context Protocol (MCP) resources and tools.
|
13
|
+
|
14
|
+
Design Purpose and Philosophy:
|
15
|
+
The MCPServer node allows workflows to expose their data and functionality as
|
16
|
+
standardized MCP resources and tools. This enables other AI applications and
|
17
|
+
agents to discover and interact with workflow capabilities through the MCP protocol.
|
18
|
+
|
19
|
+
Upstream Dependencies:
|
20
|
+
- Resource data to expose (files, databases, APIs)
|
21
|
+
- Tool implementations to register with the server
|
22
|
+
- Prompt templates to make available to clients
|
23
|
+
- Server configuration and authentication settings
|
24
|
+
|
25
|
+
Downstream Consumers:
|
26
|
+
- MCP clients that connect to discover resources
|
27
|
+
- AI applications that need workflow context
|
28
|
+
- Other Kailash workflows acting as MCP clients
|
29
|
+
- External tools and services supporting MCP
|
30
|
+
|
31
|
+
Usage Patterns:
|
32
|
+
1. Start MCP server with specified resources and tools
|
33
|
+
2. Register dynamic resources that update in real-time
|
34
|
+
3. Expose workflow capabilities as callable tools
|
35
|
+
4. Provide prompt templates for standardized interactions
|
36
|
+
5. Handle client connections and protocol compliance
|
37
|
+
|
38
|
+
Implementation Details:
|
39
|
+
- Uses the FastMCP framework for rapid server development
|
40
|
+
- Supports stdio, SSE, and HTTP transports automatically
|
41
|
+
- Implements proper resource discovery and metadata
|
42
|
+
- Provides authentication and access control mechanisms
|
43
|
+
- Handles concurrent client connections efficiently
|
44
|
+
|
45
|
+
Error Handling:
|
46
|
+
- ServerStartupError: When server fails to initialize
|
47
|
+
- ResourceRegistrationError: When resources cannot be registered
|
48
|
+
- ToolExecutionError: When tool calls fail during execution
|
49
|
+
- ClientConnectionError: When client connections are rejected
|
50
|
+
- ProtocolViolationError: When clients violate MCP protocol
|
51
|
+
|
52
|
+
Side Effects:
|
53
|
+
- Starts a network server process listening on specified ports
|
54
|
+
- Registers resources and tools in the MCP protocol registry
|
55
|
+
- May modify external systems when tools are executed
|
56
|
+
- Logs server events and client interactions
|
57
|
+
|
58
|
+
Examples:
|
59
|
+
```python
|
60
|
+
# Start a basic MCP server with resources
|
61
|
+
server = MCPServer()
|
62
|
+
result = server.run(
|
63
|
+
server_config={
|
64
|
+
"name": "workflow-server",
|
65
|
+
"transport": "stdio"
|
66
|
+
},
|
67
|
+
resources=[
|
68
|
+
{
|
69
|
+
"uri": "workflow://current/status",
|
70
|
+
"name": "Workflow Status",
|
71
|
+
"content": "Running workflow with 5 active nodes"
|
72
|
+
}
|
73
|
+
],
|
74
|
+
tools=[
|
75
|
+
{
|
76
|
+
"name": "execute_node",
|
77
|
+
"description": "Execute a specific workflow node",
|
78
|
+
"parameters": {
|
79
|
+
"node_id": {"type": "string", "required": True}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
]
|
83
|
+
)
|
84
|
+
|
85
|
+
# Register dynamic resources
|
86
|
+
server_with_dynamic = MCPServer()
|
87
|
+
result = server_with_dynamic.run(
|
88
|
+
server_config={
|
89
|
+
"name": "data-server",
|
90
|
+
"transport": "http",
|
91
|
+
"port": 8080
|
92
|
+
},
|
93
|
+
resource_providers={
|
94
|
+
"database://tables/*": "list_database_tables",
|
95
|
+
"file://workspace/*": "list_workspace_files"
|
96
|
+
}
|
97
|
+
)
|
98
|
+
```
|
99
|
+
"""
|
100
|
+
|
101
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
102
|
+
return {
|
103
|
+
"server_config": NodeParameter(
|
104
|
+
name="server_config",
|
105
|
+
type=dict,
|
106
|
+
required=False,
|
107
|
+
default={},
|
108
|
+
description="MCP server configuration (name, transport, port, etc.)",
|
109
|
+
),
|
110
|
+
"resources": NodeParameter(
|
111
|
+
name="resources",
|
112
|
+
type=list,
|
113
|
+
required=False,
|
114
|
+
default=[],
|
115
|
+
description="Static resources to expose (list of resource objects)",
|
116
|
+
),
|
117
|
+
"tools": NodeParameter(
|
118
|
+
name="tools",
|
119
|
+
type=list,
|
120
|
+
required=False,
|
121
|
+
default=[],
|
122
|
+
description="Tools to register with the server (list of tool definitions)",
|
123
|
+
),
|
124
|
+
"prompts": NodeParameter(
|
125
|
+
name="prompts",
|
126
|
+
type=list,
|
127
|
+
required=False,
|
128
|
+
default=[],
|
129
|
+
description="Prompt templates to make available (list of prompt objects)",
|
130
|
+
),
|
131
|
+
"resource_providers": NodeParameter(
|
132
|
+
name="resource_providers",
|
133
|
+
type=dict,
|
134
|
+
required=False,
|
135
|
+
default={},
|
136
|
+
description="Dynamic resource providers (URI pattern -> provider function)",
|
137
|
+
),
|
138
|
+
"authentication": NodeParameter(
|
139
|
+
name="authentication",
|
140
|
+
type=dict,
|
141
|
+
required=False,
|
142
|
+
default={},
|
143
|
+
description="Authentication configuration (type, credentials, etc.)",
|
144
|
+
),
|
145
|
+
"auto_start": NodeParameter(
|
146
|
+
name="auto_start",
|
147
|
+
type=bool,
|
148
|
+
required=False,
|
149
|
+
default=True,
|
150
|
+
description="Whether to automatically start the server",
|
151
|
+
),
|
152
|
+
"max_connections": NodeParameter(
|
153
|
+
name="max_connections",
|
154
|
+
type=int,
|
155
|
+
required=False,
|
156
|
+
default=10,
|
157
|
+
description="Maximum number of concurrent client connections",
|
158
|
+
),
|
159
|
+
}
|
160
|
+
|
161
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
162
|
+
server_config = kwargs["server_config"]
|
163
|
+
resources = kwargs.get("resources", [])
|
164
|
+
tools = kwargs.get("tools", [])
|
165
|
+
prompts = kwargs.get("prompts", [])
|
166
|
+
resource_providers = kwargs.get("resource_providers", {})
|
167
|
+
authentication = kwargs.get("authentication", {})
|
168
|
+
auto_start = kwargs.get("auto_start", True)
|
169
|
+
max_connections = kwargs.get("max_connections", 10)
|
170
|
+
|
171
|
+
try:
|
172
|
+
# Import MCP SDK (graceful fallback if not installed)
|
173
|
+
try:
|
174
|
+
from mcp.server import Server
|
175
|
+
from mcp.server.fastmcp import FastMCP
|
176
|
+
from mcp.types import Prompt, Resource, Tool
|
177
|
+
|
178
|
+
mcp_available = True
|
179
|
+
except ImportError:
|
180
|
+
mcp_available = False
|
181
|
+
|
182
|
+
if not mcp_available:
|
183
|
+
# Provide mock functionality when MCP SDK is not available
|
184
|
+
return self._mock_mcp_server(
|
185
|
+
server_config,
|
186
|
+
resources,
|
187
|
+
tools,
|
188
|
+
prompts,
|
189
|
+
resource_providers,
|
190
|
+
authentication,
|
191
|
+
auto_start,
|
192
|
+
max_connections,
|
193
|
+
)
|
194
|
+
|
195
|
+
# Extract server configuration
|
196
|
+
server_name = server_config.get("name", "kailash-server")
|
197
|
+
transport_type = server_config.get("transport", "stdio")
|
198
|
+
port = server_config.get("port", 8080)
|
199
|
+
host = server_config.get("host", "localhost")
|
200
|
+
|
201
|
+
# For now, provide mock implementation as we need proper MCP server setup
|
202
|
+
return self._mock_fastmcp_server(
|
203
|
+
server_name,
|
204
|
+
transport_type,
|
205
|
+
host,
|
206
|
+
port,
|
207
|
+
resources,
|
208
|
+
tools,
|
209
|
+
prompts,
|
210
|
+
resource_providers,
|
211
|
+
authentication,
|
212
|
+
auto_start,
|
213
|
+
max_connections,
|
214
|
+
)
|
215
|
+
|
216
|
+
except Exception as e:
|
217
|
+
return {
|
218
|
+
"success": False,
|
219
|
+
"error": str(e),
|
220
|
+
"error_type": type(e).__name__,
|
221
|
+
"server_config": server_config,
|
222
|
+
}
|
223
|
+
|
224
|
+
def _mock_mcp_server(
|
225
|
+
self,
|
226
|
+
server_config: dict,
|
227
|
+
resources: List[dict],
|
228
|
+
tools: List[dict],
|
229
|
+
prompts: List[dict],
|
230
|
+
resource_providers: dict,
|
231
|
+
authentication: dict,
|
232
|
+
auto_start: bool,
|
233
|
+
max_connections: int,
|
234
|
+
) -> Dict[str, Any]:
|
235
|
+
"""Mock MCP server when SDK is not available."""
|
236
|
+
server_name = server_config.get("name", "mock-server")
|
237
|
+
transport = server_config.get("transport", "stdio")
|
238
|
+
|
239
|
+
# Validate resources
|
240
|
+
validated_resources = []
|
241
|
+
for resource in resources:
|
242
|
+
if not isinstance(resource, dict):
|
243
|
+
continue
|
244
|
+
|
245
|
+
uri = resource.get("uri")
|
246
|
+
name = resource.get("name", uri)
|
247
|
+
description = resource.get("description", f"Resource: {name}")
|
248
|
+
|
249
|
+
if uri:
|
250
|
+
validated_resources.append(
|
251
|
+
{
|
252
|
+
"uri": uri,
|
253
|
+
"name": name,
|
254
|
+
"description": description,
|
255
|
+
"mimeType": resource.get("mimeType", "text/plain"),
|
256
|
+
"content": resource.get("content"),
|
257
|
+
}
|
258
|
+
)
|
259
|
+
|
260
|
+
# Validate tools
|
261
|
+
validated_tools = []
|
262
|
+
for tool in tools:
|
263
|
+
if not isinstance(tool, dict):
|
264
|
+
continue
|
265
|
+
|
266
|
+
name = tool.get("name")
|
267
|
+
description = tool.get("description", f"Tool: {name}")
|
268
|
+
|
269
|
+
if name:
|
270
|
+
validated_tools.append(
|
271
|
+
{
|
272
|
+
"name": name,
|
273
|
+
"description": description,
|
274
|
+
"inputSchema": tool.get("parameters", {}),
|
275
|
+
"handler": tool.get("handler", f"mock_handler_{name}"),
|
276
|
+
}
|
277
|
+
)
|
278
|
+
|
279
|
+
# Validate prompts
|
280
|
+
validated_prompts = []
|
281
|
+
for prompt in prompts:
|
282
|
+
if not isinstance(prompt, dict):
|
283
|
+
continue
|
284
|
+
|
285
|
+
name = prompt.get("name")
|
286
|
+
description = prompt.get("description", f"Prompt: {name}")
|
287
|
+
|
288
|
+
if name:
|
289
|
+
validated_prompts.append(
|
290
|
+
{
|
291
|
+
"name": name,
|
292
|
+
"description": description,
|
293
|
+
"arguments": prompt.get("arguments", []),
|
294
|
+
"template": prompt.get("template", f"Mock template for {name}"),
|
295
|
+
}
|
296
|
+
)
|
297
|
+
|
298
|
+
# Mock server status
|
299
|
+
server_status = {
|
300
|
+
"name": server_name,
|
301
|
+
"transport": transport,
|
302
|
+
"status": "running" if auto_start else "configured",
|
303
|
+
"pid": 12345, # Mock process ID
|
304
|
+
"started_at": "2025-06-01T12:00:00Z",
|
305
|
+
"uptime": "0:00:05",
|
306
|
+
"connections": {"active": 0, "total": 0, "max": max_connections},
|
307
|
+
"capabilities": {
|
308
|
+
"resources": True,
|
309
|
+
"tools": True,
|
310
|
+
"prompts": True,
|
311
|
+
"logging": True,
|
312
|
+
},
|
313
|
+
}
|
314
|
+
|
315
|
+
if transport == "http":
|
316
|
+
host = server_config.get("host", "localhost")
|
317
|
+
port = server_config.get("port", 8080)
|
318
|
+
server_status.update(
|
319
|
+
{
|
320
|
+
"host": host,
|
321
|
+
"port": port,
|
322
|
+
"url": f"http://{host}:{port}",
|
323
|
+
"endpoints": {
|
324
|
+
"sse": f"http://{host}:{port}/sse",
|
325
|
+
"resources": f"http://{host}:{port}/resources",
|
326
|
+
"tools": f"http://{host}:{port}/tools",
|
327
|
+
"prompts": f"http://{host}:{port}/prompts",
|
328
|
+
},
|
329
|
+
}
|
330
|
+
)
|
331
|
+
|
332
|
+
return {
|
333
|
+
"success": True,
|
334
|
+
"server": server_status,
|
335
|
+
"resources": {
|
336
|
+
"registered": validated_resources,
|
337
|
+
"count": len(validated_resources),
|
338
|
+
"providers": (
|
339
|
+
list(resource_providers.keys()) if resource_providers else []
|
340
|
+
),
|
341
|
+
},
|
342
|
+
"tools": {"registered": validated_tools, "count": len(validated_tools)},
|
343
|
+
"prompts": {
|
344
|
+
"registered": validated_prompts,
|
345
|
+
"count": len(validated_prompts),
|
346
|
+
},
|
347
|
+
"authentication": {
|
348
|
+
"enabled": bool(authentication),
|
349
|
+
"type": authentication.get("type", "none"),
|
350
|
+
},
|
351
|
+
"mock": True,
|
352
|
+
"message": f"Mock MCP server '{server_name}' configured successfully",
|
353
|
+
}
|
354
|
+
|
355
|
+
def _mock_fastmcp_server(
|
356
|
+
self,
|
357
|
+
server_name: str,
|
358
|
+
transport_type: str,
|
359
|
+
host: str,
|
360
|
+
port: int,
|
361
|
+
resources: List[dict],
|
362
|
+
tools: List[dict],
|
363
|
+
prompts: List[dict],
|
364
|
+
resource_providers: dict,
|
365
|
+
authentication: dict,
|
366
|
+
auto_start: bool,
|
367
|
+
max_connections: int,
|
368
|
+
) -> Dict[str, Any]:
|
369
|
+
"""Mock FastMCP server implementation."""
|
370
|
+
|
371
|
+
# Create mock FastMCP server configuration
|
372
|
+
server_code = f"""
|
373
|
+
# Mock FastMCP server code for {server_name}
|
374
|
+
from mcp.server.fastmcp import FastMCP
|
375
|
+
|
376
|
+
# Create server instance
|
377
|
+
mcp = FastMCP("{server_name}")
|
378
|
+
|
379
|
+
# Register resources
|
380
|
+
{self._generate_resource_code(resources)}
|
381
|
+
|
382
|
+
# Register tools
|
383
|
+
{self._generate_tool_code(tools)}
|
384
|
+
|
385
|
+
# Register prompts
|
386
|
+
{self._generate_prompt_code(prompts)}
|
387
|
+
|
388
|
+
# Dynamic resource providers
|
389
|
+
{self._generate_provider_code(resource_providers)}
|
390
|
+
|
391
|
+
if __name__ == "__main__":
|
392
|
+
mcp.run()
|
393
|
+
"""
|
394
|
+
|
395
|
+
# Mock server startup
|
396
|
+
startup_info = {
|
397
|
+
"server_name": server_name,
|
398
|
+
"transport": transport_type,
|
399
|
+
"generated_code": server_code,
|
400
|
+
"status": "ready" if auto_start else "configured",
|
401
|
+
"resources_count": len(resources),
|
402
|
+
"tools_count": len(tools),
|
403
|
+
"prompts_count": len(prompts),
|
404
|
+
"providers_count": len(resource_providers),
|
405
|
+
}
|
406
|
+
|
407
|
+
if transport_type == "http":
|
408
|
+
startup_info.update(
|
409
|
+
{
|
410
|
+
"host": host,
|
411
|
+
"port": port,
|
412
|
+
"url": f"http://{host}:{port}",
|
413
|
+
"sse_endpoint": f"http://{host}:{port}/sse",
|
414
|
+
}
|
415
|
+
)
|
416
|
+
|
417
|
+
return {
|
418
|
+
"success": True,
|
419
|
+
"server": startup_info,
|
420
|
+
"code": server_code,
|
421
|
+
"mock": True,
|
422
|
+
"next_steps": [
|
423
|
+
"Save the generated code to a Python file",
|
424
|
+
"Install MCP dependencies: pip install 'mcp[cli]'",
|
425
|
+
"Run the server: python server_file.py",
|
426
|
+
"Connect clients using the specified transport",
|
427
|
+
],
|
428
|
+
}
|
429
|
+
|
430
|
+
def _generate_resource_code(self, resources: List[dict]) -> str:
|
431
|
+
"""Generate Python code for resource registration."""
|
432
|
+
if not resources:
|
433
|
+
return "# No static resources defined"
|
434
|
+
|
435
|
+
code_lines = []
|
436
|
+
for resource in resources:
|
437
|
+
uri = resource.get("uri", "")
|
438
|
+
content = resource.get("content", "")
|
439
|
+
name = resource.get("name", uri)
|
440
|
+
|
441
|
+
# Escape strings for Python code
|
442
|
+
content_escaped = json.dumps(content) if content else '""'
|
443
|
+
|
444
|
+
code_lines.append(f'@mcp.resource("{uri}")')
|
445
|
+
code_lines.append(f"def get_{self._sanitize_name(uri)}():")
|
446
|
+
code_lines.append(f' """Resource: {name}"""')
|
447
|
+
code_lines.append(f" return {content_escaped}")
|
448
|
+
code_lines.append("")
|
449
|
+
|
450
|
+
return "\n".join(code_lines)
|
451
|
+
|
452
|
+
def _generate_tool_code(self, tools: List[dict]) -> str:
|
453
|
+
"""Generate Python code for tool registration."""
|
454
|
+
if not tools:
|
455
|
+
return "# No tools defined"
|
456
|
+
|
457
|
+
code_lines = []
|
458
|
+
for tool in tools:
|
459
|
+
name = tool.get("name", "")
|
460
|
+
description = tool.get("description", "")
|
461
|
+
parameters = tool.get("parameters", {})
|
462
|
+
|
463
|
+
# Generate function parameters from schema
|
464
|
+
param_list = []
|
465
|
+
|
466
|
+
# Handle OpenAPI schema format
|
467
|
+
if isinstance(parameters, dict) and "properties" in parameters:
|
468
|
+
properties = parameters.get("properties", {})
|
469
|
+
required = parameters.get("required", [])
|
470
|
+
|
471
|
+
for param_name, param_info in properties.items():
|
472
|
+
param_type = (
|
473
|
+
param_info.get("type", "str")
|
474
|
+
if isinstance(param_info, dict)
|
475
|
+
else "str"
|
476
|
+
)
|
477
|
+
if param_name in required:
|
478
|
+
param_list.append(f"{param_name}: {param_type}")
|
479
|
+
else:
|
480
|
+
param_list.append(f"{param_name}: {param_type} = None")
|
481
|
+
# Handle simple parameter format
|
482
|
+
elif isinstance(parameters, dict):
|
483
|
+
for param_name, param_info in parameters.items():
|
484
|
+
if isinstance(param_info, dict):
|
485
|
+
param_type = param_info.get("type", "str")
|
486
|
+
if param_info.get("required", False):
|
487
|
+
param_list.append(f"{param_name}: {param_type}")
|
488
|
+
else:
|
489
|
+
param_list.append(f"{param_name}: {param_type} = None")
|
490
|
+
else:
|
491
|
+
param_list.append(f"{param_name}: str = None")
|
492
|
+
|
493
|
+
param_str = ", ".join(param_list) if param_list else ""
|
494
|
+
|
495
|
+
code_lines.append("@mcp.tool()")
|
496
|
+
code_lines.append(f"def {name}({param_str}):")
|
497
|
+
code_lines.append(f' """{description}"""')
|
498
|
+
code_lines.append(" # Mock tool implementation")
|
499
|
+
code_lines.append(
|
500
|
+
f' return {{"tool": "{name}", "status": "executed", "parameters": locals()}}'
|
501
|
+
)
|
502
|
+
code_lines.append("")
|
503
|
+
|
504
|
+
return "\n".join(code_lines)
|
505
|
+
|
506
|
+
def _generate_prompt_code(self, prompts: List[dict]) -> str:
|
507
|
+
"""Generate Python code for prompt registration."""
|
508
|
+
if not prompts:
|
509
|
+
return "# No prompts defined"
|
510
|
+
|
511
|
+
code_lines = []
|
512
|
+
for prompt in prompts:
|
513
|
+
name = prompt.get("name", "")
|
514
|
+
template = prompt.get("template", "")
|
515
|
+
arguments = prompt.get("arguments", [])
|
516
|
+
|
517
|
+
# Generate function parameters from arguments
|
518
|
+
param_list = []
|
519
|
+
for arg in arguments:
|
520
|
+
if isinstance(arg, dict):
|
521
|
+
arg_name = arg.get("name", "")
|
522
|
+
if arg.get("required", False):
|
523
|
+
param_list.append(f"{arg_name}: str")
|
524
|
+
else:
|
525
|
+
param_list.append(f"{arg_name}: str = ''")
|
526
|
+
|
527
|
+
param_str = ", ".join(param_list) if param_list else ""
|
528
|
+
|
529
|
+
code_lines.append(f'@mcp.prompt("{name}")')
|
530
|
+
code_lines.append(f"def {name}_prompt({param_str}):")
|
531
|
+
code_lines.append(f' """Prompt: {name}"""')
|
532
|
+
if template:
|
533
|
+
template_escaped = json.dumps(template)
|
534
|
+
code_lines.append(f" template = {template_escaped}")
|
535
|
+
code_lines.append(" return template.format(**locals())")
|
536
|
+
else:
|
537
|
+
code_lines.append(
|
538
|
+
f' return f"Mock prompt: {name} with args: {{locals()}}"'
|
539
|
+
)
|
540
|
+
code_lines.append("")
|
541
|
+
|
542
|
+
return "\n".join(code_lines)
|
543
|
+
|
544
|
+
def _generate_provider_code(self, providers: dict) -> str:
|
545
|
+
"""Generate Python code for dynamic resource providers."""
|
546
|
+
if not providers:
|
547
|
+
return "# No dynamic resource providers defined"
|
548
|
+
|
549
|
+
code_lines = []
|
550
|
+
for pattern, provider_func in providers.items():
|
551
|
+
sanitized_pattern = self._sanitize_name(pattern)
|
552
|
+
|
553
|
+
code_lines.append(f'@mcp.resource("{pattern}")')
|
554
|
+
code_lines.append(f"def dynamic_{sanitized_pattern}(**kwargs):")
|
555
|
+
code_lines.append(f' """Dynamic resource provider for {pattern}"""')
|
556
|
+
code_lines.append(" # Mock dynamic resource implementation")
|
557
|
+
code_lines.append(' return f"Dynamic content for {kwargs}"')
|
558
|
+
code_lines.append("")
|
559
|
+
|
560
|
+
return "\n".join(code_lines)
|
561
|
+
|
562
|
+
def _sanitize_name(self, name: str) -> str:
|
563
|
+
"""Sanitize a name for use as Python identifier."""
|
564
|
+
import re
|
565
|
+
|
566
|
+
# Replace non-alphanumeric characters with underscores
|
567
|
+
sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
568
|
+
# Ensure it starts with a letter or underscore
|
569
|
+
if sanitized and sanitized[0].isdigit():
|
570
|
+
sanitized = f"r_{sanitized}"
|
571
|
+
return sanitized or "unnamed"
|
@@ -1,5 +1,20 @@
|
|
1
1
|
"""Transform processing nodes for the Kailash SDK."""
|
2
2
|
|
3
|
+
from kailash.nodes.transform.chunkers import HierarchicalChunkerNode
|
4
|
+
from kailash.nodes.transform.formatters import (
|
5
|
+
ChunkTextExtractorNode,
|
6
|
+
ContextFormatterNode,
|
7
|
+
QueryTextWrapperNode,
|
8
|
+
)
|
3
9
|
from kailash.nodes.transform.processors import DataTransformer, Filter, Map, Sort
|
4
10
|
|
5
|
-
__all__ = [
|
11
|
+
__all__ = [
|
12
|
+
"Filter",
|
13
|
+
"Map",
|
14
|
+
"Sort",
|
15
|
+
"DataTransformer",
|
16
|
+
"HierarchicalChunkerNode",
|
17
|
+
"ChunkTextExtractorNode",
|
18
|
+
"QueryTextWrapperNode",
|
19
|
+
"ContextFormatterNode",
|
20
|
+
]
|
@@ -0,0 +1,78 @@
|
|
1
|
+
"""Document chunking nodes for splitting text into manageable pieces."""
|
2
|
+
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
6
|
+
|
7
|
+
|
8
|
+
@register_node()
|
9
|
+
class HierarchicalChunkerNode(Node):
|
10
|
+
"""Splits documents into hierarchical chunks for better retrieval."""
|
11
|
+
|
12
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
13
|
+
return {
|
14
|
+
"documents": NodeParameter(
|
15
|
+
name="documents",
|
16
|
+
type=list,
|
17
|
+
required=False,
|
18
|
+
description="List of documents to chunk",
|
19
|
+
),
|
20
|
+
"chunk_size": NodeParameter(
|
21
|
+
name="chunk_size",
|
22
|
+
type=int,
|
23
|
+
required=False,
|
24
|
+
default=200,
|
25
|
+
description="Target size for text chunks",
|
26
|
+
),
|
27
|
+
"overlap": NodeParameter(
|
28
|
+
name="overlap",
|
29
|
+
type=int,
|
30
|
+
required=False,
|
31
|
+
default=50,
|
32
|
+
description="Overlap between chunks",
|
33
|
+
),
|
34
|
+
}
|
35
|
+
|
36
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
37
|
+
documents = kwargs.get("documents", [])
|
38
|
+
chunk_size = kwargs.get("chunk_size", 200)
|
39
|
+
# overlap = kwargs.get("overlap", 50) # Currently not used in chunking logic
|
40
|
+
|
41
|
+
print(f"Debug Chunker: received {len(documents)} documents")
|
42
|
+
|
43
|
+
all_chunks = []
|
44
|
+
|
45
|
+
for doc in documents:
|
46
|
+
content = doc["content"]
|
47
|
+
doc_id = doc["id"]
|
48
|
+
title = doc["title"]
|
49
|
+
|
50
|
+
# Simple sentence-aware chunking
|
51
|
+
sentences = content.split(". ")
|
52
|
+
chunks = []
|
53
|
+
current_chunk = ""
|
54
|
+
|
55
|
+
for sentence in sentences:
|
56
|
+
if len(current_chunk) + len(sentence) < chunk_size:
|
57
|
+
current_chunk += sentence + ". "
|
58
|
+
else:
|
59
|
+
if current_chunk:
|
60
|
+
chunks.append(current_chunk.strip())
|
61
|
+
current_chunk = sentence + ". "
|
62
|
+
|
63
|
+
if current_chunk:
|
64
|
+
chunks.append(current_chunk.strip())
|
65
|
+
|
66
|
+
# Create hierarchical chunk structure
|
67
|
+
for i, chunk in enumerate(chunks):
|
68
|
+
chunk_data = {
|
69
|
+
"chunk_id": f"{doc_id}_chunk_{i}",
|
70
|
+
"document_id": doc_id,
|
71
|
+
"document_title": title,
|
72
|
+
"chunk_index": i,
|
73
|
+
"content": chunk,
|
74
|
+
"hierarchy_level": "paragraph",
|
75
|
+
}
|
76
|
+
all_chunks.append(chunk_data)
|
77
|
+
|
78
|
+
return {"chunks": all_chunks}
|