kailash 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/api/__init__.py +7 -0
- kailash/api/workflow_api.py +383 -0
- kailash/nodes/__init__.py +2 -1
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/ai_providers.py +1272 -0
- kailash/nodes/ai/embedding_generator.py +853 -0
- kailash/nodes/ai/llm_agent.py +1166 -0
- kailash/nodes/api/auth.py +3 -3
- kailash/nodes/api/graphql.py +2 -2
- kailash/nodes/api/http.py +391 -48
- kailash/nodes/api/rate_limiting.py +2 -2
- kailash/nodes/api/rest.py +465 -57
- kailash/nodes/base.py +71 -12
- kailash/nodes/code/python.py +2 -1
- kailash/nodes/data/__init__.py +7 -0
- kailash/nodes/data/readers.py +28 -26
- kailash/nodes/data/retrieval.py +178 -0
- kailash/nodes/data/sharepoint_graph.py +7 -7
- kailash/nodes/data/sources.py +65 -0
- kailash/nodes/data/sql.py +7 -5
- kailash/nodes/data/vector_db.py +2 -2
- kailash/nodes/data/writers.py +6 -3
- kailash/nodes/logic/__init__.py +2 -1
- kailash/nodes/logic/operations.py +2 -1
- kailash/nodes/logic/workflow.py +439 -0
- kailash/nodes/mcp/__init__.py +11 -0
- kailash/nodes/mcp/client.py +558 -0
- kailash/nodes/mcp/resource.py +682 -0
- kailash/nodes/mcp/server.py +577 -0
- kailash/nodes/transform/__init__.py +16 -1
- kailash/nodes/transform/chunkers.py +78 -0
- kailash/nodes/transform/formatters.py +96 -0
- kailash/nodes/transform/processors.py +5 -3
- kailash/runtime/docker.py +8 -6
- kailash/sdk_exceptions.py +24 -10
- kailash/tracking/metrics_collector.py +2 -1
- kailash/tracking/models.py +0 -20
- kailash/tracking/storage/database.py +4 -4
- kailash/tracking/storage/filesystem.py +0 -1
- kailash/utils/templates.py +6 -6
- kailash/visualization/performance.py +7 -7
- kailash/visualization/reports.py +1 -1
- kailash/workflow/graph.py +4 -4
- kailash/workflow/mock_registry.py +1 -1
- {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/METADATA +441 -47
- kailash-0.1.3.dist-info/RECORD +83 -0
- kailash-0.1.1.dist-info/RECORD +0 -69
- {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/WHEEL +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
"""Text formatting nodes for transforming and preparing text data."""
|
2
|
+
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
6
|
+
|
7
|
+
|
8
|
+
@register_node()
|
9
|
+
class ChunkTextExtractorNode(Node):
|
10
|
+
"""Extracts text content from chunks for embedding generation."""
|
11
|
+
|
12
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
13
|
+
return {
|
14
|
+
"chunks": NodeParameter(
|
15
|
+
name="chunks",
|
16
|
+
type=list,
|
17
|
+
required=False,
|
18
|
+
description="List of chunks to extract text from",
|
19
|
+
)
|
20
|
+
}
|
21
|
+
|
22
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
23
|
+
chunks = kwargs.get("chunks", [])
|
24
|
+
# Extract just the content text from chunks
|
25
|
+
texts = [chunk["content"] for chunk in chunks]
|
26
|
+
return {"input_texts": texts}
|
27
|
+
|
28
|
+
|
29
|
+
@register_node()
|
30
|
+
class QueryTextWrapperNode(Node):
|
31
|
+
"""Wraps query string in list for embedding generation."""
|
32
|
+
|
33
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
34
|
+
return {
|
35
|
+
"query": NodeParameter(
|
36
|
+
name="query",
|
37
|
+
type=str,
|
38
|
+
required=False,
|
39
|
+
description="Query string to wrap",
|
40
|
+
)
|
41
|
+
}
|
42
|
+
|
43
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
44
|
+
query = kwargs.get("query", "")
|
45
|
+
print(f"Debug QueryTextWrapper: received query='{query}'")
|
46
|
+
# Use input_texts for batch embedding (single item list)
|
47
|
+
result = {"input_texts": [query]}
|
48
|
+
print(f"Debug QueryTextWrapper: returning {result}")
|
49
|
+
return result
|
50
|
+
|
51
|
+
|
52
|
+
@register_node()
|
53
|
+
class ContextFormatterNode(Node):
|
54
|
+
"""Formats relevant chunks into context for LLM."""
|
55
|
+
|
56
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
57
|
+
return {
|
58
|
+
"relevant_chunks": NodeParameter(
|
59
|
+
name="relevant_chunks",
|
60
|
+
type=list,
|
61
|
+
required=False,
|
62
|
+
description="List of relevant chunks with scores",
|
63
|
+
),
|
64
|
+
"query": NodeParameter(
|
65
|
+
name="query",
|
66
|
+
type=str,
|
67
|
+
required=False,
|
68
|
+
description="Original query string",
|
69
|
+
),
|
70
|
+
}
|
71
|
+
|
72
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
73
|
+
relevant_chunks = kwargs.get("relevant_chunks", [])
|
74
|
+
query = kwargs.get("query", "")
|
75
|
+
# Format context from relevant chunks
|
76
|
+
context_parts = []
|
77
|
+
for chunk in relevant_chunks:
|
78
|
+
context_parts.append(
|
79
|
+
f"From '{chunk['document_title']}' (Score: {chunk['relevance_score']:.3f}):\n"
|
80
|
+
f"{chunk['content']}\n"
|
81
|
+
)
|
82
|
+
|
83
|
+
context = "\n".join(context_parts)
|
84
|
+
|
85
|
+
# Create prompt for LLM
|
86
|
+
prompt = f"""Based on the following context, please answer the question: "{query}"
|
87
|
+
|
88
|
+
Context:
|
89
|
+
{context}
|
90
|
+
|
91
|
+
Please provide a comprehensive answer based on the information provided above."""
|
92
|
+
|
93
|
+
# Create messages list for LLMAgent
|
94
|
+
messages = [{"role": "user", "content": prompt}]
|
95
|
+
|
96
|
+
return {"formatted_prompt": prompt, "messages": messages, "context": context}
|
@@ -261,7 +261,7 @@ class DataTransformer(Node):
|
|
261
261
|
local_vars["result"] = result
|
262
262
|
|
263
263
|
# Execute the code block
|
264
|
-
exec(transform_str, safe_globals, local_vars)
|
264
|
+
exec(transform_str, safe_globals, local_vars) # noqa: S102
|
265
265
|
|
266
266
|
# Extract the result from local context
|
267
267
|
result = local_vars.get("result", result)
|
@@ -271,7 +271,7 @@ class DataTransformer(Node):
|
|
271
271
|
# For lambda functions like: "lambda x: x * 2"
|
272
272
|
if transform_str.strip().startswith("lambda"):
|
273
273
|
# First, compile the lambda function
|
274
|
-
lambda_func = eval(transform_str, safe_globals)
|
274
|
+
lambda_func = eval(transform_str, safe_globals) # noqa: S307
|
275
275
|
|
276
276
|
# Apply the lambda function based on input data
|
277
277
|
if isinstance(result, list):
|
@@ -324,7 +324,9 @@ class DataTransformer(Node):
|
|
324
324
|
else:
|
325
325
|
local_vars = input_data.copy()
|
326
326
|
local_vars["result"] = result
|
327
|
-
result = eval(
|
327
|
+
result = eval(
|
328
|
+
transform_str, safe_globals, local_vars
|
329
|
+
) # noqa: S307
|
328
330
|
|
329
331
|
except Exception as e:
|
330
332
|
tb = traceback.format_exc()
|
kailash/runtime/docker.py
CHANGED
@@ -174,7 +174,7 @@ def main():
|
|
174
174
|
logger.info(f"Loaded configuration for {node_data['class']} node")
|
175
175
|
|
176
176
|
# Load runtime inputs if available
|
177
|
-
input_path = Path("/data/input/inputs.json")
|
177
|
+
input_path = Path("/examples/data/input/inputs.json")
|
178
178
|
runtime_inputs = {}
|
179
179
|
if input_path.exists():
|
180
180
|
logger.info(f"Loading inputs from {input_path}")
|
@@ -206,7 +206,7 @@ def main():
|
|
206
206
|
except Exception as e:
|
207
207
|
logger.error(f"Node execution failed: {e}")
|
208
208
|
# Save error information
|
209
|
-
with open("/data/output/error.json", 'w') as f:
|
209
|
+
with open("/examples/data/output/error.json", 'w') as f:
|
210
210
|
json.dump({
|
211
211
|
"error": str(e),
|
212
212
|
"type": e.__class__.__name__
|
@@ -216,7 +216,7 @@ def main():
|
|
216
216
|
# Save results
|
217
217
|
logger.info("Saving execution results")
|
218
218
|
try:
|
219
|
-
result_path = Path("/data/output/result.json")
|
219
|
+
result_path = Path("/examples/data/output/result.json")
|
220
220
|
with open(result_path, 'w') as f:
|
221
221
|
# Handle non-serializable objects with basic conversion
|
222
222
|
try:
|
@@ -266,7 +266,7 @@ COPY node.json /app/node.json
|
|
266
266
|
COPY entrypoint.py /app/entrypoint.py
|
267
267
|
|
268
268
|
# Create data directories
|
269
|
-
RUN mkdir -p /data/input /data/output
|
269
|
+
RUN mkdir -p /examples/data/input /examples/data/output
|
270
270
|
|
271
271
|
# Set entrypoint
|
272
272
|
ENTRYPOINT ["/app/entrypoint.py"]
|
@@ -391,9 +391,9 @@ ENTRYPOINT ["/app/entrypoint.py"]
|
|
391
391
|
cmd.extend(
|
392
392
|
[
|
393
393
|
"-v",
|
394
|
-
f"{self.input_dir.absolute()}:/data/input",
|
394
|
+
f"{self.input_dir.absolute()}:/examples/data/input",
|
395
395
|
"-v",
|
396
|
-
f"{self.output_dir.absolute()}:/data/output",
|
396
|
+
f"{self.output_dir.absolute()}:/examples/data/output",
|
397
397
|
]
|
398
398
|
)
|
399
399
|
|
@@ -404,6 +404,8 @@ ENTRYPOINT ["/app/entrypoint.py"]
|
|
404
404
|
result = subprocess.run(
|
405
405
|
cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
406
406
|
)
|
407
|
+
# Result could be used for logging output if needed
|
408
|
+
_ = result
|
407
409
|
|
408
410
|
logger.info(f"Container for node {self.node_id} ran successfully")
|
409
411
|
return True
|
kailash/sdk_exceptions.py
CHANGED
@@ -278,16 +278,7 @@ class TemplateError(KailashException):
|
|
278
278
|
|
279
279
|
|
280
280
|
# Code execution exceptions
|
281
|
-
|
282
|
-
"""Raised when code execution violates safety rules.
|
283
|
-
|
284
|
-
This typically occurs when:
|
285
|
-
- Potentially dangerous operations are attempted
|
286
|
-
- Resource limits are exceeded
|
287
|
-
- Security policies are violated
|
288
|
-
"""
|
289
|
-
|
290
|
-
pass
|
281
|
+
# (SafetyViolationError already defined above - removing duplicate)
|
291
282
|
|
292
283
|
|
293
284
|
class CodeExecutionError(NodeException):
|
@@ -302,6 +293,29 @@ class CodeExecutionError(NodeException):
|
|
302
293
|
pass
|
303
294
|
|
304
295
|
|
296
|
+
# Resource exceptions
|
297
|
+
class KailashNotFoundException(KailashException):
|
298
|
+
"""Raised when a requested resource cannot be found.
|
299
|
+
|
300
|
+
This typically occurs when:
|
301
|
+
- A template ID doesn't exist in the registry
|
302
|
+
- A node type is not registered
|
303
|
+
- A file or resource is missing
|
304
|
+
"""
|
305
|
+
|
306
|
+
pass
|
307
|
+
|
308
|
+
|
309
|
+
# Workflow-specific exceptions
|
310
|
+
class KailashWorkflowException(WorkflowException):
|
311
|
+
"""Raised for general workflow-related errors.
|
312
|
+
|
313
|
+
This is an alias for WorkflowException for consistency.
|
314
|
+
"""
|
315
|
+
|
316
|
+
pass
|
317
|
+
|
318
|
+
|
305
319
|
# Legacy exception name compatibility for tests and backwards compatibility
|
306
320
|
KailashRuntimeError = RuntimeExecutionError
|
307
321
|
KailashValidationError = NodeValidationError
|
kailash/tracking/models.py
CHANGED
@@ -203,26 +203,6 @@ class TaskRun(BaseModel):
|
|
203
203
|
|
204
204
|
# Check other validation rules as needed
|
205
205
|
|
206
|
-
def to_dict(self) -> Dict[str, Any]:
|
207
|
-
"""Convert to dictionary representation."""
|
208
|
-
data = self.model_dump()
|
209
|
-
|
210
|
-
# Convert datetime objects to strings
|
211
|
-
if data.get("started_at"):
|
212
|
-
data["started_at"] = data["started_at"].isoformat()
|
213
|
-
if data.get("ended_at"):
|
214
|
-
data["ended_at"] = data["ended_at"].isoformat()
|
215
|
-
if data.get("completed_at"):
|
216
|
-
data["completed_at"] = data["completed_at"].isoformat()
|
217
|
-
if data.get("created_at"):
|
218
|
-
data["created_at"] = data["created_at"].isoformat()
|
219
|
-
|
220
|
-
# Convert metrics to dict if present
|
221
|
-
if self.metrics:
|
222
|
-
data["metrics"] = self.metrics.to_dict()
|
223
|
-
|
224
|
-
return data
|
225
|
-
|
226
206
|
@classmethod
|
227
207
|
def from_dict(cls, data: Dict[str, Any]) -> "TaskRun":
|
228
208
|
"""Create from dictionary representation."""
|
@@ -320,7 +320,7 @@ class DatabaseStorage(StorageBackend):
|
|
320
320
|
# Try to sanitize it
|
321
321
|
try:
|
322
322
|
data["input_data"] = {"value": data["input_data"]}
|
323
|
-
except:
|
323
|
+
except Exception:
|
324
324
|
data["input_data"] = None
|
325
325
|
if data.get("output_data"):
|
326
326
|
try:
|
@@ -331,7 +331,7 @@ class DatabaseStorage(StorageBackend):
|
|
331
331
|
# Try to sanitize it
|
332
332
|
try:
|
333
333
|
data["output_data"] = {"value": data["output_data"]}
|
334
|
-
except:
|
334
|
+
except Exception:
|
335
335
|
data["output_data"] = None
|
336
336
|
|
337
337
|
task = TaskRun.model_validate(data)
|
@@ -405,7 +405,7 @@ class DatabaseStorage(StorageBackend):
|
|
405
405
|
# Try to sanitize it by wrapping in quotes if needed
|
406
406
|
try:
|
407
407
|
data["input_data"] = {"value": data["input_data"]}
|
408
|
-
except:
|
408
|
+
except Exception:
|
409
409
|
data["input_data"] = None
|
410
410
|
if data.get("output_data"):
|
411
411
|
try:
|
@@ -419,7 +419,7 @@ class DatabaseStorage(StorageBackend):
|
|
419
419
|
# Try to sanitize it
|
420
420
|
try:
|
421
421
|
data["output_data"] = {"value": data["output_data"]}
|
422
|
-
except:
|
422
|
+
except Exception:
|
423
423
|
data["output_data"] = None
|
424
424
|
|
425
425
|
tasks.append(TaskRun.model_validate(data))
|
kailash/utils/templates.py
CHANGED
@@ -327,7 +327,7 @@ A Kailash workflow project.
|
|
327
327
|
|
328
328
|
- `workflows/`: Workflow definitions
|
329
329
|
- `nodes/`: Custom node implementations
|
330
|
-
- `data/`: Input data files
|
330
|
+
- `examples/data/`: Input data files
|
331
331
|
- `outputs/`: Output files
|
332
332
|
|
333
333
|
## Usage
|
@@ -363,7 +363,7 @@ workflow = Workflow(
|
|
363
363
|
)
|
364
364
|
|
365
365
|
# Add nodes
|
366
|
-
workflow.add_node("reader", CSVReader(), file_path="data/input.csv")
|
366
|
+
workflow.add_node("reader", CSVReader(), file_path="examples/examples/data/input.csv")
|
367
367
|
workflow.add_node("filter", Filter(), field="value", operator=">", value=100)
|
368
368
|
workflow.add_node("sort", Sort(), field="value", reverse=True)
|
369
369
|
workflow.add_node("aggregate", Aggregator(), group_by="category", operation="sum")
|
@@ -482,8 +482,8 @@ workflow = Workflow(
|
|
482
482
|
)
|
483
483
|
|
484
484
|
# Data ingestion
|
485
|
-
workflow.add_node("csv_reader", CSVReader(), file_path="data/sales_data.csv")
|
486
|
-
workflow.add_node("json_reader", JSONReader(), file_path="data/product_data.json")
|
485
|
+
workflow.add_node("csv_reader", CSVReader(), file_path="examples/examples/data/sales_data.csv")
|
486
|
+
workflow.add_node("json_reader", JSONReader(), file_path="examples/examples/data/product_data.json")
|
487
487
|
|
488
488
|
# Transform data
|
489
489
|
workflow.add_node("filter_sales", Filter(), field="amount", operator=">", value=1000)
|
@@ -553,7 +553,7 @@ workflow = Workflow(
|
|
553
553
|
)
|
554
554
|
|
555
555
|
# Data ingestion
|
556
|
-
workflow.add_node("read_data", CSVReader(), file_path="data/text_data.csv")
|
556
|
+
workflow.add_node("read_data", CSVReader(), file_path="examples/examples/data/text_data.csv")
|
557
557
|
|
558
558
|
# Preprocessing
|
559
559
|
workflow.add_node("extract_text", Map(), field="content")
|
@@ -616,7 +616,7 @@ workflow = Workflow(
|
|
616
616
|
)
|
617
617
|
|
618
618
|
# Read configuration
|
619
|
-
workflow.add_node("read_config", JSONReader(), file_path="data/api_config.json")
|
619
|
+
workflow.add_node("read_config", JSONReader(), file_path="examples/examples/data/api_config.json")
|
620
620
|
|
621
621
|
# Process with AI agent
|
622
622
|
workflow.add_node("chat_agent", ChatAgent(),
|
@@ -136,7 +136,7 @@ class PerformanceVisualizer:
|
|
136
136
|
|
137
137
|
# Calculate timeline bounds
|
138
138
|
min_time = min(t.started_at for t in tasks_with_times)
|
139
|
-
|
139
|
+
max(t.ended_at for t in tasks_with_times)
|
140
140
|
|
141
141
|
# Create timeline bars
|
142
142
|
y_positions = []
|
@@ -266,8 +266,8 @@ class PerformanceVisualizer:
|
|
266
266
|
)
|
267
267
|
|
268
268
|
# Memory usage chart
|
269
|
-
|
270
|
-
|
269
|
+
ax2.bar(x, memory_usage, color="lightgreen", edgecolor="black")
|
270
|
+
ax2.bar(
|
271
271
|
x,
|
272
272
|
memory_delta,
|
273
273
|
bottom=memory_usage,
|
@@ -482,7 +482,7 @@ class PerformanceVisualizer:
|
|
482
482
|
width = 0.35
|
483
483
|
|
484
484
|
# I/O bytes chart
|
485
|
-
|
485
|
+
ax1.bar(
|
486
486
|
x - width / 2,
|
487
487
|
io_read_bytes,
|
488
488
|
width,
|
@@ -490,7 +490,7 @@ class PerformanceVisualizer:
|
|
490
490
|
color="lightblue",
|
491
491
|
edgecolor="black",
|
492
492
|
)
|
493
|
-
|
493
|
+
ax1.bar(
|
494
494
|
x + width / 2,
|
495
495
|
io_write_bytes,
|
496
496
|
width,
|
@@ -507,7 +507,7 @@ class PerformanceVisualizer:
|
|
507
507
|
ax1.grid(True, axis="y", alpha=0.3)
|
508
508
|
|
509
509
|
# I/O operations count chart
|
510
|
-
|
510
|
+
ax2.bar(
|
511
511
|
x - width / 2,
|
512
512
|
io_read_count,
|
513
513
|
width,
|
@@ -515,7 +515,7 @@ class PerformanceVisualizer:
|
|
515
515
|
color="lightblue",
|
516
516
|
edgecolor="black",
|
517
517
|
)
|
518
|
-
|
518
|
+
ax2.bar(
|
519
519
|
x + width / 2,
|
520
520
|
io_write_count,
|
521
521
|
width,
|
kailash/visualization/reports.py
CHANGED
@@ -639,7 +639,7 @@ class WorkflowPerformanceReporter:
|
|
639
639
|
|
640
640
|
summary = analysis["summary"]
|
641
641
|
bottlenecks = analysis["bottlenecks"]
|
642
|
-
|
642
|
+
analysis["resource_analysis"]
|
643
643
|
error_analysis = analysis["error_analysis"]
|
644
644
|
|
645
645
|
# Efficiency insights
|
kailash/workflow/graph.py
CHANGED
@@ -10,11 +10,11 @@ import networkx as nx
|
|
10
10
|
import yaml
|
11
11
|
from pydantic import BaseModel, Field, ValidationError
|
12
12
|
|
13
|
-
from kailash.nodes import Node
|
13
|
+
from kailash.nodes.base import Node
|
14
14
|
|
15
15
|
try:
|
16
16
|
# For normal runtime, use the actual registry
|
17
|
-
from kailash.nodes import NodeRegistry
|
17
|
+
from kailash.nodes.base import NodeRegistry
|
18
18
|
except ImportError:
|
19
19
|
# For tests, use the mock registry
|
20
20
|
from kailash.workflow.mock_registry import MockRegistry as NodeRegistry
|
@@ -815,8 +815,8 @@ class Workflow:
|
|
815
815
|
|
816
816
|
# Try to find another key with a BaseModel
|
817
817
|
for key, value in last_node_results.items():
|
818
|
-
if isinstance(value, BaseModel) and
|
819
|
-
state_model
|
818
|
+
if isinstance(value, BaseModel) and isinstance(
|
819
|
+
value, type(state_model)
|
820
820
|
):
|
821
821
|
return value, results
|
822
822
|
|