kailash 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. kailash/api/__init__.py +7 -0
  2. kailash/api/workflow_api.py +383 -0
  3. kailash/nodes/__init__.py +2 -1
  4. kailash/nodes/ai/__init__.py +26 -0
  5. kailash/nodes/ai/ai_providers.py +1272 -0
  6. kailash/nodes/ai/embedding_generator.py +853 -0
  7. kailash/nodes/ai/llm_agent.py +1166 -0
  8. kailash/nodes/api/auth.py +3 -3
  9. kailash/nodes/api/graphql.py +2 -2
  10. kailash/nodes/api/http.py +391 -48
  11. kailash/nodes/api/rate_limiting.py +2 -2
  12. kailash/nodes/api/rest.py +465 -57
  13. kailash/nodes/base.py +71 -12
  14. kailash/nodes/code/python.py +2 -1
  15. kailash/nodes/data/__init__.py +7 -0
  16. kailash/nodes/data/readers.py +28 -26
  17. kailash/nodes/data/retrieval.py +178 -0
  18. kailash/nodes/data/sharepoint_graph.py +7 -7
  19. kailash/nodes/data/sources.py +65 -0
  20. kailash/nodes/data/sql.py +7 -5
  21. kailash/nodes/data/vector_db.py +2 -2
  22. kailash/nodes/data/writers.py +6 -3
  23. kailash/nodes/logic/__init__.py +2 -1
  24. kailash/nodes/logic/operations.py +2 -1
  25. kailash/nodes/logic/workflow.py +439 -0
  26. kailash/nodes/mcp/__init__.py +11 -0
  27. kailash/nodes/mcp/client.py +558 -0
  28. kailash/nodes/mcp/resource.py +682 -0
  29. kailash/nodes/mcp/server.py +577 -0
  30. kailash/nodes/transform/__init__.py +16 -1
  31. kailash/nodes/transform/chunkers.py +78 -0
  32. kailash/nodes/transform/formatters.py +96 -0
  33. kailash/nodes/transform/processors.py +5 -3
  34. kailash/runtime/docker.py +8 -6
  35. kailash/sdk_exceptions.py +24 -10
  36. kailash/tracking/metrics_collector.py +2 -1
  37. kailash/tracking/models.py +0 -20
  38. kailash/tracking/storage/database.py +4 -4
  39. kailash/tracking/storage/filesystem.py +0 -1
  40. kailash/utils/templates.py +6 -6
  41. kailash/visualization/performance.py +7 -7
  42. kailash/visualization/reports.py +1 -1
  43. kailash/workflow/graph.py +4 -4
  44. kailash/workflow/mock_registry.py +1 -1
  45. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/METADATA +441 -47
  46. kailash-0.1.3.dist-info/RECORD +83 -0
  47. kailash-0.1.1.dist-info/RECORD +0 -69
  48. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/WHEEL +0 -0
  49. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/entry_points.txt +0 -0
  50. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/licenses/LICENSE +0 -0
  51. {kailash-0.1.1.dist-info → kailash-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,96 @@
1
+ """Text formatting nodes for transforming and preparing text data."""
2
+
3
+ from typing import Any, Dict
4
+
5
+ from kailash.nodes.base import Node, NodeParameter, register_node
6
+
7
+
8
+ @register_node()
9
+ class ChunkTextExtractorNode(Node):
10
+ """Extracts text content from chunks for embedding generation."""
11
+
12
+ def get_parameters(self) -> Dict[str, NodeParameter]:
13
+ return {
14
+ "chunks": NodeParameter(
15
+ name="chunks",
16
+ type=list,
17
+ required=False,
18
+ description="List of chunks to extract text from",
19
+ )
20
+ }
21
+
22
+ def run(self, **kwargs) -> Dict[str, Any]:
23
+ chunks = kwargs.get("chunks", [])
24
+ # Extract just the content text from chunks
25
+ texts = [chunk["content"] for chunk in chunks]
26
+ return {"input_texts": texts}
27
+
28
+
29
+ @register_node()
30
+ class QueryTextWrapperNode(Node):
31
+ """Wraps query string in list for embedding generation."""
32
+
33
+ def get_parameters(self) -> Dict[str, NodeParameter]:
34
+ return {
35
+ "query": NodeParameter(
36
+ name="query",
37
+ type=str,
38
+ required=False,
39
+ description="Query string to wrap",
40
+ )
41
+ }
42
+
43
+ def run(self, **kwargs) -> Dict[str, Any]:
44
+ query = kwargs.get("query", "")
45
+ print(f"Debug QueryTextWrapper: received query='{query}'")
46
+ # Use input_texts for batch embedding (single item list)
47
+ result = {"input_texts": [query]}
48
+ print(f"Debug QueryTextWrapper: returning {result}")
49
+ return result
50
+
51
+
52
+ @register_node()
53
+ class ContextFormatterNode(Node):
54
+ """Formats relevant chunks into context for LLM."""
55
+
56
+ def get_parameters(self) -> Dict[str, NodeParameter]:
57
+ return {
58
+ "relevant_chunks": NodeParameter(
59
+ name="relevant_chunks",
60
+ type=list,
61
+ required=False,
62
+ description="List of relevant chunks with scores",
63
+ ),
64
+ "query": NodeParameter(
65
+ name="query",
66
+ type=str,
67
+ required=False,
68
+ description="Original query string",
69
+ ),
70
+ }
71
+
72
+ def run(self, **kwargs) -> Dict[str, Any]:
73
+ relevant_chunks = kwargs.get("relevant_chunks", [])
74
+ query = kwargs.get("query", "")
75
+ # Format context from relevant chunks
76
+ context_parts = []
77
+ for chunk in relevant_chunks:
78
+ context_parts.append(
79
+ f"From '{chunk['document_title']}' (Score: {chunk['relevance_score']:.3f}):\n"
80
+ f"{chunk['content']}\n"
81
+ )
82
+
83
+ context = "\n".join(context_parts)
84
+
85
+ # Create prompt for LLM
86
+ prompt = f"""Based on the following context, please answer the question: "{query}"
87
+
88
+ Context:
89
+ {context}
90
+
91
+ Please provide a comprehensive answer based on the information provided above."""
92
+
93
+ # Create messages list for LLMAgent
94
+ messages = [{"role": "user", "content": prompt}]
95
+
96
+ return {"formatted_prompt": prompt, "messages": messages, "context": context}
@@ -261,7 +261,7 @@ class DataTransformer(Node):
261
261
  local_vars["result"] = result
262
262
 
263
263
  # Execute the code block
264
- exec(transform_str, safe_globals, local_vars)
264
+ exec(transform_str, safe_globals, local_vars) # noqa: S102
265
265
 
266
266
  # Extract the result from local context
267
267
  result = local_vars.get("result", result)
@@ -271,7 +271,7 @@ class DataTransformer(Node):
271
271
  # For lambda functions like: "lambda x: x * 2"
272
272
  if transform_str.strip().startswith("lambda"):
273
273
  # First, compile the lambda function
274
- lambda_func = eval(transform_str, safe_globals)
274
+ lambda_func = eval(transform_str, safe_globals) # noqa: S307
275
275
 
276
276
  # Apply the lambda function based on input data
277
277
  if isinstance(result, list):
@@ -324,7 +324,9 @@ class DataTransformer(Node):
324
324
  else:
325
325
  local_vars = input_data.copy()
326
326
  local_vars["result"] = result
327
- result = eval(transform_str, safe_globals, local_vars)
327
+ result = eval(
328
+ transform_str, safe_globals, local_vars
329
+ ) # noqa: S307
328
330
 
329
331
  except Exception as e:
330
332
  tb = traceback.format_exc()
kailash/runtime/docker.py CHANGED
@@ -174,7 +174,7 @@ def main():
174
174
  logger.info(f"Loaded configuration for {node_data['class']} node")
175
175
 
176
176
  # Load runtime inputs if available
177
- input_path = Path("/data/input/inputs.json")
177
+ input_path = Path("/examples/data/input/inputs.json")
178
178
  runtime_inputs = {}
179
179
  if input_path.exists():
180
180
  logger.info(f"Loading inputs from {input_path}")
@@ -206,7 +206,7 @@ def main():
206
206
  except Exception as e:
207
207
  logger.error(f"Node execution failed: {e}")
208
208
  # Save error information
209
- with open("/data/output/error.json", 'w') as f:
209
+ with open("/examples/data/output/error.json", 'w') as f:
210
210
  json.dump({
211
211
  "error": str(e),
212
212
  "type": e.__class__.__name__
@@ -216,7 +216,7 @@ def main():
216
216
  # Save results
217
217
  logger.info("Saving execution results")
218
218
  try:
219
- result_path = Path("/data/output/result.json")
219
+ result_path = Path("/examples/data/output/result.json")
220
220
  with open(result_path, 'w') as f:
221
221
  # Handle non-serializable objects with basic conversion
222
222
  try:
@@ -266,7 +266,7 @@ COPY node.json /app/node.json
266
266
  COPY entrypoint.py /app/entrypoint.py
267
267
 
268
268
  # Create data directories
269
- RUN mkdir -p /data/input /data/output
269
+ RUN mkdir -p /examples/data/input /examples/data/output
270
270
 
271
271
  # Set entrypoint
272
272
  ENTRYPOINT ["/app/entrypoint.py"]
@@ -391,9 +391,9 @@ ENTRYPOINT ["/app/entrypoint.py"]
391
391
  cmd.extend(
392
392
  [
393
393
  "-v",
394
- f"{self.input_dir.absolute()}:/data/input",
394
+ f"{self.input_dir.absolute()}:/examples/data/input",
395
395
  "-v",
396
- f"{self.output_dir.absolute()}:/data/output",
396
+ f"{self.output_dir.absolute()}:/examples/data/output",
397
397
  ]
398
398
  )
399
399
 
@@ -404,6 +404,8 @@ ENTRYPOINT ["/app/entrypoint.py"]
404
404
  result = subprocess.run(
405
405
  cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
406
406
  )
407
+ # Result could be used for logging output if needed
408
+ _ = result
407
409
 
408
410
  logger.info(f"Container for node {self.node_id} ran successfully")
409
411
  return True
kailash/sdk_exceptions.py CHANGED
@@ -278,16 +278,7 @@ class TemplateError(KailashException):
278
278
 
279
279
 
280
280
  # Code execution exceptions
281
- class SafetyViolationError(NodeException):
282
- """Raised when code execution violates safety rules.
283
-
284
- This typically occurs when:
285
- - Potentially dangerous operations are attempted
286
- - Resource limits are exceeded
287
- - Security policies are violated
288
- """
289
-
290
- pass
281
+ # (SafetyViolationError already defined above - removing duplicate)
291
282
 
292
283
 
293
284
  class CodeExecutionError(NodeException):
@@ -302,6 +293,29 @@ class CodeExecutionError(NodeException):
302
293
  pass
303
294
 
304
295
 
296
+ # Resource exceptions
297
+ class KailashNotFoundException(KailashException):
298
+ """Raised when a requested resource cannot be found.
299
+
300
+ This typically occurs when:
301
+ - A template ID doesn't exist in the registry
302
+ - A node type is not registered
303
+ - A file or resource is missing
304
+ """
305
+
306
+ pass
307
+
308
+
309
+ # Workflow-specific exceptions
310
+ class KailashWorkflowException(WorkflowException):
311
+ """Raised for general workflow-related errors.
312
+
313
+ This is an alias for WorkflowException for consistency.
314
+ """
315
+
316
+ pass
317
+
318
+
305
319
  # Legacy exception name compatibility for tests and backwards compatibility
306
320
  KailashRuntimeError = RuntimeExecutionError
307
321
  KailashValidationError = NodeValidationError
@@ -88,7 +88,8 @@ class MetricsCollector:
88
88
  metrics during node execution, with support for both process-level and
89
89
  system-level monitoring.
90
90
 
91
- Usage:
91
+ Usage::
92
+
92
93
  collector = MetricsCollector()
93
94
  with collector.collect() as metrics:
94
95
  # Execute node code here
@@ -203,26 +203,6 @@ class TaskRun(BaseModel):
203
203
 
204
204
  # Check other validation rules as needed
205
205
 
206
- def to_dict(self) -> Dict[str, Any]:
207
- """Convert to dictionary representation."""
208
- data = self.model_dump()
209
-
210
- # Convert datetime objects to strings
211
- if data.get("started_at"):
212
- data["started_at"] = data["started_at"].isoformat()
213
- if data.get("ended_at"):
214
- data["ended_at"] = data["ended_at"].isoformat()
215
- if data.get("completed_at"):
216
- data["completed_at"] = data["completed_at"].isoformat()
217
- if data.get("created_at"):
218
- data["created_at"] = data["created_at"].isoformat()
219
-
220
- # Convert metrics to dict if present
221
- if self.metrics:
222
- data["metrics"] = self.metrics.to_dict()
223
-
224
- return data
225
-
226
206
  @classmethod
227
207
  def from_dict(cls, data: Dict[str, Any]) -> "TaskRun":
228
208
  """Create from dictionary representation."""
@@ -320,7 +320,7 @@ class DatabaseStorage(StorageBackend):
320
320
  # Try to sanitize it
321
321
  try:
322
322
  data["input_data"] = {"value": data["input_data"]}
323
- except:
323
+ except Exception:
324
324
  data["input_data"] = None
325
325
  if data.get("output_data"):
326
326
  try:
@@ -331,7 +331,7 @@ class DatabaseStorage(StorageBackend):
331
331
  # Try to sanitize it
332
332
  try:
333
333
  data["output_data"] = {"value": data["output_data"]}
334
- except:
334
+ except Exception:
335
335
  data["output_data"] = None
336
336
 
337
337
  task = TaskRun.model_validate(data)
@@ -405,7 +405,7 @@ class DatabaseStorage(StorageBackend):
405
405
  # Try to sanitize it by wrapping in quotes if needed
406
406
  try:
407
407
  data["input_data"] = {"value": data["input_data"]}
408
- except:
408
+ except Exception:
409
409
  data["input_data"] = None
410
410
  if data.get("output_data"):
411
411
  try:
@@ -419,7 +419,7 @@ class DatabaseStorage(StorageBackend):
419
419
  # Try to sanitize it
420
420
  try:
421
421
  data["output_data"] = {"value": data["output_data"]}
422
- except:
422
+ except Exception:
423
423
  data["output_data"] = None
424
424
 
425
425
  tasks.append(TaskRun.model_validate(data))
@@ -270,7 +270,6 @@ class FileSystemStorage(StorageBackend):
270
270
  run = WorkflowRun.model_validate(run_data)
271
271
 
272
272
  # Generate new run ID to avoid conflicts
273
- original_run_id = run.run_id
274
273
  run.run_id = str(uuid4())
275
274
 
276
275
  # Save run
@@ -327,7 +327,7 @@ A Kailash workflow project.
327
327
 
328
328
  - `workflows/`: Workflow definitions
329
329
  - `nodes/`: Custom node implementations
330
- - `data/`: Input data files
330
+ - `examples/data/`: Input data files
331
331
  - `outputs/`: Output files
332
332
 
333
333
  ## Usage
@@ -363,7 +363,7 @@ workflow = Workflow(
363
363
  )
364
364
 
365
365
  # Add nodes
366
- workflow.add_node("reader", CSVReader(), file_path="data/input.csv")
366
+ workflow.add_node("reader", CSVReader(), file_path="examples/examples/data/input.csv")
367
367
  workflow.add_node("filter", Filter(), field="value", operator=">", value=100)
368
368
  workflow.add_node("sort", Sort(), field="value", reverse=True)
369
369
  workflow.add_node("aggregate", Aggregator(), group_by="category", operation="sum")
@@ -482,8 +482,8 @@ workflow = Workflow(
482
482
  )
483
483
 
484
484
  # Data ingestion
485
- workflow.add_node("csv_reader", CSVReader(), file_path="data/sales_data.csv")
486
- workflow.add_node("json_reader", JSONReader(), file_path="data/product_data.json")
485
+ workflow.add_node("csv_reader", CSVReader(), file_path="examples/examples/data/sales_data.csv")
486
+ workflow.add_node("json_reader", JSONReader(), file_path="examples/examples/data/product_data.json")
487
487
 
488
488
  # Transform data
489
489
  workflow.add_node("filter_sales", Filter(), field="amount", operator=">", value=1000)
@@ -553,7 +553,7 @@ workflow = Workflow(
553
553
  )
554
554
 
555
555
  # Data ingestion
556
- workflow.add_node("read_data", CSVReader(), file_path="data/text_data.csv")
556
+ workflow.add_node("read_data", CSVReader(), file_path="examples/examples/data/text_data.csv")
557
557
 
558
558
  # Preprocessing
559
559
  workflow.add_node("extract_text", Map(), field="content")
@@ -616,7 +616,7 @@ workflow = Workflow(
616
616
  )
617
617
 
618
618
  # Read configuration
619
- workflow.add_node("read_config", JSONReader(), file_path="data/api_config.json")
619
+ workflow.add_node("read_config", JSONReader(), file_path="examples/examples/data/api_config.json")
620
620
 
621
621
  # Process with AI agent
622
622
  workflow.add_node("chat_agent", ChatAgent(),
@@ -136,7 +136,7 @@ class PerformanceVisualizer:
136
136
 
137
137
  # Calculate timeline bounds
138
138
  min_time = min(t.started_at for t in tasks_with_times)
139
- max_time = max(t.ended_at for t in tasks_with_times)
139
+ max(t.ended_at for t in tasks_with_times)
140
140
 
141
141
  # Create timeline bars
142
142
  y_positions = []
@@ -266,8 +266,8 @@ class PerformanceVisualizer:
266
266
  )
267
267
 
268
268
  # Memory usage chart
269
- bars2 = ax2.bar(x, memory_usage, color="lightgreen", edgecolor="black")
270
- bars2_delta = ax2.bar(
269
+ ax2.bar(x, memory_usage, color="lightgreen", edgecolor="black")
270
+ ax2.bar(
271
271
  x,
272
272
  memory_delta,
273
273
  bottom=memory_usage,
@@ -482,7 +482,7 @@ class PerformanceVisualizer:
482
482
  width = 0.35
483
483
 
484
484
  # I/O bytes chart
485
- bars1 = ax1.bar(
485
+ ax1.bar(
486
486
  x - width / 2,
487
487
  io_read_bytes,
488
488
  width,
@@ -490,7 +490,7 @@ class PerformanceVisualizer:
490
490
  color="lightblue",
491
491
  edgecolor="black",
492
492
  )
493
- bars2 = ax1.bar(
493
+ ax1.bar(
494
494
  x + width / 2,
495
495
  io_write_bytes,
496
496
  width,
@@ -507,7 +507,7 @@ class PerformanceVisualizer:
507
507
  ax1.grid(True, axis="y", alpha=0.3)
508
508
 
509
509
  # I/O operations count chart
510
- bars3 = ax2.bar(
510
+ ax2.bar(
511
511
  x - width / 2,
512
512
  io_read_count,
513
513
  width,
@@ -515,7 +515,7 @@ class PerformanceVisualizer:
515
515
  color="lightblue",
516
516
  edgecolor="black",
517
517
  )
518
- bars4 = ax2.bar(
518
+ ax2.bar(
519
519
  x + width / 2,
520
520
  io_write_count,
521
521
  width,
@@ -639,7 +639,7 @@ class WorkflowPerformanceReporter:
639
639
 
640
640
  summary = analysis["summary"]
641
641
  bottlenecks = analysis["bottlenecks"]
642
- resource_analysis = analysis["resource_analysis"]
642
+ analysis["resource_analysis"]
643
643
  error_analysis = analysis["error_analysis"]
644
644
 
645
645
  # Efficiency insights
kailash/workflow/graph.py CHANGED
@@ -10,11 +10,11 @@ import networkx as nx
10
10
  import yaml
11
11
  from pydantic import BaseModel, Field, ValidationError
12
12
 
13
- from kailash.nodes import Node
13
+ from kailash.nodes.base import Node
14
14
 
15
15
  try:
16
16
  # For normal runtime, use the actual registry
17
- from kailash.nodes import NodeRegistry
17
+ from kailash.nodes.base import NodeRegistry
18
18
  except ImportError:
19
19
  # For tests, use the mock registry
20
20
  from kailash.workflow.mock_registry import MockRegistry as NodeRegistry
@@ -815,8 +815,8 @@ class Workflow:
815
815
 
816
816
  # Try to find another key with a BaseModel
817
817
  for key, value in last_node_results.items():
818
- if isinstance(value, BaseModel) and type(value) == type(
819
- state_model
818
+ if isinstance(value, BaseModel) and isinstance(
819
+ value, type(state_model)
820
820
  ):
821
821
  return value, results
822
822
 
@@ -43,7 +43,7 @@ NODE_TYPES = [
43
43
  for node_type in NODE_TYPES:
44
44
  try:
45
45
  NodeRegistry._registry[node_type] = MockNode
46
- except:
46
+ except Exception:
47
47
  pass
48
48
 
49
49