kailash 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +31 -0
- kailash/__main__.py +11 -0
- kailash/cli/__init__.py +5 -0
- kailash/cli/commands.py +563 -0
- kailash/manifest.py +778 -0
- kailash/nodes/__init__.py +23 -0
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/agents.py +417 -0
- kailash/nodes/ai/models.py +488 -0
- kailash/nodes/api/__init__.py +52 -0
- kailash/nodes/api/auth.py +567 -0
- kailash/nodes/api/graphql.py +480 -0
- kailash/nodes/api/http.py +598 -0
- kailash/nodes/api/rate_limiting.py +572 -0
- kailash/nodes/api/rest.py +665 -0
- kailash/nodes/base.py +1032 -0
- kailash/nodes/base_async.py +128 -0
- kailash/nodes/code/__init__.py +32 -0
- kailash/nodes/code/python.py +1021 -0
- kailash/nodes/data/__init__.py +125 -0
- kailash/nodes/data/readers.py +496 -0
- kailash/nodes/data/sharepoint_graph.py +623 -0
- kailash/nodes/data/sql.py +380 -0
- kailash/nodes/data/streaming.py +1168 -0
- kailash/nodes/data/vector_db.py +964 -0
- kailash/nodes/data/writers.py +529 -0
- kailash/nodes/logic/__init__.py +6 -0
- kailash/nodes/logic/async_operations.py +702 -0
- kailash/nodes/logic/operations.py +551 -0
- kailash/nodes/transform/__init__.py +5 -0
- kailash/nodes/transform/processors.py +379 -0
- kailash/runtime/__init__.py +6 -0
- kailash/runtime/async_local.py +356 -0
- kailash/runtime/docker.py +697 -0
- kailash/runtime/local.py +434 -0
- kailash/runtime/parallel.py +557 -0
- kailash/runtime/runner.py +110 -0
- kailash/runtime/testing.py +347 -0
- kailash/sdk_exceptions.py +307 -0
- kailash/tracking/__init__.py +7 -0
- kailash/tracking/manager.py +885 -0
- kailash/tracking/metrics_collector.py +342 -0
- kailash/tracking/models.py +535 -0
- kailash/tracking/storage/__init__.py +0 -0
- kailash/tracking/storage/base.py +113 -0
- kailash/tracking/storage/database.py +619 -0
- kailash/tracking/storage/filesystem.py +543 -0
- kailash/utils/__init__.py +0 -0
- kailash/utils/export.py +924 -0
- kailash/utils/templates.py +680 -0
- kailash/visualization/__init__.py +62 -0
- kailash/visualization/api.py +732 -0
- kailash/visualization/dashboard.py +951 -0
- kailash/visualization/performance.py +808 -0
- kailash/visualization/reports.py +1471 -0
- kailash/workflow/__init__.py +15 -0
- kailash/workflow/builder.py +245 -0
- kailash/workflow/graph.py +827 -0
- kailash/workflow/mermaid_visualizer.py +628 -0
- kailash/workflow/mock_registry.py +63 -0
- kailash/workflow/runner.py +302 -0
- kailash/workflow/state.py +238 -0
- kailash/workflow/visualization.py +588 -0
- kailash-0.1.0.dist-info/METADATA +710 -0
- kailash-0.1.0.dist-info/RECORD +69 -0
- kailash-0.1.0.dist-info/WHEEL +5 -0
- kailash-0.1.0.dist-info/entry_points.txt +2 -0
- kailash-0.1.0.dist-info/licenses/LICENSE +21 -0
- kailash-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,380 @@
|
|
1
|
+
"""SQL database node for the Kailash SDK.
|
2
|
+
|
3
|
+
This module provides nodes for interacting with relational databases using SQL.
|
4
|
+
It supports various database systems through a unified interface and handles
|
5
|
+
connection management, query execution, and result processing.
|
6
|
+
|
7
|
+
Design Philosophy:
|
8
|
+
1. Database-agnostic interface with adapter pattern
|
9
|
+
2. Connection pooling for performance
|
10
|
+
3. Safe parameterized queries
|
11
|
+
4. Flexible result formats
|
12
|
+
5. Transaction support
|
13
|
+
"""
|
14
|
+
|
15
|
+
from typing import Any, Dict
|
16
|
+
|
17
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
18
|
+
|
19
|
+
|
20
|
+
@register_node()
|
21
|
+
class SQLDatabaseNode(Node):
|
22
|
+
"""Executes SQL queries against relational databases.
|
23
|
+
|
24
|
+
This node provides a unified interface for interacting with various RDBMS
|
25
|
+
systems including PostgreSQL, MySQL, SQLite, and others. It handles
|
26
|
+
connection management, query execution, and result formatting.
|
27
|
+
|
28
|
+
Design Features:
|
29
|
+
1. Database adapter pattern for multiple RDBMS support
|
30
|
+
2. Connection pooling for efficient resource usage
|
31
|
+
3. Parameterized queries to prevent SQL injection
|
32
|
+
4. Flexible result formats (dict, list, raw)
|
33
|
+
5. Transaction support with commit/rollback
|
34
|
+
6. Query timeout handling
|
35
|
+
|
36
|
+
Data Flow:
|
37
|
+
- Input: SQL query, parameters, connection config
|
38
|
+
- Processing: Execute query, format results
|
39
|
+
- Output: Query results in specified format
|
40
|
+
|
41
|
+
Common Usage Patterns:
|
42
|
+
1. Data extraction for analytics
|
43
|
+
2. ETL pipeline source/sink
|
44
|
+
3. Database migrations
|
45
|
+
4. Report generation
|
46
|
+
5. Data validation queries
|
47
|
+
|
48
|
+
Upstream Sources:
|
49
|
+
- User-defined queries
|
50
|
+
- Query builder nodes
|
51
|
+
- Template processors
|
52
|
+
- Previous query results
|
53
|
+
|
54
|
+
Downstream Consumers:
|
55
|
+
- Transform nodes: Process query results
|
56
|
+
- Writer nodes: Export to files
|
57
|
+
- Aggregator nodes: Summarize data
|
58
|
+
- Visualization nodes: Create charts
|
59
|
+
|
60
|
+
Error Handling:
|
61
|
+
- ConnectionError: Database connection issues
|
62
|
+
- QueryError: SQL syntax or execution errors
|
63
|
+
- TimeoutError: Query execution timeout
|
64
|
+
- PermissionError: Access denied
|
65
|
+
|
66
|
+
Example:
|
67
|
+
# Query customer data
|
68
|
+
sql_node = SQLDatabaseNode(
|
69
|
+
connection_string='postgresql://user:pass@host/db',
|
70
|
+
query='SELECT * FROM customers WHERE active = ?',
|
71
|
+
parameters=[True],
|
72
|
+
result_format='dict'
|
73
|
+
)
|
74
|
+
result = sql_node.execute()
|
75
|
+
# result['data'] = [
|
76
|
+
# {'id': 1, 'name': 'John', 'active': True},
|
77
|
+
# {'id': 2, 'name': 'Jane', 'active': True}
|
78
|
+
# ]
|
79
|
+
"""
|
80
|
+
|
81
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
82
|
+
"""Define input parameters for SQL execution.
|
83
|
+
|
84
|
+
Comprehensive parameters supporting various database operations
|
85
|
+
and configuration options.
|
86
|
+
|
87
|
+
Parameter Design:
|
88
|
+
1. connection_string: Database connection details
|
89
|
+
2. query: SQL query to execute
|
90
|
+
3. parameters: Query parameters for safety
|
91
|
+
4. result_format: Output structure preference
|
92
|
+
5. timeout: Query execution limit
|
93
|
+
6. transaction_mode: Transaction handling
|
94
|
+
|
95
|
+
Security considerations:
|
96
|
+
- Always use parameterized queries
|
97
|
+
- Connection strings should use environment variables
|
98
|
+
- Validate query permissions
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
Dictionary of parameter definitions
|
102
|
+
"""
|
103
|
+
return {
|
104
|
+
"connection_string": NodeParameter(
|
105
|
+
name="connection_string",
|
106
|
+
type=str,
|
107
|
+
required=True,
|
108
|
+
description="Database connection string (e.g., 'postgresql://user:pass@host/db')",
|
109
|
+
),
|
110
|
+
"query": NodeParameter(
|
111
|
+
name="query",
|
112
|
+
type=str,
|
113
|
+
required=True,
|
114
|
+
description="SQL query to execute (use ? for parameters)",
|
115
|
+
),
|
116
|
+
"parameters": NodeParameter(
|
117
|
+
name="parameters",
|
118
|
+
type=list,
|
119
|
+
required=False,
|
120
|
+
default=[],
|
121
|
+
description="Query parameters for parameterized queries",
|
122
|
+
),
|
123
|
+
"result_format": NodeParameter(
|
124
|
+
name="result_format",
|
125
|
+
type=str,
|
126
|
+
required=False,
|
127
|
+
default="dict",
|
128
|
+
description="Result format: 'dict', 'list', or 'raw'",
|
129
|
+
),
|
130
|
+
"timeout": NodeParameter(
|
131
|
+
name="timeout",
|
132
|
+
type=int,
|
133
|
+
required=False,
|
134
|
+
default=30,
|
135
|
+
description="Query timeout in seconds",
|
136
|
+
),
|
137
|
+
"transaction_mode": NodeParameter(
|
138
|
+
name="transaction_mode",
|
139
|
+
type=str,
|
140
|
+
required=False,
|
141
|
+
default="auto",
|
142
|
+
description="Transaction mode: 'auto', 'manual', or 'none'",
|
143
|
+
),
|
144
|
+
}
|
145
|
+
|
146
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
147
|
+
"""Execute SQL query against database.
|
148
|
+
|
149
|
+
Performs database query execution with proper connection handling,
|
150
|
+
parameter binding, and result formatting.
|
151
|
+
|
152
|
+
Processing Steps:
|
153
|
+
1. Parse connection string
|
154
|
+
2. Establish database connection
|
155
|
+
3. Prepare parameterized query
|
156
|
+
4. Execute with timeout
|
157
|
+
5. Format results
|
158
|
+
6. Handle transactions
|
159
|
+
7. Close connection
|
160
|
+
|
161
|
+
Connection Management:
|
162
|
+
- Uses connection pooling when available
|
163
|
+
- Automatic retry on connection failure
|
164
|
+
- Proper cleanup on errors
|
165
|
+
|
166
|
+
Result Formatting:
|
167
|
+
- dict: List of dictionaries with column names
|
168
|
+
- list: List of lists (raw rows)
|
169
|
+
- raw: Database cursor object
|
170
|
+
|
171
|
+
Args:
|
172
|
+
**kwargs: Validated parameters including:
|
173
|
+
- connection_string: Database URL
|
174
|
+
- query: SQL statement
|
175
|
+
- parameters: Query parameters
|
176
|
+
- result_format: Output format
|
177
|
+
- timeout: Execution timeout
|
178
|
+
- transaction_mode: Transaction handling
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
Dictionary containing:
|
182
|
+
- data: Query results in specified format
|
183
|
+
- row_count: Number of rows affected/returned
|
184
|
+
- columns: List of column names
|
185
|
+
- execution_time: Query execution duration
|
186
|
+
|
187
|
+
Raises:
|
188
|
+
NodeExecutionError: Connection or query errors
|
189
|
+
NodeValidationError: Invalid parameters
|
190
|
+
TimeoutError: Query timeout exceeded
|
191
|
+
"""
|
192
|
+
connection_string = kwargs["connection_string"]
|
193
|
+
query = kwargs["query"]
|
194
|
+
parameters = kwargs.get("parameters", [])
|
195
|
+
result_format = kwargs.get("result_format", "dict")
|
196
|
+
timeout = kwargs.get("timeout", 30)
|
197
|
+
transaction_mode = kwargs.get("transaction_mode", "auto")
|
198
|
+
|
199
|
+
# This is a placeholder implementation
|
200
|
+
# In a real implementation, you would:
|
201
|
+
# 1. Use appropriate database driver (psycopg2, pymysql, sqlite3, etc.)
|
202
|
+
# 2. Implement connection pooling
|
203
|
+
# 3. Handle parameterized queries properly
|
204
|
+
# 4. Implement timeout handling
|
205
|
+
# 5. Format results according to result_format
|
206
|
+
|
207
|
+
self.logger.info(f"Executing SQL query on {connection_string}")
|
208
|
+
|
209
|
+
# Simulate query execution
|
210
|
+
# In real implementation, use actual database connection
|
211
|
+
if "SELECT" in query.upper():
|
212
|
+
# Simulate SELECT query results
|
213
|
+
data = [
|
214
|
+
{"id": 1, "name": "Sample1", "value": 100},
|
215
|
+
{"id": 2, "name": "Sample2", "value": 200},
|
216
|
+
]
|
217
|
+
columns = ["id", "name", "value"]
|
218
|
+
row_count = len(data)
|
219
|
+
else:
|
220
|
+
# Simulate INSERT/UPDATE/DELETE
|
221
|
+
data = []
|
222
|
+
columns = []
|
223
|
+
row_count = 1 # Affected rows
|
224
|
+
|
225
|
+
# Format results based on result_format
|
226
|
+
if result_format == "dict":
|
227
|
+
formatted_data = data
|
228
|
+
elif result_format == "list":
|
229
|
+
formatted_data = [[row[col] for col in columns] for row in data]
|
230
|
+
else: # raw
|
231
|
+
formatted_data = data
|
232
|
+
|
233
|
+
return {
|
234
|
+
"data": formatted_data,
|
235
|
+
"row_count": row_count,
|
236
|
+
"columns": columns,
|
237
|
+
"execution_time": 0.125, # Simulated execution time
|
238
|
+
}
|
239
|
+
|
240
|
+
|
241
|
+
@register_node()
|
242
|
+
class SQLQueryBuilderNode(Node):
|
243
|
+
"""Builds SQL queries dynamically from components.
|
244
|
+
|
245
|
+
This node constructs SQL queries programmatically, providing a safe
|
246
|
+
and flexible way to build complex queries without string concatenation.
|
247
|
+
|
248
|
+
Design Features:
|
249
|
+
1. Fluent interface for query building
|
250
|
+
2. Automatic parameter binding
|
251
|
+
3. SQL injection prevention
|
252
|
+
4. Cross-database SQL generation
|
253
|
+
5. Query validation
|
254
|
+
|
255
|
+
Common Usage Patterns:
|
256
|
+
1. Dynamic report queries
|
257
|
+
2. Conditional filtering
|
258
|
+
3. Multi-table joins
|
259
|
+
4. Aggregation queries
|
260
|
+
|
261
|
+
Example:
|
262
|
+
builder = SQLQueryBuilderNode(
|
263
|
+
table='customers',
|
264
|
+
select=['name', 'email'],
|
265
|
+
where={'active': True, 'country': 'USA'},
|
266
|
+
order_by=['name'],
|
267
|
+
limit=100
|
268
|
+
)
|
269
|
+
result = builder.execute()
|
270
|
+
# result['query'] = 'SELECT name, email FROM customers WHERE active = ? AND country = ? ORDER BY name LIMIT 100'
|
271
|
+
# result['parameters'] = [True, 'USA']
|
272
|
+
"""
|
273
|
+
|
274
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
275
|
+
"""Define input parameters for query building.
|
276
|
+
|
277
|
+
Parameters for constructing SQL queries programmatically.
|
278
|
+
|
279
|
+
Returns:
|
280
|
+
Dictionary of parameter definitions
|
281
|
+
"""
|
282
|
+
return {
|
283
|
+
"table": NodeParameter(
|
284
|
+
name="table", type=str, required=True, description="Target table name"
|
285
|
+
),
|
286
|
+
"select": NodeParameter(
|
287
|
+
name="select",
|
288
|
+
type=list,
|
289
|
+
required=False,
|
290
|
+
default=["*"],
|
291
|
+
description="Columns to select",
|
292
|
+
),
|
293
|
+
"where": NodeParameter(
|
294
|
+
name="where",
|
295
|
+
type=dict,
|
296
|
+
required=False,
|
297
|
+
default={},
|
298
|
+
description="WHERE clause conditions",
|
299
|
+
),
|
300
|
+
"join": NodeParameter(
|
301
|
+
name="join",
|
302
|
+
type=list,
|
303
|
+
required=False,
|
304
|
+
default=[],
|
305
|
+
description="JOIN clauses",
|
306
|
+
),
|
307
|
+
"order_by": NodeParameter(
|
308
|
+
name="order_by",
|
309
|
+
type=list,
|
310
|
+
required=False,
|
311
|
+
default=[],
|
312
|
+
description="ORDER BY columns",
|
313
|
+
),
|
314
|
+
"limit": NodeParameter(
|
315
|
+
name="limit",
|
316
|
+
type=int,
|
317
|
+
required=False,
|
318
|
+
default=None,
|
319
|
+
description="Result limit",
|
320
|
+
),
|
321
|
+
"offset": NodeParameter(
|
322
|
+
name="offset",
|
323
|
+
type=int,
|
324
|
+
required=False,
|
325
|
+
default=None,
|
326
|
+
description="Result offset",
|
327
|
+
),
|
328
|
+
}
|
329
|
+
|
330
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
331
|
+
"""Build SQL query from components.
|
332
|
+
|
333
|
+
Constructs a parameterized SQL query from the provided components.
|
334
|
+
|
335
|
+
Args:
|
336
|
+
**kwargs: Query components
|
337
|
+
|
338
|
+
Returns:
|
339
|
+
Dictionary containing:
|
340
|
+
- query: Built SQL query with placeholders
|
341
|
+
- parameters: List of parameter values
|
342
|
+
"""
|
343
|
+
table = kwargs["table"]
|
344
|
+
select = kwargs.get("select", ["*"])
|
345
|
+
where = kwargs.get("where", {})
|
346
|
+
join = kwargs.get("join", [])
|
347
|
+
order_by = kwargs.get("order_by", [])
|
348
|
+
limit = kwargs.get("limit")
|
349
|
+
offset = kwargs.get("offset")
|
350
|
+
|
351
|
+
# Build SELECT clause
|
352
|
+
select_clause = ", ".join(select)
|
353
|
+
query_parts = [f"SELECT {select_clause}", f"FROM {table}"]
|
354
|
+
parameters = []
|
355
|
+
|
356
|
+
# Build JOIN clauses
|
357
|
+
for join_spec in join:
|
358
|
+
query_parts.append(f"JOIN {join_spec}")
|
359
|
+
|
360
|
+
# Build WHERE clause
|
361
|
+
if where:
|
362
|
+
conditions = []
|
363
|
+
for key, value in where.items():
|
364
|
+
conditions.append(f"{key} = ?")
|
365
|
+
parameters.append(value)
|
366
|
+
query_parts.append(f"WHERE {' AND '.join(conditions)}")
|
367
|
+
|
368
|
+
# Build ORDER BY clause
|
369
|
+
if order_by:
|
370
|
+
query_parts.append(f"ORDER BY {', '.join(order_by)}")
|
371
|
+
|
372
|
+
# Build LIMIT/OFFSET
|
373
|
+
if limit is not None:
|
374
|
+
query_parts.append(f"LIMIT {limit}")
|
375
|
+
if offset is not None:
|
376
|
+
query_parts.append(f"OFFSET {offset}")
|
377
|
+
|
378
|
+
query = " ".join(query_parts)
|
379
|
+
|
380
|
+
return {"query": query, "parameters": parameters}
|