kailash 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/access_control.py +40 -39
- kailash/api/auth.py +26 -32
- kailash/api/custom_nodes.py +29 -29
- kailash/api/custom_nodes_secure.py +35 -35
- kailash/api/database.py +17 -17
- kailash/api/gateway.py +19 -19
- kailash/api/mcp_integration.py +24 -23
- kailash/api/studio.py +45 -45
- kailash/api/workflow_api.py +8 -8
- kailash/cli/commands.py +5 -8
- kailash/manifest.py +42 -42
- kailash/mcp/__init__.py +1 -1
- kailash/mcp/ai_registry_server.py +20 -20
- kailash/mcp/client.py +9 -11
- kailash/mcp/client_new.py +10 -10
- kailash/mcp/server.py +1 -2
- kailash/mcp/server_enhanced.py +449 -0
- kailash/mcp/servers/ai_registry.py +6 -6
- kailash/mcp/utils/__init__.py +31 -0
- kailash/mcp/utils/cache.py +267 -0
- kailash/mcp/utils/config.py +263 -0
- kailash/mcp/utils/formatters.py +293 -0
- kailash/mcp/utils/metrics.py +418 -0
- kailash/nodes/ai/agents.py +9 -9
- kailash/nodes/ai/ai_providers.py +33 -34
- kailash/nodes/ai/embedding_generator.py +31 -32
- kailash/nodes/ai/intelligent_agent_orchestrator.py +62 -66
- kailash/nodes/ai/iterative_llm_agent.py +48 -48
- kailash/nodes/ai/llm_agent.py +32 -33
- kailash/nodes/ai/models.py +13 -13
- kailash/nodes/ai/self_organizing.py +44 -44
- kailash/nodes/api/auth.py +11 -11
- kailash/nodes/api/graphql.py +13 -13
- kailash/nodes/api/http.py +19 -19
- kailash/nodes/api/monitoring.py +20 -20
- kailash/nodes/api/rate_limiting.py +9 -13
- kailash/nodes/api/rest.py +29 -29
- kailash/nodes/api/security.py +44 -47
- kailash/nodes/base.py +21 -23
- kailash/nodes/base_async.py +7 -7
- kailash/nodes/base_cycle_aware.py +12 -12
- kailash/nodes/base_with_acl.py +5 -5
- kailash/nodes/code/python.py +66 -57
- kailash/nodes/data/directory.py +6 -6
- kailash/nodes/data/event_generation.py +10 -10
- kailash/nodes/data/file_discovery.py +28 -31
- kailash/nodes/data/readers.py +8 -8
- kailash/nodes/data/retrieval.py +10 -10
- kailash/nodes/data/sharepoint_graph.py +17 -17
- kailash/nodes/data/sources.py +5 -5
- kailash/nodes/data/sql.py +13 -13
- kailash/nodes/data/streaming.py +25 -25
- kailash/nodes/data/vector_db.py +22 -22
- kailash/nodes/data/writers.py +7 -7
- kailash/nodes/logic/async_operations.py +17 -17
- kailash/nodes/logic/convergence.py +11 -11
- kailash/nodes/logic/loop.py +4 -4
- kailash/nodes/logic/operations.py +11 -11
- kailash/nodes/logic/workflow.py +8 -9
- kailash/nodes/mixins/mcp.py +17 -17
- kailash/nodes/mixins.py +8 -10
- kailash/nodes/transform/chunkers.py +3 -3
- kailash/nodes/transform/formatters.py +7 -7
- kailash/nodes/transform/processors.py +10 -10
- kailash/runtime/access_controlled.py +18 -18
- kailash/runtime/async_local.py +17 -19
- kailash/runtime/docker.py +20 -22
- kailash/runtime/local.py +16 -16
- kailash/runtime/parallel.py +23 -23
- kailash/runtime/parallel_cyclic.py +27 -27
- kailash/runtime/runner.py +6 -6
- kailash/runtime/testing.py +20 -20
- kailash/sdk_exceptions.py +0 -58
- kailash/security.py +14 -26
- kailash/tracking/manager.py +38 -38
- kailash/tracking/metrics_collector.py +15 -14
- kailash/tracking/models.py +53 -53
- kailash/tracking/storage/base.py +7 -17
- kailash/tracking/storage/database.py +22 -23
- kailash/tracking/storage/filesystem.py +38 -40
- kailash/utils/export.py +21 -21
- kailash/utils/templates.py +2 -3
- kailash/visualization/api.py +30 -34
- kailash/visualization/dashboard.py +17 -17
- kailash/visualization/performance.py +16 -16
- kailash/visualization/reports.py +25 -27
- kailash/workflow/builder.py +8 -8
- kailash/workflow/convergence.py +13 -12
- kailash/workflow/cycle_analyzer.py +30 -32
- kailash/workflow/cycle_builder.py +12 -12
- kailash/workflow/cycle_config.py +16 -15
- kailash/workflow/cycle_debugger.py +40 -40
- kailash/workflow/cycle_exceptions.py +29 -29
- kailash/workflow/cycle_profiler.py +21 -21
- kailash/workflow/cycle_state.py +20 -22
- kailash/workflow/cyclic_runner.py +44 -44
- kailash/workflow/graph.py +40 -40
- kailash/workflow/mermaid_visualizer.py +9 -11
- kailash/workflow/migration.py +22 -22
- kailash/workflow/mock_registry.py +6 -6
- kailash/workflow/runner.py +9 -9
- kailash/workflow/safety.py +12 -13
- kailash/workflow/state.py +8 -11
- kailash/workflow/templates.py +19 -19
- kailash/workflow/validation.py +14 -14
- kailash/workflow/visualization.py +22 -22
- {kailash-0.3.0.dist-info → kailash-0.3.2.dist-info}/METADATA +53 -5
- kailash-0.3.2.dist-info/RECORD +136 -0
- kailash-0.3.0.dist-info/RECORD +0 -130
- {kailash-0.3.0.dist-info → kailash-0.3.2.dist-info}/WHEEL +0 -0
- {kailash-0.3.0.dist-info → kailash-0.3.2.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.0.dist-info → kailash-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.0.dist-info → kailash-0.3.2.dist-info}/top_level.txt +0 -0
kailash/nodes/data/directory.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
import mimetypes
|
4
4
|
import os
|
5
5
|
from datetime import datetime
|
6
|
-
from typing import Any
|
6
|
+
from typing import Any
|
7
7
|
|
8
8
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
9
9
|
from kailash.security import validate_file_path
|
@@ -46,7 +46,7 @@ class DirectoryReaderNode(Node):
|
|
46
46
|
- Directory structure information
|
47
47
|
"""
|
48
48
|
|
49
|
-
def get_parameters(self) ->
|
49
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
50
50
|
"""Define input parameters for directory scanning."""
|
51
51
|
return {
|
52
52
|
"directory_path": NodeParameter(
|
@@ -85,7 +85,7 @@ class DirectoryReaderNode(Node):
|
|
85
85
|
),
|
86
86
|
}
|
87
87
|
|
88
|
-
def run(self, **kwargs) ->
|
88
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
89
89
|
"""Execute directory scanning operation.
|
90
90
|
|
91
91
|
Returns:
|
@@ -179,9 +179,9 @@ class DirectoryReaderNode(Node):
|
|
179
179
|
file_path: str,
|
180
180
|
filename: str,
|
181
181
|
include_hidden: bool,
|
182
|
-
file_patterns:
|
183
|
-
exclude_patterns:
|
184
|
-
) ->
|
182
|
+
file_patterns: list[str],
|
183
|
+
exclude_patterns: list[str],
|
184
|
+
) -> dict[str, Any] | None:
|
185
185
|
"""Extract metadata from a single file.
|
186
186
|
|
187
187
|
Args:
|
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
import random
|
4
4
|
import uuid
|
5
|
-
from datetime import
|
6
|
-
from typing import Any
|
5
|
+
from datetime import UTC, datetime
|
6
|
+
from typing import Any
|
7
7
|
|
8
8
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
9
9
|
|
@@ -84,7 +84,7 @@ class EventGeneratorNode(Node):
|
|
84
84
|
>>> assert result['metadata']['total_events'] == 5
|
85
85
|
"""
|
86
86
|
|
87
|
-
def get_parameters(self) ->
|
87
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
88
88
|
return {
|
89
89
|
"event_types": NodeParameter(
|
90
90
|
name="event_types",
|
@@ -135,7 +135,7 @@ class EventGeneratorNode(Node):
|
|
135
135
|
),
|
136
136
|
}
|
137
137
|
|
138
|
-
def run(self, **kwargs) ->
|
138
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
139
139
|
event_types = kwargs["event_types"]
|
140
140
|
event_count = kwargs.get("event_count", 10)
|
141
141
|
aggregate_prefix = kwargs.get("aggregate_prefix", "AGG")
|
@@ -149,7 +149,7 @@ class EventGeneratorNode(Node):
|
|
149
149
|
|
150
150
|
# Generate events
|
151
151
|
events = []
|
152
|
-
now = datetime.now(
|
152
|
+
now = datetime.now(UTC)
|
153
153
|
|
154
154
|
# Create a set of aggregate IDs for realistic event grouping
|
155
155
|
num_aggregates = max(1, event_count // 3) # Roughly 3 events per aggregate
|
@@ -165,7 +165,7 @@ class EventGeneratorNode(Node):
|
|
165
165
|
# Generate timestamp within range
|
166
166
|
hours_offset = random.uniform(-time_range_hours, 0)
|
167
167
|
event_timestamp = now.timestamp() + hours_offset * 3600
|
168
|
-
event_time = datetime.fromtimestamp(event_timestamp, tz=
|
168
|
+
event_time = datetime.fromtimestamp(event_timestamp, tz=UTC)
|
169
169
|
|
170
170
|
# Generate event data
|
171
171
|
event_data = self._generate_event_data(
|
@@ -213,8 +213,8 @@ class EventGeneratorNode(Node):
|
|
213
213
|
}
|
214
214
|
|
215
215
|
def _generate_event_data(
|
216
|
-
self, event_type: str, aggregate_id: str, template:
|
217
|
-
) ->
|
216
|
+
self, event_type: str, aggregate_id: str, template: dict[str, Any]
|
217
|
+
) -> dict[str, Any]:
|
218
218
|
"""Generate event-specific data based on type and template."""
|
219
219
|
|
220
220
|
# Default data generators by event type
|
@@ -239,7 +239,7 @@ class EventGeneratorNode(Node):
|
|
239
239
|
"tracking_number": f"TRACK-{random.randint(100000, 999999)}",
|
240
240
|
"carrier": random.choice(["UPS", "FedEx", "DHL", "USPS"]),
|
241
241
|
"status": "shipped",
|
242
|
-
"estimated_delivery": datetime.now(
|
242
|
+
"estimated_delivery": datetime.now(UTC)
|
243
243
|
.replace(day=datetime.now().day + random.randint(1, 7))
|
244
244
|
.isoformat()
|
245
245
|
+ "Z",
|
@@ -293,5 +293,5 @@ class EventGeneratorNode(Node):
|
|
293
293
|
return {
|
294
294
|
"event_data": f"Generated data for {event_type}",
|
295
295
|
"aggregate_id": aggregate_id,
|
296
|
-
"timestamp": datetime.now(
|
296
|
+
"timestamp": datetime.now(UTC).isoformat() + "Z",
|
297
297
|
}
|
@@ -4,9 +4,9 @@ import hashlib
|
|
4
4
|
import mimetypes
|
5
5
|
import os
|
6
6
|
import time
|
7
|
-
from datetime import
|
7
|
+
from datetime import UTC, datetime
|
8
8
|
from pathlib import Path
|
9
|
-
from typing import Any
|
9
|
+
from typing import Any
|
10
10
|
|
11
11
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
12
12
|
|
@@ -89,7 +89,7 @@ class FileDiscoveryNode(Node):
|
|
89
89
|
>>> large_files = result['discovered_files']
|
90
90
|
"""
|
91
91
|
|
92
|
-
def get_parameters(self) ->
|
92
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
93
93
|
return {
|
94
94
|
"search_paths": NodeParameter(
|
95
95
|
name="search_paths",
|
@@ -165,7 +165,7 @@ class FileDiscoveryNode(Node):
|
|
165
165
|
),
|
166
166
|
}
|
167
167
|
|
168
|
-
def run(self, **kwargs) ->
|
168
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
169
169
|
search_paths = kwargs["search_paths"]
|
170
170
|
file_patterns = kwargs.get("file_patterns", ["*"])
|
171
171
|
exclude_patterns = kwargs.get("exclude_patterns", [])
|
@@ -218,7 +218,7 @@ class FileDiscoveryNode(Node):
|
|
218
218
|
"type": "discovery_error",
|
219
219
|
"path": search_path,
|
220
220
|
"error": str(e),
|
221
|
-
"timestamp": datetime.now(
|
221
|
+
"timestamp": datetime.now(UTC).isoformat() + "Z",
|
222
222
|
}
|
223
223
|
)
|
224
224
|
|
@@ -237,23 +237,23 @@ class FileDiscoveryNode(Node):
|
|
237
237
|
[f for f in discovered_files if f.get("type") != "discovery_error"]
|
238
238
|
),
|
239
239
|
"execution_time": execution_time,
|
240
|
-
"timestamp": datetime.now(
|
240
|
+
"timestamp": datetime.now(UTC).isoformat() + "Z",
|
241
241
|
}
|
242
242
|
|
243
243
|
def _discover_files_in_path(
|
244
244
|
self,
|
245
245
|
search_path: str,
|
246
|
-
file_patterns:
|
247
|
-
exclude_patterns:
|
246
|
+
file_patterns: list[str],
|
247
|
+
exclude_patterns: list[str],
|
248
248
|
max_depth: int,
|
249
249
|
include_metadata: bool,
|
250
250
|
include_checksums: bool,
|
251
|
-
min_size_mb:
|
252
|
-
max_size_mb:
|
253
|
-
older_than_days:
|
254
|
-
newer_than_days:
|
251
|
+
min_size_mb: float | None,
|
252
|
+
max_size_mb: float | None,
|
253
|
+
older_than_days: int | None,
|
254
|
+
newer_than_days: int | None,
|
255
255
|
follow_symlinks: bool,
|
256
|
-
) ->
|
256
|
+
) -> tuple[list[dict[str, Any]], dict[str, int]]:
|
257
257
|
"""Discover files in a specific path."""
|
258
258
|
|
259
259
|
discovered_files = []
|
@@ -326,8 +326,7 @@ class FileDiscoveryNode(Node):
|
|
326
326
|
"path": file_path,
|
327
327
|
"name": file_name,
|
328
328
|
"error": str(e),
|
329
|
-
"timestamp": datetime.now(
|
330
|
-
+ "Z",
|
329
|
+
"timestamp": datetime.now(UTC).isoformat() + "Z",
|
331
330
|
}
|
332
331
|
)
|
333
332
|
|
@@ -338,7 +337,7 @@ class FileDiscoveryNode(Node):
|
|
338
337
|
return discovered_files, stats
|
339
338
|
|
340
339
|
def _matches_patterns(
|
341
|
-
self, file_name: str, include_patterns:
|
340
|
+
self, file_name: str, include_patterns: list[str], exclude_patterns: list[str]
|
342
341
|
) -> bool:
|
343
342
|
"""Check if filename matches include patterns and doesn't match exclude patterns."""
|
344
343
|
import fnmatch
|
@@ -360,7 +359,7 @@ class FileDiscoveryNode(Node):
|
|
360
359
|
|
361
360
|
def _analyze_file(
|
362
361
|
self, file_path: str, include_metadata: bool, include_checksums: bool
|
363
|
-
) ->
|
362
|
+
) -> dict[str, Any]:
|
364
363
|
"""Analyze a single file and return its information."""
|
365
364
|
|
366
365
|
file_path_obj = Path(file_path)
|
@@ -383,15 +382,15 @@ class FileDiscoveryNode(Node):
|
|
383
382
|
"modified_timestamp": stat_info.st_mtime,
|
384
383
|
"accessed_timestamp": stat_info.st_atime,
|
385
384
|
"created_date": datetime.fromtimestamp(
|
386
|
-
stat_info.st_ctime,
|
385
|
+
stat_info.st_ctime, UTC
|
387
386
|
).isoformat()
|
388
387
|
+ "Z",
|
389
388
|
"modified_date": datetime.fromtimestamp(
|
390
|
-
stat_info.st_mtime,
|
389
|
+
stat_info.st_mtime, UTC
|
391
390
|
).isoformat()
|
392
391
|
+ "Z",
|
393
392
|
"accessed_date": datetime.fromtimestamp(
|
394
|
-
stat_info.st_atime,
|
393
|
+
stat_info.st_atime, UTC
|
395
394
|
).isoformat()
|
396
395
|
+ "Z",
|
397
396
|
}
|
@@ -445,9 +444,7 @@ class FileDiscoveryNode(Node):
|
|
445
444
|
# Content analysis for text files
|
446
445
|
if mime_type and mime_type.startswith("text/"):
|
447
446
|
try:
|
448
|
-
with open(
|
449
|
-
file_path, "r", encoding="utf-8", errors="ignore"
|
450
|
-
) as f:
|
447
|
+
with open(file_path, encoding="utf-8", errors="ignore") as f:
|
451
448
|
content_sample = f.read(1024) # Read first 1KB
|
452
449
|
file_info.update(
|
453
450
|
{
|
@@ -475,10 +472,10 @@ class FileDiscoveryNode(Node):
|
|
475
472
|
}
|
476
473
|
)
|
477
474
|
|
478
|
-
file_info["timestamp"] = datetime.now(
|
475
|
+
file_info["timestamp"] = datetime.now(UTC).isoformat() + "Z"
|
479
476
|
return file_info
|
480
477
|
|
481
|
-
def _calculate_checksums(self, file_path: str) ->
|
478
|
+
def _calculate_checksums(self, file_path: str) -> dict[str, str]:
|
482
479
|
"""Calculate MD5 and SHA256 checksums for a file."""
|
483
480
|
checksums = {}
|
484
481
|
|
@@ -509,9 +506,9 @@ class FileDiscoveryNode(Node):
|
|
509
506
|
|
510
507
|
def _matches_date_criteria(
|
511
508
|
self,
|
512
|
-
file_info:
|
513
|
-
older_than_days:
|
514
|
-
newer_than_days:
|
509
|
+
file_info: dict[str, Any],
|
510
|
+
older_than_days: int | None,
|
511
|
+
newer_than_days: int | None,
|
515
512
|
) -> bool:
|
516
513
|
"""Check if file matches date criteria."""
|
517
514
|
|
@@ -532,10 +529,10 @@ class FileDiscoveryNode(Node):
|
|
532
529
|
|
533
530
|
def _generate_discovery_summary(
|
534
531
|
self,
|
535
|
-
discovered_files:
|
536
|
-
discovery_stats:
|
532
|
+
discovered_files: list[dict],
|
533
|
+
discovery_stats: dict[str, int],
|
537
534
|
execution_time: float,
|
538
|
-
) ->
|
535
|
+
) -> dict[str, Any]:
|
539
536
|
"""Generate summary of file discovery results."""
|
540
537
|
|
541
538
|
# Count files by type/extension
|
kailash/nodes/data/readers.py
CHANGED
@@ -30,7 +30,7 @@ Downstream Consumers:
|
|
30
30
|
|
31
31
|
import csv
|
32
32
|
import json
|
33
|
-
from typing import Any
|
33
|
+
from typing import Any
|
34
34
|
|
35
35
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
36
36
|
from kailash.security import safe_open, validate_file_path
|
@@ -146,7 +146,7 @@ class CSVReaderNode(Node):
|
|
146
146
|
... )
|
147
147
|
"""
|
148
148
|
|
149
|
-
def get_parameters(self) ->
|
149
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
150
150
|
"""Define input parameters for CSV reading.
|
151
151
|
|
152
152
|
This method specifies the configuration options for reading CSV files,
|
@@ -197,7 +197,7 @@ class CSVReaderNode(Node):
|
|
197
197
|
),
|
198
198
|
}
|
199
199
|
|
200
|
-
def run(self, **kwargs) ->
|
200
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
201
201
|
"""Execute CSV reading operation.
|
202
202
|
|
203
203
|
This method performs the actual file reading, handling both headerless
|
@@ -272,7 +272,7 @@ class CSVReaderNode(Node):
|
|
272
272
|
index_pos = header_row.index(index_column) if index_column else None
|
273
273
|
|
274
274
|
for row in reader:
|
275
|
-
row_dict = dict(zip(header_row, row))
|
275
|
+
row_dict = dict(zip(header_row, row, strict=False))
|
276
276
|
data.append(row_dict)
|
277
277
|
|
278
278
|
# If index column specified, add to indexed dictionary
|
@@ -345,7 +345,7 @@ class JSONReaderNode(Node):
|
|
345
345
|
# }
|
346
346
|
"""
|
347
347
|
|
348
|
-
def get_parameters(self) ->
|
348
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
349
349
|
"""Define input parameters for JSON reading.
|
350
350
|
|
351
351
|
Simple parameter definition reflecting JSON's self-describing nature.
|
@@ -368,7 +368,7 @@ class JSONReaderNode(Node):
|
|
368
368
|
)
|
369
369
|
}
|
370
370
|
|
371
|
-
def run(self, **kwargs) ->
|
371
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
372
372
|
"""Execute JSON reading operation.
|
373
373
|
|
374
374
|
Reads and parses JSON file, preserving the original structure
|
@@ -472,7 +472,7 @@ class TextReaderNode(Node):
|
|
472
472
|
>>> # result['text'] = "2024-01-01 INFO: Application started\\n..."
|
473
473
|
"""
|
474
474
|
|
475
|
-
def get_parameters(self) ->
|
475
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
476
476
|
"""Define input parameters for text reading.
|
477
477
|
|
478
478
|
Provides essential parameters for text file reading with
|
@@ -507,7 +507,7 @@ class TextReaderNode(Node):
|
|
507
507
|
),
|
508
508
|
}
|
509
509
|
|
510
|
-
def run(self, **kwargs) ->
|
510
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
511
511
|
"""Execute text reading operation.
|
512
512
|
|
513
513
|
Reads entire text file into memory as a single string,
|
kailash/nodes/data/retrieval.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Document retrieval nodes for finding relevant content using various similarity methods."""
|
2
2
|
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any
|
4
4
|
|
5
5
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
6
6
|
|
@@ -9,7 +9,7 @@ from kailash.nodes.base import Node, NodeParameter, register_node
|
|
9
9
|
class RelevanceScorerNode(Node):
|
10
10
|
"""Scores chunk relevance using various similarity methods including embeddings similarity."""
|
11
11
|
|
12
|
-
def get_parameters(self) ->
|
12
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
13
13
|
return {
|
14
14
|
"chunks": NodeParameter(
|
15
15
|
name="chunks",
|
@@ -45,7 +45,7 @@ class RelevanceScorerNode(Node):
|
|
45
45
|
),
|
46
46
|
}
|
47
47
|
|
48
|
-
def run(self, **kwargs) ->
|
48
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
49
49
|
chunks = kwargs.get("chunks", [])
|
50
50
|
query_embeddings = kwargs.get("query_embedding", [])
|
51
51
|
chunk_embeddings = kwargs.get("chunk_embeddings", [])
|
@@ -98,8 +98,8 @@ class RelevanceScorerNode(Node):
|
|
98
98
|
return {"relevant_chunks": top_chunks}
|
99
99
|
|
100
100
|
def _cosine_similarity_scoring(
|
101
|
-
self, chunks:
|
102
|
-
) ->
|
101
|
+
self, chunks: list[dict], query_embeddings: list, chunk_embeddings: list
|
102
|
+
) -> list[dict]:
|
103
103
|
"""Score chunks using cosine similarity."""
|
104
104
|
# Extract actual embedding vectors from the embedding objects
|
105
105
|
# EmbeddingGeneratorNode returns embeddings in format: {"embedding": [...], "text": "...", "dimensions": X}
|
@@ -131,7 +131,7 @@ class RelevanceScorerNode(Node):
|
|
131
131
|
return 0.5
|
132
132
|
|
133
133
|
try:
|
134
|
-
dot_product = sum(x * y for x, y in zip(a, b))
|
134
|
+
dot_product = sum(x * y for x, y in zip(a, b, strict=False))
|
135
135
|
norm_a = sum(x * x for x in a) ** 0.5
|
136
136
|
norm_b = sum(x * x for x in b) ** 0.5
|
137
137
|
return dot_product / (norm_a * norm_b) if norm_a * norm_b > 0 else 0
|
@@ -162,16 +162,16 @@ class RelevanceScorerNode(Node):
|
|
162
162
|
return scored_chunks
|
163
163
|
|
164
164
|
def _bm25_scoring(
|
165
|
-
self, chunks:
|
166
|
-
) ->
|
165
|
+
self, chunks: list[dict], query_embeddings: list, chunk_embeddings: list
|
166
|
+
) -> list[dict]:
|
167
167
|
"""Score chunks using BM25 algorithm (future implementation)."""
|
168
168
|
# TODO: Implement BM25 scoring
|
169
169
|
# For now, return chunks with default scores
|
170
170
|
return [{**chunk, "relevance_score": 0.5} for chunk in chunks]
|
171
171
|
|
172
172
|
def _tfidf_scoring(
|
173
|
-
self, chunks:
|
174
|
-
) ->
|
173
|
+
self, chunks: list[dict], query_embeddings: list, chunk_embeddings: list
|
174
|
+
) -> list[dict]:
|
175
175
|
"""Score chunks using TF-IDF similarity (future implementation)."""
|
176
176
|
# TODO: Implement TF-IDF scoring
|
177
177
|
# For now, return chunks with default scores
|
@@ -23,7 +23,7 @@ Downstream consumers:
|
|
23
23
|
|
24
24
|
import os
|
25
25
|
from pathlib import Path
|
26
|
-
from typing import Any
|
26
|
+
from typing import Any
|
27
27
|
|
28
28
|
import requests
|
29
29
|
|
@@ -79,7 +79,7 @@ class SharePointGraphReader(Node):
|
|
79
79
|
author="Kailash SDK",
|
80
80
|
)
|
81
81
|
|
82
|
-
def get_parameters(self) ->
|
82
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
83
83
|
"""Define input parameters for SharePoint Graph operations."""
|
84
84
|
return {
|
85
85
|
"tenant_id": NodeParameter(
|
@@ -149,7 +149,7 @@ class SharePointGraphReader(Node):
|
|
149
149
|
|
150
150
|
def _authenticate(
|
151
151
|
self, tenant_id: str, client_id: str, client_secret: str
|
152
|
-
) ->
|
152
|
+
) -> dict[str, Any]:
|
153
153
|
"""Authenticate with Microsoft Graph API using MSAL.
|
154
154
|
|
155
155
|
Returns dict with token and headers for stateless operation.
|
@@ -184,7 +184,7 @@ class SharePointGraphReader(Node):
|
|
184
184
|
},
|
185
185
|
}
|
186
186
|
|
187
|
-
def _get_site_data(self, site_url: str, headers:
|
187
|
+
def _get_site_data(self, site_url: str, headers: dict[str, str]) -> dict[str, Any]:
|
188
188
|
"""Get SharePoint site data from Graph API."""
|
189
189
|
# Convert SharePoint URL to Graph API site ID format
|
190
190
|
site_id = site_url.replace("https://", "").replace(
|
@@ -201,8 +201,8 @@ class SharePointGraphReader(Node):
|
|
201
201
|
)
|
202
202
|
|
203
203
|
def _list_libraries(
|
204
|
-
self, site_id: str, headers:
|
205
|
-
) ->
|
204
|
+
self, site_id: str, headers: dict[str, str]
|
205
|
+
) -> list[dict[str, Any]]:
|
206
206
|
"""List all document libraries in the site."""
|
207
207
|
drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
|
208
208
|
response = requests.get(drives_url, headers=headers)
|
@@ -215,8 +215,8 @@ class SharePointGraphReader(Node):
|
|
215
215
|
)
|
216
216
|
|
217
217
|
def _get_drive_id(
|
218
|
-
self, site_id: str, library_name: str, headers:
|
219
|
-
) ->
|
218
|
+
self, site_id: str, library_name: str, headers: dict[str, str]
|
219
|
+
) -> str | None:
|
220
220
|
"""Get the drive ID for a specific library."""
|
221
221
|
libraries = self._list_libraries(site_id, headers)
|
222
222
|
for lib in libraries:
|
@@ -225,8 +225,8 @@ class SharePointGraphReader(Node):
|
|
225
225
|
return None
|
226
226
|
|
227
227
|
def _list_files(
|
228
|
-
self, site_id: str, library_name: str, folder_path: str, headers:
|
229
|
-
) ->
|
228
|
+
self, site_id: str, library_name: str, folder_path: str, headers: dict[str, str]
|
229
|
+
) -> dict[str, Any]:
|
230
230
|
"""List files in a specific library and folder."""
|
231
231
|
drive_id = self._get_drive_id(site_id, library_name, headers)
|
232
232
|
if not drive_id:
|
@@ -289,8 +289,8 @@ class SharePointGraphReader(Node):
|
|
289
289
|
file_name: str,
|
290
290
|
folder_path: str,
|
291
291
|
local_path: str,
|
292
|
-
headers:
|
293
|
-
) ->
|
292
|
+
headers: dict[str, str],
|
293
|
+
) -> dict[str, Any]:
|
294
294
|
"""Download a file from SharePoint."""
|
295
295
|
drive_id = self._get_drive_id(site_id, library_name, headers)
|
296
296
|
if not drive_id:
|
@@ -344,8 +344,8 @@ class SharePointGraphReader(Node):
|
|
344
344
|
)
|
345
345
|
|
346
346
|
def _search_files(
|
347
|
-
self, site_id: str, library_name: str, query: str, headers:
|
348
|
-
) ->
|
347
|
+
self, site_id: str, library_name: str, query: str, headers: dict[str, str]
|
348
|
+
) -> dict[str, Any]:
|
349
349
|
"""Search for files in a library."""
|
350
350
|
drive_id = self._get_drive_id(site_id, library_name, headers)
|
351
351
|
if not drive_id:
|
@@ -383,7 +383,7 @@ class SharePointGraphReader(Node):
|
|
383
383
|
f"Search failed: {response.status_code} - {response.text}"
|
384
384
|
)
|
385
385
|
|
386
|
-
def run(self, **kwargs) ->
|
386
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
387
387
|
"""Execute SharePoint Graph operation.
|
388
388
|
|
389
389
|
This method is stateless and returns JSON-serializable results
|
@@ -494,7 +494,7 @@ class SharePointGraphWriter(Node):
|
|
494
494
|
author="Kailash SDK",
|
495
495
|
)
|
496
496
|
|
497
|
-
def get_parameters(self) ->
|
497
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
498
498
|
"""Define input parameters for SharePoint upload operations."""
|
499
499
|
return {
|
500
500
|
"tenant_id": NodeParameter(
|
@@ -549,7 +549,7 @@ class SharePointGraphWriter(Node):
|
|
549
549
|
),
|
550
550
|
}
|
551
551
|
|
552
|
-
def run(self, **kwargs) ->
|
552
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
553
553
|
"""Execute SharePoint upload operation."""
|
554
554
|
# Validate required parameters
|
555
555
|
tenant_id = kwargs.get("tenant_id")
|
kailash/nodes/data/sources.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Data source nodes for providing input data to workflows."""
|
2
2
|
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any
|
4
4
|
|
5
5
|
from kailash.nodes.base import Node, NodeParameter, register_node
|
6
6
|
|
@@ -9,7 +9,7 @@ from kailash.nodes.base import Node, NodeParameter, register_node
|
|
9
9
|
class DocumentSourceNode(Node):
|
10
10
|
"""Provides sample documents for hierarchical RAG processing."""
|
11
11
|
|
12
|
-
def get_parameters(self) ->
|
12
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
13
13
|
return {
|
14
14
|
"sample_documents": NodeParameter(
|
15
15
|
name="sample_documents",
|
@@ -20,7 +20,7 @@ class DocumentSourceNode(Node):
|
|
20
20
|
)
|
21
21
|
}
|
22
22
|
|
23
|
-
def run(self, **kwargs) ->
|
23
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
24
24
|
# Sample documents for demonstration
|
25
25
|
documents = [
|
26
26
|
{
|
@@ -48,7 +48,7 @@ class DocumentSourceNode(Node):
|
|
48
48
|
class QuerySourceNode(Node):
|
49
49
|
"""Provides sample queries for RAG processing."""
|
50
50
|
|
51
|
-
def get_parameters(self) ->
|
51
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
52
52
|
return {
|
53
53
|
"query": NodeParameter(
|
54
54
|
name="query",
|
@@ -59,7 +59,7 @@ class QuerySourceNode(Node):
|
|
59
59
|
)
|
60
60
|
}
|
61
61
|
|
62
|
-
def run(self, **kwargs) ->
|
62
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
63
63
|
query = kwargs.get("query", "What are the main types of machine learning?")
|
64
64
|
print(f"Debug QuerySource: providing query='{query}'")
|
65
65
|
return {"query": query}
|
kailash/nodes/data/sql.py
CHANGED
@@ -16,7 +16,7 @@ import os
|
|
16
16
|
import threading
|
17
17
|
import time
|
18
18
|
from datetime import datetime
|
19
|
-
from typing import Any,
|
19
|
+
from typing import Any, Optional
|
20
20
|
|
21
21
|
import yaml
|
22
22
|
from sqlalchemy import create_engine, text
|
@@ -38,7 +38,7 @@ class SQLDatabaseNode(Node):
|
|
38
38
|
self.config_path = project_config_path
|
39
39
|
self.config = self._load_project_config()
|
40
40
|
|
41
|
-
def _load_project_config(self) ->
|
41
|
+
def _load_project_config(self) -> dict[str, Any]:
|
42
42
|
"""Load project configuration from YAML file."""
|
43
43
|
if not os.path.exists(self.config_path):
|
44
44
|
raise NodeExecutionError(
|
@@ -46,7 +46,7 @@ class SQLDatabaseNode(Node):
|
|
46
46
|
)
|
47
47
|
|
48
48
|
try:
|
49
|
-
with open(self.config_path
|
49
|
+
with open(self.config_path) as f:
|
50
50
|
config = yaml.safe_load(f)
|
51
51
|
return config or {}
|
52
52
|
except yaml.YAMLError as e:
|
@@ -56,7 +56,7 @@ class SQLDatabaseNode(Node):
|
|
56
56
|
|
57
57
|
def get_database_config(
|
58
58
|
self, connection_name: str
|
59
|
-
) ->
|
59
|
+
) -> tuple[str, dict[str, Any]]:
|
60
60
|
"""Get database configuration by connection name.
|
61
61
|
|
62
62
|
Args:
|
@@ -185,8 +185,8 @@ class SQLDatabaseNode(Node):
|
|
185
185
|
"""
|
186
186
|
|
187
187
|
# Class-level shared resources for connection pooling
|
188
|
-
_shared_pools:
|
189
|
-
_pool_metrics:
|
188
|
+
_shared_pools: dict[tuple[str, frozenset], Any] = {}
|
189
|
+
_pool_metrics: dict[tuple[str, frozenset], dict[str, Any]] = {}
|
190
190
|
_pool_lock = threading.Lock()
|
191
191
|
_config_manager: Optional["SQLDatabaseNode._DatabaseConfigManager"] = None
|
192
192
|
|
@@ -252,7 +252,7 @@ class SQLDatabaseNode(Node):
|
|
252
252
|
# Call parent constructor
|
253
253
|
super().__init__(**kwargs)
|
254
254
|
|
255
|
-
def get_parameters(self) ->
|
255
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
256
256
|
"""Define input parameters for SQL execution.
|
257
257
|
|
258
258
|
Configuration parameters (provided to constructor):
|
@@ -334,7 +334,7 @@ class SQLDatabaseNode(Node):
|
|
334
334
|
|
335
335
|
return self._shared_pools[cache_key]
|
336
336
|
|
337
|
-
def run(self, **kwargs) ->
|
337
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
338
338
|
"""Execute SQL query using shared connection pool.
|
339
339
|
|
340
340
|
Args:
|
@@ -458,7 +458,7 @@ class SQLDatabaseNode(Node):
|
|
458
458
|
}
|
459
459
|
|
460
460
|
@classmethod
|
461
|
-
def get_pool_status(cls) ->
|
461
|
+
def get_pool_status(cls) -> dict[str, Any]:
|
462
462
|
"""Get status of all shared connection pools."""
|
463
463
|
with cls._pool_lock:
|
464
464
|
status = {}
|
@@ -736,7 +736,7 @@ class SQLDatabaseNode(Node):
|
|
736
736
|
|
737
737
|
return sanitized
|
738
738
|
|
739
|
-
def _convert_to_named_parameters(self, query: str, parameters:
|
739
|
+
def _convert_to_named_parameters(self, query: str, parameters: list) -> tuple:
|
740
740
|
"""Convert positional parameters to named parameters for SQLAlchemy 2.0.
|
741
741
|
|
742
742
|
Args:
|
@@ -790,8 +790,8 @@ class SQLDatabaseNode(Node):
|
|
790
790
|
return modified_query, param_dict
|
791
791
|
|
792
792
|
def _format_results(
|
793
|
-
self, rows:
|
794
|
-
) ->
|
793
|
+
self, rows: list, columns: list[str], result_format: str
|
794
|
+
) -> list[Any]:
|
795
795
|
"""Format query results according to specified format.
|
796
796
|
|
797
797
|
Args:
|
@@ -820,4 +820,4 @@ class SQLDatabaseNode(Node):
|
|
820
820
|
self.logger.warning(
|
821
821
|
f"Unknown result_format '{result_format}', defaulting to 'dict'"
|
822
822
|
)
|
823
|
-
return [dict(zip(columns, row)) for row in rows]
|
823
|
+
return [dict(zip(columns, row, strict=False)) for row in rows]
|