kailash 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +25 -3
- kailash/nodes/admin/__init__.py +35 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1519 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +1 -0
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +407 -2
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +293 -12
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +91 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +132 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
- kailash-0.4.0.dist-info/RECORD +223 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.1.dist-info/RECORD +0 -136
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,497 @@
|
|
1
|
+
"""Data lineage tracking node for audit trails and compliance reporting.
|
2
|
+
|
3
|
+
This module provides comprehensive data lineage tracking capabilities that record
|
4
|
+
data transformations, track data flow through workflows, and generate compliance
|
5
|
+
reports for regulatory requirements.
|
6
|
+
|
7
|
+
Key Features:
|
8
|
+
- Automatic data transformation tracking
|
9
|
+
- Data source and destination recording
|
10
|
+
- Compliance report generation
|
11
|
+
- Data flow visualization
|
12
|
+
- Audit trail maintenance
|
13
|
+
- Data quality metrics tracking
|
14
|
+
"""
|
15
|
+
|
16
|
+
import json
|
17
|
+
import uuid
|
18
|
+
from datetime import datetime, timedelta
|
19
|
+
from typing import Any, Dict, List, Optional
|
20
|
+
|
21
|
+
from kailash.nodes.base import Node, NodeMetadata, NodeParameter, register_node
|
22
|
+
from kailash.sdk_exceptions import NodeConfigurationError, NodeExecutionError
|
23
|
+
|
24
|
+
|
25
|
+
@register_node()
|
26
|
+
class DataLineageNode(Node):
|
27
|
+
"""Node for tracking data lineage and generating audit trails.
|
28
|
+
|
29
|
+
This node automatically tracks data transformations, maintains audit trails,
|
30
|
+
and generates compliance reports for regulatory requirements. It provides
|
31
|
+
comprehensive data lineage tracking for enterprise workflows.
|
32
|
+
|
33
|
+
Key capabilities:
|
34
|
+
1. Data transformation tracking
|
35
|
+
2. Source and destination recording
|
36
|
+
3. Compliance report generation
|
37
|
+
4. Data quality metrics
|
38
|
+
5. Audit trail maintenance
|
39
|
+
6. Data flow visualization
|
40
|
+
|
41
|
+
Example:
|
42
|
+
>>> lineage = DataLineageNode()
|
43
|
+
>>> result = lineage.execute(
|
44
|
+
... operation="track_transformation",
|
45
|
+
... data_source="customer_db",
|
46
|
+
... transformation_type="anonymization",
|
47
|
+
... output_destination="analytics_db",
|
48
|
+
... compliance_tags=["GDPR", "CCPA"],
|
49
|
+
... data_classifications=["PII", "financial"]
|
50
|
+
... )
|
51
|
+
"""
|
52
|
+
|
53
|
+
def get_metadata(self) -> NodeMetadata:
|
54
|
+
"""Get node metadata for discovery and orchestration."""
|
55
|
+
return NodeMetadata(
|
56
|
+
name="Data Lineage Node",
|
57
|
+
description="Track data lineage and generate audit trails for compliance",
|
58
|
+
tags={"enterprise", "compliance", "audit", "lineage", "governance"},
|
59
|
+
version="1.0.0",
|
60
|
+
author="Kailash SDK",
|
61
|
+
)
|
62
|
+
|
63
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
64
|
+
"""Define input parameters for data lineage operations."""
|
65
|
+
return {
|
66
|
+
"operation": NodeParameter(
|
67
|
+
name="operation",
|
68
|
+
type=str,
|
69
|
+
required=False,
|
70
|
+
default="track_transformation",
|
71
|
+
description="Operation: track_transformation, generate_report, query_lineage, compliance_check",
|
72
|
+
),
|
73
|
+
"data_source": NodeParameter(
|
74
|
+
name="data_source",
|
75
|
+
type=str,
|
76
|
+
required=False,
|
77
|
+
description="Source of the data being processed",
|
78
|
+
),
|
79
|
+
"output_destination": NodeParameter(
|
80
|
+
name="output_destination",
|
81
|
+
type=str,
|
82
|
+
required=False,
|
83
|
+
description="Destination where processed data is stored",
|
84
|
+
),
|
85
|
+
"transformation_type": NodeParameter(
|
86
|
+
name="transformation_type",
|
87
|
+
type=str,
|
88
|
+
required=False,
|
89
|
+
description="Type of transformation applied (anonymization, aggregation, filtering, etc.)",
|
90
|
+
),
|
91
|
+
"transformation_details": NodeParameter(
|
92
|
+
name="transformation_details",
|
93
|
+
type=dict,
|
94
|
+
required=False,
|
95
|
+
description="Detailed transformation metadata",
|
96
|
+
),
|
97
|
+
"compliance_tags": NodeParameter(
|
98
|
+
name="compliance_tags",
|
99
|
+
type=list,
|
100
|
+
required=False,
|
101
|
+
default=[],
|
102
|
+
description="Compliance framework tags (GDPR, CCPA, SOX, etc.)",
|
103
|
+
),
|
104
|
+
"data_classifications": NodeParameter(
|
105
|
+
name="data_classifications",
|
106
|
+
type=list,
|
107
|
+
required=False,
|
108
|
+
default=[],
|
109
|
+
description="Data classification tags (PII, PHI, financial, etc.)",
|
110
|
+
),
|
111
|
+
"user_id": NodeParameter(
|
112
|
+
name="user_id",
|
113
|
+
type=str,
|
114
|
+
required=False,
|
115
|
+
description="User ID performing the operation",
|
116
|
+
),
|
117
|
+
"workflow_id": NodeParameter(
|
118
|
+
name="workflow_id",
|
119
|
+
type=str,
|
120
|
+
required=False,
|
121
|
+
description="Workflow ID for this operation",
|
122
|
+
),
|
123
|
+
"start_date": NodeParameter(
|
124
|
+
name="start_date",
|
125
|
+
type=str,
|
126
|
+
required=False,
|
127
|
+
description="Start date for lineage queries (ISO format)",
|
128
|
+
),
|
129
|
+
"end_date": NodeParameter(
|
130
|
+
name="end_date",
|
131
|
+
type=str,
|
132
|
+
required=False,
|
133
|
+
description="End date for lineage queries (ISO format)",
|
134
|
+
),
|
135
|
+
"report_format": NodeParameter(
|
136
|
+
name="report_format",
|
137
|
+
type=str,
|
138
|
+
required=False,
|
139
|
+
default="json",
|
140
|
+
description="Report format: json, csv, html",
|
141
|
+
),
|
142
|
+
"storage_backend": NodeParameter(
|
143
|
+
name="storage_backend",
|
144
|
+
type=str,
|
145
|
+
required=False,
|
146
|
+
default="memory",
|
147
|
+
description="Storage backend: memory, file, database",
|
148
|
+
),
|
149
|
+
"storage_config": NodeParameter(
|
150
|
+
name="storage_config",
|
151
|
+
type=dict,
|
152
|
+
required=False,
|
153
|
+
default={},
|
154
|
+
description="Storage backend configuration",
|
155
|
+
),
|
156
|
+
}
|
157
|
+
|
158
|
+
def __init__(self, **kwargs):
|
159
|
+
"""Initialize the DataLineageNode."""
|
160
|
+
super().__init__(**kwargs)
|
161
|
+
self._lineage_storage = {}
|
162
|
+
self._compliance_rules = {
|
163
|
+
"GDPR": {
|
164
|
+
"required_classifications": ["PII"],
|
165
|
+
"retention_days": 2555, # 7 years
|
166
|
+
"anonymization_required": True,
|
167
|
+
},
|
168
|
+
"CCPA": {
|
169
|
+
"required_classifications": ["PII"],
|
170
|
+
"retention_days": 1095, # 3 years
|
171
|
+
"deletion_rights": True,
|
172
|
+
},
|
173
|
+
"SOX": {
|
174
|
+
"required_classifications": ["financial"],
|
175
|
+
"retention_days": 2555, # 7 years
|
176
|
+
"audit_trail_required": True,
|
177
|
+
},
|
178
|
+
"HIPAA": {
|
179
|
+
"required_classifications": ["PHI"],
|
180
|
+
"retention_days": 2190, # 6 years
|
181
|
+
"encryption_required": True,
|
182
|
+
},
|
183
|
+
}
|
184
|
+
|
185
|
+
def _generate_lineage_id(self) -> str:
|
186
|
+
"""Generate unique lineage tracking ID."""
|
187
|
+
return f"lineage_{uuid.uuid4().hex[:12]}"
|
188
|
+
|
189
|
+
def _track_transformation(
|
190
|
+
self,
|
191
|
+
data_source: str,
|
192
|
+
output_destination: str,
|
193
|
+
transformation_type: str,
|
194
|
+
transformation_details: Optional[Dict] = None,
|
195
|
+
compliance_tags: Optional[List[str]] = None,
|
196
|
+
data_classifications: Optional[List[str]] = None,
|
197
|
+
user_id: Optional[str] = None,
|
198
|
+
workflow_id: Optional[str] = None,
|
199
|
+
) -> Dict[str, Any]:
|
200
|
+
"""Track a data transformation operation."""
|
201
|
+
lineage_id = self._generate_lineage_id()
|
202
|
+
timestamp = datetime.now().isoformat()
|
203
|
+
|
204
|
+
# Create lineage record
|
205
|
+
lineage_record = {
|
206
|
+
"lineage_id": lineage_id,
|
207
|
+
"timestamp": timestamp,
|
208
|
+
"data_source": data_source,
|
209
|
+
"output_destination": output_destination,
|
210
|
+
"transformation_type": transformation_type,
|
211
|
+
"transformation_details": transformation_details or {},
|
212
|
+
"compliance_tags": compliance_tags or [],
|
213
|
+
"data_classifications": data_classifications or [],
|
214
|
+
"user_id": user_id,
|
215
|
+
"workflow_id": workflow_id,
|
216
|
+
"data_flow": {
|
217
|
+
"input": {
|
218
|
+
"source": data_source,
|
219
|
+
"timestamp": timestamp,
|
220
|
+
"classifications": data_classifications or [],
|
221
|
+
},
|
222
|
+
"processing": {
|
223
|
+
"transformation": transformation_type,
|
224
|
+
"details": transformation_details or {},
|
225
|
+
"user": user_id,
|
226
|
+
"workflow": workflow_id,
|
227
|
+
},
|
228
|
+
"output": {
|
229
|
+
"destination": output_destination,
|
230
|
+
"timestamp": timestamp,
|
231
|
+
"compliance_tags": compliance_tags or [],
|
232
|
+
},
|
233
|
+
},
|
234
|
+
}
|
235
|
+
|
236
|
+
# Perform compliance checks
|
237
|
+
compliance_results = self._check_compliance(lineage_record)
|
238
|
+
lineage_record["compliance_check"] = compliance_results
|
239
|
+
|
240
|
+
# Store lineage record
|
241
|
+
self._lineage_storage[lineage_id] = lineage_record
|
242
|
+
|
243
|
+
return {
|
244
|
+
"lineage_id": lineage_id,
|
245
|
+
"status": "tracked",
|
246
|
+
"compliance_status": compliance_results["overall_status"],
|
247
|
+
"compliance_warnings": compliance_results["warnings"],
|
248
|
+
"audit_trail_created": True,
|
249
|
+
"record": lineage_record,
|
250
|
+
}
|
251
|
+
|
252
|
+
def _check_compliance(self, lineage_record: Dict[str, Any]) -> Dict[str, Any]:
|
253
|
+
"""Check compliance requirements for a lineage record."""
|
254
|
+
compliance_tags = lineage_record.get("compliance_tags", [])
|
255
|
+
data_classifications = lineage_record.get("data_classifications", [])
|
256
|
+
|
257
|
+
compliance_results = {
|
258
|
+
"overall_status": "compliant",
|
259
|
+
"warnings": [],
|
260
|
+
"requirements_met": [],
|
261
|
+
"requirements_failed": [],
|
262
|
+
}
|
263
|
+
|
264
|
+
for tag in compliance_tags:
|
265
|
+
if tag in self._compliance_rules:
|
266
|
+
rule = self._compliance_rules[tag]
|
267
|
+
|
268
|
+
# Check required classifications
|
269
|
+
required_classifications = rule.get("required_classifications", [])
|
270
|
+
if required_classifications:
|
271
|
+
missing_classifications = set(required_classifications) - set(
|
272
|
+
data_classifications
|
273
|
+
)
|
274
|
+
if missing_classifications:
|
275
|
+
compliance_results["requirements_failed"].append(
|
276
|
+
f"{tag}: Missing required classifications: {list(missing_classifications)}"
|
277
|
+
)
|
278
|
+
compliance_results["overall_status"] = "non_compliant"
|
279
|
+
else:
|
280
|
+
compliance_results["requirements_met"].append(
|
281
|
+
f"{tag}: Required classifications present"
|
282
|
+
)
|
283
|
+
|
284
|
+
# Check transformation requirements
|
285
|
+
transformation_type = lineage_record.get("transformation_type", "")
|
286
|
+
if (
|
287
|
+
rule.get("anonymization_required")
|
288
|
+
and "anonymization" not in transformation_type.lower()
|
289
|
+
):
|
290
|
+
compliance_results["warnings"].append(
|
291
|
+
f"{tag}: Anonymization may be required for this data"
|
292
|
+
)
|
293
|
+
|
294
|
+
if rule.get("encryption_required"):
|
295
|
+
compliance_results["warnings"].append(
|
296
|
+
f"{tag}: Ensure data encryption is applied"
|
297
|
+
)
|
298
|
+
|
299
|
+
if rule.get("audit_trail_required"):
|
300
|
+
compliance_results["requirements_met"].append(
|
301
|
+
f"{tag}: Audit trail automatically maintained"
|
302
|
+
)
|
303
|
+
|
304
|
+
return compliance_results
|
305
|
+
|
306
|
+
def _generate_report(
|
307
|
+
self,
|
308
|
+
start_date: Optional[str] = None,
|
309
|
+
end_date: Optional[str] = None,
|
310
|
+
report_format: str = "json",
|
311
|
+
compliance_tags: Optional[List[str]] = None,
|
312
|
+
data_classifications: Optional[List[str]] = None,
|
313
|
+
) -> Dict[str, Any]:
|
314
|
+
"""Generate a compliance and lineage report."""
|
315
|
+
# Parse date filters
|
316
|
+
start_dt = (
|
317
|
+
datetime.fromisoformat(start_date)
|
318
|
+
if start_date
|
319
|
+
else datetime.now() - timedelta(days=30)
|
320
|
+
)
|
321
|
+
end_dt = datetime.fromisoformat(end_date) if end_date else datetime.now()
|
322
|
+
|
323
|
+
# Filter lineage records
|
324
|
+
filtered_records = []
|
325
|
+
for record in self._lineage_storage.values():
|
326
|
+
record_time = datetime.fromisoformat(record["timestamp"])
|
327
|
+
if start_dt <= record_time <= end_dt:
|
328
|
+
# Apply tag and classification filters
|
329
|
+
if compliance_tags:
|
330
|
+
if not any(
|
331
|
+
tag in record.get("compliance_tags", [])
|
332
|
+
for tag in compliance_tags
|
333
|
+
):
|
334
|
+
continue
|
335
|
+
|
336
|
+
if data_classifications:
|
337
|
+
if not any(
|
338
|
+
cls in record.get("data_classifications", [])
|
339
|
+
for cls in data_classifications
|
340
|
+
):
|
341
|
+
continue
|
342
|
+
|
343
|
+
filtered_records.append(record)
|
344
|
+
|
345
|
+
# Generate summary statistics
|
346
|
+
summary = {
|
347
|
+
"total_operations": len(filtered_records),
|
348
|
+
"date_range": {
|
349
|
+
"start": start_dt.isoformat(),
|
350
|
+
"end": end_dt.isoformat(),
|
351
|
+
},
|
352
|
+
"compliance_summary": {},
|
353
|
+
"transformation_types": {},
|
354
|
+
"data_sources": {},
|
355
|
+
"destinations": {},
|
356
|
+
"compliance_violations": 0,
|
357
|
+
}
|
358
|
+
|
359
|
+
# Analyze records
|
360
|
+
for record in filtered_records:
|
361
|
+
# Count transformation types
|
362
|
+
transform_type = record.get("transformation_type", "unknown")
|
363
|
+
summary["transformation_types"][transform_type] = (
|
364
|
+
summary["transformation_types"].get(transform_type, 0) + 1
|
365
|
+
)
|
366
|
+
|
367
|
+
# Count data sources
|
368
|
+
source = record.get("data_source", "unknown")
|
369
|
+
summary["data_sources"][source] = summary["data_sources"].get(source, 0) + 1
|
370
|
+
|
371
|
+
# Count destinations
|
372
|
+
dest = record.get("output_destination", "unknown")
|
373
|
+
summary["destinations"][dest] = summary["destinations"].get(dest, 0) + 1
|
374
|
+
|
375
|
+
# Analyze compliance
|
376
|
+
compliance_check = record.get("compliance_check", {})
|
377
|
+
if compliance_check.get("overall_status") == "non_compliant":
|
378
|
+
summary["compliance_violations"] += 1
|
379
|
+
|
380
|
+
for tag in record.get("compliance_tags", []):
|
381
|
+
if tag not in summary["compliance_summary"]:
|
382
|
+
summary["compliance_summary"][tag] = {
|
383
|
+
"total_operations": 0,
|
384
|
+
"compliant": 0,
|
385
|
+
"non_compliant": 0,
|
386
|
+
}
|
387
|
+
summary["compliance_summary"][tag]["total_operations"] += 1
|
388
|
+
if compliance_check.get("overall_status") == "compliant":
|
389
|
+
summary["compliance_summary"][tag]["compliant"] += 1
|
390
|
+
else:
|
391
|
+
summary["compliance_summary"][tag]["non_compliant"] += 1
|
392
|
+
|
393
|
+
report = {
|
394
|
+
"report_id": f"report_{uuid.uuid4().hex[:12]}",
|
395
|
+
"generated_at": datetime.now().isoformat(),
|
396
|
+
"report_format": report_format,
|
397
|
+
"summary": summary,
|
398
|
+
"detailed_records": (
|
399
|
+
filtered_records if report_format == "json" else len(filtered_records)
|
400
|
+
),
|
401
|
+
}
|
402
|
+
|
403
|
+
return report
|
404
|
+
|
405
|
+
def _query_lineage(
|
406
|
+
self,
|
407
|
+
data_source: Optional[str] = None,
|
408
|
+
output_destination: Optional[str] = None,
|
409
|
+
workflow_id: Optional[str] = None,
|
410
|
+
user_id: Optional[str] = None,
|
411
|
+
) -> Dict[str, Any]:
|
412
|
+
"""Query lineage records based on criteria."""
|
413
|
+
matching_records = []
|
414
|
+
|
415
|
+
for record in self._lineage_storage.values():
|
416
|
+
matches = True
|
417
|
+
|
418
|
+
if data_source and record.get("data_source") != data_source:
|
419
|
+
matches = False
|
420
|
+
if (
|
421
|
+
output_destination
|
422
|
+
and record.get("output_destination") != output_destination
|
423
|
+
):
|
424
|
+
matches = False
|
425
|
+
if workflow_id and record.get("workflow_id") != workflow_id:
|
426
|
+
matches = False
|
427
|
+
if user_id and record.get("user_id") != user_id:
|
428
|
+
matches = False
|
429
|
+
|
430
|
+
if matches:
|
431
|
+
matching_records.append(record)
|
432
|
+
|
433
|
+
return {
|
434
|
+
"query_results": matching_records,
|
435
|
+
"total_matches": len(matching_records),
|
436
|
+
"query_timestamp": datetime.now().isoformat(),
|
437
|
+
}
|
438
|
+
|
439
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
440
|
+
"""Execute data lineage operation."""
|
441
|
+
operation = kwargs.get("operation", "track_transformation")
|
442
|
+
|
443
|
+
if operation == "track_transformation":
|
444
|
+
data_source = kwargs.get("data_source")
|
445
|
+
output_destination = kwargs.get("output_destination")
|
446
|
+
|
447
|
+
if not data_source or not output_destination:
|
448
|
+
raise NodeConfigurationError(
|
449
|
+
"data_source and output_destination are required for track_transformation"
|
450
|
+
)
|
451
|
+
|
452
|
+
return self._track_transformation(
|
453
|
+
data_source=data_source,
|
454
|
+
output_destination=output_destination,
|
455
|
+
transformation_type=kwargs.get("transformation_type", "unknown"),
|
456
|
+
transformation_details=kwargs.get("transformation_details"),
|
457
|
+
compliance_tags=kwargs.get("compliance_tags"),
|
458
|
+
data_classifications=kwargs.get("data_classifications"),
|
459
|
+
user_id=kwargs.get("user_id"),
|
460
|
+
workflow_id=kwargs.get("workflow_id"),
|
461
|
+
)
|
462
|
+
|
463
|
+
elif operation == "generate_report":
|
464
|
+
return self._generate_report(
|
465
|
+
start_date=kwargs.get("start_date"),
|
466
|
+
end_date=kwargs.get("end_date"),
|
467
|
+
report_format=kwargs.get("report_format", "json"),
|
468
|
+
compliance_tags=kwargs.get("compliance_tags"),
|
469
|
+
data_classifications=kwargs.get("data_classifications"),
|
470
|
+
)
|
471
|
+
|
472
|
+
elif operation == "query_lineage":
|
473
|
+
return self._query_lineage(
|
474
|
+
data_source=kwargs.get("data_source"),
|
475
|
+
output_destination=kwargs.get("output_destination"),
|
476
|
+
workflow_id=kwargs.get("workflow_id"),
|
477
|
+
user_id=kwargs.get("user_id"),
|
478
|
+
)
|
479
|
+
|
480
|
+
elif operation == "compliance_check":
|
481
|
+
# Perform standalone compliance check
|
482
|
+
mock_record = {
|
483
|
+
"compliance_tags": kwargs.get("compliance_tags", []),
|
484
|
+
"data_classifications": kwargs.get("data_classifications", []),
|
485
|
+
"transformation_type": kwargs.get("transformation_type", ""),
|
486
|
+
}
|
487
|
+
return {
|
488
|
+
"compliance_check": self._check_compliance(mock_record),
|
489
|
+
"timestamp": datetime.now().isoformat(),
|
490
|
+
}
|
491
|
+
|
492
|
+
else:
|
493
|
+
raise NodeConfigurationError(f"Invalid operation: {operation}")
|
494
|
+
|
495
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
496
|
+
"""Async execution method for enterprise integration."""
|
497
|
+
return self.run(**kwargs)
|
@@ -216,8 +216,11 @@ class ConvergenceCheckerNode(CycleAwareNode):
|
|
216
216
|
),
|
217
217
|
}
|
218
218
|
|
219
|
-
def run(self,
|
219
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
220
220
|
"""Execute convergence checking logic."""
|
221
|
+
# Get context
|
222
|
+
context = kwargs.get("context", {})
|
223
|
+
|
221
224
|
# Get parameters
|
222
225
|
value = kwargs["value"]
|
223
226
|
threshold = kwargs.get("threshold", 0.8)
|
@@ -243,12 +246,24 @@ class ConvergenceCheckerNode(CycleAwareNode):
|
|
243
246
|
no_improvement_count = prev_state.get("no_improvement_count", 0)
|
244
247
|
convergence_start_iteration = prev_state.get("convergence_start_iteration")
|
245
248
|
|
246
|
-
#
|
247
|
-
|
248
|
-
|
249
|
-
|
249
|
+
# Detect if we're dealing with boolean values (common in cycle convergence)
|
250
|
+
is_boolean_convergence = isinstance(value, bool) or (
|
251
|
+
len(value_history) >= 2 and all(v in [0.0, 1.0] for v in value_history[-2:])
|
252
|
+
)
|
253
|
+
|
254
|
+
# Update best value and improvement tracking (skip for boolean convergence)
|
255
|
+
if not is_boolean_convergence:
|
256
|
+
if value > best_value:
|
257
|
+
best_value = value
|
258
|
+
no_improvement_count = 0
|
259
|
+
else:
|
260
|
+
no_improvement_count += 1
|
250
261
|
else:
|
251
|
-
|
262
|
+
# For boolean convergence, don't track "improvement" - just track changes
|
263
|
+
if value != prev_state.get("last_value", value):
|
264
|
+
no_improvement_count = 0
|
265
|
+
else:
|
266
|
+
no_improvement_count += 1
|
252
267
|
|
253
268
|
# Initialize convergence state
|
254
269
|
converged = False
|
@@ -265,7 +280,10 @@ class ConvergenceCheckerNode(CycleAwareNode):
|
|
265
280
|
if early_stop_iterations and iteration >= early_stop_iterations:
|
266
281
|
converged = True
|
267
282
|
reason = f"Early stop: reached {early_stop_iterations} iterations"
|
268
|
-
elif
|
283
|
+
elif (
|
284
|
+
patience and no_improvement_count >= patience and not is_boolean_convergence
|
285
|
+
):
|
286
|
+
# Only apply patience mechanism for non-boolean convergence
|
269
287
|
converged = True
|
270
288
|
reason = f"Early stop: no improvement for {patience} iterations"
|
271
289
|
else:
|
@@ -324,6 +342,7 @@ class ConvergenceCheckerNode(CycleAwareNode):
|
|
324
342
|
"best_value": best_value,
|
325
343
|
"no_improvement_count": no_improvement_count,
|
326
344
|
"convergence_start_iteration": convergence_start_iteration,
|
345
|
+
"last_value": value, # Track last value for boolean convergence
|
327
346
|
}
|
328
347
|
|
329
348
|
# Include pass-through data if provided
|
@@ -537,8 +556,11 @@ class MultiCriteriaConvergenceNode(CycleAwareNode):
|
|
537
556
|
),
|
538
557
|
}
|
539
558
|
|
540
|
-
def run(self,
|
559
|
+
def run(self, **kwargs) -> dict[str, Any]:
|
541
560
|
"""Execute multi-criteria convergence checking."""
|
561
|
+
# Get context
|
562
|
+
context = kwargs.get("context", {})
|
563
|
+
|
542
564
|
metrics = kwargs.get("metrics", {})
|
543
565
|
|
544
566
|
# On first iteration, store criteria in state
|
@@ -597,7 +619,7 @@ class MultiCriteriaConvergenceNode(CycleAwareNode):
|
|
597
619
|
}
|
598
620
|
|
599
621
|
# Run individual convergence check
|
600
|
-
result = checker.run(mock_context, **checker_params)
|
622
|
+
result = checker.run(context=mock_context, **checker_params)
|
601
623
|
|
602
624
|
results[metric_name] = {
|
603
625
|
"converged": result["converged"],
|
@@ -98,6 +98,14 @@ class SwitchNode(Node):
|
|
98
98
|
type=Any,
|
99
99
|
required=False, # For testing flexibility - required at execution time
|
100
100
|
description="Input data to route",
|
101
|
+
auto_map_primary=True, # Auto-map the main workflow input
|
102
|
+
auto_map_from=[
|
103
|
+
"data",
|
104
|
+
"input",
|
105
|
+
"value",
|
106
|
+
"items",
|
107
|
+
], # Common alternatives
|
108
|
+
workflow_alias="data", # Preferred name in workflow connections
|
101
109
|
),
|
102
110
|
"condition_field": NodeParameter(
|
103
111
|
name="condition_field",
|
@@ -333,10 +341,13 @@ class SwitchNode(Node):
|
|
333
341
|
# Default case always gets the input data
|
334
342
|
result[default_field] = input_data
|
335
343
|
|
336
|
-
#
|
337
|
-
|
344
|
+
# Initialize ALL case outputs to None first (for workflow compatibility)
|
345
|
+
for case in cases:
|
346
|
+
case_str = f"{case_prefix}{self._sanitize_case_name(case)}"
|
347
|
+
result[case_str] = None
|
338
348
|
|
339
|
-
#
|
349
|
+
# Find which case matches and populate it
|
350
|
+
matched_case = None
|
340
351
|
for case in cases:
|
341
352
|
if self._evaluate_condition(check_value, operator, case):
|
342
353
|
# Convert case value to a valid output field name
|
kailash/nodes/mixins/__init__.py
CHANGED
@@ -4,8 +4,16 @@ This module provides mixins that can be combined with node classes
|
|
4
4
|
to add additional functionality without inheritance complexity.
|
5
5
|
"""
|
6
6
|
|
7
|
+
from .event_emitter import EventAwareNode, EventEmitterMixin, enable_events_for_node
|
7
8
|
from .mcp import MCPCapabilityMixin
|
9
|
+
from .security import LoggingMixin, PerformanceMixin, SecurityMixin
|
8
10
|
|
9
11
|
__all__ = [
|
10
12
|
"MCPCapabilityMixin",
|
13
|
+
"EventEmitterMixin",
|
14
|
+
"EventAwareNode",
|
15
|
+
"enable_events_for_node",
|
16
|
+
"SecurityMixin",
|
17
|
+
"PerformanceMixin",
|
18
|
+
"LoggingMixin",
|
11
19
|
]
|