kailash 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +27 -3
- kailash/nodes/admin/__init__.py +42 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1523 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +248 -40
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +436 -5
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/ai/vision_utils.py +148 -0
- kailash/nodes/alerts/__init__.py +26 -0
- kailash/nodes/alerts/base.py +234 -0
- kailash/nodes/alerts/discord.py +499 -0
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +283 -10
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +103 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +133 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/security.py +1 -1
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/METADATA +256 -20
- kailash-0.4.1.dist-info/RECORD +227 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.2.dist-info/RECORD +0 -136
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/WHEEL +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/top_level.txt +0 -0
kailash/workflow/templates.py
CHANGED
@@ -664,6 +664,671 @@ items_processed += actual_batch_size
|
|
664
664
|
return cycle_id
|
665
665
|
|
666
666
|
|
667
|
+
class BusinessWorkflowTemplates:
|
668
|
+
"""Pre-built templates for common business workflow patterns."""
|
669
|
+
|
670
|
+
@staticmethod
|
671
|
+
def investment_data_pipeline(
|
672
|
+
workflow: Workflow,
|
673
|
+
data_source: str = "market_data",
|
674
|
+
processor: str = "portfolio_analyzer",
|
675
|
+
validator: str = "risk_assessor",
|
676
|
+
output: str = "investment_report",
|
677
|
+
) -> str:
|
678
|
+
"""
|
679
|
+
Create a complete investment data processing pipeline.
|
680
|
+
|
681
|
+
Args:
|
682
|
+
workflow: Target workflow
|
683
|
+
data_source: Node that fetches market/portfolio data
|
684
|
+
processor: Node that analyzes investment data
|
685
|
+
validator: Node that validates risk metrics
|
686
|
+
output: Node that generates investment reports
|
687
|
+
|
688
|
+
Returns:
|
689
|
+
str: Pipeline identifier
|
690
|
+
"""
|
691
|
+
# Add data fetching node if not exists
|
692
|
+
if data_source not in workflow.nodes:
|
693
|
+
from kailash.nodes.data import HTTPRequestNode
|
694
|
+
|
695
|
+
workflow.add_node(
|
696
|
+
data_source,
|
697
|
+
HTTPRequestNode(
|
698
|
+
name=data_source,
|
699
|
+
url="https://api.example.com/market-data",
|
700
|
+
method="GET",
|
701
|
+
),
|
702
|
+
)
|
703
|
+
|
704
|
+
# Add portfolio analysis node if not exists
|
705
|
+
if processor not in workflow.nodes:
|
706
|
+
from kailash.nodes.code import PythonCodeNode
|
707
|
+
|
708
|
+
analysis_code = """
|
709
|
+
import pandas as pd
|
710
|
+
import numpy as np
|
711
|
+
from datetime import datetime, timedelta
|
712
|
+
|
713
|
+
# Process investment data
|
714
|
+
data = market_data if 'market_data' in locals() else {}
|
715
|
+
portfolio_value = data.get('portfolio_value', 1000000)
|
716
|
+
positions = data.get('positions', [])
|
717
|
+
|
718
|
+
# Calculate key metrics
|
719
|
+
total_return = sum(pos.get('return_pct', 0) * pos.get('weight', 0) for pos in positions)
|
720
|
+
volatility = np.std([pos.get('return_pct', 0) for pos in positions])
|
721
|
+
sharpe_ratio = total_return / volatility if volatility > 0 else 0
|
722
|
+
|
723
|
+
# Risk assessment
|
724
|
+
risk_level = 'LOW' if volatility < 0.1 else 'MEDIUM' if volatility < 0.2 else 'HIGH'
|
725
|
+
|
726
|
+
result = {
|
727
|
+
'portfolio_value': portfolio_value,
|
728
|
+
'total_return': total_return,
|
729
|
+
'volatility': volatility,
|
730
|
+
'sharpe_ratio': sharpe_ratio,
|
731
|
+
'risk_level': risk_level,
|
732
|
+
'positions_count': len(positions),
|
733
|
+
'analysis_date': datetime.now().isoformat()
|
734
|
+
}
|
735
|
+
"""
|
736
|
+
workflow.add_node(
|
737
|
+
processor, PythonCodeNode(name=processor, code=analysis_code)
|
738
|
+
)
|
739
|
+
|
740
|
+
# Add risk validation node if not exists
|
741
|
+
if validator not in workflow.nodes:
|
742
|
+
from kailash.nodes.code import PythonCodeNode
|
743
|
+
|
744
|
+
validation_code = """
|
745
|
+
# Risk validation and compliance checks
|
746
|
+
analysis = result if 'result' in locals() else {}
|
747
|
+
|
748
|
+
# Risk limits and compliance
|
749
|
+
max_volatility = 0.25
|
750
|
+
max_single_position = 0.10
|
751
|
+
min_diversification = 5
|
752
|
+
|
753
|
+
# Validate metrics
|
754
|
+
volatility_ok = analysis.get('volatility', 0) <= max_volatility
|
755
|
+
diversification_ok = analysis.get('positions_count', 0) >= min_diversification
|
756
|
+
risk_acceptable = analysis.get('risk_level') in ['LOW', 'MEDIUM']
|
757
|
+
|
758
|
+
# Generate warnings
|
759
|
+
warnings = []
|
760
|
+
if not volatility_ok:
|
761
|
+
warnings.append(f"Portfolio volatility {analysis.get('volatility', 0):.2%} exceeds limit {max_volatility:.2%}")
|
762
|
+
if not diversification_ok:
|
763
|
+
warnings.append(f"Insufficient diversification: {analysis.get('positions_count', 0)} positions (min {min_diversification})")
|
764
|
+
if not risk_acceptable:
|
765
|
+
warnings.append(f"Risk level {analysis.get('risk_level')} may be too high")
|
766
|
+
|
767
|
+
validation_result = {
|
768
|
+
'validated': len(warnings) == 0,
|
769
|
+
'warnings': warnings,
|
770
|
+
'compliance_score': (int(volatility_ok) + int(diversification_ok) + int(risk_acceptable)) / 3,
|
771
|
+
'validation_date': analysis.get('analysis_date'),
|
772
|
+
'risk_metrics': analysis
|
773
|
+
}
|
774
|
+
"""
|
775
|
+
workflow.add_node(
|
776
|
+
validator, PythonCodeNode(name=validator, code=validation_code)
|
777
|
+
)
|
778
|
+
|
779
|
+
# Add report generation node if not exists
|
780
|
+
if output not in workflow.nodes:
|
781
|
+
from kailash.examples.utils.data_paths import get_output_data_path
|
782
|
+
from kailash.nodes.data import JSONWriterNode
|
783
|
+
|
784
|
+
workflow.add_node(
|
785
|
+
output,
|
786
|
+
JSONWriterNode(
|
787
|
+
name=output,
|
788
|
+
file_path=get_output_data_path("investment_report.json"),
|
789
|
+
),
|
790
|
+
)
|
791
|
+
|
792
|
+
# Connect the pipeline
|
793
|
+
workflow.connect(data_source, processor)
|
794
|
+
workflow.connect(processor, validator, {"result": "result"})
|
795
|
+
workflow.connect(validator, output, {"validation_result": "data"})
|
796
|
+
|
797
|
+
return "investment_pipeline"
|
798
|
+
|
799
|
+
@staticmethod
|
800
|
+
def document_ai_workflow(
|
801
|
+
workflow: Workflow,
|
802
|
+
document_reader: str = "pdf_reader",
|
803
|
+
text_processor: str = "ai_analyzer",
|
804
|
+
extractor: str = "data_extractor",
|
805
|
+
output: str = "structured_data",
|
806
|
+
) -> str:
|
807
|
+
"""
|
808
|
+
Create a document AI processing workflow.
|
809
|
+
|
810
|
+
Args:
|
811
|
+
workflow: Target workflow
|
812
|
+
document_reader: Node that reads documents
|
813
|
+
text_processor: Node that processes text with AI
|
814
|
+
extractor: Node that extracts structured data
|
815
|
+
output: Node that saves extracted data
|
816
|
+
|
817
|
+
Returns:
|
818
|
+
str: Workflow identifier
|
819
|
+
"""
|
820
|
+
# Add document reader if not exists
|
821
|
+
if document_reader not in workflow.nodes:
|
822
|
+
from kailash.examples.utils.data_paths import get_input_data_path
|
823
|
+
from kailash.nodes.data import DirectoryReaderNode
|
824
|
+
|
825
|
+
workflow.add_node(
|
826
|
+
document_reader,
|
827
|
+
DirectoryReaderNode(
|
828
|
+
name=document_reader,
|
829
|
+
directory_path=get_input_data_path("documents"),
|
830
|
+
file_types=[".pdf", ".docx", ".txt"],
|
831
|
+
),
|
832
|
+
)
|
833
|
+
|
834
|
+
# Add AI text processor if not exists
|
835
|
+
if text_processor not in workflow.nodes:
|
836
|
+
from kailash.nodes.ai import LLMAgentNode
|
837
|
+
|
838
|
+
workflow.add_node(
|
839
|
+
text_processor,
|
840
|
+
LLMAgentNode(
|
841
|
+
name=text_processor,
|
842
|
+
model="llama3.2",
|
843
|
+
prompt_template="""
|
844
|
+
Analyze the following document and extract key information:
|
845
|
+
|
846
|
+
Document: {document_content}
|
847
|
+
|
848
|
+
Please extract:
|
849
|
+
1. Document type (contract, invoice, report, etc.)
|
850
|
+
2. Key dates mentioned
|
851
|
+
3. Important entities (people, companies, amounts)
|
852
|
+
4. Main topics or subjects
|
853
|
+
5. Any action items or deadlines
|
854
|
+
|
855
|
+
Provide the response in JSON format with these fields:
|
856
|
+
- document_type
|
857
|
+
- dates
|
858
|
+
- entities
|
859
|
+
- topics
|
860
|
+
- action_items
|
861
|
+
""",
|
862
|
+
base_url="http://localhost:11434",
|
863
|
+
),
|
864
|
+
)
|
865
|
+
|
866
|
+
# Add data extractor if not exists
|
867
|
+
if extractor not in workflow.nodes:
|
868
|
+
from kailash.nodes.code import PythonCodeNode
|
869
|
+
|
870
|
+
extraction_code = """
|
871
|
+
import json
|
872
|
+
import re
|
873
|
+
from datetime import datetime
|
874
|
+
|
875
|
+
# Process AI analysis result
|
876
|
+
ai_response = response if 'response' in locals() else ""
|
877
|
+
document_info = files if 'files' in locals() else []
|
878
|
+
|
879
|
+
# Try to parse JSON from AI response
|
880
|
+
try:
|
881
|
+
# Extract JSON from response (handle cases where AI adds extra text)
|
882
|
+
json_match = re.search(r'\\{.*\\}', ai_response, re.DOTALL)
|
883
|
+
if json_match:
|
884
|
+
extracted_data = json.loads(json_match.group())
|
885
|
+
else:
|
886
|
+
# Fallback if no JSON found
|
887
|
+
extracted_data = {"raw_response": ai_response}
|
888
|
+
except:
|
889
|
+
extracted_data = {"raw_response": ai_response}
|
890
|
+
|
891
|
+
# Add metadata
|
892
|
+
extracted_data.update({
|
893
|
+
'extraction_date': datetime.now().isoformat(),
|
894
|
+
'document_count': len(document_info) if isinstance(document_info, list) else 1,
|
895
|
+
'processing_status': 'completed'
|
896
|
+
})
|
897
|
+
|
898
|
+
# Structure the final result
|
899
|
+
result = {
|
900
|
+
'extracted_data': extracted_data,
|
901
|
+
'source_documents': document_info,
|
902
|
+
'processing_metadata': {
|
903
|
+
'extraction_method': 'ai_analysis',
|
904
|
+
'model_used': 'llama3.2',
|
905
|
+
'processing_date': datetime.now().isoformat()
|
906
|
+
}
|
907
|
+
}
|
908
|
+
"""
|
909
|
+
workflow.add_node(
|
910
|
+
extractor, PythonCodeNode(name=extractor, code=extraction_code)
|
911
|
+
)
|
912
|
+
|
913
|
+
# Add output writer if not exists
|
914
|
+
if output not in workflow.nodes:
|
915
|
+
from kailash.examples.utils.data_paths import get_output_data_path
|
916
|
+
from kailash.nodes.data import JSONWriterNode
|
917
|
+
|
918
|
+
workflow.add_node(
|
919
|
+
output,
|
920
|
+
JSONWriterNode(
|
921
|
+
name=output,
|
922
|
+
file_path=get_output_data_path("extracted_document_data.json"),
|
923
|
+
),
|
924
|
+
)
|
925
|
+
|
926
|
+
# Connect the workflow
|
927
|
+
workflow.connect(document_reader, text_processor, {"files": "document_content"})
|
928
|
+
workflow.connect(
|
929
|
+
text_processor, extractor, {"response": "response", "files": "files"}
|
930
|
+
)
|
931
|
+
workflow.connect(extractor, output, {"result": "data"})
|
932
|
+
|
933
|
+
return "document_ai_pipeline"
|
934
|
+
|
935
|
+
@staticmethod
|
936
|
+
def api_integration_pattern(
|
937
|
+
workflow: Workflow,
|
938
|
+
auth_node: str = "api_auth",
|
939
|
+
data_fetcher: str = "api_client",
|
940
|
+
transformer: str = "data_transformer",
|
941
|
+
validator: str = "response_validator",
|
942
|
+
output: str = "api_output",
|
943
|
+
) -> str:
|
944
|
+
"""
|
945
|
+
Create a robust API integration pattern with auth, retry, and validation.
|
946
|
+
|
947
|
+
Args:
|
948
|
+
workflow: Target workflow
|
949
|
+
auth_node: Node that handles API authentication
|
950
|
+
data_fetcher: Node that fetches data from API
|
951
|
+
transformer: Node that transforms API responses
|
952
|
+
validator: Node that validates responses
|
953
|
+
output: Node that outputs processed data
|
954
|
+
|
955
|
+
Returns:
|
956
|
+
str: Integration identifier
|
957
|
+
"""
|
958
|
+
# Add OAuth2 authentication if not exists
|
959
|
+
if auth_node not in workflow.nodes:
|
960
|
+
from kailash.nodes.api import OAuth2Node
|
961
|
+
|
962
|
+
workflow.add_node(
|
963
|
+
auth_node,
|
964
|
+
OAuth2Node(
|
965
|
+
name=auth_node,
|
966
|
+
client_id="${API_CLIENT_ID}",
|
967
|
+
client_secret="${API_CLIENT_SECRET}",
|
968
|
+
token_url="https://api.example.com/oauth/token",
|
969
|
+
scope="read write",
|
970
|
+
),
|
971
|
+
)
|
972
|
+
|
973
|
+
# Add API client with retry logic if not exists
|
974
|
+
if data_fetcher not in workflow.nodes:
|
975
|
+
from kailash.nodes.api import HTTPRequestNode
|
976
|
+
|
977
|
+
workflow.add_node(
|
978
|
+
data_fetcher,
|
979
|
+
HTTPRequestNode(
|
980
|
+
name=data_fetcher,
|
981
|
+
url="https://api.example.com/data",
|
982
|
+
method="GET",
|
983
|
+
timeout=30,
|
984
|
+
retry_count=3,
|
985
|
+
),
|
986
|
+
)
|
987
|
+
|
988
|
+
# Add data transformer if not exists
|
989
|
+
if transformer not in workflow.nodes:
|
990
|
+
from kailash.nodes.code import PythonCodeNode
|
991
|
+
|
992
|
+
transform_code = """
|
993
|
+
import json
|
994
|
+
from datetime import datetime
|
995
|
+
|
996
|
+
# Transform API response data
|
997
|
+
response_data = response if 'response' in locals() else {}
|
998
|
+
token_info = token if 'token' in locals() else {}
|
999
|
+
|
1000
|
+
# Handle different response formats
|
1001
|
+
if isinstance(response_data, str):
|
1002
|
+
try:
|
1003
|
+
response_data = json.loads(response_data)
|
1004
|
+
except:
|
1005
|
+
response_data = {"raw_response": response_data}
|
1006
|
+
|
1007
|
+
# Transform data structure
|
1008
|
+
transformed_data = {
|
1009
|
+
'api_data': response_data,
|
1010
|
+
'request_metadata': {
|
1011
|
+
'timestamp': datetime.now().isoformat(),
|
1012
|
+
'authenticated': bool(token_info.get('access_token')),
|
1013
|
+
'token_expires': token_info.get('expires_at'),
|
1014
|
+
'data_source': 'external_api'
|
1015
|
+
},
|
1016
|
+
'data_quality': {
|
1017
|
+
'record_count': len(response_data) if isinstance(response_data, list) else 1,
|
1018
|
+
'has_errors': 'error' in str(response_data).lower(),
|
1019
|
+
'response_size_kb': len(str(response_data)) / 1024
|
1020
|
+
}
|
1021
|
+
}
|
1022
|
+
|
1023
|
+
result = transformed_data
|
1024
|
+
"""
|
1025
|
+
workflow.add_node(
|
1026
|
+
transformer, PythonCodeNode(name=transformer, code=transform_code)
|
1027
|
+
)
|
1028
|
+
|
1029
|
+
# Add response validator if not exists
|
1030
|
+
if validator not in workflow.nodes:
|
1031
|
+
from kailash.nodes.code import PythonCodeNode
|
1032
|
+
|
1033
|
+
validation_code = """
|
1034
|
+
# Validate API response and transformed data
|
1035
|
+
data = result if 'result' in locals() else {}
|
1036
|
+
|
1037
|
+
# Validation checks
|
1038
|
+
api_data = data.get('api_data', {})
|
1039
|
+
metadata = data.get('request_metadata', {})
|
1040
|
+
quality = data.get('data_quality', {})
|
1041
|
+
|
1042
|
+
validation_results = {
|
1043
|
+
'data_present': bool(api_data),
|
1044
|
+
'authenticated_request': metadata.get('authenticated', False),
|
1045
|
+
'no_errors': not quality.get('has_errors', True),
|
1046
|
+
'reasonable_size': quality.get('response_size_kb', 0) > 0,
|
1047
|
+
'recent_data': True # Could add timestamp validation
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
# Overall validation
|
1051
|
+
all_valid = all(validation_results.values())
|
1052
|
+
validation_score = sum(validation_results.values()) / len(validation_results)
|
1053
|
+
|
1054
|
+
validated_result = {
|
1055
|
+
'validation_passed': all_valid,
|
1056
|
+
'validation_score': validation_score,
|
1057
|
+
'validation_details': validation_results,
|
1058
|
+
'validated_data': data if all_valid else None,
|
1059
|
+
'validation_timestamp': metadata.get('timestamp')
|
1060
|
+
}
|
1061
|
+
"""
|
1062
|
+
workflow.add_node(
|
1063
|
+
validator, PythonCodeNode(name=validator, code=validation_code)
|
1064
|
+
)
|
1065
|
+
|
1066
|
+
# Add output node if not exists
|
1067
|
+
if output not in workflow.nodes:
|
1068
|
+
from kailash.examples.utils.data_paths import get_output_data_path
|
1069
|
+
from kailash.nodes.data import JSONWriterNode
|
1070
|
+
|
1071
|
+
workflow.add_node(
|
1072
|
+
output,
|
1073
|
+
JSONWriterNode(
|
1074
|
+
name=output,
|
1075
|
+
file_path=get_output_data_path("api_integration_result.json"),
|
1076
|
+
),
|
1077
|
+
)
|
1078
|
+
|
1079
|
+
# Connect the integration pattern
|
1080
|
+
workflow.connect(auth_node, data_fetcher, {"token": "auth_header"})
|
1081
|
+
workflow.connect(
|
1082
|
+
data_fetcher, transformer, {"response": "response", "token": "token"}
|
1083
|
+
)
|
1084
|
+
workflow.connect(transformer, validator, {"result": "result"})
|
1085
|
+
workflow.connect(validator, output, {"validated_result": "data"})
|
1086
|
+
|
1087
|
+
return "api_integration"
|
1088
|
+
|
1089
|
+
@staticmethod
|
1090
|
+
def data_processing_pipeline(
|
1091
|
+
workflow: Workflow,
|
1092
|
+
data_reader: str = "data_reader",
|
1093
|
+
cleaner: str = "data_cleaner",
|
1094
|
+
enricher: str = "data_enricher",
|
1095
|
+
aggregator: str = "data_aggregator",
|
1096
|
+
writer: str = "data_writer",
|
1097
|
+
) -> str:
|
1098
|
+
"""
|
1099
|
+
Create a comprehensive data processing pipeline.
|
1100
|
+
|
1101
|
+
Args:
|
1102
|
+
workflow: Target workflow
|
1103
|
+
data_reader: Node that reads raw data
|
1104
|
+
cleaner: Node that cleans and validates data
|
1105
|
+
enricher: Node that enriches data with additional information
|
1106
|
+
aggregator: Node that aggregates and summarizes data
|
1107
|
+
writer: Node that writes processed data
|
1108
|
+
|
1109
|
+
Returns:
|
1110
|
+
str: Pipeline identifier
|
1111
|
+
"""
|
1112
|
+
# Add data reader if not exists
|
1113
|
+
if data_reader not in workflow.nodes:
|
1114
|
+
from kailash.examples.utils.data_paths import get_input_data_path
|
1115
|
+
from kailash.nodes.data import CSVReaderNode
|
1116
|
+
|
1117
|
+
workflow.add_node(
|
1118
|
+
data_reader,
|
1119
|
+
CSVReaderNode(
|
1120
|
+
name=data_reader, file_path=get_input_data_path("raw_data.csv")
|
1121
|
+
),
|
1122
|
+
)
|
1123
|
+
|
1124
|
+
# Add data cleaner if not exists
|
1125
|
+
if cleaner not in workflow.nodes:
|
1126
|
+
from kailash.nodes.code import PythonCodeNode
|
1127
|
+
|
1128
|
+
cleaning_code = """
|
1129
|
+
import pandas as pd
|
1130
|
+
import numpy as np
|
1131
|
+
from datetime import datetime
|
1132
|
+
|
1133
|
+
# Clean and validate data
|
1134
|
+
data = data if 'data' in locals() else []
|
1135
|
+
|
1136
|
+
# Convert to DataFrame for easier processing
|
1137
|
+
if isinstance(data, list) and data:
|
1138
|
+
df = pd.DataFrame(data)
|
1139
|
+
elif isinstance(data, dict):
|
1140
|
+
df = pd.DataFrame([data])
|
1141
|
+
else:
|
1142
|
+
df = pd.DataFrame()
|
1143
|
+
|
1144
|
+
# Data cleaning operations
|
1145
|
+
if not df.empty:
|
1146
|
+
# Remove duplicates
|
1147
|
+
original_count = len(df)
|
1148
|
+
df = df.drop_duplicates()
|
1149
|
+
duplicates_removed = original_count - len(df)
|
1150
|
+
|
1151
|
+
# Handle missing values
|
1152
|
+
numeric_columns = df.select_dtypes(include=[np.number]).columns
|
1153
|
+
df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())
|
1154
|
+
|
1155
|
+
# Remove outliers (3 standard deviations)
|
1156
|
+
for col in numeric_columns:
|
1157
|
+
mean = df[col].mean()
|
1158
|
+
std = df[col].std()
|
1159
|
+
df = df[abs(df[col] - mean) <= 3 * std]
|
1160
|
+
|
1161
|
+
# Standardize text fields
|
1162
|
+
text_columns = df.select_dtypes(include=['object']).columns
|
1163
|
+
for col in text_columns:
|
1164
|
+
df[col] = df[col].astype(str).str.strip().str.title()
|
1165
|
+
|
1166
|
+
cleaned_data = df.to_dict('records')
|
1167
|
+
else:
|
1168
|
+
cleaned_data = []
|
1169
|
+
duplicates_removed = 0
|
1170
|
+
|
1171
|
+
result = {
|
1172
|
+
'cleaned_data': cleaned_data,
|
1173
|
+
'cleaning_stats': {
|
1174
|
+
'original_records': len(data) if isinstance(data, list) else 1,
|
1175
|
+
'cleaned_records': len(cleaned_data),
|
1176
|
+
'duplicates_removed': duplicates_removed,
|
1177
|
+
'cleaning_date': datetime.now().isoformat()
|
1178
|
+
}
|
1179
|
+
}
|
1180
|
+
"""
|
1181
|
+
workflow.add_node(cleaner, PythonCodeNode(name=cleaner, code=cleaning_code))
|
1182
|
+
|
1183
|
+
# Add data enricher if not exists
|
1184
|
+
if enricher not in workflow.nodes:
|
1185
|
+
from kailash.nodes.code import PythonCodeNode
|
1186
|
+
|
1187
|
+
enrichment_code = """
|
1188
|
+
import pandas as pd
|
1189
|
+
from datetime import datetime
|
1190
|
+
|
1191
|
+
# Enrich data with additional calculated fields
|
1192
|
+
clean_result = result if 'result' in locals() else {}
|
1193
|
+
cleaned_data = clean_result.get('cleaned_data', [])
|
1194
|
+
|
1195
|
+
if cleaned_data:
|
1196
|
+
df = pd.DataFrame(cleaned_data)
|
1197
|
+
|
1198
|
+
# Add calculated fields
|
1199
|
+
if 'amount' in df.columns:
|
1200
|
+
df['amount_category'] = pd.cut(df['amount'],
|
1201
|
+
bins=[0, 100, 1000, 10000, float('inf')],
|
1202
|
+
labels=['Small', 'Medium', 'Large', 'Enterprise'])
|
1203
|
+
|
1204
|
+
if 'date' in df.columns:
|
1205
|
+
df['date'] = pd.to_datetime(df['date'], errors='coerce')
|
1206
|
+
df['year'] = df['date'].dt.year
|
1207
|
+
df['month'] = df['date'].dt.month
|
1208
|
+
df['quarter'] = df['date'].dt.quarter
|
1209
|
+
|
1210
|
+
# Add data quality scores
|
1211
|
+
df['completeness_score'] = (df.count(axis=1) / len(df.columns))
|
1212
|
+
df['data_quality'] = pd.cut(df['completeness_score'],
|
1213
|
+
bins=[0, 0.5, 0.8, 1.0],
|
1214
|
+
labels=['Poor', 'Fair', 'Good'])
|
1215
|
+
|
1216
|
+
enriched_data = df.to_dict('records')
|
1217
|
+
else:
|
1218
|
+
enriched_data = []
|
1219
|
+
|
1220
|
+
result = {
|
1221
|
+
'enriched_data': enriched_data,
|
1222
|
+
'enrichment_stats': {
|
1223
|
+
'records_enriched': len(enriched_data),
|
1224
|
+
'fields_added': ['amount_category', 'year', 'month', 'quarter', 'completeness_score', 'data_quality'],
|
1225
|
+
'enrichment_date': datetime.now().isoformat()
|
1226
|
+
},
|
1227
|
+
'original_stats': clean_result.get('cleaning_stats', {})
|
1228
|
+
}
|
1229
|
+
"""
|
1230
|
+
workflow.add_node(
|
1231
|
+
enricher, PythonCodeNode(name=enricher, code=enrichment_code)
|
1232
|
+
)
|
1233
|
+
|
1234
|
+
# Add aggregator if not exists
|
1235
|
+
if aggregator not in workflow.nodes:
|
1236
|
+
from kailash.nodes.code import PythonCodeNode
|
1237
|
+
|
1238
|
+
aggregation_code = """
|
1239
|
+
import pandas as pd
|
1240
|
+
from datetime import datetime
|
1241
|
+
|
1242
|
+
# Aggregate and summarize enriched data
|
1243
|
+
enrich_result = result if 'result' in locals() else {}
|
1244
|
+
enriched_data = enrich_result.get('enriched_data', [])
|
1245
|
+
|
1246
|
+
if enriched_data:
|
1247
|
+
df = pd.DataFrame(enriched_data)
|
1248
|
+
|
1249
|
+
# Calculate summary statistics
|
1250
|
+
summary_stats = {}
|
1251
|
+
|
1252
|
+
# Numeric summaries
|
1253
|
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
1254
|
+
for col in numeric_cols:
|
1255
|
+
summary_stats[col] = {
|
1256
|
+
'mean': df[col].mean(),
|
1257
|
+
'median': df[col].median(),
|
1258
|
+
'std': df[col].std(),
|
1259
|
+
'min': df[col].min(),
|
1260
|
+
'max': df[col].max(),
|
1261
|
+
'count': df[col].count()
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
# Categorical summaries
|
1265
|
+
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
|
1266
|
+
category_summaries = {}
|
1267
|
+
for col in categorical_cols:
|
1268
|
+
if col not in ['data_quality', 'amount_category']: # Skip our generated categories
|
1269
|
+
category_summaries[col] = df[col].value_counts().to_dict()
|
1270
|
+
|
1271
|
+
# Data quality summary
|
1272
|
+
quality_summary = {
|
1273
|
+
'total_records': len(df),
|
1274
|
+
'complete_records': (df['completeness_score'] == 1.0).sum(),
|
1275
|
+
'quality_distribution': df['data_quality'].value_counts().to_dict() if 'data_quality' in df.columns else {},
|
1276
|
+
'average_completeness': df['completeness_score'].mean() if 'completeness_score' in df.columns else 1.0
|
1277
|
+
}
|
1278
|
+
|
1279
|
+
aggregated_result = {
|
1280
|
+
'summary_statistics': summary_stats,
|
1281
|
+
'category_summaries': category_summaries,
|
1282
|
+
'quality_summary': quality_summary,
|
1283
|
+
'aggregation_date': datetime.now().isoformat()
|
1284
|
+
}
|
1285
|
+
else:
|
1286
|
+
aggregated_result = {
|
1287
|
+
'summary_statistics': {},
|
1288
|
+
'category_summaries': {},
|
1289
|
+
'quality_summary': {'total_records': 0},
|
1290
|
+
'aggregation_date': datetime.now().isoformat()
|
1291
|
+
}
|
1292
|
+
|
1293
|
+
result = {
|
1294
|
+
'aggregated_results': aggregated_result,
|
1295
|
+
'processed_data': enriched_data,
|
1296
|
+
'processing_pipeline': {
|
1297
|
+
'original_stats': enrich_result.get('original_stats', {}),
|
1298
|
+
'enrichment_stats': enrich_result.get('enrichment_stats', {}),
|
1299
|
+
'aggregation_stats': {
|
1300
|
+
'fields_summarized': len(aggregated_result['summary_statistics']),
|
1301
|
+
'categories_analyzed': len(aggregated_result['category_summaries'])
|
1302
|
+
}
|
1303
|
+
}
|
1304
|
+
}
|
1305
|
+
"""
|
1306
|
+
workflow.add_node(
|
1307
|
+
aggregator, PythonCodeNode(name=aggregator, code=aggregation_code)
|
1308
|
+
)
|
1309
|
+
|
1310
|
+
# Add data writer if not exists
|
1311
|
+
if writer not in workflow.nodes:
|
1312
|
+
from kailash.examples.utils.data_paths import get_output_data_path
|
1313
|
+
from kailash.nodes.data import JSONWriterNode
|
1314
|
+
|
1315
|
+
workflow.add_node(
|
1316
|
+
writer,
|
1317
|
+
JSONWriterNode(
|
1318
|
+
name=writer,
|
1319
|
+
file_path=get_output_data_path("processed_data_results.json"),
|
1320
|
+
),
|
1321
|
+
)
|
1322
|
+
|
1323
|
+
# Connect the pipeline
|
1324
|
+
workflow.connect(data_reader, cleaner, {"data": "data"})
|
1325
|
+
workflow.connect(cleaner, enricher, {"result": "result"})
|
1326
|
+
workflow.connect(enricher, aggregator, {"result": "result"})
|
1327
|
+
workflow.connect(aggregator, writer, {"result": "data"})
|
1328
|
+
|
1329
|
+
return "data_processing_pipeline"
|
1330
|
+
|
1331
|
+
|
667
1332
|
# Convenience methods to add to Workflow class
|
668
1333
|
def add_optimization_cycle(
|
669
1334
|
self,
|
@@ -754,6 +1419,61 @@ def add_batch_processing_cycle(
|
|
754
1419
|
)
|
755
1420
|
|
756
1421
|
|
1422
|
+
# Business workflow convenience methods
|
1423
|
+
def add_investment_pipeline(
|
1424
|
+
self,
|
1425
|
+
data_source: str = "market_data",
|
1426
|
+
processor: str = "portfolio_analyzer",
|
1427
|
+
validator: str = "risk_assessor",
|
1428
|
+
output: str = "investment_report",
|
1429
|
+
) -> str:
|
1430
|
+
"""Add an investment data processing pipeline to this workflow."""
|
1431
|
+
return BusinessWorkflowTemplates.investment_data_pipeline(
|
1432
|
+
self, data_source, processor, validator, output
|
1433
|
+
)
|
1434
|
+
|
1435
|
+
|
1436
|
+
def add_document_ai_workflow(
|
1437
|
+
self,
|
1438
|
+
document_reader: str = "pdf_reader",
|
1439
|
+
text_processor: str = "ai_analyzer",
|
1440
|
+
extractor: str = "data_extractor",
|
1441
|
+
output: str = "structured_data",
|
1442
|
+
) -> str:
|
1443
|
+
"""Add a document AI processing workflow to this workflow."""
|
1444
|
+
return BusinessWorkflowTemplates.document_ai_workflow(
|
1445
|
+
self, document_reader, text_processor, extractor, output
|
1446
|
+
)
|
1447
|
+
|
1448
|
+
|
1449
|
+
def add_api_integration_pattern(
|
1450
|
+
self,
|
1451
|
+
auth_node: str = "api_auth",
|
1452
|
+
data_fetcher: str = "api_client",
|
1453
|
+
transformer: str = "data_transformer",
|
1454
|
+
validator: str = "response_validator",
|
1455
|
+
output: str = "api_output",
|
1456
|
+
) -> str:
|
1457
|
+
"""Add an API integration pattern to this workflow."""
|
1458
|
+
return BusinessWorkflowTemplates.api_integration_pattern(
|
1459
|
+
self, auth_node, data_fetcher, transformer, validator, output
|
1460
|
+
)
|
1461
|
+
|
1462
|
+
|
1463
|
+
def add_data_processing_pipeline(
|
1464
|
+
self,
|
1465
|
+
data_reader: str = "data_reader",
|
1466
|
+
cleaner: str = "data_cleaner",
|
1467
|
+
enricher: str = "data_enricher",
|
1468
|
+
aggregator: str = "data_aggregator",
|
1469
|
+
writer: str = "data_writer",
|
1470
|
+
) -> str:
|
1471
|
+
"""Add a data processing pipeline to this workflow."""
|
1472
|
+
return BusinessWorkflowTemplates.data_processing_pipeline(
|
1473
|
+
self, data_reader, cleaner, enricher, aggregator, writer
|
1474
|
+
)
|
1475
|
+
|
1476
|
+
|
757
1477
|
# Add convenience methods to Workflow class
|
758
1478
|
Workflow.add_optimization_cycle = add_optimization_cycle
|
759
1479
|
Workflow.add_retry_cycle = add_retry_cycle
|
@@ -761,3 +1481,9 @@ Workflow.add_data_quality_cycle = add_data_quality_cycle
|
|
761
1481
|
Workflow.add_learning_cycle = add_learning_cycle
|
762
1482
|
Workflow.add_convergence_cycle = add_convergence_cycle
|
763
1483
|
Workflow.add_batch_processing_cycle = add_batch_processing_cycle
|
1484
|
+
|
1485
|
+
# Add business workflow methods to Workflow class
|
1486
|
+
Workflow.add_investment_pipeline = add_investment_pipeline
|
1487
|
+
Workflow.add_document_ai_workflow = add_document_ai_workflow
|
1488
|
+
Workflow.add_api_integration_pattern = add_api_integration_pattern
|
1489
|
+
Workflow.add_data_processing_pipeline = add_data_processing_pipeline
|