kailash 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +25 -3
  37. kailash/nodes/admin/__init__.py +35 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1519 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +1 -0
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +407 -2
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/api/auth.py +287 -6
  50. kailash/nodes/api/rest.py +151 -0
  51. kailash/nodes/auth/__init__.py +17 -0
  52. kailash/nodes/auth/directory_integration.py +1228 -0
  53. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  54. kailash/nodes/auth/mfa.py +2338 -0
  55. kailash/nodes/auth/risk_assessment.py +872 -0
  56. kailash/nodes/auth/session_management.py +1093 -0
  57. kailash/nodes/auth/sso.py +1040 -0
  58. kailash/nodes/base.py +344 -13
  59. kailash/nodes/base_cycle_aware.py +4 -2
  60. kailash/nodes/base_with_acl.py +1 -1
  61. kailash/nodes/code/python.py +293 -12
  62. kailash/nodes/compliance/__init__.py +9 -0
  63. kailash/nodes/compliance/data_retention.py +1888 -0
  64. kailash/nodes/compliance/gdpr.py +2004 -0
  65. kailash/nodes/data/__init__.py +22 -2
  66. kailash/nodes/data/async_connection.py +469 -0
  67. kailash/nodes/data/async_sql.py +757 -0
  68. kailash/nodes/data/async_vector.py +598 -0
  69. kailash/nodes/data/readers.py +767 -0
  70. kailash/nodes/data/retrieval.py +360 -1
  71. kailash/nodes/data/sharepoint_graph.py +397 -21
  72. kailash/nodes/data/sql.py +94 -5
  73. kailash/nodes/data/streaming.py +68 -8
  74. kailash/nodes/data/vector_db.py +54 -4
  75. kailash/nodes/enterprise/__init__.py +13 -0
  76. kailash/nodes/enterprise/batch_processor.py +741 -0
  77. kailash/nodes/enterprise/data_lineage.py +497 -0
  78. kailash/nodes/logic/convergence.py +31 -9
  79. kailash/nodes/logic/operations.py +14 -3
  80. kailash/nodes/mixins/__init__.py +8 -0
  81. kailash/nodes/mixins/event_emitter.py +201 -0
  82. kailash/nodes/mixins/mcp.py +9 -4
  83. kailash/nodes/mixins/security.py +165 -0
  84. kailash/nodes/monitoring/__init__.py +7 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  86. kailash/nodes/rag/__init__.py +284 -0
  87. kailash/nodes/rag/advanced.py +1615 -0
  88. kailash/nodes/rag/agentic.py +773 -0
  89. kailash/nodes/rag/conversational.py +999 -0
  90. kailash/nodes/rag/evaluation.py +875 -0
  91. kailash/nodes/rag/federated.py +1188 -0
  92. kailash/nodes/rag/graph.py +721 -0
  93. kailash/nodes/rag/multimodal.py +671 -0
  94. kailash/nodes/rag/optimized.py +933 -0
  95. kailash/nodes/rag/privacy.py +1059 -0
  96. kailash/nodes/rag/query_processing.py +1335 -0
  97. kailash/nodes/rag/realtime.py +764 -0
  98. kailash/nodes/rag/registry.py +547 -0
  99. kailash/nodes/rag/router.py +837 -0
  100. kailash/nodes/rag/similarity.py +1854 -0
  101. kailash/nodes/rag/strategies.py +566 -0
  102. kailash/nodes/rag/workflows.py +575 -0
  103. kailash/nodes/security/__init__.py +19 -0
  104. kailash/nodes/security/abac_evaluator.py +1411 -0
  105. kailash/nodes/security/audit_log.py +91 -0
  106. kailash/nodes/security/behavior_analysis.py +1893 -0
  107. kailash/nodes/security/credential_manager.py +401 -0
  108. kailash/nodes/security/rotating_credentials.py +760 -0
  109. kailash/nodes/security/security_event.py +132 -0
  110. kailash/nodes/security/threat_detection.py +1103 -0
  111. kailash/nodes/testing/__init__.py +9 -0
  112. kailash/nodes/testing/credential_testing.py +499 -0
  113. kailash/nodes/transform/__init__.py +10 -2
  114. kailash/nodes/transform/chunkers.py +592 -1
  115. kailash/nodes/transform/processors.py +484 -14
  116. kailash/nodes/validation.py +321 -0
  117. kailash/runtime/access_controlled.py +1 -1
  118. kailash/runtime/async_local.py +41 -7
  119. kailash/runtime/docker.py +1 -1
  120. kailash/runtime/local.py +474 -55
  121. kailash/runtime/parallel.py +1 -1
  122. kailash/runtime/parallel_cyclic.py +1 -1
  123. kailash/runtime/testing.py +210 -2
  124. kailash/utils/migrations/__init__.py +25 -0
  125. kailash/utils/migrations/generator.py +433 -0
  126. kailash/utils/migrations/models.py +231 -0
  127. kailash/utils/migrations/runner.py +489 -0
  128. kailash/utils/secure_logging.py +342 -0
  129. kailash/workflow/__init__.py +16 -0
  130. kailash/workflow/cyclic_runner.py +3 -4
  131. kailash/workflow/graph.py +70 -2
  132. kailash/workflow/resilience.py +249 -0
  133. kailash/workflow/templates.py +726 -0
  134. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
  135. kailash-0.4.0.dist-info/RECORD +223 -0
  136. kailash/api/__init__.py +0 -17
  137. kailash/api/__main__.py +0 -6
  138. kailash/api/studio_secure.py +0 -893
  139. kailash/mcp/__main__.py +0 -13
  140. kailash/mcp/server_new.py +0 -336
  141. kailash/mcp/servers/__init__.py +0 -12
  142. kailash-0.3.1.dist-info/RECORD +0 -136
  143. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
  144. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
  145. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
  146. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -664,6 +664,671 @@ items_processed += actual_batch_size
664
664
  return cycle_id
665
665
 
666
666
 
667
+ class BusinessWorkflowTemplates:
668
+ """Pre-built templates for common business workflow patterns."""
669
+
670
+ @staticmethod
671
+ def investment_data_pipeline(
672
+ workflow: Workflow,
673
+ data_source: str = "market_data",
674
+ processor: str = "portfolio_analyzer",
675
+ validator: str = "risk_assessor",
676
+ output: str = "investment_report",
677
+ ) -> str:
678
+ """
679
+ Create a complete investment data processing pipeline.
680
+
681
+ Args:
682
+ workflow: Target workflow
683
+ data_source: Node that fetches market/portfolio data
684
+ processor: Node that analyzes investment data
685
+ validator: Node that validates risk metrics
686
+ output: Node that generates investment reports
687
+
688
+ Returns:
689
+ str: Pipeline identifier
690
+ """
691
+ # Add data fetching node if not exists
692
+ if data_source not in workflow.nodes:
693
+ from kailash.nodes.data import HTTPRequestNode
694
+
695
+ workflow.add_node(
696
+ data_source,
697
+ HTTPRequestNode(
698
+ name=data_source,
699
+ url="https://api.example.com/market-data",
700
+ method="GET",
701
+ ),
702
+ )
703
+
704
+ # Add portfolio analysis node if not exists
705
+ if processor not in workflow.nodes:
706
+ from kailash.nodes.code import PythonCodeNode
707
+
708
+ analysis_code = """
709
+ import pandas as pd
710
+ import numpy as np
711
+ from datetime import datetime, timedelta
712
+
713
+ # Process investment data
714
+ data = market_data if 'market_data' in locals() else {}
715
+ portfolio_value = data.get('portfolio_value', 1000000)
716
+ positions = data.get('positions', [])
717
+
718
+ # Calculate key metrics
719
+ total_return = sum(pos.get('return_pct', 0) * pos.get('weight', 0) for pos in positions)
720
+ volatility = np.std([pos.get('return_pct', 0) for pos in positions])
721
+ sharpe_ratio = total_return / volatility if volatility > 0 else 0
722
+
723
+ # Risk assessment
724
+ risk_level = 'LOW' if volatility < 0.1 else 'MEDIUM' if volatility < 0.2 else 'HIGH'
725
+
726
+ result = {
727
+ 'portfolio_value': portfolio_value,
728
+ 'total_return': total_return,
729
+ 'volatility': volatility,
730
+ 'sharpe_ratio': sharpe_ratio,
731
+ 'risk_level': risk_level,
732
+ 'positions_count': len(positions),
733
+ 'analysis_date': datetime.now().isoformat()
734
+ }
735
+ """
736
+ workflow.add_node(
737
+ processor, PythonCodeNode(name=processor, code=analysis_code)
738
+ )
739
+
740
+ # Add risk validation node if not exists
741
+ if validator not in workflow.nodes:
742
+ from kailash.nodes.code import PythonCodeNode
743
+
744
+ validation_code = """
745
+ # Risk validation and compliance checks
746
+ analysis = result if 'result' in locals() else {}
747
+
748
+ # Risk limits and compliance
749
+ max_volatility = 0.25
750
+ max_single_position = 0.10
751
+ min_diversification = 5
752
+
753
+ # Validate metrics
754
+ volatility_ok = analysis.get('volatility', 0) <= max_volatility
755
+ diversification_ok = analysis.get('positions_count', 0) >= min_diversification
756
+ risk_acceptable = analysis.get('risk_level') in ['LOW', 'MEDIUM']
757
+
758
+ # Generate warnings
759
+ warnings = []
760
+ if not volatility_ok:
761
+ warnings.append(f"Portfolio volatility {analysis.get('volatility', 0):.2%} exceeds limit {max_volatility:.2%}")
762
+ if not diversification_ok:
763
+ warnings.append(f"Insufficient diversification: {analysis.get('positions_count', 0)} positions (min {min_diversification})")
764
+ if not risk_acceptable:
765
+ warnings.append(f"Risk level {analysis.get('risk_level')} may be too high")
766
+
767
+ validation_result = {
768
+ 'validated': len(warnings) == 0,
769
+ 'warnings': warnings,
770
+ 'compliance_score': (int(volatility_ok) + int(diversification_ok) + int(risk_acceptable)) / 3,
771
+ 'validation_date': analysis.get('analysis_date'),
772
+ 'risk_metrics': analysis
773
+ }
774
+ """
775
+ workflow.add_node(
776
+ validator, PythonCodeNode(name=validator, code=validation_code)
777
+ )
778
+
779
+ # Add report generation node if not exists
780
+ if output not in workflow.nodes:
781
+ from kailash.examples.utils.data_paths import get_output_data_path
782
+ from kailash.nodes.data import JSONWriterNode
783
+
784
+ workflow.add_node(
785
+ output,
786
+ JSONWriterNode(
787
+ name=output,
788
+ file_path=get_output_data_path("investment_report.json"),
789
+ ),
790
+ )
791
+
792
+ # Connect the pipeline
793
+ workflow.connect(data_source, processor)
794
+ workflow.connect(processor, validator, {"result": "result"})
795
+ workflow.connect(validator, output, {"validation_result": "data"})
796
+
797
+ return "investment_pipeline"
798
+
799
+ @staticmethod
800
+ def document_ai_workflow(
801
+ workflow: Workflow,
802
+ document_reader: str = "pdf_reader",
803
+ text_processor: str = "ai_analyzer",
804
+ extractor: str = "data_extractor",
805
+ output: str = "structured_data",
806
+ ) -> str:
807
+ """
808
+ Create a document AI processing workflow.
809
+
810
+ Args:
811
+ workflow: Target workflow
812
+ document_reader: Node that reads documents
813
+ text_processor: Node that processes text with AI
814
+ extractor: Node that extracts structured data
815
+ output: Node that saves extracted data
816
+
817
+ Returns:
818
+ str: Workflow identifier
819
+ """
820
+ # Add document reader if not exists
821
+ if document_reader not in workflow.nodes:
822
+ from kailash.examples.utils.data_paths import get_input_data_path
823
+ from kailash.nodes.data import DirectoryReaderNode
824
+
825
+ workflow.add_node(
826
+ document_reader,
827
+ DirectoryReaderNode(
828
+ name=document_reader,
829
+ directory_path=get_input_data_path("documents"),
830
+ file_types=[".pdf", ".docx", ".txt"],
831
+ ),
832
+ )
833
+
834
+ # Add AI text processor if not exists
835
+ if text_processor not in workflow.nodes:
836
+ from kailash.nodes.ai import LLMAgentNode
837
+
838
+ workflow.add_node(
839
+ text_processor,
840
+ LLMAgentNode(
841
+ name=text_processor,
842
+ model="llama3.2",
843
+ prompt_template="""
844
+ Analyze the following document and extract key information:
845
+
846
+ Document: {document_content}
847
+
848
+ Please extract:
849
+ 1. Document type (contract, invoice, report, etc.)
850
+ 2. Key dates mentioned
851
+ 3. Important entities (people, companies, amounts)
852
+ 4. Main topics or subjects
853
+ 5. Any action items or deadlines
854
+
855
+ Provide the response in JSON format with these fields:
856
+ - document_type
857
+ - dates
858
+ - entities
859
+ - topics
860
+ - action_items
861
+ """,
862
+ base_url="http://localhost:11434",
863
+ ),
864
+ )
865
+
866
+ # Add data extractor if not exists
867
+ if extractor not in workflow.nodes:
868
+ from kailash.nodes.code import PythonCodeNode
869
+
870
+ extraction_code = """
871
+ import json
872
+ import re
873
+ from datetime import datetime
874
+
875
+ # Process AI analysis result
876
+ ai_response = response if 'response' in locals() else ""
877
+ document_info = files if 'files' in locals() else []
878
+
879
+ # Try to parse JSON from AI response
880
+ try:
881
+ # Extract JSON from response (handle cases where AI adds extra text)
882
+ json_match = re.search(r'\\{.*\\}', ai_response, re.DOTALL)
883
+ if json_match:
884
+ extracted_data = json.loads(json_match.group())
885
+ else:
886
+ # Fallback if no JSON found
887
+ extracted_data = {"raw_response": ai_response}
888
+ except:
889
+ extracted_data = {"raw_response": ai_response}
890
+
891
+ # Add metadata
892
+ extracted_data.update({
893
+ 'extraction_date': datetime.now().isoformat(),
894
+ 'document_count': len(document_info) if isinstance(document_info, list) else 1,
895
+ 'processing_status': 'completed'
896
+ })
897
+
898
+ # Structure the final result
899
+ result = {
900
+ 'extracted_data': extracted_data,
901
+ 'source_documents': document_info,
902
+ 'processing_metadata': {
903
+ 'extraction_method': 'ai_analysis',
904
+ 'model_used': 'llama3.2',
905
+ 'processing_date': datetime.now().isoformat()
906
+ }
907
+ }
908
+ """
909
+ workflow.add_node(
910
+ extractor, PythonCodeNode(name=extractor, code=extraction_code)
911
+ )
912
+
913
+ # Add output writer if not exists
914
+ if output not in workflow.nodes:
915
+ from kailash.examples.utils.data_paths import get_output_data_path
916
+ from kailash.nodes.data import JSONWriterNode
917
+
918
+ workflow.add_node(
919
+ output,
920
+ JSONWriterNode(
921
+ name=output,
922
+ file_path=get_output_data_path("extracted_document_data.json"),
923
+ ),
924
+ )
925
+
926
+ # Connect the workflow
927
+ workflow.connect(document_reader, text_processor, {"files": "document_content"})
928
+ workflow.connect(
929
+ text_processor, extractor, {"response": "response", "files": "files"}
930
+ )
931
+ workflow.connect(extractor, output, {"result": "data"})
932
+
933
+ return "document_ai_pipeline"
934
+
935
+ @staticmethod
936
+ def api_integration_pattern(
937
+ workflow: Workflow,
938
+ auth_node: str = "api_auth",
939
+ data_fetcher: str = "api_client",
940
+ transformer: str = "data_transformer",
941
+ validator: str = "response_validator",
942
+ output: str = "api_output",
943
+ ) -> str:
944
+ """
945
+ Create a robust API integration pattern with auth, retry, and validation.
946
+
947
+ Args:
948
+ workflow: Target workflow
949
+ auth_node: Node that handles API authentication
950
+ data_fetcher: Node that fetches data from API
951
+ transformer: Node that transforms API responses
952
+ validator: Node that validates responses
953
+ output: Node that outputs processed data
954
+
955
+ Returns:
956
+ str: Integration identifier
957
+ """
958
+ # Add OAuth2 authentication if not exists
959
+ if auth_node not in workflow.nodes:
960
+ from kailash.nodes.api import OAuth2Node
961
+
962
+ workflow.add_node(
963
+ auth_node,
964
+ OAuth2Node(
965
+ name=auth_node,
966
+ client_id="${API_CLIENT_ID}",
967
+ client_secret="${API_CLIENT_SECRET}",
968
+ token_url="https://api.example.com/oauth/token",
969
+ scope="read write",
970
+ ),
971
+ )
972
+
973
+ # Add API client with retry logic if not exists
974
+ if data_fetcher not in workflow.nodes:
975
+ from kailash.nodes.api import HTTPRequestNode
976
+
977
+ workflow.add_node(
978
+ data_fetcher,
979
+ HTTPRequestNode(
980
+ name=data_fetcher,
981
+ url="https://api.example.com/data",
982
+ method="GET",
983
+ timeout=30,
984
+ retry_count=3,
985
+ ),
986
+ )
987
+
988
+ # Add data transformer if not exists
989
+ if transformer not in workflow.nodes:
990
+ from kailash.nodes.code import PythonCodeNode
991
+
992
+ transform_code = """
993
+ import json
994
+ from datetime import datetime
995
+
996
+ # Transform API response data
997
+ response_data = response if 'response' in locals() else {}
998
+ token_info = token if 'token' in locals() else {}
999
+
1000
+ # Handle different response formats
1001
+ if isinstance(response_data, str):
1002
+ try:
1003
+ response_data = json.loads(response_data)
1004
+ except:
1005
+ response_data = {"raw_response": response_data}
1006
+
1007
+ # Transform data structure
1008
+ transformed_data = {
1009
+ 'api_data': response_data,
1010
+ 'request_metadata': {
1011
+ 'timestamp': datetime.now().isoformat(),
1012
+ 'authenticated': bool(token_info.get('access_token')),
1013
+ 'token_expires': token_info.get('expires_at'),
1014
+ 'data_source': 'external_api'
1015
+ },
1016
+ 'data_quality': {
1017
+ 'record_count': len(response_data) if isinstance(response_data, list) else 1,
1018
+ 'has_errors': 'error' in str(response_data).lower(),
1019
+ 'response_size_kb': len(str(response_data)) / 1024
1020
+ }
1021
+ }
1022
+
1023
+ result = transformed_data
1024
+ """
1025
+ workflow.add_node(
1026
+ transformer, PythonCodeNode(name=transformer, code=transform_code)
1027
+ )
1028
+
1029
+ # Add response validator if not exists
1030
+ if validator not in workflow.nodes:
1031
+ from kailash.nodes.code import PythonCodeNode
1032
+
1033
+ validation_code = """
1034
+ # Validate API response and transformed data
1035
+ data = result if 'result' in locals() else {}
1036
+
1037
+ # Validation checks
1038
+ api_data = data.get('api_data', {})
1039
+ metadata = data.get('request_metadata', {})
1040
+ quality = data.get('data_quality', {})
1041
+
1042
+ validation_results = {
1043
+ 'data_present': bool(api_data),
1044
+ 'authenticated_request': metadata.get('authenticated', False),
1045
+ 'no_errors': not quality.get('has_errors', True),
1046
+ 'reasonable_size': quality.get('response_size_kb', 0) > 0,
1047
+ 'recent_data': True # Could add timestamp validation
1048
+ }
1049
+
1050
+ # Overall validation
1051
+ all_valid = all(validation_results.values())
1052
+ validation_score = sum(validation_results.values()) / len(validation_results)
1053
+
1054
+ validated_result = {
1055
+ 'validation_passed': all_valid,
1056
+ 'validation_score': validation_score,
1057
+ 'validation_details': validation_results,
1058
+ 'validated_data': data if all_valid else None,
1059
+ 'validation_timestamp': metadata.get('timestamp')
1060
+ }
1061
+ """
1062
+ workflow.add_node(
1063
+ validator, PythonCodeNode(name=validator, code=validation_code)
1064
+ )
1065
+
1066
+ # Add output node if not exists
1067
+ if output not in workflow.nodes:
1068
+ from kailash.examples.utils.data_paths import get_output_data_path
1069
+ from kailash.nodes.data import JSONWriterNode
1070
+
1071
+ workflow.add_node(
1072
+ output,
1073
+ JSONWriterNode(
1074
+ name=output,
1075
+ file_path=get_output_data_path("api_integration_result.json"),
1076
+ ),
1077
+ )
1078
+
1079
+ # Connect the integration pattern
1080
+ workflow.connect(auth_node, data_fetcher, {"token": "auth_header"})
1081
+ workflow.connect(
1082
+ data_fetcher, transformer, {"response": "response", "token": "token"}
1083
+ )
1084
+ workflow.connect(transformer, validator, {"result": "result"})
1085
+ workflow.connect(validator, output, {"validated_result": "data"})
1086
+
1087
+ return "api_integration"
1088
+
1089
+ @staticmethod
1090
+ def data_processing_pipeline(
1091
+ workflow: Workflow,
1092
+ data_reader: str = "data_reader",
1093
+ cleaner: str = "data_cleaner",
1094
+ enricher: str = "data_enricher",
1095
+ aggregator: str = "data_aggregator",
1096
+ writer: str = "data_writer",
1097
+ ) -> str:
1098
+ """
1099
+ Create a comprehensive data processing pipeline.
1100
+
1101
+ Args:
1102
+ workflow: Target workflow
1103
+ data_reader: Node that reads raw data
1104
+ cleaner: Node that cleans and validates data
1105
+ enricher: Node that enriches data with additional information
1106
+ aggregator: Node that aggregates and summarizes data
1107
+ writer: Node that writes processed data
1108
+
1109
+ Returns:
1110
+ str: Pipeline identifier
1111
+ """
1112
+ # Add data reader if not exists
1113
+ if data_reader not in workflow.nodes:
1114
+ from kailash.examples.utils.data_paths import get_input_data_path
1115
+ from kailash.nodes.data import CSVReaderNode
1116
+
1117
+ workflow.add_node(
1118
+ data_reader,
1119
+ CSVReaderNode(
1120
+ name=data_reader, file_path=get_input_data_path("raw_data.csv")
1121
+ ),
1122
+ )
1123
+
1124
+ # Add data cleaner if not exists
1125
+ if cleaner not in workflow.nodes:
1126
+ from kailash.nodes.code import PythonCodeNode
1127
+
1128
+ cleaning_code = """
1129
+ import pandas as pd
1130
+ import numpy as np
1131
+ from datetime import datetime
1132
+
1133
+ # Clean and validate data
1134
+ data = data if 'data' in locals() else []
1135
+
1136
+ # Convert to DataFrame for easier processing
1137
+ if isinstance(data, list) and data:
1138
+ df = pd.DataFrame(data)
1139
+ elif isinstance(data, dict):
1140
+ df = pd.DataFrame([data])
1141
+ else:
1142
+ df = pd.DataFrame()
1143
+
1144
+ # Data cleaning operations
1145
+ if not df.empty:
1146
+ # Remove duplicates
1147
+ original_count = len(df)
1148
+ df = df.drop_duplicates()
1149
+ duplicates_removed = original_count - len(df)
1150
+
1151
+ # Handle missing values
1152
+ numeric_columns = df.select_dtypes(include=[np.number]).columns
1153
+ df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())
1154
+
1155
+ # Remove outliers (3 standard deviations)
1156
+ for col in numeric_columns:
1157
+ mean = df[col].mean()
1158
+ std = df[col].std()
1159
+ df = df[abs(df[col] - mean) <= 3 * std]
1160
+
1161
+ # Standardize text fields
1162
+ text_columns = df.select_dtypes(include=['object']).columns
1163
+ for col in text_columns:
1164
+ df[col] = df[col].astype(str).str.strip().str.title()
1165
+
1166
+ cleaned_data = df.to_dict('records')
1167
+ else:
1168
+ cleaned_data = []
1169
+ duplicates_removed = 0
1170
+
1171
+ result = {
1172
+ 'cleaned_data': cleaned_data,
1173
+ 'cleaning_stats': {
1174
+ 'original_records': len(data) if isinstance(data, list) else 1,
1175
+ 'cleaned_records': len(cleaned_data),
1176
+ 'duplicates_removed': duplicates_removed,
1177
+ 'cleaning_date': datetime.now().isoformat()
1178
+ }
1179
+ }
1180
+ """
1181
+ workflow.add_node(cleaner, PythonCodeNode(name=cleaner, code=cleaning_code))
1182
+
1183
+ # Add data enricher if not exists
1184
+ if enricher not in workflow.nodes:
1185
+ from kailash.nodes.code import PythonCodeNode
1186
+
1187
+ enrichment_code = """
1188
+ import pandas as pd
1189
+ from datetime import datetime
1190
+
1191
+ # Enrich data with additional calculated fields
1192
+ clean_result = result if 'result' in locals() else {}
1193
+ cleaned_data = clean_result.get('cleaned_data', [])
1194
+
1195
+ if cleaned_data:
1196
+ df = pd.DataFrame(cleaned_data)
1197
+
1198
+ # Add calculated fields
1199
+ if 'amount' in df.columns:
1200
+ df['amount_category'] = pd.cut(df['amount'],
1201
+ bins=[0, 100, 1000, 10000, float('inf')],
1202
+ labels=['Small', 'Medium', 'Large', 'Enterprise'])
1203
+
1204
+ if 'date' in df.columns:
1205
+ df['date'] = pd.to_datetime(df['date'], errors='coerce')
1206
+ df['year'] = df['date'].dt.year
1207
+ df['month'] = df['date'].dt.month
1208
+ df['quarter'] = df['date'].dt.quarter
1209
+
1210
+ # Add data quality scores
1211
+ df['completeness_score'] = (df.count(axis=1) / len(df.columns))
1212
+ df['data_quality'] = pd.cut(df['completeness_score'],
1213
+ bins=[0, 0.5, 0.8, 1.0],
1214
+ labels=['Poor', 'Fair', 'Good'])
1215
+
1216
+ enriched_data = df.to_dict('records')
1217
+ else:
1218
+ enriched_data = []
1219
+
1220
+ result = {
1221
+ 'enriched_data': enriched_data,
1222
+ 'enrichment_stats': {
1223
+ 'records_enriched': len(enriched_data),
1224
+ 'fields_added': ['amount_category', 'year', 'month', 'quarter', 'completeness_score', 'data_quality'],
1225
+ 'enrichment_date': datetime.now().isoformat()
1226
+ },
1227
+ 'original_stats': clean_result.get('cleaning_stats', {})
1228
+ }
1229
+ """
1230
+ workflow.add_node(
1231
+ enricher, PythonCodeNode(name=enricher, code=enrichment_code)
1232
+ )
1233
+
1234
+ # Add aggregator if not exists
1235
+ if aggregator not in workflow.nodes:
1236
+ from kailash.nodes.code import PythonCodeNode
1237
+
1238
+ aggregation_code = """
1239
+ import pandas as pd
1240
+ from datetime import datetime
1241
+
1242
+ # Aggregate and summarize enriched data
1243
+ enrich_result = result if 'result' in locals() else {}
1244
+ enriched_data = enrich_result.get('enriched_data', [])
1245
+
1246
+ if enriched_data:
1247
+ df = pd.DataFrame(enriched_data)
1248
+
1249
+ # Calculate summary statistics
1250
+ summary_stats = {}
1251
+
1252
+ # Numeric summaries
1253
+ numeric_cols = df.select_dtypes(include=['number']).columns
1254
+ for col in numeric_cols:
1255
+ summary_stats[col] = {
1256
+ 'mean': df[col].mean(),
1257
+ 'median': df[col].median(),
1258
+ 'std': df[col].std(),
1259
+ 'min': df[col].min(),
1260
+ 'max': df[col].max(),
1261
+ 'count': df[col].count()
1262
+ }
1263
+
1264
+ # Categorical summaries
1265
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
1266
+ category_summaries = {}
1267
+ for col in categorical_cols:
1268
+ if col not in ['data_quality', 'amount_category']: # Skip our generated categories
1269
+ category_summaries[col] = df[col].value_counts().to_dict()
1270
+
1271
+ # Data quality summary
1272
+ quality_summary = {
1273
+ 'total_records': len(df),
1274
+ 'complete_records': (df['completeness_score'] == 1.0).sum(),
1275
+ 'quality_distribution': df['data_quality'].value_counts().to_dict() if 'data_quality' in df.columns else {},
1276
+ 'average_completeness': df['completeness_score'].mean() if 'completeness_score' in df.columns else 1.0
1277
+ }
1278
+
1279
+ aggregated_result = {
1280
+ 'summary_statistics': summary_stats,
1281
+ 'category_summaries': category_summaries,
1282
+ 'quality_summary': quality_summary,
1283
+ 'aggregation_date': datetime.now().isoformat()
1284
+ }
1285
+ else:
1286
+ aggregated_result = {
1287
+ 'summary_statistics': {},
1288
+ 'category_summaries': {},
1289
+ 'quality_summary': {'total_records': 0},
1290
+ 'aggregation_date': datetime.now().isoformat()
1291
+ }
1292
+
1293
+ result = {
1294
+ 'aggregated_results': aggregated_result,
1295
+ 'processed_data': enriched_data,
1296
+ 'processing_pipeline': {
1297
+ 'original_stats': enrich_result.get('original_stats', {}),
1298
+ 'enrichment_stats': enrich_result.get('enrichment_stats', {}),
1299
+ 'aggregation_stats': {
1300
+ 'fields_summarized': len(aggregated_result['summary_statistics']),
1301
+ 'categories_analyzed': len(aggregated_result['category_summaries'])
1302
+ }
1303
+ }
1304
+ }
1305
+ """
1306
+ workflow.add_node(
1307
+ aggregator, PythonCodeNode(name=aggregator, code=aggregation_code)
1308
+ )
1309
+
1310
+ # Add data writer if not exists
1311
+ if writer not in workflow.nodes:
1312
+ from kailash.examples.utils.data_paths import get_output_data_path
1313
+ from kailash.nodes.data import JSONWriterNode
1314
+
1315
+ workflow.add_node(
1316
+ writer,
1317
+ JSONWriterNode(
1318
+ name=writer,
1319
+ file_path=get_output_data_path("processed_data_results.json"),
1320
+ ),
1321
+ )
1322
+
1323
+ # Connect the pipeline
1324
+ workflow.connect(data_reader, cleaner, {"data": "data"})
1325
+ workflow.connect(cleaner, enricher, {"result": "result"})
1326
+ workflow.connect(enricher, aggregator, {"result": "result"})
1327
+ workflow.connect(aggregator, writer, {"result": "data"})
1328
+
1329
+ return "data_processing_pipeline"
1330
+
1331
+
667
1332
  # Convenience methods to add to Workflow class
668
1333
  def add_optimization_cycle(
669
1334
  self,
@@ -754,6 +1419,61 @@ def add_batch_processing_cycle(
754
1419
  )
755
1420
 
756
1421
 
1422
+ # Business workflow convenience methods
1423
+ def add_investment_pipeline(
1424
+ self,
1425
+ data_source: str = "market_data",
1426
+ processor: str = "portfolio_analyzer",
1427
+ validator: str = "risk_assessor",
1428
+ output: str = "investment_report",
1429
+ ) -> str:
1430
+ """Add an investment data processing pipeline to this workflow."""
1431
+ return BusinessWorkflowTemplates.investment_data_pipeline(
1432
+ self, data_source, processor, validator, output
1433
+ )
1434
+
1435
+
1436
+ def add_document_ai_workflow(
1437
+ self,
1438
+ document_reader: str = "pdf_reader",
1439
+ text_processor: str = "ai_analyzer",
1440
+ extractor: str = "data_extractor",
1441
+ output: str = "structured_data",
1442
+ ) -> str:
1443
+ """Add a document AI processing workflow to this workflow."""
1444
+ return BusinessWorkflowTemplates.document_ai_workflow(
1445
+ self, document_reader, text_processor, extractor, output
1446
+ )
1447
+
1448
+
1449
+ def add_api_integration_pattern(
1450
+ self,
1451
+ auth_node: str = "api_auth",
1452
+ data_fetcher: str = "api_client",
1453
+ transformer: str = "data_transformer",
1454
+ validator: str = "response_validator",
1455
+ output: str = "api_output",
1456
+ ) -> str:
1457
+ """Add an API integration pattern to this workflow."""
1458
+ return BusinessWorkflowTemplates.api_integration_pattern(
1459
+ self, auth_node, data_fetcher, transformer, validator, output
1460
+ )
1461
+
1462
+
1463
+ def add_data_processing_pipeline(
1464
+ self,
1465
+ data_reader: str = "data_reader",
1466
+ cleaner: str = "data_cleaner",
1467
+ enricher: str = "data_enricher",
1468
+ aggregator: str = "data_aggregator",
1469
+ writer: str = "data_writer",
1470
+ ) -> str:
1471
+ """Add a data processing pipeline to this workflow."""
1472
+ return BusinessWorkflowTemplates.data_processing_pipeline(
1473
+ self, data_reader, cleaner, enricher, aggregator, writer
1474
+ )
1475
+
1476
+
757
1477
  # Add convenience methods to Workflow class
758
1478
  Workflow.add_optimization_cycle = add_optimization_cycle
759
1479
  Workflow.add_retry_cycle = add_retry_cycle
@@ -761,3 +1481,9 @@ Workflow.add_data_quality_cycle = add_data_quality_cycle
761
1481
  Workflow.add_learning_cycle = add_learning_cycle
762
1482
  Workflow.add_convergence_cycle = add_convergence_cycle
763
1483
  Workflow.add_batch_processing_cycle = add_batch_processing_cycle
1484
+
1485
+ # Add business workflow methods to Workflow class
1486
+ Workflow.add_investment_pipeline = add_investment_pipeline
1487
+ Workflow.add_document_ai_workflow = add_document_ai_workflow
1488
+ Workflow.add_api_integration_pattern = add_api_integration_pattern
1489
+ Workflow.add_data_processing_pipeline = add_data_processing_pipeline