sdg-hub 0.7.2__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub-0.8.0/.github/actions/free-disk-space/action.yml +26 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/actionlint.dockerfile +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/integration-test.yml +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/CLAUDE.md +34 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/PKG-INFO +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/blocks/llm-blocks.md +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/flows/overview.md +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/text_analysis/structured_insights_demo.ipynb +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/pyproject.toml +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/_version.py +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/__init__.py +13 -1
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/__init__.py +11 -2
- sdg_hub-0.8.0/src/sdg_hub/core/blocks/agent/__init__.py +6 -0
- sdg_hub-0.8.0/src/sdg_hub/core/blocks/agent/agent_block.py +397 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/base.py +4 -1
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/llm/__init__.py +3 -2
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +2 -0
- sdg_hub-0.7.2/src/sdg_hub/core/blocks/llm/llm_parser_block.py → sdg_hub-0.8.0/src/sdg_hub/core/blocks/llm/llm_response_extractor_block.py +32 -9
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/llm/text_parser_block.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/json_structure_block.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/melt_columns.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/rename_columns.py +12 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/text_concat.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +2 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/__init__.py +46 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/agent/__init__.py +10 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/agent/base.py +233 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/agent/langflow.py +151 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/base.py +99 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/exceptions.py +41 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/http/__init__.py +6 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/http/client.py +150 -0
- sdg_hub-0.8.0/src/sdg_hub/core/connectors/registry.py +112 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/flow/base.py +7 -31
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/flow_metrics.py +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/flow.yaml +6 -6
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +4 -4
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +4 -4
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +7 -7
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +7 -7
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +4 -4
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub.egg-info/PKG-INFO +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub.egg-info/SOURCES.txt +24 -2
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub.egg-info/requires.txt +1 -1
- sdg_hub-0.8.0/tests/blocks/agent/__init__.py +2 -0
- sdg_hub-0.8.0/tests/blocks/agent/test_agent_block.py +502 -0
- sdg_hub-0.7.2/tests/blocks/llm/test_llm_parser_block.py → sdg_hub-0.8.0/tests/blocks/llm/test_llm_response_extractor_block.py +55 -52
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/llm/test_promptbuilderblock.py +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/test_base_block.py +4 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/transform/test_melt_columns.py +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/transform/test_rename_columns.py +9 -0
- sdg_hub-0.8.0/tests/connectors/__init__.py +2 -0
- sdg_hub-0.8.0/tests/connectors/agent/__init__.py +2 -0
- sdg_hub-0.8.0/tests/connectors/agent/test_base.py +86 -0
- sdg_hub-0.8.0/tests/connectors/agent/test_langflow.py +87 -0
- sdg_hub-0.8.0/tests/connectors/http/__init__.py +2 -0
- sdg_hub-0.8.0/tests/connectors/http/test_client.py +80 -0
- sdg_hub-0.8.0/tests/connectors/test_base.py +53 -0
- sdg_hub-0.8.0/tests/connectors/test_exceptions.py +29 -0
- sdg_hub-0.8.0/tests/connectors/test_registry.py +60 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_base.py +28 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/utils/test_flow_metrics.py +11 -11
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tox.ini +2 -2
- sdg_hub-0.7.2/.github/actions/free-disk-space/action.yml +0 -19
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/dependabot.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/mergify.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/packer.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.gitignore +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.isort.cfg +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/.pylintrc +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/CONTRIBUTING.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/LICENSE +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/Makefile +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/.nojekyll +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/_coverpage.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/_navbar.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/_sidebar.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/api-reference.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/assets/logo.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/assets/sdg-hub-cover.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/blocks/custom-blocks.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/blocks/filtering-blocks.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/blocks/overview.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/blocks/transform-blocks.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/concepts.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/development.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/flows/available-flows.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/flows/custom-flows.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/flows/discovery.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/index.html +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/installation.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/docs/quick-start.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/imgs/quality_benchmark_accuracy.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/raft_builder.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/knowledge_generation_ja.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/knowledge_tuning/knowledge_utils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/rag_evaluation/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/rag_evaluation/rag_evaluation_dataset_generation.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/text_analysis/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/scripts/packer/centos.pkr.hcl +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/scripts/packer/setup-centos.sh +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/scripts/ruff.sh +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/scripts/snyk_notebook_scan.sh +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/setup.cfg +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/flow/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/flow/checkpointer.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/flow/metadata.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/flow/registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/flow/validation.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/datautils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/error_handling.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/logger_config.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/path_resolution.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/time_estimator.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/answer_generation.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/conceptual_qa_generation.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/context_extraction.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/groundedness_critic.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/question_evolution.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/evaluation/rag/topic_generation.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/llm/test_llm_chat_block.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/llm/test_textparserblock.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/test_registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/testdata/test_config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/transform/test_index_based_mapper.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/transform/test_json_structure_block.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/transform/test_text_concat.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/conftest.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_checkpointer.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_dataset_requirements.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_integration.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_metadata.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_time_estimation.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/flow/test_validation.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/conftest.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/test_data/test_seed_data.jsonl +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/test_functional.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/utils/test_datautils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/utils/test_error_handling.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.8.0}/tests/utils/test_path_resolution.py +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: 'Free Disk Space'
|
|
2
|
+
description: 'Frees disk space on the runner for Python ML workloads'
|
|
3
|
+
runs:
|
|
4
|
+
using: "composite"
|
|
5
|
+
steps:
|
|
6
|
+
- run: |
|
|
7
|
+
echo "Disk space before cleanup:"
|
|
8
|
+
df -h /
|
|
9
|
+
|
|
10
|
+
# Remove large pre-installed SDKs not needed for Python projects
|
|
11
|
+
sudo rm -rf \
|
|
12
|
+
/usr/share/dotnet \
|
|
13
|
+
/usr/local/lib/android \
|
|
14
|
+
/opt/ghc \
|
|
15
|
+
/usr/local/share/powershell \
|
|
16
|
+
/usr/share/swift \
|
|
17
|
+
/usr/local/.ghcup \
|
|
18
|
+
/usr/lib/jvm \
|
|
19
|
+
/opt/hostedtoolcache/CodeQL || true
|
|
20
|
+
|
|
21
|
+
# Remove docker images
|
|
22
|
+
docker image ls -aq | xargs -r sudo docker rmi >/dev/null 2>&1 || true
|
|
23
|
+
|
|
24
|
+
echo "Disk space after cleanup:"
|
|
25
|
+
df -h /
|
|
26
|
+
shell: bash
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# Since dependabot cannot update workflows using docker,
|
|
2
2
|
# we use this indirection since dependabot can update this file.
|
|
3
|
-
FROM rhysd/actionlint:1.7.
|
|
3
|
+
FROM rhysd/actionlint:1.7.10@sha256:ef8299f97635c4c30e2298f48f30763ab782a4ad2c95b744649439a039421e36
|
|
@@ -112,7 +112,7 @@ jobs:
|
|
|
112
112
|
|
|
113
113
|
|
|
114
114
|
- name: Cache huggingface datasets
|
|
115
|
-
uses: actions/cache@
|
|
115
|
+
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
|
116
116
|
with:
|
|
117
117
|
path: ~/.cache/huggingface
|
|
118
118
|
# Invalidate cache when any example notebook changes (may affect dataset downloads)
|
|
@@ -86,6 +86,7 @@ The framework is built around a modular block system with **composability at its
|
|
|
86
86
|
- `transform/`: Data transformation blocks (column operations, text manipulation)
|
|
87
87
|
- `filtering/`: Data filtering blocks with quality thresholds
|
|
88
88
|
- `evaluation/`: Quality evaluation blocks (faithfulness, relevancy assessment)
|
|
89
|
+
- `agent/`: Agent framework integration blocks (Langflow, etc.)
|
|
89
90
|
|
|
90
91
|
**Key Benefits**: Type-safe composition, automatic validation, rich logging, and high-performance async processing.
|
|
91
92
|
|
|
@@ -129,6 +130,39 @@ flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/
|
|
|
129
130
|
└── generate_questions_responses.yaml
|
|
130
131
|
```
|
|
131
132
|
|
|
133
|
+
### Connector System
|
|
134
|
+
Connectors handle communication with external agent frameworks:
|
|
135
|
+
|
|
136
|
+
- **BaseConnector** (`src/sdg_hub/core/connectors/base.py`): Abstract base for all connectors
|
|
137
|
+
- **ConnectorRegistry** (`src/sdg_hub/core/connectors/registry.py`): Auto-discovery for connectors
|
|
138
|
+
- **BaseAgentConnector** (`src/sdg_hub/core/connectors/agent/base.py`): Base class for agent framework connectors
|
|
139
|
+
|
|
140
|
+
**Supported Agent Frameworks:**
|
|
141
|
+
- **Langflow** (`src/sdg_hub/core/connectors/agent/langflow.py`): Visual LLM app builder
|
|
142
|
+
|
|
143
|
+
**Using AgentBlock:**
|
|
144
|
+
```python
|
|
145
|
+
from sdg_hub.core.blocks.agent import AgentBlock
|
|
146
|
+
|
|
147
|
+
block = AgentBlock(
|
|
148
|
+
block_name="my_agent",
|
|
149
|
+
agent_framework="langflow", # Connector name from registry
|
|
150
|
+
agent_url="http://localhost:7860/api/v1/run/my-flow",
|
|
151
|
+
agent_api_key="your-api-key", # Optional
|
|
152
|
+
input_cols=["question"],
|
|
153
|
+
output_cols=["response"],
|
|
154
|
+
extract_response=True, # Extract text from response (Langflow-specific)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
result = block.generate(dataset)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Adding New Connectors:**
|
|
161
|
+
1. Create a new file in `src/sdg_hub/core/connectors/agent/`
|
|
162
|
+
2. Inherit from `BaseAgentConnector`
|
|
163
|
+
3. Implement `build_request()` and `parse_response()` methods
|
|
164
|
+
4. Register with `@ConnectorRegistry.register("name")`
|
|
165
|
+
|
|
132
166
|
## Key Patterns
|
|
133
167
|
|
|
134
168
|
### Block Development
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdg_hub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Synthetic Data Generation
|
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -26,7 +26,7 @@ Requires-Dist: click<9.0.0,>=8.1.7
|
|
|
26
26
|
Requires-Dist: datasets>=4.0.0
|
|
27
27
|
Requires-Dist: httpx<1.0.0,>=0.25.0
|
|
28
28
|
Requires-Dist: jinja2
|
|
29
|
-
Requires-Dist: litellm<
|
|
29
|
+
Requires-Dist: litellm<2.0.0,>=1.73.0
|
|
30
30
|
Requires-Dist: rich
|
|
31
31
|
Requires-Dist: pandas
|
|
32
32
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
@@ -603,7 +603,7 @@ print(result["judgment"]) # ['YES']
|
|
|
603
603
|
TextParserBlock is commonly used after LLMChatBlock to structure responses:
|
|
604
604
|
|
|
605
605
|
```python
|
|
606
|
-
from sdg_hub.core.blocks import LLMChatBlock,
|
|
606
|
+
from sdg_hub.core.blocks import LLMChatBlock, LLMResponseExtractorBlock, TextParserBlock
|
|
607
607
|
|
|
608
608
|
# Step 1: Generate LLM response
|
|
609
609
|
chat_block = LLMChatBlock(
|
|
@@ -615,7 +615,7 @@ chat_block = LLMChatBlock(
|
|
|
615
615
|
|
|
616
616
|
# Step 2: Extract content from response object
|
|
617
617
|
# Use field_prefix="" to get cleaner column names
|
|
618
|
-
llm_parser =
|
|
618
|
+
llm_parser = LLMResponseExtractorBlock(
|
|
619
619
|
block_name="extract_eval",
|
|
620
620
|
input_cols=["eval_response"],
|
|
621
621
|
extract_content=True,
|
|
@@ -316,7 +316,7 @@ blocks:
|
|
|
316
316
|
output_cols: ["eval_response"]
|
|
317
317
|
async_mode: true
|
|
318
318
|
|
|
319
|
-
- block_type: "
|
|
319
|
+
- block_type: "LLMResponseExtractorBlock"
|
|
320
320
|
block_config:
|
|
321
321
|
block_name: "extract_eval_content"
|
|
322
322
|
input_cols: ["eval_response"]
|
|
@@ -537,7 +537,7 @@ result = flow.generate(
|
|
|
537
537
|
| | `top_p` | Nucleus sampling threshold | `0.0` - `1.0` |
|
|
538
538
|
| | `frequency_penalty` | Penalize token repetition | `-2.0` - `2.0` |
|
|
539
539
|
| | `presence_penalty` | Penalize new topics | `-2.0` - `2.0` |
|
|
540
|
-
| **
|
|
540
|
+
| **LLMResponseExtractorBlock** | `extract_content` | Extract main content field | `True`, `False` |
|
|
541
541
|
| | `extract_reasoning_content` | Extract reasoning/thinking | `True`, `False` |
|
|
542
542
|
| | `extract_tool_calls` | Extract tool call data | `True`, `False` |
|
|
543
543
|
| | `field_prefix` | Prefix for output fields | `"llm_"`, `"parsed_"` |
|
|
@@ -752,7 +752,7 @@ result = flow.generate(dataset)
|
|
|
752
752
|
│ │ generate_question │ LLMChatBlock │ 45.30s │ 100 → 100 │ +1 │ ✓││
|
|
753
753
|
│ │ generate_answer │ LLMChatBlock │ 78.45s │ 100 → 100 │ +1 │ ✓││
|
|
754
754
|
│ │ eval_faithfulness... │ LLMChatBlock │ 52.20s │ 100 → 100 │ +1 │ ✓││
|
|
755
|
-
│ │ extract_eval_con... │
|
|
755
|
+
│ │ extract_eval_con... │ LLMResponseExtractorBlock │ 0.15s │ 100 → 100 │ +2 │ ✓││
|
|
756
756
|
│ │ parse_evaluation │ TextParserBlock │ 0.22s │ 100 → 100 │ +2 │ ✓││
|
|
757
757
|
│ │ filter_faithful │ ColumnValueF... │ 0.08s │ 100 → 87 │ — │ ✓││
|
|
758
758
|
│ ├──────────────────────┼─────────────────┼──────────┼──────────────┼─────────┼──┤│
|
|
@@ -332,7 +332,7 @@
|
|
|
332
332
|
" LLMChatBlock,\n",
|
|
333
333
|
" PromptBuilderBlock,\n",
|
|
334
334
|
" TextParserBlock,\n",
|
|
335
|
-
"
|
|
335
|
+
" LLMResponseExtractorBlock,\n",
|
|
336
336
|
")\n",
|
|
337
337
|
"from sdg_hub.core.blocks.transform import JSONStructureBlock\n",
|
|
338
338
|
"\n",
|
|
@@ -355,7 +355,7 @@
|
|
|
355
355
|
" temperature=0.1, # Low temperature for more consistent extraction\n",
|
|
356
356
|
")\n",
|
|
357
357
|
"\n",
|
|
358
|
-
"
|
|
358
|
+
"ticker_llm_response_extractor_block = LLMResponseExtractorBlock(\n",
|
|
359
359
|
" block_name=\"extract_stock_tickers\",\n",
|
|
360
360
|
" input_cols=[\"raw_stock_tickers\"],\n",
|
|
361
361
|
" extract_content=True,\n",
|
|
@@ -406,7 +406,7 @@
|
|
|
406
406
|
"ticker_blocks = [\n",
|
|
407
407
|
" ticker_prompt_block,\n",
|
|
408
408
|
" ticker_llm_block,\n",
|
|
409
|
-
"
|
|
409
|
+
" ticker_llm_response_extractor_block,\n",
|
|
410
410
|
" ticker_parser_block,\n",
|
|
411
411
|
" enhanced_json_block,\n",
|
|
412
412
|
"]\n",
|
|
@@ -33,7 +33,7 @@ dependencies = [
|
|
|
33
33
|
"datasets>=4.0.0",
|
|
34
34
|
"httpx>=0.25.0,<1.0.0",
|
|
35
35
|
"jinja2",
|
|
36
|
-
"litellm>=1.73.0,<1.75.0
|
|
36
|
+
"litellm>=1.73.0,<2.0.0", # raising cap since tests run without errors related to 'backoff' cap back to <1.75.0 if errors surface
|
|
37
37
|
"rich",
|
|
38
38
|
"pandas",
|
|
39
39
|
"pydantic>=2.0.0,<3.0.0", # cap before v3; adjust the lower bound to the minimum v2.x you’ve tested
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.8.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 8, 0)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g280e2588b'
|
|
@@ -2,14 +2,26 @@
|
|
|
2
2
|
"""Core SDG Hub components."""
|
|
3
3
|
|
|
4
4
|
# Local
|
|
5
|
-
from .blocks import BaseBlock, BlockRegistry
|
|
5
|
+
from .blocks import AgentBlock, BaseBlock, BlockRegistry
|
|
6
|
+
from .connectors import (
|
|
7
|
+
BaseConnector,
|
|
8
|
+
ConnectorConfig,
|
|
9
|
+
ConnectorError,
|
|
10
|
+
ConnectorRegistry,
|
|
11
|
+
)
|
|
6
12
|
from .flow import Flow, FlowMetadata, FlowRegistry, FlowValidator
|
|
7
13
|
from .utils import GenerateError, resolve_path
|
|
8
14
|
|
|
9
15
|
__all__ = [
|
|
10
16
|
# Block components
|
|
17
|
+
"AgentBlock",
|
|
11
18
|
"BaseBlock",
|
|
12
19
|
"BlockRegistry",
|
|
20
|
+
# Connector components
|
|
21
|
+
"BaseConnector",
|
|
22
|
+
"ConnectorConfig",
|
|
23
|
+
"ConnectorError",
|
|
24
|
+
"ConnectorRegistry",
|
|
13
25
|
# Flow components
|
|
14
26
|
"Flow",
|
|
15
27
|
"FlowRegistry",
|
|
@@ -4,9 +4,16 @@ This package provides various block implementations for data generation, process
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
# Local
|
|
7
|
+
from .agent import AgentBlock
|
|
7
8
|
from .base import BaseBlock
|
|
8
9
|
from .filtering import ColumnValueFilterBlock
|
|
9
|
-
from .llm import
|
|
10
|
+
from .llm import (
|
|
11
|
+
LLMChatBlock,
|
|
12
|
+
LLMParserBlock,
|
|
13
|
+
LLMResponseExtractorBlock,
|
|
14
|
+
PromptBuilderBlock,
|
|
15
|
+
TextParserBlock,
|
|
16
|
+
)
|
|
10
17
|
from .registry import BlockRegistry
|
|
11
18
|
from .transform import (
|
|
12
19
|
DuplicateColumnsBlock,
|
|
@@ -18,6 +25,7 @@ from .transform import (
|
|
|
18
25
|
)
|
|
19
26
|
|
|
20
27
|
__all__ = [
|
|
28
|
+
"AgentBlock",
|
|
21
29
|
"BaseBlock",
|
|
22
30
|
"BlockRegistry",
|
|
23
31
|
"ColumnValueFilterBlock",
|
|
@@ -28,7 +36,8 @@ __all__ = [
|
|
|
28
36
|
"TextConcatBlock",
|
|
29
37
|
"UniformColumnValueSetter",
|
|
30
38
|
"LLMChatBlock",
|
|
31
|
-
"LLMParserBlock",
|
|
39
|
+
"LLMParserBlock", # Deprecated alias for LLMResponseExtractorBlock
|
|
40
|
+
"LLMResponseExtractorBlock",
|
|
32
41
|
"TextParserBlock",
|
|
33
42
|
"PromptBuilderBlock",
|
|
34
43
|
]
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""Agent block for integrating external agent frameworks."""
|
|
3
|
+
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
import asyncio
|
|
6
|
+
import uuid
|
|
7
|
+
|
|
8
|
+
from pydantic import Field, PrivateAttr
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from ...connectors.agent.base import BaseAgentConnector
|
|
13
|
+
from ...connectors.base import ConnectorConfig
|
|
14
|
+
from ...connectors.exceptions import ConnectorError
|
|
15
|
+
from ...connectors.registry import ConnectorRegistry
|
|
16
|
+
from ...utils.logger_config import setup_logger
|
|
17
|
+
from ..base import BaseBlock
|
|
18
|
+
from ..registry import BlockRegistry
|
|
19
|
+
|
|
20
|
+
logger = setup_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@BlockRegistry.register(
|
|
24
|
+
"AgentBlock",
|
|
25
|
+
category="agent",
|
|
26
|
+
description="Execute agent frameworks (Langflow, etc.) on DataFrame rows",
|
|
27
|
+
)
|
|
28
|
+
class AgentBlock(BaseBlock):
|
|
29
|
+
"""Block for executing external agent frameworks on DataFrame rows.
|
|
30
|
+
|
|
31
|
+
This block integrates with various agent frameworks through the connector
|
|
32
|
+
system. Each row in the DataFrame is processed by sending messages to the
|
|
33
|
+
agent and storing the response.
|
|
34
|
+
|
|
35
|
+
The block supports both sync and async execution modes for optimal
|
|
36
|
+
performance with large datasets.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
agent_framework : str
|
|
41
|
+
Name of the connector to use (e.g., 'langflow').
|
|
42
|
+
agent_url : str
|
|
43
|
+
API endpoint URL for the agent.
|
|
44
|
+
agent_api_key : str, optional
|
|
45
|
+
API key for authentication.
|
|
46
|
+
timeout : float
|
|
47
|
+
Request timeout in seconds. Default 120.0.
|
|
48
|
+
max_retries : int
|
|
49
|
+
Maximum retry attempts. Default 3.
|
|
50
|
+
session_id_col : str, optional
|
|
51
|
+
Column containing session IDs. If not provided, generates UUIDs.
|
|
52
|
+
async_mode : bool
|
|
53
|
+
Whether to use async execution. Default False.
|
|
54
|
+
max_concurrency : int
|
|
55
|
+
Maximum concurrent requests in async mode. Default 10.
|
|
56
|
+
|
|
57
|
+
Example YAML Configuration
|
|
58
|
+
--------------------------
|
|
59
|
+
```yaml
|
|
60
|
+
- block_type: AgentBlock
|
|
61
|
+
block_config:
|
|
62
|
+
block_name: my_agent
|
|
63
|
+
agent_framework: langflow
|
|
64
|
+
agent_url: http://localhost:7860/api/v1/run/my-flow
|
|
65
|
+
agent_api_key: ${LANGFLOW_API_KEY}
|
|
66
|
+
input_cols:
|
|
67
|
+
messages: messages_col
|
|
68
|
+
output_cols:
|
|
69
|
+
- agent_response
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Example
|
|
73
|
+
-------
|
|
74
|
+
>>> block = AgentBlock(
|
|
75
|
+
... block_name="qa_agent",
|
|
76
|
+
... agent_framework="langflow",
|
|
77
|
+
... agent_url="http://localhost:7860/api/v1/run/qa-flow",
|
|
78
|
+
... input_cols={"messages": "question"},
|
|
79
|
+
... output_cols=["response"],
|
|
80
|
+
... )
|
|
81
|
+
>>> result_df = block(df)
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
# Required configuration
|
|
85
|
+
agent_framework: str = Field(
|
|
86
|
+
...,
|
|
87
|
+
description="Connector name (e.g., 'langflow')",
|
|
88
|
+
)
|
|
89
|
+
agent_url: str = Field(
|
|
90
|
+
...,
|
|
91
|
+
description="Agent API endpoint URL",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Optional configuration
|
|
95
|
+
agent_api_key: Optional[str] = Field(
|
|
96
|
+
None,
|
|
97
|
+
description="API key for authentication",
|
|
98
|
+
)
|
|
99
|
+
timeout: float = Field(
|
|
100
|
+
120.0,
|
|
101
|
+
description="Request timeout in seconds",
|
|
102
|
+
gt=0,
|
|
103
|
+
)
|
|
104
|
+
max_retries: int = Field(
|
|
105
|
+
3,
|
|
106
|
+
description="Maximum retry attempts",
|
|
107
|
+
ge=0,
|
|
108
|
+
)
|
|
109
|
+
session_id_col: Optional[str] = Field(
|
|
110
|
+
None,
|
|
111
|
+
description="Column containing session IDs",
|
|
112
|
+
)
|
|
113
|
+
async_mode: bool = Field(
|
|
114
|
+
False,
|
|
115
|
+
description="Use async execution for better throughput",
|
|
116
|
+
)
|
|
117
|
+
max_concurrency: int = Field(
|
|
118
|
+
10,
|
|
119
|
+
description="Maximum concurrent requests in async mode",
|
|
120
|
+
gt=0,
|
|
121
|
+
)
|
|
122
|
+
extract_response: bool = Field(
|
|
123
|
+
False,
|
|
124
|
+
description="Extract just the text content from agent response",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Private attributes
|
|
128
|
+
_connector: Optional[BaseAgentConnector] = PrivateAttr(default=None)
|
|
129
|
+
_connector_config_key: Optional[tuple] = PrivateAttr(default=None)
|
|
130
|
+
|
|
131
|
+
def _get_connector(self) -> BaseAgentConnector:
|
|
132
|
+
"""Get or create the connector instance.
|
|
133
|
+
|
|
134
|
+
Invalidates the cached connector if the config has changed (e.g., due
|
|
135
|
+
to runtime overrides).
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
BaseAgentConnector
|
|
140
|
+
The configured connector instance.
|
|
141
|
+
"""
|
|
142
|
+
config_key = (
|
|
143
|
+
self.agent_framework,
|
|
144
|
+
self.agent_url,
|
|
145
|
+
self.agent_api_key,
|
|
146
|
+
self.timeout,
|
|
147
|
+
self.max_retries,
|
|
148
|
+
self.extract_response,
|
|
149
|
+
)
|
|
150
|
+
if self._connector is None or self._connector_config_key != config_key:
|
|
151
|
+
connector_class = ConnectorRegistry.get(self.agent_framework)
|
|
152
|
+
config = ConnectorConfig(
|
|
153
|
+
url=self.agent_url,
|
|
154
|
+
api_key=self.agent_api_key,
|
|
155
|
+
timeout=self.timeout,
|
|
156
|
+
max_retries=self.max_retries,
|
|
157
|
+
extract_text=self.extract_response,
|
|
158
|
+
)
|
|
159
|
+
self._connector = connector_class(config=config)
|
|
160
|
+
self._connector_config_key = config_key
|
|
161
|
+
return self._connector
|
|
162
|
+
|
|
163
|
+
def _get_messages_col(self) -> str:
|
|
164
|
+
"""Get the input column name for messages.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
str
|
|
169
|
+
Column name containing messages.
|
|
170
|
+
"""
|
|
171
|
+
if isinstance(self.input_cols, dict):
|
|
172
|
+
if "messages" in self.input_cols:
|
|
173
|
+
return self.input_cols["messages"]
|
|
174
|
+
elif self.input_cols:
|
|
175
|
+
return list(self.input_cols.keys())[0]
|
|
176
|
+
else:
|
|
177
|
+
raise ConnectorError("input_cols must specify the messages column")
|
|
178
|
+
elif isinstance(self.input_cols, list) and len(self.input_cols) > 0:
|
|
179
|
+
return self.input_cols[0]
|
|
180
|
+
else:
|
|
181
|
+
raise ConnectorError("input_cols must specify the messages column")
|
|
182
|
+
|
|
183
|
+
def _get_output_col(self) -> str:
|
|
184
|
+
"""Get the output column name for responses.
|
|
185
|
+
|
|
186
|
+
Returns
|
|
187
|
+
-------
|
|
188
|
+
str
|
|
189
|
+
Column name for storing responses.
|
|
190
|
+
"""
|
|
191
|
+
if isinstance(self.output_cols, dict):
|
|
192
|
+
return list(self.output_cols.keys())[0]
|
|
193
|
+
elif isinstance(self.output_cols, list) and len(self.output_cols) > 0:
|
|
194
|
+
return self.output_cols[0]
|
|
195
|
+
else:
|
|
196
|
+
return "agent_response"
|
|
197
|
+
|
|
198
|
+
def _build_messages(self, content: Any) -> list[dict[str, Any]]:
|
|
199
|
+
"""Build message list from row content.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
content : Any
|
|
204
|
+
Content from the DataFrame cell.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
list[dict]
|
|
209
|
+
List of messages in standard format.
|
|
210
|
+
"""
|
|
211
|
+
if isinstance(content, list):
|
|
212
|
+
# Already a message list
|
|
213
|
+
return content
|
|
214
|
+
elif isinstance(content, dict):
|
|
215
|
+
# Single message dict
|
|
216
|
+
return [content]
|
|
217
|
+
else:
|
|
218
|
+
# Plain text - wrap as user message
|
|
219
|
+
return [{"role": "user", "content": str(content)}]
|
|
220
|
+
|
|
221
|
+
def _get_session_id(self, row: pd.Series, idx: int) -> str:
|
|
222
|
+
"""Get session ID for a row.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
row : pd.Series
|
|
227
|
+
DataFrame row.
|
|
228
|
+
idx : int
|
|
229
|
+
Row index.
|
|
230
|
+
|
|
231
|
+
Returns
|
|
232
|
+
-------
|
|
233
|
+
str
|
|
234
|
+
Session ID.
|
|
235
|
+
"""
|
|
236
|
+
if self.session_id_col and self.session_id_col in row:
|
|
237
|
+
return str(row[self.session_id_col])
|
|
238
|
+
return str(uuid.uuid4())
|
|
239
|
+
|
|
240
|
+
def _process_row_sync(
|
|
241
|
+
self,
|
|
242
|
+
row: pd.Series,
|
|
243
|
+
idx: int,
|
|
244
|
+
connector: BaseAgentConnector,
|
|
245
|
+
messages_col: str,
|
|
246
|
+
) -> dict[str, Any]:
|
|
247
|
+
"""Process a single row synchronously.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
row : pd.Series
|
|
252
|
+
DataFrame row.
|
|
253
|
+
idx : int
|
|
254
|
+
Row index.
|
|
255
|
+
connector : BaseAgentConnector
|
|
256
|
+
Connector instance.
|
|
257
|
+
messages_col : str
|
|
258
|
+
Column containing messages.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
dict
|
|
263
|
+
Response from the agent.
|
|
264
|
+
"""
|
|
265
|
+
messages = self._build_messages(row[messages_col])
|
|
266
|
+
session_id = self._get_session_id(row, idx)
|
|
267
|
+
return connector.send(messages, session_id)
|
|
268
|
+
|
|
269
|
+
async def _process_row_async(
|
|
270
|
+
self,
|
|
271
|
+
row: pd.Series,
|
|
272
|
+
idx: int,
|
|
273
|
+
connector: BaseAgentConnector,
|
|
274
|
+
messages_col: str,
|
|
275
|
+
semaphore: asyncio.Semaphore,
|
|
276
|
+
) -> tuple[int, dict[str, Any]]:
|
|
277
|
+
"""Process a single row asynchronously.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
row : pd.Series
|
|
282
|
+
DataFrame row.
|
|
283
|
+
idx : int
|
|
284
|
+
Row index.
|
|
285
|
+
connector : BaseAgentConnector
|
|
286
|
+
Connector instance.
|
|
287
|
+
messages_col : str
|
|
288
|
+
Column containing messages.
|
|
289
|
+
semaphore : asyncio.Semaphore
|
|
290
|
+
Semaphore for concurrency control.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
tuple[int, dict]
|
|
295
|
+
Row index and response.
|
|
296
|
+
"""
|
|
297
|
+
async with semaphore:
|
|
298
|
+
messages = self._build_messages(row[messages_col])
|
|
299
|
+
session_id = self._get_session_id(row, idx)
|
|
300
|
+
response = await connector.asend(messages, session_id)
|
|
301
|
+
return idx, response
|
|
302
|
+
|
|
303
|
+
async def _process_batch_async(
|
|
304
|
+
self,
|
|
305
|
+
df: pd.DataFrame,
|
|
306
|
+
connector: BaseAgentConnector,
|
|
307
|
+
messages_col: str,
|
|
308
|
+
) -> dict[int, dict[str, Any]]:
|
|
309
|
+
"""Process all rows asynchronously.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
df : pd.DataFrame
|
|
314
|
+
Input DataFrame.
|
|
315
|
+
connector : BaseAgentConnector
|
|
316
|
+
Connector instance.
|
|
317
|
+
messages_col : str
|
|
318
|
+
Column containing messages.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
dict[int, dict]
|
|
323
|
+
Mapping from row index to response.
|
|
324
|
+
"""
|
|
325
|
+
semaphore = asyncio.Semaphore(self.max_concurrency)
|
|
326
|
+
tasks = [
|
|
327
|
+
self._process_row_async(row, idx, connector, messages_col, semaphore)
|
|
328
|
+
for idx, row in df.iterrows()
|
|
329
|
+
]
|
|
330
|
+
|
|
331
|
+
results = {}
|
|
332
|
+
for coro in tqdm(
|
|
333
|
+
asyncio.as_completed(tasks),
|
|
334
|
+
total=len(tasks),
|
|
335
|
+
desc=f"{self.block_name} (async)",
|
|
336
|
+
):
|
|
337
|
+
idx, response = await coro
|
|
338
|
+
results[idx] = response
|
|
339
|
+
|
|
340
|
+
return results
|
|
341
|
+
|
|
342
|
+
def generate(self, samples: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
|
|
343
|
+
"""Process DataFrame rows through the agent.
|
|
344
|
+
|
|
345
|
+
Parameters
|
|
346
|
+
----------
|
|
347
|
+
samples : pd.DataFrame
|
|
348
|
+
Input DataFrame with messages column.
|
|
349
|
+
**kwargs : Any
|
|
350
|
+
Runtime overrides.
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
pd.DataFrame
|
|
355
|
+
DataFrame with agent responses added.
|
|
356
|
+
"""
|
|
357
|
+
df = samples.copy()
|
|
358
|
+
connector = self._get_connector()
|
|
359
|
+
messages_col = self._get_messages_col()
|
|
360
|
+
output_col = self._get_output_col()
|
|
361
|
+
|
|
362
|
+
if self.async_mode:
|
|
363
|
+
# Async execution
|
|
364
|
+
try:
|
|
365
|
+
asyncio.get_running_loop()
|
|
366
|
+
# Already in async context - use thread executor
|
|
367
|
+
import concurrent.futures
|
|
368
|
+
|
|
369
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
370
|
+
future = executor.submit(
|
|
371
|
+
asyncio.run,
|
|
372
|
+
self._process_batch_async(df, connector, messages_col),
|
|
373
|
+
)
|
|
374
|
+
results = future.result()
|
|
375
|
+
except RuntimeError:
|
|
376
|
+
# No event loop - create one
|
|
377
|
+
results = asyncio.run(
|
|
378
|
+
self._process_batch_async(df, connector, messages_col)
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
# Apply results
|
|
382
|
+
df[output_col] = df.index.map(results)
|
|
383
|
+
else:
|
|
384
|
+
# Sync execution with progress bar
|
|
385
|
+
responses = []
|
|
386
|
+
for idx, row in tqdm(
|
|
387
|
+
df.iterrows(),
|
|
388
|
+
total=len(df),
|
|
389
|
+
desc=self.block_name,
|
|
390
|
+
):
|
|
391
|
+
response = self._process_row_sync(row, idx, connector, messages_col)
|
|
392
|
+
responses.append(response)
|
|
393
|
+
|
|
394
|
+
df[output_col] = responses
|
|
395
|
+
|
|
396
|
+
logger.info(f"Processed {len(df)} rows with {self.agent_framework} agent")
|
|
397
|
+
return df
|
|
@@ -49,6 +49,9 @@ class BaseBlock(BaseModel, ABC):
|
|
|
49
49
|
block_name: str = Field(
|
|
50
50
|
..., description="Unique identifier for this block instance"
|
|
51
51
|
)
|
|
52
|
+
block_type: Optional[str] = Field(
|
|
53
|
+
None, description="Block type (e.g., 'llm', 'transform', 'parser', 'filtering')"
|
|
54
|
+
)
|
|
52
55
|
input_cols: Union[str, list[str], dict[str, Any], None] = Field(
|
|
53
56
|
None, description="Input columns: str, list, or dict"
|
|
54
57
|
)
|
|
@@ -366,5 +369,5 @@ class BaseBlock(BaseModel, ABC):
|
|
|
366
369
|
Dict[str, Any]
|
|
367
370
|
"""
|
|
368
371
|
config = self.get_config()
|
|
369
|
-
config["
|
|
372
|
+
config["block_class"] = self.__class__.__name__
|
|
370
373
|
return config
|