sdg-hub 0.7.2__tar.gz → 0.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/actionlint.dockerfile +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/PKG-INFO +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/llm-blocks.md +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/overview.md +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/text_analysis/structured_insights_demo.ipynb +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/pyproject.toml +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/_version.py +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/__init__.py +9 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/base.py +4 -1
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/__init__.py +3 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +2 -0
- sdg_hub-0.7.2/src/sdg_hub/core/blocks/llm/llm_parser_block.py → sdg_hub-0.7.3/src/sdg_hub/core/blocks/llm/llm_response_extractor_block.py +32 -9
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/text_parser_block.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/json_structure_block.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/melt_columns.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/rename_columns.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/text_concat.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +2 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/base.py +7 -31
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/flow_metrics.py +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/flow.yaml +6 -6
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +4 -4
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +3 -3
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +4 -4
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +7 -7
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +7 -7
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +4 -4
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/PKG-INFO +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/SOURCES.txt +2 -2
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/requires.txt +1 -1
- sdg_hub-0.7.2/tests/blocks/llm/test_llm_parser_block.py → sdg_hub-0.7.3/tests/blocks/llm/test_llm_response_extractor_block.py +55 -52
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/llm/test_promptbuilderblock.py +1 -1
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/test_base_block.py +4 -3
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_base.py +28 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_flow_metrics.py +11 -11
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/actions/free-disk-space/action.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/dependabot.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/mergify.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/integration-test.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/packer.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.gitignore +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.isort.cfg +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.pylintrc +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/CLAUDE.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/CONTRIBUTING.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/LICENSE +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/Makefile +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/.nojekyll +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/_coverpage.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/_navbar.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/_sidebar.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/api-reference.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/assets/logo.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/assets/sdg-hub-cover.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/custom-blocks.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/filtering-blocks.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/overview.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/transform-blocks.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/concepts.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/development.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/available-flows.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/custom-flows.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/discovery.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/index.html +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/installation.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/quick-start.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/imgs/quality_benchmark_accuracy.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/raft_builder.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/knowledge_generation_ja.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/knowledge_utils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/rag_evaluation/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/rag_evaluation/rag_evaluation_dataset_generation.ipynb +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/text_analysis/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/packer/centos.pkr.hcl +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/packer/setup-centos.sh +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/ruff.sh +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/snyk_notebook_scan.sh +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/setup.cfg +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/checkpointer.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/metadata.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/validation.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/datautils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/error_handling.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/logger_config.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/path_resolution.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/time_estimator.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/answer_generation.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/conceptual_qa_generation.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/context_extraction.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/groundedness_critic.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/question_evolution.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/topic_generation.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/llm/test_llm_chat_block.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/llm/test_textparserblock.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/test_registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_index_based_mapper.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_json_structure_block.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_melt_columns.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_rename_columns.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_text_concat.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/conftest.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_checkpointer.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_dataset_requirements.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_integration.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_metadata.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_registry.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_time_estimation.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_validation.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/__init__.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/conftest.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/test_data/test_seed_data.jsonl +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/test_functional.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_datautils.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_error_handling.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_path_resolution.py +0 -0
- {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tox.ini +0 -0
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# Since dependabot cannot update workflows using docker,
|
|
2
2
|
# we use this indirection since dependabot can update this file.
|
|
3
|
-
FROM rhysd/actionlint:1.7.
|
|
3
|
+
FROM rhysd/actionlint:1.7.10@sha256:ef8299f97635c4c30e2298f48f30763ab782a4ad2c95b744649439a039421e36
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdg_hub
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Synthetic Data Generation
|
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -26,7 +26,7 @@ Requires-Dist: click<9.0.0,>=8.1.7
|
|
|
26
26
|
Requires-Dist: datasets>=4.0.0
|
|
27
27
|
Requires-Dist: httpx<1.0.0,>=0.25.0
|
|
28
28
|
Requires-Dist: jinja2
|
|
29
|
-
Requires-Dist: litellm<
|
|
29
|
+
Requires-Dist: litellm<2.0.0,>=1.73.0
|
|
30
30
|
Requires-Dist: rich
|
|
31
31
|
Requires-Dist: pandas
|
|
32
32
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
@@ -603,7 +603,7 @@ print(result["judgment"]) # ['YES']
|
|
|
603
603
|
TextParserBlock is commonly used after LLMChatBlock to structure responses:
|
|
604
604
|
|
|
605
605
|
```python
|
|
606
|
-
from sdg_hub.core.blocks import LLMChatBlock,
|
|
606
|
+
from sdg_hub.core.blocks import LLMChatBlock, LLMResponseExtractorBlock, TextParserBlock
|
|
607
607
|
|
|
608
608
|
# Step 1: Generate LLM response
|
|
609
609
|
chat_block = LLMChatBlock(
|
|
@@ -615,7 +615,7 @@ chat_block = LLMChatBlock(
|
|
|
615
615
|
|
|
616
616
|
# Step 2: Extract content from response object
|
|
617
617
|
# Use field_prefix="" to get cleaner column names
|
|
618
|
-
llm_parser =
|
|
618
|
+
llm_parser = LLMResponseExtractorBlock(
|
|
619
619
|
block_name="extract_eval",
|
|
620
620
|
input_cols=["eval_response"],
|
|
621
621
|
extract_content=True,
|
|
@@ -316,7 +316,7 @@ blocks:
|
|
|
316
316
|
output_cols: ["eval_response"]
|
|
317
317
|
async_mode: true
|
|
318
318
|
|
|
319
|
-
- block_type: "
|
|
319
|
+
- block_type: "LLMResponseExtractorBlock"
|
|
320
320
|
block_config:
|
|
321
321
|
block_name: "extract_eval_content"
|
|
322
322
|
input_cols: ["eval_response"]
|
|
@@ -537,7 +537,7 @@ result = flow.generate(
|
|
|
537
537
|
| | `top_p` | Nucleus sampling threshold | `0.0` - `1.0` |
|
|
538
538
|
| | `frequency_penalty` | Penalize token repetition | `-2.0` - `2.0` |
|
|
539
539
|
| | `presence_penalty` | Penalize new topics | `-2.0` - `2.0` |
|
|
540
|
-
| **
|
|
540
|
+
| **LLMResponseExtractorBlock** | `extract_content` | Extract main content field | `True`, `False` |
|
|
541
541
|
| | `extract_reasoning_content` | Extract reasoning/thinking | `True`, `False` |
|
|
542
542
|
| | `extract_tool_calls` | Extract tool call data | `True`, `False` |
|
|
543
543
|
| | `field_prefix` | Prefix for output fields | `"llm_"`, `"parsed_"` |
|
|
@@ -752,7 +752,7 @@ result = flow.generate(dataset)
|
|
|
752
752
|
│ │ generate_question │ LLMChatBlock │ 45.30s │ 100 → 100 │ +1 │ ✓││
|
|
753
753
|
│ │ generate_answer │ LLMChatBlock │ 78.45s │ 100 → 100 │ +1 │ ✓││
|
|
754
754
|
│ │ eval_faithfulness... │ LLMChatBlock │ 52.20s │ 100 → 100 │ +1 │ ✓││
|
|
755
|
-
│ │ extract_eval_con... │
|
|
755
|
+
│ │ extract_eval_con... │ LLMResponseExtractorBlock │ 0.15s │ 100 → 100 │ +2 │ ✓││
|
|
756
756
|
│ │ parse_evaluation │ TextParserBlock │ 0.22s │ 100 → 100 │ +2 │ ✓││
|
|
757
757
|
│ │ filter_faithful │ ColumnValueF... │ 0.08s │ 100 → 87 │ — │ ✓││
|
|
758
758
|
│ ├──────────────────────┼─────────────────┼──────────┼──────────────┼─────────┼──┤│
|
|
@@ -332,7 +332,7 @@
|
|
|
332
332
|
" LLMChatBlock,\n",
|
|
333
333
|
" PromptBuilderBlock,\n",
|
|
334
334
|
" TextParserBlock,\n",
|
|
335
|
-
"
|
|
335
|
+
" LLMResponseExtractorBlock,\n",
|
|
336
336
|
")\n",
|
|
337
337
|
"from sdg_hub.core.blocks.transform import JSONStructureBlock\n",
|
|
338
338
|
"\n",
|
|
@@ -355,7 +355,7 @@
|
|
|
355
355
|
" temperature=0.1, # Low temperature for more consistent extraction\n",
|
|
356
356
|
")\n",
|
|
357
357
|
"\n",
|
|
358
|
-
"
|
|
358
|
+
"ticker_llm_response_extractor_block = LLMResponseExtractorBlock(\n",
|
|
359
359
|
" block_name=\"extract_stock_tickers\",\n",
|
|
360
360
|
" input_cols=[\"raw_stock_tickers\"],\n",
|
|
361
361
|
" extract_content=True,\n",
|
|
@@ -406,7 +406,7 @@
|
|
|
406
406
|
"ticker_blocks = [\n",
|
|
407
407
|
" ticker_prompt_block,\n",
|
|
408
408
|
" ticker_llm_block,\n",
|
|
409
|
-
"
|
|
409
|
+
" ticker_llm_response_extractor_block,\n",
|
|
410
410
|
" ticker_parser_block,\n",
|
|
411
411
|
" enhanced_json_block,\n",
|
|
412
412
|
"]\n",
|
|
@@ -33,7 +33,7 @@ dependencies = [
|
|
|
33
33
|
"datasets>=4.0.0",
|
|
34
34
|
"httpx>=0.25.0,<1.0.0",
|
|
35
35
|
"jinja2",
|
|
36
|
-
"litellm>=1.73.0,<1.75.0
|
|
36
|
+
"litellm>=1.73.0,<2.0.0", # raising cap since tests run without errors related to 'backoff' cap back to <1.75.0 if errors surface
|
|
37
37
|
"rich",
|
|
38
38
|
"pandas",
|
|
39
39
|
"pydantic>=2.0.0,<3.0.0", # cap before v3; adjust the lower bound to the minimum v2.x you’ve tested
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.7.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 7,
|
|
31
|
+
__version__ = version = '0.7.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 7, 3)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g97824a47f'
|
|
@@ -6,7 +6,13 @@ This package provides various block implementations for data generation, process
|
|
|
6
6
|
# Local
|
|
7
7
|
from .base import BaseBlock
|
|
8
8
|
from .filtering import ColumnValueFilterBlock
|
|
9
|
-
from .llm import
|
|
9
|
+
from .llm import (
|
|
10
|
+
LLMChatBlock,
|
|
11
|
+
LLMParserBlock,
|
|
12
|
+
LLMResponseExtractorBlock,
|
|
13
|
+
PromptBuilderBlock,
|
|
14
|
+
TextParserBlock,
|
|
15
|
+
)
|
|
10
16
|
from .registry import BlockRegistry
|
|
11
17
|
from .transform import (
|
|
12
18
|
DuplicateColumnsBlock,
|
|
@@ -28,7 +34,8 @@ __all__ = [
|
|
|
28
34
|
"TextConcatBlock",
|
|
29
35
|
"UniformColumnValueSetter",
|
|
30
36
|
"LLMChatBlock",
|
|
31
|
-
"LLMParserBlock",
|
|
37
|
+
"LLMParserBlock", # Deprecated alias for LLMResponseExtractorBlock
|
|
38
|
+
"LLMResponseExtractorBlock",
|
|
32
39
|
"TextParserBlock",
|
|
33
40
|
"PromptBuilderBlock",
|
|
34
41
|
]
|
|
@@ -49,6 +49,9 @@ class BaseBlock(BaseModel, ABC):
|
|
|
49
49
|
block_name: str = Field(
|
|
50
50
|
..., description="Unique identifier for this block instance"
|
|
51
51
|
)
|
|
52
|
+
block_type: Optional[str] = Field(
|
|
53
|
+
None, description="Block type (e.g., 'llm', 'transform', 'parser', 'filtering')"
|
|
54
|
+
)
|
|
52
55
|
input_cols: Union[str, list[str], dict[str, Any], None] = Field(
|
|
53
56
|
None, description="Input columns: str, list, or dict"
|
|
54
57
|
)
|
|
@@ -366,5 +369,5 @@ class BaseBlock(BaseModel, ABC):
|
|
|
366
369
|
Dict[str, Any]
|
|
367
370
|
"""
|
|
368
371
|
config = self.get_config()
|
|
369
|
-
config["
|
|
372
|
+
config["block_class"] = self.__class__.__name__
|
|
370
373
|
return config
|
|
@@ -46,6 +46,8 @@ DTYPE_MAP = {
|
|
|
46
46
|
"Filters datasets based on column values using various comparison operations",
|
|
47
47
|
)
|
|
48
48
|
class ColumnValueFilterBlock(BaseBlock):
|
|
49
|
+
block_type: str = "filtering"
|
|
50
|
+
|
|
49
51
|
"""A block for filtering datasets based on column values.
|
|
50
52
|
|
|
51
53
|
This block allows filtering of datasets using various operations (e.g., equals, contains)
|
|
@@ -9,7 +9,7 @@ local models (vLLM, Ollama), and more.
|
|
|
9
9
|
# Local
|
|
10
10
|
from .error_handler import ErrorCategory, LLMErrorHandler
|
|
11
11
|
from .llm_chat_block import LLMChatBlock
|
|
12
|
-
from .
|
|
12
|
+
from .llm_response_extractor_block import LLMParserBlock, LLMResponseExtractorBlock
|
|
13
13
|
from .prompt_builder_block import PromptBuilderBlock
|
|
14
14
|
from .text_parser_block import TextParserBlock
|
|
15
15
|
|
|
@@ -17,7 +17,8 @@ __all__ = [
|
|
|
17
17
|
"LLMErrorHandler",
|
|
18
18
|
"ErrorCategory",
|
|
19
19
|
"LLMChatBlock",
|
|
20
|
-
"LLMParserBlock",
|
|
20
|
+
"LLMParserBlock", # Deprecated alias for LLMResponseExtractorBlock
|
|
21
|
+
"LLMResponseExtractorBlock",
|
|
21
22
|
"PromptBuilderBlock",
|
|
22
23
|
"TextParserBlock",
|
|
23
24
|
]
|
|
@@ -32,6 +32,8 @@ logger = setup_logger(__name__)
|
|
|
32
32
|
class LLMChatBlock(BaseBlock):
|
|
33
33
|
model_config = ConfigDict(extra="allow")
|
|
34
34
|
|
|
35
|
+
block_type: str = "llm"
|
|
36
|
+
|
|
35
37
|
"""Unified LLM chat block supporting all providers via LiteLLM.
|
|
36
38
|
|
|
37
39
|
This block provides a minimal wrapper around LiteLLM's completion API,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
-
"""LLM
|
|
2
|
+
"""LLM response extractor block for extracting fields from LLM response objects.
|
|
3
3
|
|
|
4
|
-
This module provides the
|
|
4
|
+
This module provides the LLMResponseExtractorBlock for extracting specific fields
|
|
5
5
|
(content, reasoning_content, tool_calls) from chat completion response objects.
|
|
6
6
|
"""
|
|
7
7
|
|
|
@@ -22,13 +22,15 @@ logger = setup_logger(__name__)
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@BlockRegistry.register(
|
|
25
|
-
"
|
|
25
|
+
"LLMResponseExtractorBlock",
|
|
26
26
|
"llm",
|
|
27
27
|
"Extracts specified fields from LLM response objects",
|
|
28
28
|
)
|
|
29
|
-
class
|
|
29
|
+
class LLMResponseExtractorBlock(BaseBlock):
|
|
30
30
|
_flow_requires_jsonl_tmp: bool = True
|
|
31
31
|
|
|
32
|
+
block_type: str = "llm_util"
|
|
33
|
+
|
|
32
34
|
"""Block for extracting fields from LLM response objects.
|
|
33
35
|
|
|
34
36
|
This block extracts specified fields from chat completion response objects.
|
|
@@ -88,7 +90,7 @@ class LLMParserBlock(BaseBlock):
|
|
|
88
90
|
]
|
|
89
91
|
):
|
|
90
92
|
raise ValueError(
|
|
91
|
-
"
|
|
93
|
+
"LLMResponseExtractorBlock requires at least one extraction field to be enabled: "
|
|
92
94
|
"extract_content, extract_reasoning_content, or extract_tool_calls"
|
|
93
95
|
)
|
|
94
96
|
|
|
@@ -106,7 +108,7 @@ class LLMParserBlock(BaseBlock):
|
|
|
106
108
|
return self
|
|
107
109
|
|
|
108
110
|
def _validate_custom(self, dataset: pd.DataFrame) -> None:
|
|
109
|
-
"""Validate
|
|
111
|
+
"""Validate LLMResponseExtractorBlock specific requirements.
|
|
110
112
|
|
|
111
113
|
Parameters
|
|
112
114
|
----------
|
|
@@ -116,14 +118,16 @@ class LLMParserBlock(BaseBlock):
|
|
|
116
118
|
Raises
|
|
117
119
|
------
|
|
118
120
|
ValueError
|
|
119
|
-
If
|
|
121
|
+
If LLMResponseExtractorBlock requirements are not met.
|
|
120
122
|
"""
|
|
121
123
|
# Validate that we have exactly one input column
|
|
122
124
|
if len(self.input_cols) == 0:
|
|
123
|
-
raise ValueError(
|
|
125
|
+
raise ValueError(
|
|
126
|
+
"LLMResponseExtractorBlock expects at least one input column"
|
|
127
|
+
)
|
|
124
128
|
if len(self.input_cols) > 1:
|
|
125
129
|
logger.warning(
|
|
126
|
-
f"
|
|
130
|
+
f"LLMResponseExtractorBlock expects exactly one input column, but got {len(self.input_cols)}. "
|
|
127
131
|
f"Using the first column: {self.input_cols[0]}"
|
|
128
132
|
)
|
|
129
133
|
|
|
@@ -324,3 +328,22 @@ class LLMParserBlock(BaseBlock):
|
|
|
324
328
|
new_data.extend(self._generate(sample))
|
|
325
329
|
|
|
326
330
|
return pd.DataFrame(new_data)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# Backwards compatibility alias (deprecated)
|
|
334
|
+
# Register deprecated alias in BlockRegistry so old YAML flows still work
|
|
335
|
+
@BlockRegistry.register(
|
|
336
|
+
"LLMParserBlock",
|
|
337
|
+
"llm",
|
|
338
|
+
"Deprecated: Use LLMResponseExtractorBlock instead",
|
|
339
|
+
deprecated=True,
|
|
340
|
+
replacement="LLMResponseExtractorBlock",
|
|
341
|
+
)
|
|
342
|
+
class LLMParserBlock(LLMResponseExtractorBlock):
|
|
343
|
+
"""Deprecated alias for LLMResponseExtractorBlock.
|
|
344
|
+
|
|
345
|
+
This class exists for backwards compatibility with existing code and YAML flows.
|
|
346
|
+
Use LLMResponseExtractorBlock instead.
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
pass
|
|
@@ -222,6 +222,8 @@ class PromptRenderer:
|
|
|
222
222
|
"Formats prompts into structured chat messages or plain text using Jinja templates",
|
|
223
223
|
)
|
|
224
224
|
class PromptBuilderBlock(BaseBlock):
|
|
225
|
+
block_type: str = "llm_util"
|
|
226
|
+
|
|
225
227
|
"""Block for formatting prompts into structured chat messages or plain text.
|
|
226
228
|
|
|
227
229
|
This block takes input from dataset columns, applies Jinja templates from a YAML config
|
|
@@ -30,6 +30,8 @@ logger = setup_logger(__name__)
|
|
|
30
30
|
class TextParserBlock(BaseBlock):
|
|
31
31
|
_flow_requires_jsonl_tmp: bool = True
|
|
32
32
|
|
|
33
|
+
block_type: str = "parser"
|
|
34
|
+
|
|
33
35
|
"""Block for parsing and post-processing text content.
|
|
34
36
|
|
|
35
37
|
This block handles text parsing using start/end tags, custom regex patterns,
|
|
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
|
|
|
27
27
|
"Duplicates existing columns with new names according to a mapping specification",
|
|
28
28
|
)
|
|
29
29
|
class DuplicateColumnsBlock(BaseBlock):
|
|
30
|
+
block_type: str = "transform"
|
|
31
|
+
|
|
30
32
|
"""Block for duplicating existing columns with new names.
|
|
31
33
|
|
|
32
34
|
This block creates copies of existing columns with new names according to a mapping specification.
|
|
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
|
|
|
28
28
|
"Maps values from source columns to output columns based on choice columns using shared mapping",
|
|
29
29
|
)
|
|
30
30
|
class IndexBasedMapperBlock(BaseBlock):
|
|
31
|
+
block_type: str = "transform"
|
|
32
|
+
|
|
31
33
|
"""Block for mapping values from source columns to output columns based on choice columns.
|
|
32
34
|
|
|
33
35
|
This block uses a shared mapping dictionary to select values from source columns and
|
|
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
|
|
|
28
28
|
"Combines multiple columns into a single column containing a structured JSON object",
|
|
29
29
|
)
|
|
30
30
|
class JSONStructureBlock(BaseBlock):
|
|
31
|
+
block_type: str = "transform"
|
|
32
|
+
|
|
31
33
|
"""Block for combining multiple columns into a structured JSON object.
|
|
32
34
|
|
|
33
35
|
This block takes values from multiple input columns and combines them into a single
|
|
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
|
|
|
28
28
|
"Transforms wide dataset format into long format by melting columns into rows",
|
|
29
29
|
)
|
|
30
30
|
class MeltColumnsBlock(BaseBlock):
|
|
31
|
+
block_type: str = "transform"
|
|
32
|
+
|
|
31
33
|
"""Block for flattening multiple columns into a long format.
|
|
32
34
|
|
|
33
35
|
This block transforms a wide dataset format into a long format by melting
|
|
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
|
|
|
27
27
|
"Renames columns in a dataset according to a mapping specification",
|
|
28
28
|
)
|
|
29
29
|
class RenameColumnsBlock(BaseBlock):
|
|
30
|
+
block_type: str = "transform"
|
|
31
|
+
|
|
30
32
|
"""Block for renaming columns in a dataset.
|
|
31
33
|
|
|
32
34
|
This block renames columns in a dataset according to a mapping specification.
|
|
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
|
|
|
27
27
|
"Combines multiple columns into a single column using a specified separator",
|
|
28
28
|
)
|
|
29
29
|
class TextConcatBlock(BaseBlock):
|
|
30
|
+
block_type: str = "transform"
|
|
31
|
+
|
|
30
32
|
"""Block for combining multiple columns into a single column.
|
|
31
33
|
|
|
32
34
|
This block concatenates values from multiple columns into a single output column,
|
|
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
|
|
|
28
28
|
"Replaces all values in a column with a single summary statistic (e.g., mode, mean, median)",
|
|
29
29
|
)
|
|
30
30
|
class UniformColumnValueSetter(BaseBlock):
|
|
31
|
+
block_type: str = "transform"
|
|
32
|
+
|
|
31
33
|
"""Block that replaces all values in a column with a single aggregate value.
|
|
32
34
|
|
|
33
35
|
Supported strategies include: mode, min, max, mean, median.
|
|
@@ -679,7 +679,7 @@ class Flow(BaseModel):
|
|
|
679
679
|
self._block_metrics.append(
|
|
680
680
|
{
|
|
681
681
|
"block_name": block.block_name,
|
|
682
|
-
"
|
|
682
|
+
"block_class": block.__class__.__name__,
|
|
683
683
|
"execution_time": execution_time,
|
|
684
684
|
"input_rows": input_rows,
|
|
685
685
|
"output_rows": output_rows,
|
|
@@ -701,7 +701,7 @@ class Flow(BaseModel):
|
|
|
701
701
|
self._block_metrics.append(
|
|
702
702
|
{
|
|
703
703
|
"block_name": block.block_name,
|
|
704
|
-
"
|
|
704
|
+
"block_class": block.__class__.__name__,
|
|
705
705
|
"execution_time": execution_time,
|
|
706
706
|
"input_rows": input_rows,
|
|
707
707
|
"output_rows": 0,
|
|
@@ -882,38 +882,14 @@ class Flow(BaseModel):
|
|
|
882
882
|
)
|
|
883
883
|
|
|
884
884
|
def _detect_llm_blocks(self) -> list[str]:
|
|
885
|
-
"""Detect
|
|
886
|
-
|
|
887
|
-
LLM blocks are identified by having model, api_base, or api_key attributes,
|
|
888
|
-
regardless of their values (they may be None until set_model_config() is called).
|
|
885
|
+
"""Detect blocks with block_type='llm'.
|
|
889
886
|
|
|
890
887
|
Returns
|
|
891
888
|
-------
|
|
892
889
|
List[str]
|
|
893
|
-
List of block names that
|
|
890
|
+
List of block names that are LLM blocks.
|
|
894
891
|
"""
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
for block in self.blocks:
|
|
898
|
-
block_type = block.__class__.__name__
|
|
899
|
-
block_name = block.block_name
|
|
900
|
-
|
|
901
|
-
# Check by attribute existence (not value) - LLM blocks have these attributes even if None
|
|
902
|
-
has_model_attr = hasattr(block, "model")
|
|
903
|
-
has_api_base_attr = hasattr(block, "api_base")
|
|
904
|
-
has_api_key_attr = hasattr(block, "api_key")
|
|
905
|
-
|
|
906
|
-
# A block is considered an LLM block if it has any LLM-related attributes
|
|
907
|
-
is_llm_block = has_model_attr or has_api_base_attr or has_api_key_attr
|
|
908
|
-
|
|
909
|
-
if is_llm_block:
|
|
910
|
-
llm_blocks.append(block_name)
|
|
911
|
-
logger.debug(
|
|
912
|
-
f"Detected LLM block '{block_name}' ({block_type}): "
|
|
913
|
-
f"has_model_attr={has_model_attr}, has_api_base_attr={has_api_base_attr}, has_api_key_attr={has_api_key_attr}"
|
|
914
|
-
)
|
|
915
|
-
|
|
916
|
-
return llm_blocks
|
|
892
|
+
return [block.block_name for block in self.blocks if block.block_type == "llm"]
|
|
917
893
|
|
|
918
894
|
def is_model_config_required(self) -> bool:
|
|
919
895
|
"""Check if model configuration is required for this flow.
|
|
@@ -1152,7 +1128,7 @@ class Flow(BaseModel):
|
|
|
1152
1128
|
# Record block execution info
|
|
1153
1129
|
block_info = {
|
|
1154
1130
|
"block_name": block.block_name,
|
|
1155
|
-
"
|
|
1131
|
+
"block_class": block.__class__.__name__,
|
|
1156
1132
|
"execution_time_seconds": block_execution_time,
|
|
1157
1133
|
"input_rows": input_rows,
|
|
1158
1134
|
"output_rows": len(current_dataset),
|
|
@@ -1341,7 +1317,7 @@ class Flow(BaseModel):
|
|
|
1341
1317
|
"metadata": self.metadata.model_dump(),
|
|
1342
1318
|
"blocks": [
|
|
1343
1319
|
{
|
|
1344
|
-
"
|
|
1320
|
+
"block_class": block.__class__.__name__,
|
|
1345
1321
|
"block_name": block.block_name,
|
|
1346
1322
|
"input_cols": getattr(block, "input_cols", None),
|
|
1347
1323
|
"output_cols": getattr(block, "output_cols", None),
|
|
@@ -31,12 +31,12 @@ def aggregate_block_metrics(entries: list[dict[str, Any]]) -> list[dict[str, Any
|
|
|
31
31
|
"""
|
|
32
32
|
agg: dict[tuple[str, str], dict[str, Any]] = {}
|
|
33
33
|
for m in entries:
|
|
34
|
-
key = (m.get("block_name"), m.get("
|
|
34
|
+
key = (m.get("block_name"), m.get("block_class"))
|
|
35
35
|
a = agg.setdefault(
|
|
36
36
|
key,
|
|
37
37
|
{
|
|
38
38
|
"block_name": key[0],
|
|
39
|
-
"
|
|
39
|
+
"block_class": key[1],
|
|
40
40
|
"execution_time": 0.0,
|
|
41
41
|
"input_rows": 0,
|
|
42
42
|
"output_rows": 0,
|
|
@@ -138,7 +138,7 @@ def display_metrics_summary(
|
|
|
138
138
|
|
|
139
139
|
table.add_row(
|
|
140
140
|
metrics["block_name"],
|
|
141
|
-
metrics["
|
|
141
|
+
metrics["block_class"],
|
|
142
142
|
duration,
|
|
143
143
|
row_change,
|
|
144
144
|
col_change,
|
|
@@ -41,7 +41,7 @@ blocks:
|
|
|
41
41
|
max_tokens: 2048
|
|
42
42
|
temperature: 0.7
|
|
43
43
|
|
|
44
|
-
- block_type:
|
|
44
|
+
- block_type: LLMResponseExtractorBlock
|
|
45
45
|
block_config:
|
|
46
46
|
block_name: parse_topic
|
|
47
47
|
input_cols: topic_response
|
|
@@ -73,7 +73,7 @@ blocks:
|
|
|
73
73
|
max_tokens: 2048
|
|
74
74
|
temperature: 0.7
|
|
75
75
|
|
|
76
|
-
- block_type:
|
|
76
|
+
- block_type: LLMResponseExtractorBlock
|
|
77
77
|
block_config:
|
|
78
78
|
block_name: parse_question
|
|
79
79
|
input_cols: question_response
|
|
@@ -97,7 +97,7 @@ blocks:
|
|
|
97
97
|
max_tokens: 4096
|
|
98
98
|
temperature: 0.7
|
|
99
99
|
|
|
100
|
-
- block_type:
|
|
100
|
+
- block_type: LLMResponseExtractorBlock
|
|
101
101
|
block_config:
|
|
102
102
|
block_name: parse_evolved_question
|
|
103
103
|
input_cols: evolution_response
|
|
@@ -123,7 +123,7 @@ blocks:
|
|
|
123
123
|
max_tokens: 4096
|
|
124
124
|
temperature: 0.2
|
|
125
125
|
|
|
126
|
-
- block_type:
|
|
126
|
+
- block_type: LLMResponseExtractorBlock
|
|
127
127
|
block_config:
|
|
128
128
|
block_name: parse_answer
|
|
129
129
|
input_cols: answer_response
|
|
@@ -150,7 +150,7 @@ blocks:
|
|
|
150
150
|
max_tokens: 512
|
|
151
151
|
temperature: 0.0
|
|
152
152
|
|
|
153
|
-
- block_type:
|
|
153
|
+
- block_type: LLMResponseExtractorBlock
|
|
154
154
|
block_config:
|
|
155
155
|
block_name: parse_critic_score
|
|
156
156
|
input_cols: critic_response
|
|
@@ -185,7 +185,7 @@ blocks:
|
|
|
185
185
|
max_tokens: 4096
|
|
186
186
|
temperature: 0.0
|
|
187
187
|
|
|
188
|
-
- block_type:
|
|
188
|
+
- block_type: LLMResponseExtractorBlock
|
|
189
189
|
block_config:
|
|
190
190
|
block_name: parse_extracted_context
|
|
191
191
|
input_cols: extraction_response
|
|
@@ -60,7 +60,7 @@ blocks:
|
|
|
60
60
|
temperature: 0.7
|
|
61
61
|
n: 50
|
|
62
62
|
async_mode: true
|
|
63
|
-
- block_type:
|
|
63
|
+
- block_type: LLMResponseExtractorBlock
|
|
64
64
|
block_config:
|
|
65
65
|
block_name: extract_detailed_summary
|
|
66
66
|
input_cols: raw_summary
|
|
@@ -108,7 +108,7 @@ blocks:
|
|
|
108
108
|
temperature: 0.7
|
|
109
109
|
n: 1
|
|
110
110
|
async_mode: true
|
|
111
|
-
- block_type:
|
|
111
|
+
- block_type: LLMResponseExtractorBlock
|
|
112
112
|
block_config:
|
|
113
113
|
block_name: extract_questions
|
|
114
114
|
input_cols: question_list
|
|
@@ -142,7 +142,7 @@ blocks:
|
|
|
142
142
|
temperature: 0.7
|
|
143
143
|
n: 1
|
|
144
144
|
async_mode: true
|
|
145
|
-
- block_type:
|
|
145
|
+
- block_type: LLMResponseExtractorBlock
|
|
146
146
|
block_config:
|
|
147
147
|
block_name: extract_answers
|
|
148
148
|
input_cols: response_dict
|
|
@@ -174,7 +174,7 @@ blocks:
|
|
|
174
174
|
output_cols: eval_faithful_response_dict
|
|
175
175
|
n: 1
|
|
176
176
|
async_mode: true
|
|
177
|
-
- block_type:
|
|
177
|
+
- block_type: LLMResponseExtractorBlock
|
|
178
178
|
block_config:
|
|
179
179
|
block_name: extract_eval_faithful
|
|
180
180
|
input_cols: eval_faithful_response_dict
|
|
@@ -64,7 +64,7 @@ blocks:
|
|
|
64
64
|
temperature: 1.0
|
|
65
65
|
n: 1
|
|
66
66
|
async_mode: true
|
|
67
|
-
- block_type:
|
|
67
|
+
- block_type: LLMResponseExtractorBlock
|
|
68
68
|
block_config:
|
|
69
69
|
block_name: extract_questions
|
|
70
70
|
input_cols: question_list
|
|
@@ -98,7 +98,7 @@ blocks:
|
|
|
98
98
|
temperature: 1.0
|
|
99
99
|
n: 1
|
|
100
100
|
async_mode: true
|
|
101
|
-
- block_type:
|
|
101
|
+
- block_type: LLMResponseExtractorBlock
|
|
102
102
|
block_config:
|
|
103
103
|
block_name: extract_answer
|
|
104
104
|
input_cols: response_dict
|
|
@@ -130,7 +130,7 @@ blocks:
|
|
|
130
130
|
output_cols: eval_faithful_response_dict
|
|
131
131
|
n: 1
|
|
132
132
|
async_mode: true
|
|
133
|
-
- block_type:
|
|
133
|
+
- block_type: LLMResponseExtractorBlock
|
|
134
134
|
block_config:
|
|
135
135
|
block_name: extract_eval_faithful
|
|
136
136
|
input_cols: eval_faithful_response_dict
|
|
@@ -62,7 +62,7 @@ blocks:
|
|
|
62
62
|
temperature: 0.7
|
|
63
63
|
n: 50
|
|
64
64
|
async_mode: true
|
|
65
|
-
- block_type:
|
|
65
|
+
- block_type: LLMResponseExtractorBlock
|
|
66
66
|
block_config:
|
|
67
67
|
block_name: extract_extractive_summary
|
|
68
68
|
input_cols: raw_summary
|
|
@@ -110,7 +110,7 @@ blocks:
|
|
|
110
110
|
temperature: 0.7
|
|
111
111
|
n: 1
|
|
112
112
|
async_mode: true
|
|
113
|
-
- block_type:
|
|
113
|
+
- block_type: LLMResponseExtractorBlock
|
|
114
114
|
block_config:
|
|
115
115
|
block_name: extract_questions
|
|
116
116
|
input_cols: question_list
|
|
@@ -144,7 +144,7 @@ blocks:
|
|
|
144
144
|
temperature: 0.7
|
|
145
145
|
n: 1
|
|
146
146
|
async_mode: true
|
|
147
|
-
- block_type:
|
|
147
|
+
- block_type: LLMResponseExtractorBlock
|
|
148
148
|
block_config:
|
|
149
149
|
block_name: extract_answers
|
|
150
150
|
input_cols: response_dict
|
|
@@ -176,7 +176,7 @@ blocks:
|
|
|
176
176
|
output_cols: eval_faithful_response_dict
|
|
177
177
|
n: 1
|
|
178
178
|
async_mode: true
|
|
179
|
-
- block_type:
|
|
179
|
+
- block_type: LLMResponseExtractorBlock
|
|
180
180
|
block_config:
|
|
181
181
|
block_name: extract_eval_faithful
|
|
182
182
|
input_cols: eval_faithful_response_dict
|
|
@@ -49,7 +49,7 @@ blocks:
|
|
|
49
49
|
temperature: 0.7
|
|
50
50
|
n: 1
|
|
51
51
|
async_mode: true
|
|
52
|
-
- block_type:
|
|
52
|
+
- block_type: LLMResponseExtractorBlock
|
|
53
53
|
block_config:
|
|
54
54
|
block_name: extract_atomic_facts
|
|
55
55
|
input_cols: raw_summary
|
|
@@ -98,7 +98,7 @@ blocks:
|
|
|
98
98
|
temperature: 0.7
|
|
99
99
|
n: 1
|
|
100
100
|
async_mode: true
|
|
101
|
-
- block_type:
|
|
101
|
+
- block_type: LLMResponseExtractorBlock
|
|
102
102
|
block_config:
|
|
103
103
|
block_name: extract_key_fact_qa
|
|
104
104
|
input_cols: raw_key_fact_qa
|