sdg-hub 0.3.1__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub-0.4.0/.github/workflows/packer.yml +15 -0
- {sdg_hub-0.3.1/src/sdg_hub.egg-info → sdg_hub-0.4.0}/PKG-INFO +1 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/README.md +0 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/_sidebar.md +0 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/blocks/filtering-blocks.md +0 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/blocks/llm-blocks.md +0 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/blocks/overview.md +0 -6
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/blocks/transform-blocks.md +0 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/concepts.md +1 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/development.md +0 -7
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/flows/overview.md +32 -4
- sdg_hub-0.4.0/examples/annotation/annotation_classification.ipynb +486 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/annotation/news_classification_flow.yaml +38 -8
- sdg_hub-0.4.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +425 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +5 -0
- sdg_hub-0.4.0/examples/text_analysis/structured_insights_demo.ipynb +520 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/_version.py +3 -3
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/__init__.py +2 -4
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/base.py +61 -6
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/__init__.py +2 -4
- sdg_hub-0.4.0/src/sdg_hub/core/blocks/llm/llm_chat_block.py +586 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
- sdg_hub-0.4.0/src/sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/base.py +7 -4
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
- sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
- sdg_hub-0.4.0/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0/src/sdg_hub.egg-info}/PKG-INFO +1 -1
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/SOURCES.txt +12 -14
- sdg_hub-0.4.0/tests/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/filtering/test_columnvaluefilter.py +2 -2
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/llm/test_llm_chat_block.py +91 -183
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +29 -34
- sdg_hub-0.4.0/tests/blocks/llm/test_llm_parser_block.py +671 -0
- sdg_hub-0.4.0/tests/blocks/llm/test_textparserblock.py +962 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/test_base_block.py +198 -2
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_base.py +36 -7
- sdg_hub-0.3.1/docs/blocks/evaluation-blocks.md +0 -22
- sdg_hub-0.3.1/examples/annotation/annotation_classification.ipynb +0 -840
- sdg_hub-0.3.1/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -588
- sdg_hub-0.3.1/examples/text_analysis/structured_insights_demo.ipynb +0 -4479
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -9
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/llm/client_manager.py +0 -472
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/llm/config.py +0 -337
- sdg_hub-0.3.1/src/sdg_hub/core/blocks/llm/llm_chat_block.py +0 -600
- sdg_hub-0.3.1/tests/blocks/evaluation/__init__.py +0 -2
- sdg_hub-0.3.1/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -271
- sdg_hub-0.3.1/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -189
- sdg_hub-0.3.1/tests/blocks/evaluation/test_verify_question_block.py +0 -331
- sdg_hub-0.3.1/tests/blocks/llm/test_textparserblock.py +0 -1849
- sdg_hub-0.3.1/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -17
- sdg_hub-0.3.1/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -24
- sdg_hub-0.3.1/tests/blocks/testdata/test_verify_question.yaml +0 -27
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/actions/free-disk-space/action.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/dependabot.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/mergify.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/actionlint.dockerfile +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/e2e.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.gitignore +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.isort.cfg +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/.pylintrc +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/CLAUDE.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/CONTRIBUTING.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/LICENSE +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/Makefile +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/README.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/.nojekyll +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/_coverpage.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/_navbar.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/api-reference.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/blocks/custom-blocks.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/flows/discovery.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/index.html +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/installation.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/docs/quick-start.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/annotation/news_classification_assessment_prompt.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/annotation/news_classification_prompt.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/annotation/revise_news_classification_prompt.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/README.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/knowledge_tuning/knowledge_utils.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/text_analysis/README.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/pyproject.toml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/scripts/ruff.sh +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/setup.cfg +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/registry.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/json_structure_block.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/checkpointer.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/metadata.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/migration.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/registry.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/validation.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/datautils.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/error_handling.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/flow_metrics.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/logger_config.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/path_resolution.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
- {sdg_hub-0.3.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa}/__init__.py +0 -0
- {sdg_hub-0.3.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary}/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
- {sdg_hub-0.3.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts}/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
- {sdg_hub-0.3.1/tests → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
- /sdg_hub-0.3.1/src/sdg_hub/py.typed → /sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/requires.txt +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/deprecated/test_llmblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/test_registry.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/testdata/test_config.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/transform/test_index_based_mapper.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/transform/test_json_structure_block.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/transform/test_melt_columns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/transform/test_text_concat.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_renameblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_settomajority.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/__init__.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/conftest.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_checkpointer.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_dataset_requirements.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_integration.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_metadata.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_migration.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_registry.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/flow/test_validation.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/utils/test_datautils.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/utils/test_error_handling.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tests/utils/test_path_resolution.py +0 -0
- {sdg_hub-0.3.1 → sdg_hub-0.4.0}/tox.ini +0 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
name: Build AMI with Packer
|
2
|
+
|
3
|
+
on:
|
4
|
+
workflow_dispatch:
|
5
|
+
|
6
|
+
jobs:
|
7
|
+
build-ami:
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
permissions:
|
10
|
+
id-token: write # This is required for OIDC
|
11
|
+
contents: read
|
12
|
+
|
13
|
+
steps:
|
14
|
+
- name: Checkout repository
|
15
|
+
uses: actions/checkout@v4
|
@@ -49,7 +49,6 @@ Learn about the modular block architecture that powers SDG Hub:
|
|
49
49
|
- **[LLM Blocks](blocks/llm-blocks.md)** - Chat, prompt building, and text parsing
|
50
50
|
- **[Transform Blocks](blocks/transform-blocks.md)** - Data transformation and manipulation
|
51
51
|
- **[Filtering Blocks](blocks/filtering-blocks.md)** - Quality filtering and data validation
|
52
|
-
- **[Evaluation Blocks](blocks/evaluation-blocks.md)** - Faithfulness and relevancy assessment
|
53
52
|
- **[Custom Blocks](blocks/custom-blocks.md)** - Building your own processing blocks
|
54
53
|
|
55
54
|
### Flow System
|
@@ -9,7 +9,6 @@
|
|
9
9
|
* [LLM Blocks](blocks/llm-blocks.md)
|
10
10
|
* [Transform Blocks](blocks/transform-blocks.md)
|
11
11
|
* [Filtering Blocks](blocks/filtering-blocks.md)
|
12
|
-
* [Evaluation Blocks](blocks/evaluation-blocks.md)
|
13
12
|
* [Custom Blocks](blocks/custom-blocks.md)
|
14
13
|
|
15
14
|
* **Flow System**
|
@@ -10,7 +10,6 @@ Filters dataset rows based on column values using flexible comparison operators
|
|
10
10
|
|
11
11
|
## 🚀 Next Steps
|
12
12
|
|
13
|
-
- **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
|
14
13
|
- **[LLM Blocks](llm-blocks.md)** - AI-powered text generation
|
15
14
|
- **[Transform Blocks](transform-blocks.md)** - Data manipulation and reshaping
|
16
15
|
- **[Flow Integration](../flows/overview.md)** - Combine filtering into complete pipelines
|
@@ -239,5 +239,4 @@ Extracts structured data from LLM responses using patterns, schemas, or custom p
|
|
239
239
|
|
240
240
|
- **[Transform Blocks](transform-blocks.md)** - Data manipulation and reshaping
|
241
241
|
- **[Filtering Blocks](filtering-blocks.md)** - Quality control and validation
|
242
|
-
- **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
|
243
242
|
- **[Flow Integration](../flows/overview.md)** - Combine LLM blocks into complete pipelines
|
@@ -65,11 +65,6 @@ Data manipulation and transformation:
|
|
65
65
|
Quality control and data validation:
|
66
66
|
- **ColumnValueFilterBlock** - Filter rows based on column values
|
67
67
|
|
68
|
-
### 📊 Evaluation Blocks (`evaluation/`)
|
69
|
-
Quality assessment and scoring:
|
70
|
-
- **EvaluateFaithfulnessBlock** - Assess factual accuracy
|
71
|
-
- **EvaluateRelevancyBlock** - Measure relevance scores
|
72
|
-
- **VerifyQuestionBlock** - Validate question quality
|
73
68
|
|
74
69
|
## 🔧 Block Lifecycle
|
75
70
|
|
@@ -149,5 +144,4 @@ Ready to dive deeper? Explore specific block categories:
|
|
149
144
|
- **[LLM Blocks](llm-blocks.md)** - AI-powered language model operations
|
150
145
|
- **[Transform Blocks](transform-blocks.md)** - Data manipulation and reshaping
|
151
146
|
- **[Filtering Blocks](filtering-blocks.md)** - Quality control and validation
|
152
|
-
- **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
|
153
147
|
- **[Custom Blocks](custom-blocks.md)** - Build your own processing blocks
|
@@ -26,6 +26,5 @@ Sets uniform values across specified columns, useful for adding metadata or defa
|
|
26
26
|
## 🚀 Next Steps
|
27
27
|
|
28
28
|
- **[Filtering Blocks](filtering-blocks.md)** - Quality control and data validation
|
29
|
-
- **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
|
30
29
|
- **[LLM Blocks](llm-blocks.md)** - AI-powered text generation
|
31
30
|
- **[Flow Integration](../flows/overview.md)** - Combine transform blocks into complete pipelines
|
@@ -152,7 +152,7 @@ Every block validates data at runtime:
|
|
152
152
|
- Validate your pipeline before scaling up
|
153
153
|
|
154
154
|
### 2. Layer Validation
|
155
|
-
- Use
|
155
|
+
- Use basic block composition (PromptBuilder → LLMChat → Parser → Filter) to assess quality
|
156
156
|
- Implement filtering to maintain data standards
|
157
157
|
|
158
158
|
### 3. Monitor Performance
|
@@ -206,13 +206,6 @@ class TestMyNewBlock:
|
|
206
206
|
- Comprehensive operator support
|
207
207
|
- Good performance on large datasets
|
208
208
|
|
209
|
-
#### Evaluation Blocks (`src/sdg_hub/core/blocks/evaluation/`)
|
210
|
-
- **Purpose**: Quality assessment and scoring
|
211
|
-
- **Examples**: Faithfulness evaluation, relevancy scoring
|
212
|
-
- **Requirements**:
|
213
|
-
- Consistent scoring methodology
|
214
|
-
- Support for different evaluation criteria
|
215
|
-
- Clear documentation of scoring rubrics
|
216
209
|
|
217
210
|
## 🌊 Contributing Flows
|
218
211
|
|
@@ -169,13 +169,41 @@ blocks:
|
|
169
169
|
max_tokens: 300
|
170
170
|
async_mode: true
|
171
171
|
|
172
|
-
# Quality evaluation
|
173
|
-
- block_type: "
|
172
|
+
# Quality evaluation using basic blocks
|
173
|
+
- block_type: "PromptBuilderBlock"
|
174
174
|
block_config:
|
175
|
-
block_name: "
|
175
|
+
block_name: "faithfulness_prompt"
|
176
176
|
input_cols: ["document", "answer"]
|
177
|
-
output_cols: ["
|
177
|
+
output_cols: ["eval_prompt"]
|
178
|
+
prompt_template: "Evaluate if this answer is faithful to the document..."
|
179
|
+
|
180
|
+
- block_type: "LLMChatBlock"
|
181
|
+
block_config:
|
182
|
+
block_name: "eval_faithfulness_llm"
|
183
|
+
input_cols: ["eval_prompt"]
|
184
|
+
output_cols: ["eval_response"]
|
178
185
|
async_mode: true
|
186
|
+
|
187
|
+
- block_type: "LLMParserBlock"
|
188
|
+
block_config:
|
189
|
+
block_name: "extract_eval_content"
|
190
|
+
input_cols: ["eval_response"]
|
191
|
+
extract_content: true
|
192
|
+
|
193
|
+
- block_type: "TextParserBlock"
|
194
|
+
block_config:
|
195
|
+
block_name: "parse_evaluation"
|
196
|
+
input_cols: ["extract_eval_content_content"]
|
197
|
+
output_cols: ["explanation", "judgment"]
|
198
|
+
start_tags: ["[Start of Explanation]", "[Start of Answer]"]
|
199
|
+
end_tags: ["[End of Explanation]", "[End of Answer]"]
|
200
|
+
|
201
|
+
- block_type: "ColumnValueFilterBlock"
|
202
|
+
block_config:
|
203
|
+
block_name: "filter_faithful"
|
204
|
+
input_cols: ["judgment"]
|
205
|
+
filter_value: "YES"
|
206
|
+
operation: "eq"
|
179
207
|
|
180
208
|
# Quality filtering
|
181
209
|
- block_type: "ColumnValueFilterBlock"
|
@@ -0,0 +1,486 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"metadata": {},
|
7
|
+
"outputs": [],
|
8
|
+
"source": [
|
9
|
+
"%load_ext autoreload\n",
|
10
|
+
"%autoreload 2"
|
11
|
+
]
|
12
|
+
},
|
13
|
+
{
|
14
|
+
"cell_type": "code",
|
15
|
+
"execution_count": null,
|
16
|
+
"metadata": {},
|
17
|
+
"outputs": [],
|
18
|
+
"source": [
|
19
|
+
"# Third Party\n",
|
20
|
+
"from datasets import load_dataset\n",
|
21
|
+
"from openai import OpenAI\n",
|
22
|
+
"from rich import print\n",
|
23
|
+
"from rich.panel import Panel\n",
|
24
|
+
"from sklearn.metrics import classification_report\n",
|
25
|
+
"\n",
|
26
|
+
"# First Party\n",
|
27
|
+
"from sdg_hub import Flow, FlowMetadata, BlockRegistry\n",
|
28
|
+
"\n",
|
29
|
+
"import nest_asyncio\n",
|
30
|
+
"nest_asyncio.apply()"
|
31
|
+
]
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"cell_type": "markdown",
|
35
|
+
"metadata": {},
|
36
|
+
"source": [
|
37
|
+
"# Classifying news articles\n",
|
38
|
+
"\n",
|
39
|
+
"\n",
|
40
|
+
"In this tutorial, you’ll learn how to create your own custom data generation flow using SDG Hub. This notebook will walk you through all the essential pieces to make your own flow using `sdg_hub` for any use-case using the fundamental components of sdg_hub: `Blocks` and `Flows`\n",
|
41
|
+
"\n",
|
42
|
+
"As an example use-case, we will pick news classification. Classification is a fundamental task in machine learning, where the goal is to assign predefined categories to input data. To address the classic machine learning use-case of news or text classification, we will use sdg_hub and leverage a language model to **classify news articles** with topic labels — specifically using the [AG News dataset](https://huggingface.co/datasets/fancyzhx/ag_news) from Hugging Face.\n",
|
43
|
+
"\n",
|
44
|
+
"We’ll go step by step through a progressively improving flow. Each stage builds on the previous one, giving you a practical sense of how you can evolve your flow from using simple heuristics to highly customized and reliable data generation, using different inference paradigms such as self assessment.\n",
|
45
|
+
"\n",
|
46
|
+
"### 🔍 Understand the Task\n",
|
47
|
+
"Before we write any prompts or code, we’ll take time to understand what we want the model to do. For this exercise, the task is **text classification** — assigning one of 4 possible categories (e.g., \"World\", \"Sports\", \"Sci/Tech\", \"Business\") to a given news article\n",
|
48
|
+
"\n",
|
49
|
+
"### 🛠️ Build a Basic Annotation Flow and learn the `sdg_hub` way\n",
|
50
|
+
"We’ll start by creating a minimal flow that simply prompts the model to generate topic labels on the unlabeled data. This will use default prompts, simply populating the prompt with the text and asking the model to generate one of the 4 possible labels, with no examples.\n",
|
51
|
+
"\n",
|
52
|
+
"### 🎯 Improve with Assessment and Iteration\n",
|
53
|
+
"Next, we’ll refine the flow by adding an assessment step. Iterations and self verification on a task often lead to better performance\n",
|
54
|
+
"\n",
|
55
|
+
"Let’s get started by loading a sample of the dataset"
|
56
|
+
]
|
57
|
+
},
|
58
|
+
{
|
59
|
+
"cell_type": "code",
|
60
|
+
"execution_count": null,
|
61
|
+
"metadata": {},
|
62
|
+
"outputs": [],
|
63
|
+
"source": [
|
64
|
+
"dataset = load_dataset(\"fancyzhx/ag_news\")\n",
|
65
|
+
"\n",
|
66
|
+
"train_data = dataset[\"train\"].shuffle(seed=42).select(range(500))\n",
|
67
|
+
"test_data = dataset[\"test\"].shuffle(seed=42).select(range(100))\n",
|
68
|
+
"\n",
|
69
|
+
"# map the labels to the category names\n",
|
70
|
+
"label_map = train_data.features['label'].names\n",
|
71
|
+
"\n",
|
72
|
+
"train_data = train_data.map(lambda x: {\"category\": label_map[x[\"label\"]]})\n",
|
73
|
+
"test_data = test_data.map(lambda x: {\"category\": label_map[x[\"label\"]]})"
|
74
|
+
]
|
75
|
+
},
|
76
|
+
{
|
77
|
+
"cell_type": "code",
|
78
|
+
"execution_count": null,
|
79
|
+
"metadata": {},
|
80
|
+
"outputs": [],
|
81
|
+
"source": [
|
82
|
+
"# Group examples by category\n",
|
83
|
+
"examples_by_category = {}\n",
|
84
|
+
"for item in train_data:\n",
|
85
|
+
" category = item['category']\n",
|
86
|
+
" if category not in examples_by_category:\n",
|
87
|
+
" examples_by_category[category] = []\n",
|
88
|
+
" examples_by_category[category].append(item['text'])\n",
|
89
|
+
"\n",
|
90
|
+
"# Print one example from each category in a panel\n",
|
91
|
+
"for category, examples in examples_by_category.items():\n",
|
92
|
+
" print(Panel(examples[0], title=f\"Category: {category}\", expand=False))\n"
|
93
|
+
]
|
94
|
+
},
|
95
|
+
{
|
96
|
+
"cell_type": "markdown",
|
97
|
+
"metadata": {},
|
98
|
+
"source": [
|
99
|
+
"## Simple Data Annotation Pipeline\n",
|
100
|
+
"\n",
|
101
|
+
"In this section, we’ll create our **first working flow** to perform classification using a language model. The goal is to understand the building blocks of `sdg_hub` and how we can employ them to get a language model to classify a given text.\n",
|
102
|
+
"\n",
|
103
|
+
"### Recap: How `sdg_hub` Works\n",
|
104
|
+
"\n",
|
105
|
+
"```mermaid\n",
|
106
|
+
"flowchart LR\n",
|
107
|
+
" A[Flow] --> B[Blocks] --> C[Prompts]\n",
|
108
|
+
" C --> D[Generated Data]\n",
|
109
|
+
"```"
|
110
|
+
]
|
111
|
+
},
|
112
|
+
{
|
113
|
+
"cell_type": "markdown",
|
114
|
+
"metadata": {},
|
115
|
+
"source": [
|
116
|
+
"# Building a Simple Classification Flow\n",
|
117
|
+
"\n",
|
118
|
+
"### Discover Blocks for us to use\n",
|
119
|
+
"\n"
|
120
|
+
]
|
121
|
+
},
|
122
|
+
{
|
123
|
+
"cell_type": "code",
|
124
|
+
"execution_count": null,
|
125
|
+
"metadata": {},
|
126
|
+
"outputs": [],
|
127
|
+
"source": [
|
128
|
+
"BlockRegistry.discover_blocks()"
|
129
|
+
]
|
130
|
+
},
|
131
|
+
{
|
132
|
+
"cell_type": "markdown",
|
133
|
+
"metadata": {},
|
134
|
+
"source": [
|
135
|
+
"It seems all the functionality we are interested in, such as building a prompt, chatting with an llm and parsing its output are under the `llm` category in sdg_hub. Lets start there."
|
136
|
+
]
|
137
|
+
},
|
138
|
+
{
|
139
|
+
"cell_type": "code",
|
140
|
+
"execution_count": null,
|
141
|
+
"metadata": {},
|
142
|
+
"outputs": [],
|
143
|
+
"source": [
|
144
|
+
"from sdg_hub.core.blocks.llm import PromptBuilderBlock, LLMChatBlock, TextParserBlock, LLMParserBlock"
|
145
|
+
]
|
146
|
+
},
|
147
|
+
{
|
148
|
+
"cell_type": "markdown",
|
149
|
+
"metadata": {},
|
150
|
+
"source": [
|
151
|
+
"### Creating the required blocks\n",
|
152
|
+
"\n",
|
153
|
+
"To get started, we'll construct the simplest possible flow for text classification using SDG Hub. We will focus on 3 main blocks that will often appear as a triplet while using `sdg_hub`\n",
|
154
|
+
"\n",
|
155
|
+
"1. **Prompt Builder Block**: Converts each input text into a prompt formatted for the LLM. The important input argument to keep in mind for `PromptBuilderblock` is the `prompt_config_path` which is where the prompt template is saved. Any prompt engineering we would want to do would be done in such a prompt template.\n",
|
156
|
+
"2. **LLM Chat Block**: Sends the prompt to the language model and receives its response (the predicted label).\n",
|
157
|
+
"3. **Text Parser Block**: Extracts the final label from the LLM's output.\n",
|
158
|
+
"\n",
|
159
|
+
"This setup results in a single LLM interaction per sample, forming a minimal classification pipeline.\n",
|
160
|
+
"\n",
|
161
|
+
"We are going to be using the simple prompt that can be found in `news_articles_classification_prompt.yaml`"
|
162
|
+
]
|
163
|
+
},
|
164
|
+
{
|
165
|
+
"cell_type": "code",
|
166
|
+
"execution_count": null,
|
167
|
+
"metadata": {},
|
168
|
+
"outputs": [],
|
169
|
+
"source": [
|
170
|
+
"promptbuilderblock_1 = PromptBuilderBlock(block_name='annotation_prompt_builder', input_cols=['text'], output_cols=['annotation_prompt'], prompt_config_path=\"news_classification_prompt.yaml\", format_as_messages=True)\n",
|
171
|
+
"llmchatblock_1 = LLMChatBlock(block_name='annotation_llm_chat_block', input_cols=['annotation_prompt'], output_cols=['raw_output'], temperature=0.0, max_tokens=5, extra_body={'guided_choice': ['World', 'Sports', 'Business', 'Sci/Tech']}, async_mode=True)\n",
|
172
|
+
"llmparserblock_1 = LLMParserBlock(block_name='annotation_llm_parser_block', input_cols=['raw_output'], extract_content=True, expand_lists=True)\n",
|
173
|
+
"textparserblock_1 = TextParserBlock(block_name='annotation_text_parser_block', input_cols=['annotation_llm_parser_block_content'], output_cols=['output'], start_tags=[''], end_tags=[''])"
|
174
|
+
]
|
175
|
+
},
|
176
|
+
{
|
177
|
+
"cell_type": "markdown",
|
178
|
+
"metadata": {},
|
179
|
+
"source": [
|
180
|
+
"### Designing the `Flow`\n",
|
181
|
+
"\n",
|
182
|
+
"The `Flow` class is at the heart of SDG Hub. Simply put, a `Flow` is a chain of `Blocks` that get executed sequentially. Here, we will simply chain our PromptBuilder -> LLMChatBlock -> TextParser, in that order:\n",
|
183
|
+
"\n",
|
184
|
+
"```mermaid\n",
|
185
|
+
"flowchart LR\n",
|
186
|
+
" subgraph Flow\n",
|
187
|
+
" direction LR\n",
|
188
|
+
" A[PromptBuilderBlock] --> B[LLMChatBlock] --> C[TextParserBlock]\n",
|
189
|
+
" end\n",
|
190
|
+
"```\n",
|
191
|
+
"\n"
|
192
|
+
]
|
193
|
+
},
|
194
|
+
{
|
195
|
+
"cell_type": "code",
|
196
|
+
"execution_count": null,
|
197
|
+
"metadata": {},
|
198
|
+
"outputs": [],
|
199
|
+
"source": [
|
200
|
+
"flow = Flow(blocks=[promptbuilderblock_1, llmchatblock_1, llmparserblock_1, textparserblock_1], metadata=FlowMetadata(name=\"annotation_flow\", description=\"A flow for news article classification\", author=\"sdg_hub\"))"
|
201
|
+
]
|
202
|
+
},
|
203
|
+
{
|
204
|
+
"cell_type": "markdown",
|
205
|
+
"metadata": {},
|
206
|
+
"source": [
|
207
|
+
"### Set the model configs for the `Flow`\n",
|
208
|
+
"\n",
|
209
|
+
"In SDG Hub, model details such as the API base URL, the API Key (if any) and the model name are set at a Flow level using the `set_model_config` method as shown. The `model` parameter accepts a string in the format of \"`provider`/`model_name`\". Here our `provider` is 'hosted_vllm' as we are using a locally hosted model through vllm, and the model name is \"meta-llama/Llama-3.3-70B-Instruct\"\n",
|
210
|
+
"\n",
|
211
|
+
"We must set the `api_base` parameter and point it to where the model endpoint can be found, in this case, `http://localhost:8000/v1`"
|
212
|
+
]
|
213
|
+
},
|
214
|
+
{
|
215
|
+
"cell_type": "code",
|
216
|
+
"execution_count": null,
|
217
|
+
"metadata": {},
|
218
|
+
"outputs": [],
|
219
|
+
"source": [
|
220
|
+
"# flow.set_model_config(model=\"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\", api_base=\"http://localhost:8000/v1\", api_key=\"\")\n",
|
221
|
+
"\n",
|
222
|
+
"flow.set_model_config(model=\"hosted_vllm/qwen3-8b\", api_base=\"http://localhost:8101/v1\", api_key=\"empty\")\n"
|
223
|
+
]
|
224
|
+
},
|
225
|
+
{
|
226
|
+
"cell_type": "markdown",
|
227
|
+
"metadata": {},
|
228
|
+
"source": [
|
229
|
+
"### Time to generate!\n",
|
230
|
+
"\n",
|
231
|
+
"In sdg_hub, the way to generate data is very simple. we simply use the `generate` method from `Flow`. At its simplest form, all the `generate` method needs is the input dataset to operate on. Additionally, we can pass runtime parameters for each block as well, if we wish to override any of the block specific model configs."
|
232
|
+
]
|
233
|
+
},
|
234
|
+
{
|
235
|
+
"cell_type": "code",
|
236
|
+
"execution_count": null,
|
237
|
+
"metadata": {},
|
238
|
+
"outputs": [],
|
239
|
+
"source": [
|
240
|
+
"generated_data = flow.generate(test_data)"
|
241
|
+
]
|
242
|
+
},
|
243
|
+
{
|
244
|
+
"cell_type": "markdown",
|
245
|
+
"metadata": {},
|
246
|
+
"source": [
|
247
|
+
"### Evaluation\n",
|
248
|
+
"\n",
|
249
|
+
"Now that we’ve generated synthetic labels using our simple classification flow, it’s time to evaluate how well the model performed. The goal of this section is to compare the predicted labels against the **true labels** from the dataset using standard classification metrics (precision, recall, f-1 score and classification accuracy)\n",
|
250
|
+
"\n",
|
251
|
+
"We’ll use `sklearn.metrics.classification_report`, which provides precision, recall, F1-score, and support for each class.\n"
|
252
|
+
]
|
253
|
+
},
|
254
|
+
{
|
255
|
+
"cell_type": "code",
|
256
|
+
"execution_count": null,
|
257
|
+
"metadata": {},
|
258
|
+
"outputs": [],
|
259
|
+
"source": [
|
260
|
+
"print(classification_report(generated_data[\"category\"], generated_data[\"output\"]))"
|
261
|
+
]
|
262
|
+
},
|
263
|
+
{
|
264
|
+
"cell_type": "markdown",
|
265
|
+
"metadata": {},
|
266
|
+
"source": [
|
267
|
+
"## Introducing an Assessment step\n",
|
268
|
+
"\n",
|
269
|
+
"Our initial flow used a one step approach — the model was given the task, a fixed label set, and some input text. While this baseline gives us a useful starting point, it has clear limitations:\n",
|
270
|
+
"\n",
|
271
|
+
"- The model may rely on generic heuristics or surface patterns that don’t generalize well.\n",
|
272
|
+
"- It can confuse similar categories (e.g., \"World\" vs. \"Business\") without knowing how they're typically used.\n",
|
273
|
+
"- Without guidance, the model may underperform on edge cases or ambiguous queries.\n",
|
274
|
+
"\n",
|
275
|
+
"\n",
|
276
|
+
"### What is Assessment\n",
|
277
|
+
"\n",
|
278
|
+
"With an assessment step, we will call to the same LLM, but this time, we provide the LLM with its own previous categorization label, and the original text. We will prompt the LLM to think about the original prediction, and give it context about challening cases\n",
|
279
|
+
"In this manner, we can elicit critical judgement from the model about its own prior classification decision. This type of additional context can be useful in the next iteration.\n",
|
280
|
+
"\n",
|
281
|
+
"\n",
|
282
|
+
"### What We’ll Do Next\n",
|
283
|
+
"\n",
|
284
|
+
"We’ll now enhance our flow by introducing another chain of `PromptBuilder` -> `LLMChatBlock` -> `TextParserBlock` whose purpose is to pass the (original text + prediction) to the LLM and obtain a verification or assessment of the prediction.\n",
|
285
|
+
"\n",
|
286
|
+
"\n",
|
287
|
+
"```mermaid\n",
|
288
|
+
"flowchart LR\n",
|
289
|
+
" subgraph Flow1[Initial Classification]\n",
|
290
|
+
" direction LR\n",
|
291
|
+
" A[PromptBuilderBlock] --> B[LLMChatBlock] --> C[TextParserBlock]\n",
|
292
|
+
" end\n",
|
293
|
+
" subgraph Flow2[Assessment]\n",
|
294
|
+
" direction LR\n",
|
295
|
+
" D[PromptBuilderBlock_Assessment] --> E[LLMChatBlock_Assessment] --> F[TextParserBlock_Assessment]\n",
|
296
|
+
" end\n",
|
297
|
+
" \n",
|
298
|
+
" C --> D\n",
|
299
|
+
"```\n",
|
300
|
+
"\n",
|
301
|
+
"\n",
|
302
|
+
"We will investigate if this catches any of the mis-classifications, and get an idea of how well our verification prompting works!"
|
303
|
+
]
|
304
|
+
},
|
305
|
+
{
|
306
|
+
"cell_type": "code",
|
307
|
+
"execution_count": null,
|
308
|
+
"metadata": {},
|
309
|
+
"outputs": [],
|
310
|
+
"source": [
|
311
|
+
"promptbuilderblock_assessment = PromptBuilderBlock(block_name='verifier_prompt_builder', input_cols=['text', 'output'], output_cols=['assessment_prompt'], prompt_config_path=\"news_classification_assessment_prompt.yaml\", format_as_messages=True)\n",
|
312
|
+
"llmchatblock_assessment = LLMChatBlock(block_name='verifier_llm_chat_block', input_cols=['assessment_prompt'], output_cols=['raw_assessment_output'], async_mode=True)\n",
|
313
|
+
"llmparserblock_assessment = LLMParserBlock(block_name='verifier_llm_parser_block', input_cols=['raw_assessment_output'], extract_content=True, expand_lists=True)\n",
|
314
|
+
"textparserblock_assessment = TextParserBlock(block_name='verifier_text_parser_block', input_cols=['verifier_llm_parser_block_content'], output_cols=['assessment_output'], start_tags=[''], end_tags=[''])\n",
|
315
|
+
"\n",
|
316
|
+
"flow = Flow(blocks=[promptbuilderblock_1, llmchatblock_1, llmparserblock_1, textparserblock_1, promptbuilderblock_assessment, llmchatblock_assessment, llmparserblock_assessment, textparserblock_assessment], metadata=FlowMetadata(name=\"annotation_flow\", description=\"A flow for news article classification\", author=\"sdg_hub\"))\n",
|
317
|
+
"# flow.set_model_config(model=\"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\", api_base=\"http://localhost:8000/v1\", api_key=\"\")\n",
|
318
|
+
"flow.set_model_config(model=\"hosted_vllm/qwen3-8b\", api_base=\"http://localhost:8101/v1\", api_key=\"empty\")\n",
|
319
|
+
"\n",
|
320
|
+
"\n",
|
321
|
+
"\n",
|
322
|
+
"generated_data = flow.generate(test_data)"
|
323
|
+
]
|
324
|
+
},
|
325
|
+
{
|
326
|
+
"cell_type": "code",
|
327
|
+
"execution_count": null,
|
328
|
+
"metadata": {},
|
329
|
+
"outputs": [],
|
330
|
+
"source": [
|
331
|
+
"generated_data_pd = generated_data.to_pandas()\n",
|
332
|
+
"mislabeled_samples = generated_data_pd[generated_data_pd[\"category\"] != generated_data_pd[\"output\"]]\n",
|
333
|
+
"\n",
|
334
|
+
"print(Panel(mislabeled_samples.iloc[0]['assessment_output'], title=\"Assessment\"))\n",
|
335
|
+
"print(Panel(str(mislabeled_samples.iloc[0]['category']), title=\"Ground truth label\"))"
|
336
|
+
]
|
337
|
+
},
|
338
|
+
{
|
339
|
+
"cell_type": "markdown",
|
340
|
+
"metadata": {},
|
341
|
+
"source": [
|
342
|
+
"Great! Now we can see that the assessment step is working good, especially on the misclassified samples as shown above. The above is a hard example which has slipped past our original classification flow, but was caught by our assessment step's critical judgement."
|
343
|
+
]
|
344
|
+
},
|
345
|
+
{
|
346
|
+
"cell_type": "markdown",
|
347
|
+
"metadata": {},
|
348
|
+
"source": [
|
349
|
+
"### Revising the Classifications\n",
|
350
|
+
"\n",
|
351
|
+
"We will now create our final revision step, which will take the results of the initial prediction and the assessment steps and pass it onto the LLM once again for a revised attempt at classifying the same input text. The flow can be imagined like so:\n",
|
352
|
+
"\n",
|
353
|
+
"```mermaid\n",
|
354
|
+
"flowchart LR\n",
|
355
|
+
" subgraph Flow1[Initial Classification]\n",
|
356
|
+
" direction LR\n",
|
357
|
+
" A[PromptBuilderBlock] --> B[LLMChatBlock] --> C[TextParserBlock]\n",
|
358
|
+
" end\n",
|
359
|
+
" subgraph Flow2[Assessment]\n",
|
360
|
+
" direction LR\n",
|
361
|
+
" D[PromptBuilderBlock_Assessment] --> E[LLMChatBlock_Assessment] --> F[TextParserBlock_Assessment]\n",
|
362
|
+
" end\n",
|
363
|
+
" subgraph Flow3[Revised Classification]\n",
|
364
|
+
" direction LR\n",
|
365
|
+
" G[PromptBuilderBlock_Revision] --> H[LLMChatBlock_Revision] --> I[TextParserBlock_Revision]\n",
|
366
|
+
" end\n",
|
367
|
+
" \n",
|
368
|
+
" C --> D\n",
|
369
|
+
" F --> G\n",
|
370
|
+
"```"
|
371
|
+
]
|
372
|
+
},
|
373
|
+
{
|
374
|
+
"cell_type": "code",
|
375
|
+
"execution_count": null,
|
376
|
+
"metadata": {},
|
377
|
+
"outputs": [],
|
378
|
+
"source": [
|
379
|
+
"promptbuilderblock_revision = PromptBuilderBlock(block_name='revised_prompt_builder', input_cols=['text', 'output', 'assessment_output'], output_cols=['revised_prompt'], prompt_config_path=\"revise_news_classification_prompt.yaml\", format_as_messages=True)\n",
|
380
|
+
"llmchatblock_revision = LLMChatBlock(block_name='revised_llm_chat_block', input_cols=['revised_prompt'], output_cols=['raw_revised_output'], temperature=0.0, max_tokens=5, extra_body={'guided_choice': ['World', 'Sports', 'Business', 'Sci/Tech']}, async_mode=True)\n",
|
381
|
+
"llmparserblock_revision = LLMParserBlock(block_name='revised_llm_parser_block', input_cols=['raw_revised_output'], extract_content=True, expand_lists=True)\n",
|
382
|
+
"textparserblock_revision = TextParserBlock(block_name='revised_text_parser_block', input_cols=['revised_llm_parser_block_content'], output_cols=['revised_output'], start_tags=[''], end_tags=[''])\n",
|
383
|
+
"\n",
|
384
|
+
"flow = Flow(blocks=[promptbuilderblock_1, llmchatblock_1, llmparserblock_1, textparserblock_1, promptbuilderblock_assessment, llmchatblock_assessment, llmparserblock_assessment, textparserblock_assessment, promptbuilderblock_revision, llmchatblock_revision, llmparserblock_revision, textparserblock_revision], metadata=FlowMetadata(name=\"news_classification_flow\", description=\"A flow for news article classification with assessment and revision\", author=\"sdg_hub\"))\n",
|
385
|
+
"# flow.set_model_config(model=\"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\", api_base=\"http://localhost:8000/v1\", api_key=\"\")\n",
|
386
|
+
"flow.set_model_config(model=\"hosted_vllm/qwen3-8b\", api_base=\"http://localhost:8101/v1\", api_key=\"empty\")"
|
387
|
+
]
|
388
|
+
},
|
389
|
+
{
|
390
|
+
"cell_type": "code",
|
391
|
+
"execution_count": null,
|
392
|
+
"metadata": {},
|
393
|
+
"outputs": [],
|
394
|
+
"source": [
|
395
|
+
"generated_data = flow.generate(test_data)"
|
396
|
+
]
|
397
|
+
},
|
398
|
+
{
|
399
|
+
"cell_type": "code",
|
400
|
+
"execution_count": null,
|
401
|
+
"metadata": {},
|
402
|
+
"outputs": [],
|
403
|
+
"source": [
|
404
|
+
"print(classification_report(generated_data[\"category\"], generated_data[\"revised_output\"]))"
|
405
|
+
]
|
406
|
+
},
|
407
|
+
{
|
408
|
+
"cell_type": "markdown",
|
409
|
+
"metadata": {},
|
410
|
+
"source": [
|
411
|
+
"🔥 We improved the results drastically! Let us take a look at the number of mislabeled samples before and after the assessment + revision steps\n"
|
412
|
+
]
|
413
|
+
},
|
414
|
+
{
|
415
|
+
"cell_type": "code",
|
416
|
+
"execution_count": null,
|
417
|
+
"metadata": {},
|
418
|
+
"outputs": [],
|
419
|
+
"source": [
|
420
|
+
"generated_data_pd = generated_data.to_pandas()\n",
|
421
|
+
"num_mislabeled_output = (generated_data_pd[\"category\"] != generated_data_pd[\"output\"]).sum()\n",
|
422
|
+
"num_mislabeled_revised = (generated_data_pd[\"category\"] != generated_data_pd[\"revised_output\"]).sum()\n",
|
423
|
+
"print(f\"Number of mislabeled samples (original output): {num_mislabeled_output}\")\n",
|
424
|
+
"print(f\"Number of mislabeled samples (revised output): {num_mislabeled_revised}\")\n"
|
425
|
+
]
|
426
|
+
},
|
427
|
+
{
|
428
|
+
"cell_type": "markdown",
|
429
|
+
"metadata": {},
|
430
|
+
"source": [
|
431
|
+
"Great, we whave now improved the classification accuracy of our system by augmenting our naive classification flow by adding an assessment followed by a revision step\n"
|
432
|
+
]
|
433
|
+
},
|
434
|
+
{
|
435
|
+
"cell_type": "markdown",
|
436
|
+
"metadata": {},
|
437
|
+
"source": [
|
438
|
+
"### Export the flow to yaml form\n"
|
439
|
+
]
|
440
|
+
},
|
441
|
+
{
|
442
|
+
"cell_type": "code",
|
443
|
+
"execution_count": null,
|
444
|
+
"metadata": {},
|
445
|
+
"outputs": [],
|
446
|
+
"source": [
|
447
|
+
"flow.to_yaml(\"news_classification_flow.yaml\")"
|
448
|
+
]
|
449
|
+
},
|
450
|
+
{
|
451
|
+
"cell_type": "markdown",
|
452
|
+
"metadata": {},
|
453
|
+
"source": [
|
454
|
+
"## ✅ Summary: What You’ve Learned\n",
|
455
|
+
"\n",
|
456
|
+
"In this tutorial, you learned how to create your own flow for a custom use-case using `sdg_hub`, using the fundamental components: `Flow` and `Block`. You also learned how to create and structure the prompts. You learned how to design an assessment or a judgement step in order to improve the performance of the overall system. You started from scratch and evolved it into a robust, high-accuracy system.\n",
|
457
|
+
"\n",
|
458
|
+
"## 🚀 What’s Next?\n",
|
459
|
+
"\n",
|
460
|
+
"* Prompt Engineer! - You can add examples for classifications directly in the classification steps and see how this improves the performance. In-context examples are extremely effective at aligning the model's outputs to the task at hand\n",
|
461
|
+
"* Try it out on your own data!"
|
462
|
+
]
|
463
|
+
}
|
464
|
+
],
|
465
|
+
"metadata": {
|
466
|
+
"kernelspec": {
|
467
|
+
"display_name": "test_nb",
|
468
|
+
"language": "python",
|
469
|
+
"name": "python3"
|
470
|
+
},
|
471
|
+
"language_info": {
|
472
|
+
"codemirror_mode": {
|
473
|
+
"name": "ipython",
|
474
|
+
"version": 3
|
475
|
+
},
|
476
|
+
"file_extension": ".py",
|
477
|
+
"mimetype": "text/x-python",
|
478
|
+
"name": "python",
|
479
|
+
"nbconvert_exporter": "python",
|
480
|
+
"pygments_lexer": "ipython3",
|
481
|
+
"version": "3.12.8"
|
482
|
+
}
|
483
|
+
},
|
484
|
+
"nbformat": 4,
|
485
|
+
"nbformat_minor": 2
|
486
|
+
}
|