sdg-hub 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/PKG-INFO +21 -18
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/README.md +20 -17
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/flows/overview.md +19 -3
- sdg_hub-0.2.1/examples/annotation/annotation_classification.ipynb +865 -0
- sdg_hub-0.2.1/examples/annotation/news_classification_assessment_prompt.yaml +42 -0
- sdg_hub-0.2.1/examples/annotation/news_classification_prompt.yaml +11 -0
- sdg_hub-0.2.1/examples/annotation/revise_news_classification_prompt.yaml +19 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/_version.py +2 -2
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/__init__.py +2 -0
- sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +491 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/text_parser_block.py +77 -30
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/registry.py +1 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/flow/base.py +243 -14
- sdg_hub-0.2.1/src/sdg_hub/core/flow/checkpointer.py +333 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/flow/metadata.py +45 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/flow/migration.py +12 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/flow/registry.py +121 -58
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/flow/validation.py +12 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/utils/__init__.py +2 -1
- sdg_hub-0.2.1/src/sdg_hub/core/utils/datautils.py +63 -0
- sdg_hub-0.2.1/src/sdg_hub/core/utils/flow_id_words.yaml +231 -0
- sdg_hub-0.2.1/src/sdg_hub/core/utils/flow_identifier.py +94 -0
- sdg_hub-0.2.1/src/sdg_hub/core/utils/yaml_utils.py +59 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/PKG-INFO +21 -18
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/SOURCES.txt +11 -0
- sdg_hub-0.2.1/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +868 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/llm/test_textparserblock.py +241 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/test_registry.py +2 -2
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/test_base.py +181 -0
- sdg_hub-0.2.1/tests/flow/test_checkpointer.py +331 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/test_integration.py +22 -7
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/test_metadata.py +43 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/test_migration.py +90 -7
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/test_registry.py +248 -28
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/utils/test_error_handling.py +1 -2
- sdg_hub-0.2.0/src/sdg_hub/core/utils/datautils.py +0 -12
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/actions/free-disk-space/action.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/dependabot.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/mergify.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/actionlint.dockerfile +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/e2e.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.gitignore +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.isort.cfg +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/.pylintrc +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/CLAUDE.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/CONTRIBUTING.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/LICENSE +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/Makefile +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/.nojekyll +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/README.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/_coverpage.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/_navbar.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/_sidebar.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/api-reference.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/blocks/custom-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/blocks/evaluation-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/blocks/filtering-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/blocks/llm-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/blocks/overview.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/blocks/transform-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/concepts.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/development.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/flows/discovery.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/index.html +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/installation.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/docs/quick-start.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/README.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/examples/knowledge_tuning/knowledge_utils.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/pyproject.toml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/scripts/ruff.sh +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/setup.cfg +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/base.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/client_manager.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/config.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/flow/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/utils/error_handling.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/utils/logger_config.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/core/utils/path_resolution.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/requires.txt +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/deprecated/test_llmblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/evaluation/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/evaluation/test_verify_question_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/llm/test_llm_chat_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/test_base_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/testdata/test_verify_question.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/transform/test_index_based_mapper.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/transform/test_melt_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/transform/test_text_concat.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_renameblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_settomajority.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/conftest.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/flow/test_validation.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tests/utils/test_path_resolution.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.1}/tox.ini +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.1
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -121,7 +121,7 @@ uv pip install sdg-hub[examples]
|
|
121
121
|
|
122
122
|
## 🚀 Quick Start
|
123
123
|
|
124
|
-
###
|
124
|
+
### Core Concepts
|
125
125
|
|
126
126
|
**Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
|
127
127
|
|
@@ -136,7 +136,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
|
|
136
136
|
|
137
137
|
#### Flow Discovery
|
138
138
|
```python
|
139
|
-
from sdg_hub import FlowRegistry
|
139
|
+
from sdg_hub import FlowRegistry, Flow
|
140
140
|
|
141
141
|
# Auto-discover all available flows (no setup needed!)
|
142
142
|
FlowRegistry.discover_flows()
|
@@ -150,16 +150,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
|
|
150
150
|
print(f"QA flows: {qa_flows}")
|
151
151
|
```
|
152
152
|
|
153
|
-
|
153
|
+
Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
|
154
|
+
|
154
155
|
```python
|
155
|
-
|
156
|
-
|
156
|
+
# Every flow gets a deterministic ID
|
157
|
+
# Same flow name always generates the same ID
|
158
|
+
flow_id = "small-rock-799"
|
157
159
|
|
158
|
-
#
|
159
|
-
|
160
|
-
flow_path = FlowRegistry.get_flow_path(flow_name)
|
160
|
+
# Use ID to reference the flow
|
161
|
+
flow_path = FlowRegistry.get_flow_path(flow_id)
|
161
162
|
flow = Flow.from_yaml(flow_path)
|
163
|
+
```
|
162
164
|
|
165
|
+
#### Discovering Models and Configuring them
|
166
|
+
```python
|
163
167
|
# Discover recommended models
|
164
168
|
default_model = flow.get_default_model()
|
165
169
|
recommendations = flow.get_model_recommendations()
|
@@ -171,7 +175,9 @@ flow.set_model_config(
|
|
171
175
|
api_base="http://localhost:8000/v1",
|
172
176
|
api_key="your_key",
|
173
177
|
)
|
174
|
-
|
178
|
+
```
|
179
|
+
#### Load your dataset and run the flow
|
180
|
+
```python
|
175
181
|
# Create your dataset with required columns
|
176
182
|
dataset = Dataset.from_dict({
|
177
183
|
'document': ['Your document text here...'],
|
@@ -186,6 +192,11 @@ dataset = Dataset.from_dict({
|
|
186
192
|
'icl_response_3': ['Example answer 3']
|
187
193
|
})
|
188
194
|
|
195
|
+
# Quick Testing with Dry Run
|
196
|
+
dry_result = flow.dry_run(dataset, sample_size=1)
|
197
|
+
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
198
|
+
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
199
|
+
|
189
200
|
# Generate high-quality QA pairs
|
190
201
|
result = flow.generate(dataset)
|
191
202
|
|
@@ -196,14 +207,6 @@ faithfulness_scores = result['faithfulness_judgment']
|
|
196
207
|
relevancy_scores = result['relevancy_score']
|
197
208
|
```
|
198
209
|
|
199
|
-
#### Quick Testing with Dry Run
|
200
|
-
```python
|
201
|
-
# Test the flow with a small sample first
|
202
|
-
dry_result = flow.dry_run(dataset, sample_size=1)
|
203
|
-
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
204
|
-
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
205
|
-
```
|
206
|
-
|
207
210
|
|
208
211
|
## 📄 License
|
209
212
|
|
@@ -53,7 +53,7 @@ uv pip install sdg-hub[examples]
|
|
53
53
|
|
54
54
|
## 🚀 Quick Start
|
55
55
|
|
56
|
-
###
|
56
|
+
### Core Concepts
|
57
57
|
|
58
58
|
**Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
|
59
59
|
|
@@ -68,7 +68,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
|
|
68
68
|
|
69
69
|
#### Flow Discovery
|
70
70
|
```python
|
71
|
-
from sdg_hub import FlowRegistry
|
71
|
+
from sdg_hub import FlowRegistry, Flow
|
72
72
|
|
73
73
|
# Auto-discover all available flows (no setup needed!)
|
74
74
|
FlowRegistry.discover_flows()
|
@@ -82,16 +82,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
|
|
82
82
|
print(f"QA flows: {qa_flows}")
|
83
83
|
```
|
84
84
|
|
85
|
-
|
85
|
+
Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
|
86
|
+
|
86
87
|
```python
|
87
|
-
|
88
|
-
|
88
|
+
# Every flow gets a deterministic ID
|
89
|
+
# Same flow name always generates the same ID
|
90
|
+
flow_id = "small-rock-799"
|
89
91
|
|
90
|
-
#
|
91
|
-
|
92
|
-
flow_path = FlowRegistry.get_flow_path(flow_name)
|
92
|
+
# Use ID to reference the flow
|
93
|
+
flow_path = FlowRegistry.get_flow_path(flow_id)
|
93
94
|
flow = Flow.from_yaml(flow_path)
|
95
|
+
```
|
94
96
|
|
97
|
+
#### Discovering Models and Configuring them
|
98
|
+
```python
|
95
99
|
# Discover recommended models
|
96
100
|
default_model = flow.get_default_model()
|
97
101
|
recommendations = flow.get_model_recommendations()
|
@@ -103,7 +107,9 @@ flow.set_model_config(
|
|
103
107
|
api_base="http://localhost:8000/v1",
|
104
108
|
api_key="your_key",
|
105
109
|
)
|
106
|
-
|
110
|
+
```
|
111
|
+
#### Load your dataset and run the flow
|
112
|
+
```python
|
107
113
|
# Create your dataset with required columns
|
108
114
|
dataset = Dataset.from_dict({
|
109
115
|
'document': ['Your document text here...'],
|
@@ -118,6 +124,11 @@ dataset = Dataset.from_dict({
|
|
118
124
|
'icl_response_3': ['Example answer 3']
|
119
125
|
})
|
120
126
|
|
127
|
+
# Quick Testing with Dry Run
|
128
|
+
dry_result = flow.dry_run(dataset, sample_size=1)
|
129
|
+
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
130
|
+
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
131
|
+
|
121
132
|
# Generate high-quality QA pairs
|
122
133
|
result = flow.generate(dataset)
|
123
134
|
|
@@ -128,14 +139,6 @@ faithfulness_scores = result['faithfulness_judgment']
|
|
128
139
|
relevancy_scores = result['relevancy_score']
|
129
140
|
```
|
130
141
|
|
131
|
-
#### Quick Testing with Dry Run
|
132
|
-
```python
|
133
|
-
# Test the flow with a small sample first
|
134
|
-
dry_result = flow.dry_run(dataset, sample_size=1)
|
135
|
-
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
136
|
-
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
137
|
-
```
|
138
|
-
|
139
142
|
|
140
143
|
## 📄 License
|
141
144
|
|
@@ -269,13 +269,29 @@ print(f"Sample output: {dry_result['sample_output']}")
|
|
269
269
|
Customize flow behavior at runtime:
|
270
270
|
|
271
271
|
```python
|
272
|
-
# Override default parameters
|
272
|
+
# Override default runtime parameters
|
273
273
|
result = flow.generate(
|
274
274
|
dataset,
|
275
|
-
|
275
|
+
runtime_params={
|
276
276
|
"max_tokens": 200,
|
277
277
|
"temperature": 0.9,
|
278
|
-
|
278
|
+
}
|
279
|
+
)
|
280
|
+
```
|
281
|
+
|
282
|
+
### Block-Specific Runtime Arguments
|
283
|
+
|
284
|
+
You can enable or disable advanced features—such as "thinking mode"—for individual blocks at runtime using the `runtime_params` argument. This allows fine-grained control over block behavior without modifying the flow YAML.
|
285
|
+
|
286
|
+
For example, to disable "thinking mode" for several blocks:
|
287
|
+
|
288
|
+
```python
|
289
|
+
# Set runtime_params for specific blocks
|
290
|
+
result = flow.generate(
|
291
|
+
dataset,
|
292
|
+
runtime_params = {
|
293
|
+
# LLMChatBlock blocks
|
294
|
+
"llm_chat_block_1": {"extra_body": {"chat_template_kwargs": {"enable_thinking": False}}},
|
279
295
|
}
|
280
296
|
)
|
281
297
|
```
|