sdg-hub 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_hub-0.2.0/src/sdg_hub.egg-info → sdg_hub-0.2.2}/PKG-INFO +59 -31
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/README.md +58 -29
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/llm-blocks.md +52 -5
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/overview.md +10 -9
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/concepts.md +7 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/development.md +5 -4
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/flows/overview.md +64 -3
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/quick-start.md +7 -2
- sdg_hub-0.2.2/examples/annotation/annotation_classification.ipynb +840 -0
- sdg_hub-0.2.2/examples/annotation/news_classification_assessment_prompt.yaml +42 -0
- sdg_hub-0.2.2/examples/annotation/news_classification_flow.yaml +185 -0
- sdg_hub-0.2.2/examples/annotation/news_classification_prompt.yaml +11 -0
- sdg_hub-0.2.2/examples/annotation/revise_news_classification_prompt.yaml +19 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/README.md +1 -1
- sdg_hub-0.2.2/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/pyproject.toml +0 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/_version.py +16 -3
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
- sdg_hub-0.2.2/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +323 -0
- sdg_hub-0.2.2/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +323 -0
- sdg_hub-0.2.2/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +329 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/__init__.py +2 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/client_manager.py +61 -24
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/config.py +1 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
- sdg_hub-0.2.2/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/registry.py +49 -35
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/base.py +370 -20
- sdg_hub-0.2.2/src/sdg_hub/core/flow/checkpointer.py +333 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/metadata.py +45 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/migration.py +12 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/registry.py +121 -58
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/validation.py +12 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/__init__.py +2 -1
- sdg_hub-0.2.2/src/sdg_hub/core/utils/datautils.py +92 -0
- sdg_hub-0.2.2/src/sdg_hub/core/utils/flow_id_words.yaml +231 -0
- sdg_hub-0.2.2/src/sdg_hub/core/utils/flow_identifier.py +94 -0
- sdg_hub-0.2.2/src/sdg_hub/core/utils/yaml_utils.py +59 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
- {sdg_hub-0.2.0 → sdg_hub-0.2.2/src/sdg_hub.egg-info}/PKG-INFO +59 -31
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/SOURCES.txt +15 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/requires.txt +0 -1
- sdg_hub-0.2.2/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +271 -0
- sdg_hub-0.2.2/tests/blocks/evaluation/test_evaluate_relevancy_block.py +189 -0
- sdg_hub-0.2.2/tests/blocks/evaluation/test_verify_question_block.py +331 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/llm/test_llm_chat_block.py +2 -0
- sdg_hub-0.2.2/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +1329 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/llm/test_textparserblock.py +241 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/test_registry.py +198 -34
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/conftest.py +1 -1
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_base.py +337 -2
- sdg_hub-0.2.2/tests/flow/test_checkpointer.py +331 -0
- sdg_hub-0.2.2/tests/flow/test_dataset_requirements.py +419 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_integration.py +24 -9
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_metadata.py +43 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_migration.py +90 -7
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_registry.py +248 -28
- sdg_hub-0.2.2/tests/utils/test_datautils.py +43 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/utils/test_error_handling.py +1 -2
- sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -564
- sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -564
- sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -564
- sdg_hub-0.2.0/src/sdg_hub/core/utils/datautils.py +0 -12
- sdg_hub-0.2.0/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -496
- sdg_hub-0.2.0/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -493
- sdg_hub-0.2.0/tests/blocks/evaluation/test_verify_question_block.py +0 -480
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/actions/free-disk-space/action.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/dependabot.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/mergify.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/actionlint.dockerfile +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/e2e.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.gitignore +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.isort.cfg +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.pylintrc +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/CLAUDE.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/CONTRIBUTING.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/LICENSE +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/Makefile +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/.nojekyll +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/README.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/_coverpage.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/_navbar.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/_sidebar.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/api-reference.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/custom-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/evaluation-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/filtering-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/transform-blocks.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/flows/discovery.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/index.html +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/installation.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/knowledge_utils.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/scripts/ruff.sh +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/setup.cfg +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/base.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/error_handling.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/logger_config.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/path_resolution.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/deprecated/test_llmblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/evaluation/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/test_base_block.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_verify_question.yaml +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_index_based_mapper.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_melt_columns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_text_concat.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_renameblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_settomajority.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/__init__.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_validation.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/utils/test_path_resolution.py +0 -0
- {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tox.ini +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.2
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
|
|
27
27
|
Requires-Dist: httpx<1.0.0,>=0.25.0
|
28
28
|
Requires-Dist: jinja2
|
29
29
|
Requires-Dist: litellm<1.75.0,>=1.73.0
|
30
|
-
Requires-Dist: openai<2.0.0,>=1.13.3
|
31
30
|
Requires-Dist: rich
|
32
31
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
33
32
|
Requires-Dist: python-dotenv<2.0.0,>=1.0.0
|
@@ -92,6 +91,8 @@ A modular Python framework for building synthetic data generation pipelines usin
|
|
92
91
|
|
93
92
|
**📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
|
94
93
|
|
94
|
+
**📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
|
95
|
+
|
95
96
|
**🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
|
96
97
|
|
97
98
|
|
@@ -121,7 +122,7 @@ uv pip install sdg-hub[examples]
|
|
121
122
|
|
122
123
|
## 🚀 Quick Start
|
123
124
|
|
124
|
-
###
|
125
|
+
### Core Concepts
|
125
126
|
|
126
127
|
**Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
|
127
128
|
|
@@ -136,7 +137,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
|
|
136
137
|
|
137
138
|
#### Flow Discovery
|
138
139
|
```python
|
139
|
-
from sdg_hub import FlowRegistry
|
140
|
+
from sdg_hub import FlowRegistry, Flow
|
140
141
|
|
141
142
|
# Auto-discover all available flows (no setup needed!)
|
142
143
|
FlowRegistry.discover_flows()
|
@@ -150,16 +151,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
|
|
150
151
|
print(f"QA flows: {qa_flows}")
|
151
152
|
```
|
152
153
|
|
153
|
-
|
154
|
+
Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
|
155
|
+
|
154
156
|
```python
|
155
|
-
|
156
|
-
|
157
|
+
# Every flow gets a deterministic ID
|
158
|
+
# Same flow name always generates the same ID
|
159
|
+
flow_id = "small-rock-799"
|
157
160
|
|
158
|
-
#
|
159
|
-
|
160
|
-
flow_path = FlowRegistry.get_flow_path(flow_name)
|
161
|
+
# Use ID to reference the flow
|
162
|
+
flow_path = FlowRegistry.get_flow_path(flow_id)
|
161
163
|
flow = Flow.from_yaml(flow_path)
|
164
|
+
```
|
162
165
|
|
166
|
+
#### Discovering Models and Configuring them
|
167
|
+
```python
|
163
168
|
# Discover recommended models
|
164
169
|
default_model = flow.get_default_model()
|
165
170
|
recommendations = flow.get_model_recommendations()
|
@@ -171,21 +176,52 @@ flow.set_model_config(
|
|
171
176
|
api_base="http://localhost:8000/v1",
|
172
177
|
api_key="your_key",
|
173
178
|
)
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
'
|
186
|
-
'
|
179
|
+
```
|
180
|
+
#### Discover dataset requirements and create your dataset
|
181
|
+
```python
|
182
|
+
# First, discover what data the flow needs
|
183
|
+
# Get an empty dataset with the exact schema needed
|
184
|
+
schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
|
185
|
+
print(f"Required columns: {schema_dataset.column_names}")
|
186
|
+
print(f"Schema: {schema_dataset.features}")
|
187
|
+
|
188
|
+
# Option 1: Add data directly to the schema dataset
|
189
|
+
dataset = schema_dataset.add_item({
|
190
|
+
'document': 'Your document text here...',
|
191
|
+
'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
|
192
|
+
'domain': 'Computer Science',
|
193
|
+
'icl_document': 'Example document for in-context learning...',
|
194
|
+
'icl_query_1': 'Example question 1?',
|
195
|
+
'icl_response_1': 'Example answer 1',
|
196
|
+
'icl_query_2': 'Example question 2?',
|
197
|
+
'icl_response_2': 'Example answer 2',
|
198
|
+
'icl_query_3': 'Example question 3?',
|
199
|
+
'icl_response_3': 'Example answer 3'
|
187
200
|
})
|
188
201
|
|
202
|
+
# Option 2: Create your own dataset and validate the schema
|
203
|
+
my_dataset = Dataset.from_dict(my_data_dict)
|
204
|
+
if my_dataset.features == schema_dataset.features:
|
205
|
+
print("✅ Schema matches - ready to generate!")
|
206
|
+
dataset = my_dataset
|
207
|
+
else:
|
208
|
+
print("❌ Schema mismatch - check your columns")
|
209
|
+
|
210
|
+
# Option 3: Get raw requirements for detailed inspection
|
211
|
+
requirements = flow.get_dataset_requirements()
|
212
|
+
if requirements:
|
213
|
+
print(f"Required: {requirements.required_columns}")
|
214
|
+
print(f"Optional: {requirements.optional_columns}")
|
215
|
+
print(f"Min samples: {requirements.min_samples}")
|
216
|
+
```
|
217
|
+
|
218
|
+
#### Dry Run and Generate
|
219
|
+
```python
|
220
|
+
# Quick Testing with Dry Run
|
221
|
+
dry_result = flow.dry_run(dataset, sample_size=1)
|
222
|
+
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
223
|
+
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
224
|
+
|
189
225
|
# Generate high-quality QA pairs
|
190
226
|
result = flow.generate(dataset)
|
191
227
|
|
@@ -196,14 +232,6 @@ faithfulness_scores = result['faithfulness_judgment']
|
|
196
232
|
relevancy_scores = result['relevancy_score']
|
197
233
|
```
|
198
234
|
|
199
|
-
#### Quick Testing with Dry Run
|
200
|
-
```python
|
201
|
-
# Test the flow with a small sample first
|
202
|
-
dry_result = flow.dry_run(dataset, sample_size=1)
|
203
|
-
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
204
|
-
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
205
|
-
```
|
206
|
-
|
207
235
|
|
208
236
|
## 📄 License
|
209
237
|
|
@@ -24,6 +24,8 @@ A modular Python framework for building synthetic data generation pipelines usin
|
|
24
24
|
|
25
25
|
**📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
|
26
26
|
|
27
|
+
**📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
|
28
|
+
|
27
29
|
**🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
|
28
30
|
|
29
31
|
|
@@ -53,7 +55,7 @@ uv pip install sdg-hub[examples]
|
|
53
55
|
|
54
56
|
## 🚀 Quick Start
|
55
57
|
|
56
|
-
###
|
58
|
+
### Core Concepts
|
57
59
|
|
58
60
|
**Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
|
59
61
|
|
@@ -68,7 +70,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
|
|
68
70
|
|
69
71
|
#### Flow Discovery
|
70
72
|
```python
|
71
|
-
from sdg_hub import FlowRegistry
|
73
|
+
from sdg_hub import FlowRegistry, Flow
|
72
74
|
|
73
75
|
# Auto-discover all available flows (no setup needed!)
|
74
76
|
FlowRegistry.discover_flows()
|
@@ -82,16 +84,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
|
|
82
84
|
print(f"QA flows: {qa_flows}")
|
83
85
|
```
|
84
86
|
|
85
|
-
|
87
|
+
Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
|
88
|
+
|
86
89
|
```python
|
87
|
-
|
88
|
-
|
90
|
+
# Every flow gets a deterministic ID
|
91
|
+
# Same flow name always generates the same ID
|
92
|
+
flow_id = "small-rock-799"
|
89
93
|
|
90
|
-
#
|
91
|
-
|
92
|
-
flow_path = FlowRegistry.get_flow_path(flow_name)
|
94
|
+
# Use ID to reference the flow
|
95
|
+
flow_path = FlowRegistry.get_flow_path(flow_id)
|
93
96
|
flow = Flow.from_yaml(flow_path)
|
97
|
+
```
|
94
98
|
|
99
|
+
#### Discovering Models and Configuring them
|
100
|
+
```python
|
95
101
|
# Discover recommended models
|
96
102
|
default_model = flow.get_default_model()
|
97
103
|
recommendations = flow.get_model_recommendations()
|
@@ -103,21 +109,52 @@ flow.set_model_config(
|
|
103
109
|
api_base="http://localhost:8000/v1",
|
104
110
|
api_key="your_key",
|
105
111
|
)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
'
|
118
|
-
'
|
112
|
+
```
|
113
|
+
#### Discover dataset requirements and create your dataset
|
114
|
+
```python
|
115
|
+
# First, discover what data the flow needs
|
116
|
+
# Get an empty dataset with the exact schema needed
|
117
|
+
schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
|
118
|
+
print(f"Required columns: {schema_dataset.column_names}")
|
119
|
+
print(f"Schema: {schema_dataset.features}")
|
120
|
+
|
121
|
+
# Option 1: Add data directly to the schema dataset
|
122
|
+
dataset = schema_dataset.add_item({
|
123
|
+
'document': 'Your document text here...',
|
124
|
+
'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
|
125
|
+
'domain': 'Computer Science',
|
126
|
+
'icl_document': 'Example document for in-context learning...',
|
127
|
+
'icl_query_1': 'Example question 1?',
|
128
|
+
'icl_response_1': 'Example answer 1',
|
129
|
+
'icl_query_2': 'Example question 2?',
|
130
|
+
'icl_response_2': 'Example answer 2',
|
131
|
+
'icl_query_3': 'Example question 3?',
|
132
|
+
'icl_response_3': 'Example answer 3'
|
119
133
|
})
|
120
134
|
|
135
|
+
# Option 2: Create your own dataset and validate the schema
|
136
|
+
my_dataset = Dataset.from_dict(my_data_dict)
|
137
|
+
if my_dataset.features == schema_dataset.features:
|
138
|
+
print("✅ Schema matches - ready to generate!")
|
139
|
+
dataset = my_dataset
|
140
|
+
else:
|
141
|
+
print("❌ Schema mismatch - check your columns")
|
142
|
+
|
143
|
+
# Option 3: Get raw requirements for detailed inspection
|
144
|
+
requirements = flow.get_dataset_requirements()
|
145
|
+
if requirements:
|
146
|
+
print(f"Required: {requirements.required_columns}")
|
147
|
+
print(f"Optional: {requirements.optional_columns}")
|
148
|
+
print(f"Min samples: {requirements.min_samples}")
|
149
|
+
```
|
150
|
+
|
151
|
+
#### Dry Run and Generate
|
152
|
+
```python
|
153
|
+
# Quick Testing with Dry Run
|
154
|
+
dry_result = flow.dry_run(dataset, sample_size=1)
|
155
|
+
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
156
|
+
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
157
|
+
|
121
158
|
# Generate high-quality QA pairs
|
122
159
|
result = flow.generate(dataset)
|
123
160
|
|
@@ -128,14 +165,6 @@ faithfulness_scores = result['faithfulness_judgment']
|
|
128
165
|
relevancy_scores = result['relevancy_score']
|
129
166
|
```
|
130
167
|
|
131
|
-
#### Quick Testing with Dry Run
|
132
|
-
```python
|
133
|
-
# Test the flow with a small sample first
|
134
|
-
dry_result = flow.dry_run(dataset, sample_size=1)
|
135
|
-
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
136
|
-
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
137
|
-
```
|
138
|
-
|
139
168
|
|
140
169
|
## 📄 License
|
141
170
|
|
@@ -34,12 +34,9 @@ The unified chat block that replaces provider-specific implementations with a si
|
|
34
34
|
### Basic Usage
|
35
35
|
|
36
36
|
```python
|
37
|
-
from sdg_hub.core.blocks import
|
37
|
+
from sdg_hub.core.blocks import LLMChatBlock
|
38
38
|
from datasets import Dataset
|
39
39
|
|
40
|
-
# Get the LLM chat block
|
41
|
-
LLMChatBlock = BlockRegistry.get_block("LLMChatBlock")
|
42
|
-
|
43
40
|
# Configure for OpenAI
|
44
41
|
chat_block = LLMChatBlock(
|
45
42
|
block_name="question_answerer",
|
@@ -133,7 +130,7 @@ dataset = Dataset.from_dict({
|
|
133
130
|
})
|
134
131
|
```
|
135
132
|
|
136
|
-
#### Async Processing
|
133
|
+
#### Async Processing & Concurrency Control
|
137
134
|
```python
|
138
135
|
chat_block = LLMChatBlock(
|
139
136
|
block_name="async_chat",
|
@@ -147,6 +144,56 @@ chat_block = LLMChatBlock(
|
|
147
144
|
result = chat_block.generate(large_dataset)
|
148
145
|
```
|
149
146
|
|
147
|
+
**Flow-Level Concurrency Control:**
|
148
|
+
|
149
|
+
When using LLM blocks within flows, you can control concurrency to prevent overwhelming API servers or hitting rate limits:
|
150
|
+
|
151
|
+
```python
|
152
|
+
from sdg_hub import Flow
|
153
|
+
|
154
|
+
# Load a flow with LLM blocks
|
155
|
+
flow = Flow.from_yaml("path/to/your/flow.yaml")
|
156
|
+
flow.set_model_config(model="openai/gpt-4o", api_key="your-key")
|
157
|
+
|
158
|
+
# Control concurrency for each LLM block in the flow
|
159
|
+
result = flow.generate(
|
160
|
+
dataset,
|
161
|
+
max_concurrency=5 # Max 5 concurrent requests at any time
|
162
|
+
)
|
163
|
+
```
|
164
|
+
|
165
|
+
**Benefits of Concurrency Control:**
|
166
|
+
- **Rate Limit Management** - Prevent API throttling by limiting concurrent requests
|
167
|
+
- **Resource Control** - Manage memory and network usage for large datasets
|
168
|
+
- **Provider-Friendly** - Respect API provider recommendations for concurrent requests
|
169
|
+
- **Automatic Scaling** - No concurrency limit = maximum parallelism for fastest processing
|
170
|
+
|
171
|
+
**How It Works:**
|
172
|
+
|
173
|
+
The unified async system automatically detects whether you're processing single or multiple messages and applies concurrency control appropriately:
|
174
|
+
|
175
|
+
```python
|
176
|
+
# Single message - processed immediately
|
177
|
+
single_message = [{"role": "user", "content": "Hello"}]
|
178
|
+
|
179
|
+
# Multiple messages - concurrency controlled via semaphore
|
180
|
+
batch_messages = [
|
181
|
+
[{"role": "user", "content": "Question 1"}],
|
182
|
+
[{"role": "user", "content": "Question 2"}],
|
183
|
+
[{"role": "user", "content": "Question 3"}],
|
184
|
+
# ... up to thousands of messages
|
185
|
+
]
|
186
|
+
|
187
|
+
# Both cases use the same unified API under the hood
|
188
|
+
# Concurrency is managed transparently
|
189
|
+
```
|
190
|
+
|
191
|
+
**Performance Guidelines:**
|
192
|
+
- **Small datasets (<100 samples)**: No concurrency limit needed
|
193
|
+
- **Medium datasets (100-1000 samples)**: `max_concurrency=10-20`
|
194
|
+
- **Large datasets (1000+ samples)**: `max_concurrency=5-10` (respect API limits)
|
195
|
+
- **Production workloads**: Start conservative and tune based on error rates
|
196
|
+
|
150
197
|
### Message Format
|
151
198
|
|
152
199
|
LLMChatBlock expects messages in OpenAI chat format:
|
@@ -30,14 +30,15 @@ All blocks inherit from `BaseBlock`, which provides:
|
|
30
30
|
|
31
31
|
### Standard Configuration
|
32
32
|
```python
|
33
|
-
|
33
|
+
# Import the specific block you need
|
34
|
+
from sdg_hub.core.blocks import LLMChatBlock
|
34
35
|
|
35
36
|
# Every block has these standard fields
|
36
|
-
|
37
|
-
block = MyBlock(
|
37
|
+
block = LLMChatBlock(
|
38
38
|
block_name="my_unique_block", # Required: unique identifier
|
39
|
-
input_cols=["
|
40
|
-
output_cols=["
|
39
|
+
input_cols=["input_text"], # Column this block needs
|
40
|
+
output_cols=["response"], # Column this block creates
|
41
|
+
model="openai/gpt-4o", # Required: provider/model format
|
41
42
|
# ... block-specific configuration
|
42
43
|
)
|
43
44
|
```
|
@@ -86,13 +87,13 @@ print(f"Found {len(available_blocks)} blocks")
|
|
86
87
|
|
87
88
|
### 2. Block Instantiation
|
88
89
|
```python
|
89
|
-
#
|
90
|
-
|
90
|
+
# Import the specific block you need
|
91
|
+
from sdg_hub.core.blocks import LLMChatBlock
|
91
92
|
|
92
93
|
# Create an instance with configuration
|
93
|
-
chat_block =
|
94
|
+
chat_block = LLMChatBlock(
|
94
95
|
block_name="question_answerer",
|
95
|
-
|
96
|
+
model="openai/gpt-4o",
|
96
97
|
input_cols=["question"],
|
97
98
|
output_cols=["answer"],
|
98
99
|
prompt_template="Answer this question: {question}"
|
@@ -159,7 +159,13 @@ Every block validates data at runtime:
|
|
159
159
|
- Watch execution logs for bottlenecks
|
160
160
|
- Use async-friendly blocks for LLM operations
|
161
161
|
|
162
|
-
### 4.
|
162
|
+
### 4. Optimize for Scale
|
163
|
+
- Use `max_concurrency` parameter to control API request rates
|
164
|
+
- Start with conservative concurrency limits (5-10) for production
|
165
|
+
- Increase concurrency carefully while monitoring error rates
|
166
|
+
- Consider provider-specific rate limits and costs
|
167
|
+
|
168
|
+
### 5. Design for Reuse
|
163
169
|
- Create modular flows that can be combined
|
164
170
|
- Use parameters for customization points
|
165
171
|
|
@@ -123,15 +123,16 @@ Create comprehensive tests following this pattern:
|
|
123
123
|
|
124
124
|
import pytest
|
125
125
|
from datasets import Dataset
|
126
|
-
from sdg_hub.core.blocks import BlockRegistry
|
127
126
|
from sdg_hub.core.utils.error_handling import MissingColumnError
|
127
|
+
# Import your custom block directly
|
128
|
+
from .my_new_block import MyNewBlock
|
128
129
|
|
129
130
|
class TestMyNewBlock:
|
130
131
|
"""Test suite for MyNewBlock."""
|
131
132
|
|
132
133
|
def test_basic_functionality(self):
|
133
134
|
"""Test basic block functionality."""
|
134
|
-
block =
|
135
|
+
block = MyNewBlock(
|
135
136
|
block_name="test_block",
|
136
137
|
input_cols=["input"],
|
137
138
|
output_cols=["output"]
|
@@ -149,7 +150,7 @@ class TestMyNewBlock:
|
|
149
150
|
def test_configuration_validation(self):
|
150
151
|
"""Test parameter validation."""
|
151
152
|
with pytest.raises(ValueError):
|
152
|
-
|
153
|
+
MyNewBlock(
|
153
154
|
block_name="bad_config",
|
154
155
|
input_cols=["input"],
|
155
156
|
output_cols=["output"],
|
@@ -158,7 +159,7 @@ class TestMyNewBlock:
|
|
158
159
|
|
159
160
|
def test_missing_columns(self):
|
160
161
|
"""Test error handling for missing columns."""
|
161
|
-
block =
|
162
|
+
block = MyNewBlock(
|
162
163
|
block_name="test_block",
|
163
164
|
input_cols=["missing_column"],
|
164
165
|
output_cols=["output"]
|
@@ -269,17 +269,78 @@ print(f"Sample output: {dry_result['sample_output']}")
|
|
269
269
|
Customize flow behavior at runtime:
|
270
270
|
|
271
271
|
```python
|
272
|
-
# Override default parameters
|
272
|
+
# Override default runtime parameters
|
273
273
|
result = flow.generate(
|
274
274
|
dataset,
|
275
|
-
|
275
|
+
runtime_params={
|
276
276
|
"max_tokens": 200,
|
277
277
|
"temperature": 0.9,
|
278
|
-
"enable_evaluation": False
|
279
278
|
}
|
280
279
|
)
|
281
280
|
```
|
282
281
|
|
282
|
+
### Block-Specific Runtime Arguments
|
283
|
+
|
284
|
+
You can enable or disable advanced features—such as "thinking mode"—for individual blocks at runtime using the `runtime_params` argument. This allows fine-grained control over block behavior without modifying the flow YAML.
|
285
|
+
|
286
|
+
For example, to disable "thinking mode" for several blocks:
|
287
|
+
|
288
|
+
```python
|
289
|
+
# Set runtime_params for specific blocks
|
290
|
+
result = flow.generate(
|
291
|
+
dataset,
|
292
|
+
runtime_params = {
|
293
|
+
# LLMChatBlock blocks
|
294
|
+
"llm_chat_block_1": {"extra_body": {"chat_template_kwargs": {"enable_thinking": False}}},
|
295
|
+
}
|
296
|
+
)
|
297
|
+
```
|
298
|
+
|
299
|
+
### Concurrency Control
|
300
|
+
|
301
|
+
For flows containing LLM blocks, you can control the maximum number of concurrent API requests to prevent overwhelming servers or hitting rate limits:
|
302
|
+
|
303
|
+
```python
|
304
|
+
# Basic concurrency control
|
305
|
+
result = flow.generate(
|
306
|
+
dataset,
|
307
|
+
max_concurrency=5 # Max 5 concurrent requests per LLM block execution
|
308
|
+
)
|
309
|
+
|
310
|
+
# Combined with other parameters
|
311
|
+
result = flow.generate(
|
312
|
+
dataset,
|
313
|
+
max_concurrency=10,
|
314
|
+
runtime_params={
|
315
|
+
"temperature": 0.7,
|
316
|
+
"max_tokens": 200
|
317
|
+
}
|
318
|
+
)
|
319
|
+
```
|
320
|
+
|
321
|
+
**When to Use Concurrency Control:**
|
322
|
+
|
323
|
+
- **Large Datasets** - Process thousands of samples without overwhelming APIs
|
324
|
+
- **Rate Limit Management** - Respect provider-specific concurrent request limits
|
325
|
+
- **Production Workloads** - Ensure stable, predictable resource usage
|
326
|
+
- **Cost Optimization** - Prevent burst API charges from uncontrolled parallelism
|
327
|
+
|
328
|
+
**Recommended Settings:**
|
329
|
+
|
330
|
+
```python
|
331
|
+
# Conservative (recommended for production)
|
332
|
+
result = flow.generate(dataset, max_concurrency=5)
|
333
|
+
|
334
|
+
# Moderate (good for development/testing)
|
335
|
+
result = flow.generate(dataset, max_concurrency=10)
|
336
|
+
|
337
|
+
# Aggressive (only for robust APIs and small datasets)
|
338
|
+
result = flow.generate(dataset, max_concurrency=20)
|
339
|
+
|
340
|
+
# No limit (maximum speed, use with caution)
|
341
|
+
result = flow.generate(dataset) # Default behavior
|
342
|
+
```
|
343
|
+
|
283
344
|
## 🚀 Next Steps
|
284
345
|
|
285
346
|
Ready to master the flow system? Explore these detailed guides:
|
@@ -107,11 +107,16 @@ print(f"🔎 QA Generation Flows: {qa_flows}")
|
|
107
107
|
eval_flows = FlowRegistry.search_flows(tag="evaluation")
|
108
108
|
print(f"📊 Evaluation Flows: {eval_flows}")
|
109
109
|
|
110
|
+
# List all blocks by categories
|
111
|
+
all_blocks = BlockRegistry.list_blocks(grouped=True)
|
112
|
+
for category, blocks in all_blocks.items():
|
113
|
+
print(f"Blocks for category {category}: {blocks}")
|
114
|
+
|
110
115
|
# Find blocks by category
|
111
|
-
llm_blocks = BlockRegistry.
|
116
|
+
llm_blocks = BlockRegistry.list_blocks(category="llm")
|
112
117
|
print(f"🧠 LLM Blocks: {llm_blocks}")
|
113
118
|
|
114
|
-
transform_blocks = BlockRegistry.
|
119
|
+
transform_blocks = BlockRegistry.list_blocks(category="transform")
|
115
120
|
print(f"🔄 Transform Blocks: {transform_blocks}")
|
116
121
|
```
|
117
122
|
|