sdg-hub 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_hub-0.3.0/src/sdg_hub.egg-info → sdg_hub-0.3.1}/PKG-INFO +1 -1
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/_version.py +3 -3
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/client_manager.py +26 -1
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/datautils.py +40 -22
- {sdg_hub-0.3.0 → sdg_hub-0.3.1/src/sdg_hub.egg-info}/PKG-INFO +1 -1
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_llm_chat_block.py +156 -0
- sdg_hub-0.3.1/tests/utils/test_datautils.py +661 -0
- sdg_hub-0.3.0/tests/utils/test_datautils.py +0 -132
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/actions/free-disk-space/action.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/dependabot.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/mergify.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/actionlint.dockerfile +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/e2e.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.gitignore +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.isort.cfg +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.pylintrc +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/CLAUDE.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/CONTRIBUTING.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/LICENSE +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/Makefile +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/README.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/.nojekyll +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/README.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/_coverpage.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/_navbar.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/_sidebar.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/api-reference.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/custom-blocks.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/evaluation-blocks.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/filtering-blocks.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/llm-blocks.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/overview.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/transform-blocks.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/concepts.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/development.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/flows/discovery.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/flows/overview.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/index.html +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/installation.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/quick-start.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/annotation_classification.ipynb +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/news_classification_assessment_prompt.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/news_classification_flow.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/news_classification_prompt.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/revise_news_classification_prompt.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/README.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/knowledge_utils.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/text_analysis/README.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/text_analysis/structured_insights_demo.ipynb +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/pyproject.toml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/scripts/ruff.sh +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/setup.cfg +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/base.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/config.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/text_parser_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/registry.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/json_structure_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/base.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/checkpointer.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/metadata.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/migration.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/registry.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/validation.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/error_handling.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/flow_metrics.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/logger_config.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/path_resolution.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/SOURCES.txt +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/requires.txt +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/deprecated/test_llmblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/test_verify_question_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_textparserblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/test_base_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/test_registry.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_config.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_verify_question.yaml +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_index_based_mapper.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_json_structure_block.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_melt_columns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_text_concat.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_renameblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_settomajority.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/__init__.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/conftest.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_base.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_checkpointer.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_dataset_requirements.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_integration.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_metadata.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_migration.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_registry.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_validation.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/utils/test_error_handling.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/utils/test_path_resolution.py +0 -0
- {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tox.ini +0 -0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.3.
|
32
|
-
__version_tuple__ = version_tuple = (0, 3,
|
31
|
+
__version__ = version = '0.3.1'
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 1)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g4e0f10375'
|
@@ -214,8 +214,33 @@ class LLMClientManager:
|
|
214
214
|
messages_list = messages
|
215
215
|
|
216
216
|
if max_concurrency is not None:
|
217
|
+
if max_concurrency < 1:
|
218
|
+
raise ValueError(
|
219
|
+
"max_concurrency must be greater than 0, got {max_concurrency}"
|
220
|
+
)
|
221
|
+
# Adjust concurrency based on n parameter to avoid overwhelming API
|
222
|
+
# when n > 1 (multiple completions per request)
|
223
|
+
n_value = overrides.get("n") or self.config.n or 1
|
224
|
+
if n_value > 1:
|
225
|
+
# Warn if max_concurrency is less than n
|
226
|
+
if max_concurrency < n_value:
|
227
|
+
logger.warning(
|
228
|
+
f"max_concurrency ({max_concurrency}) is less than n ({n_value}). "
|
229
|
+
f"This may result in very low concurrency. Consider increasing max_concurrency "
|
230
|
+
f"or reducing n for better performance."
|
231
|
+
)
|
232
|
+
|
233
|
+
# Reduce concurrency when generating multiple completions per request
|
234
|
+
adjusted_concurrency = max(1, max_concurrency // n_value)
|
235
|
+
logger.debug(
|
236
|
+
f"Adjusted max_concurrency from {max_concurrency} to {adjusted_concurrency} "
|
237
|
+
f"for n={n_value} completions per request"
|
238
|
+
)
|
239
|
+
else:
|
240
|
+
adjusted_concurrency = max_concurrency
|
241
|
+
|
217
242
|
# Use semaphore for concurrency control
|
218
|
-
semaphore = asyncio.Semaphore(
|
243
|
+
semaphore = asyncio.Semaphore(adjusted_concurrency)
|
219
244
|
|
220
245
|
async def _create_with_semaphore(msgs):
|
221
246
|
async with semaphore:
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# Third Party
|
2
2
|
from datasets import Dataset, concatenate_datasets
|
3
|
+
import numpy as np
|
3
4
|
|
4
5
|
# Local
|
5
6
|
from .error_handling import FlowValidationError
|
@@ -39,28 +40,45 @@ def validate_no_duplicates(dataset: Dataset) -> None:
|
|
39
40
|
|
40
41
|
df = dataset.to_pandas()
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
43
|
+
def is_hashable(x):
|
44
|
+
try:
|
45
|
+
hash(x)
|
46
|
+
return True
|
47
|
+
except TypeError:
|
48
|
+
return False
|
49
|
+
|
50
|
+
def make_hashable(x):
|
51
|
+
if is_hashable(x):
|
52
|
+
# int, float, str, bytes, None etc. are already hashable
|
53
|
+
return x
|
54
|
+
if isinstance(x, np.ndarray):
|
55
|
+
if x.ndim == 0:
|
56
|
+
return make_hashable(x.item())
|
57
|
+
return tuple(make_hashable(i) for i in x)
|
58
|
+
if isinstance(x, dict):
|
59
|
+
# sort robustly even with heterogeneous key types
|
60
|
+
return tuple(
|
61
|
+
sorted(
|
62
|
+
((k, make_hashable(v)) for k, v in x.items()),
|
63
|
+
key=lambda kv: repr(kv[0]),
|
64
|
+
)
|
65
|
+
)
|
66
|
+
if isinstance(x, (set, frozenset)):
|
67
|
+
# order‑insensitive
|
68
|
+
return frozenset(make_hashable(i) for i in x)
|
69
|
+
if hasattr(x, "__iter__"):
|
70
|
+
# lists, tuples, custom iterables
|
71
|
+
return tuple(make_hashable(i) for i in x)
|
72
|
+
# last‑resort fallback to a stable representation
|
73
|
+
return repr(x)
|
74
|
+
|
75
|
+
# Apply to the whole dataframe to ensure every cell is hashable
|
76
|
+
if hasattr(df, "map"):
|
77
|
+
df = df.map(make_hashable)
|
78
|
+
else:
|
79
|
+
df = df.applymap(make_hashable)
|
80
|
+
|
81
|
+
duplicate_count = int(df.duplicated(keep="first").sum())
|
64
82
|
if duplicate_count > 0:
|
65
83
|
raise FlowValidationError(
|
66
84
|
f"Input dataset contains {duplicate_count} duplicate rows. "
|
@@ -488,6 +488,37 @@ class TestLLMChatBlock:
|
|
488
488
|
class TestErrorHandling:
|
489
489
|
"""Test error handling for LLMChatBlock."""
|
490
490
|
|
491
|
+
def test_max_concurrency_value_error(
|
492
|
+
self, mock_litellm_acompletion, sample_dataset
|
493
|
+
):
|
494
|
+
"""Test ValueError is raised when max_concurrency < 1."""
|
495
|
+
block = LLMChatBlock(
|
496
|
+
block_name="test_max_concurrency_error",
|
497
|
+
input_cols="messages",
|
498
|
+
output_cols="response",
|
499
|
+
model="openai/gpt-4",
|
500
|
+
api_key="test-key",
|
501
|
+
async_mode=True,
|
502
|
+
)
|
503
|
+
|
504
|
+
# Test with max_concurrency = 0
|
505
|
+
with pytest.raises(
|
506
|
+
ValueError, match="max_concurrency must be greater than 0, got"
|
507
|
+
):
|
508
|
+
block.generate(sample_dataset, _flow_max_concurrency=0)
|
509
|
+
|
510
|
+
# Test with max_concurrency = -1
|
511
|
+
with pytest.raises(
|
512
|
+
ValueError, match="max_concurrency must be greater than 0, got"
|
513
|
+
):
|
514
|
+
block.generate(sample_dataset, _flow_max_concurrency=-1)
|
515
|
+
|
516
|
+
# Test with max_concurrency = -5
|
517
|
+
with pytest.raises(
|
518
|
+
ValueError, match="max_concurrency must be greater than 0, got"
|
519
|
+
):
|
520
|
+
block.generate(sample_dataset, _flow_max_concurrency=-5)
|
521
|
+
|
491
522
|
def test_litellm_rate_limit_error(self, sample_dataset):
|
492
523
|
"""Test handling of LiteLLM rate limit errors."""
|
493
524
|
with patch(
|
@@ -660,6 +691,131 @@ class TestMultipleResponses:
|
|
660
691
|
|
661
692
|
assert mock_litellm_completion_multiple.call_count == 2 # One call per sample
|
662
693
|
|
694
|
+
def test_concurrency_adjustment_with_n_greater_than_1(
|
695
|
+
self, mock_litellm_acompletion, sample_dataset
|
696
|
+
):
|
697
|
+
"""Test concurrency is adjusted when n > 1 to avoid overwhelming API."""
|
698
|
+
with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
|
699
|
+
block = LLMChatBlock(
|
700
|
+
block_name="test_concurrency_adjustment",
|
701
|
+
input_cols="messages",
|
702
|
+
output_cols="responses",
|
703
|
+
model="openai/gpt-4",
|
704
|
+
api_key="test-key",
|
705
|
+
n=4, # Generate 4 responses per input
|
706
|
+
async_mode=True,
|
707
|
+
)
|
708
|
+
|
709
|
+
# Test with max_concurrency = 8, should be adjusted to 2 (8 // 4)
|
710
|
+
result = block.generate(sample_dataset, _flow_max_concurrency=8)
|
711
|
+
|
712
|
+
assert "responses" in result.column_names
|
713
|
+
assert len(result["responses"]) == 2
|
714
|
+
|
715
|
+
# Verify debug log was called for concurrency adjustment
|
716
|
+
mock_logger.debug.assert_called()
|
717
|
+
debug_calls = [
|
718
|
+
call
|
719
|
+
for call in mock_logger.debug.call_args_list
|
720
|
+
if "Adjusted max_concurrency" in str(call)
|
721
|
+
]
|
722
|
+
assert len(debug_calls) > 0
|
723
|
+
assert "Adjusted max_concurrency from 8 to 2" in str(debug_calls[0])
|
724
|
+
assert "for n=4 completions per request" in str(debug_calls[0])
|
725
|
+
|
726
|
+
def test_concurrency_warning_when_max_concurrency_less_than_n(
|
727
|
+
self, mock_litellm_acompletion, sample_dataset
|
728
|
+
):
|
729
|
+
"""Test warning is logged when max_concurrency < n."""
|
730
|
+
with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
|
731
|
+
block = LLMChatBlock(
|
732
|
+
block_name="test_concurrency_warning",
|
733
|
+
input_cols="messages",
|
734
|
+
output_cols="responses",
|
735
|
+
model="openai/gpt-4",
|
736
|
+
api_key="test-key",
|
737
|
+
n=5, # Generate 5 responses per input
|
738
|
+
async_mode=True,
|
739
|
+
)
|
740
|
+
|
741
|
+
# Test with max_concurrency = 3, which is less than n=5
|
742
|
+
result = block.generate(sample_dataset, _flow_max_concurrency=3)
|
743
|
+
|
744
|
+
assert "responses" in result.column_names
|
745
|
+
assert len(result["responses"]) == 2
|
746
|
+
|
747
|
+
# Verify warning log was called
|
748
|
+
mock_logger.warning.assert_called()
|
749
|
+
warning_calls = [
|
750
|
+
call
|
751
|
+
for call in mock_logger.warning.call_args_list
|
752
|
+
if "max_concurrency" in str(call)
|
753
|
+
]
|
754
|
+
assert len(warning_calls) > 0
|
755
|
+
assert "max_concurrency (3) is less than n (5)" in str(warning_calls[0])
|
756
|
+
assert "Consider increasing max_concurrency" in str(warning_calls[0])
|
757
|
+
|
758
|
+
def test_concurrency_not_adjusted_when_n_is_1(
|
759
|
+
self, mock_litellm_acompletion, sample_dataset
|
760
|
+
):
|
761
|
+
"""Test concurrency is not adjusted when n=1 or n=None."""
|
762
|
+
with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
|
763
|
+
# Test with n=1
|
764
|
+
block_n1 = LLMChatBlock(
|
765
|
+
block_name="test_no_adjustment_n1",
|
766
|
+
input_cols="messages",
|
767
|
+
output_cols="response",
|
768
|
+
model="openai/gpt-4",
|
769
|
+
api_key="test-key",
|
770
|
+
n=1,
|
771
|
+
async_mode=True,
|
772
|
+
)
|
773
|
+
|
774
|
+
result = block_n1.generate(sample_dataset, _flow_max_concurrency=8)
|
775
|
+
|
776
|
+
assert "response" in result.column_names
|
777
|
+
assert len(result["response"]) == 2
|
778
|
+
|
779
|
+
# No adjustment should happen, so no debug log about adjustment
|
780
|
+
debug_calls = [
|
781
|
+
call
|
782
|
+
for call in mock_logger.debug.call_args_list
|
783
|
+
if "Adjusted max_concurrency" in str(call)
|
784
|
+
]
|
785
|
+
assert len(debug_calls) == 0
|
786
|
+
|
787
|
+
def test_concurrency_override_in_generate_call(
|
788
|
+
self, mock_litellm_acompletion, sample_dataset
|
789
|
+
):
|
790
|
+
"""Test concurrency adjustment works when n is overridden in generate call."""
|
791
|
+
with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
|
792
|
+
block = LLMChatBlock(
|
793
|
+
block_name="test_override_adjustment",
|
794
|
+
input_cols="messages",
|
795
|
+
output_cols="responses",
|
796
|
+
model="openai/gpt-4",
|
797
|
+
api_key="test-key",
|
798
|
+
n=1, # Initially set to 1
|
799
|
+
async_mode=True,
|
800
|
+
)
|
801
|
+
|
802
|
+
# Override n to 3 at runtime with max_concurrency=9
|
803
|
+
result = block.generate(sample_dataset, n=3, _flow_max_concurrency=9)
|
804
|
+
|
805
|
+
assert "responses" in result.column_names
|
806
|
+
assert len(result["responses"]) == 2
|
807
|
+
|
808
|
+
# Verify debug log shows adjustment based on runtime n=3
|
809
|
+
mock_logger.debug.assert_called()
|
810
|
+
debug_calls = [
|
811
|
+
call
|
812
|
+
for call in mock_logger.debug.call_args_list
|
813
|
+
if "Adjusted max_concurrency" in str(call)
|
814
|
+
]
|
815
|
+
assert len(debug_calls) > 0
|
816
|
+
assert "Adjusted max_concurrency from 9 to 3" in str(debug_calls[0])
|
817
|
+
assert "for n=3 completions per request" in str(debug_calls[0])
|
818
|
+
|
663
819
|
def test_single_response_still_works(self, mock_litellm_completion, sample_dataset):
|
664
820
|
"""Test that n=1 or n=None still returns single strings."""
|
665
821
|
# Test n=1
|