sdg-hub 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. {sdg_hub-0.3.0/src/sdg_hub.egg-info → sdg_hub-0.3.1}/PKG-INFO +1 -1
  2. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/_version.py +3 -3
  3. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/client_manager.py +26 -1
  4. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/datautils.py +40 -22
  5. {sdg_hub-0.3.0 → sdg_hub-0.3.1/src/sdg_hub.egg-info}/PKG-INFO +1 -1
  6. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_llm_chat_block.py +156 -0
  7. sdg_hub-0.3.1/tests/utils/test_datautils.py +661 -0
  8. sdg_hub-0.3.0/tests/utils/test_datautils.py +0 -132
  9. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/actionlint.yaml +0 -0
  10. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/actions/free-disk-space/action.yml +0 -0
  11. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/dependabot.yml +0 -0
  12. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/mergify.yml +0 -0
  13. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/actionlint.dockerfile +0 -0
  14. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/actionlint.yml +0 -0
  15. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/docs.yml +0 -0
  16. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/e2e.yml +0 -0
  17. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/lint.yml +0 -0
  18. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/matchers/actionlint.json +0 -0
  19. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/matchers/pylint.json +0 -0
  20. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/pypi.yaml +0 -0
  21. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.github/workflows/test.yml +0 -0
  22. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.gitignore +0 -0
  23. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.isort.cfg +0 -0
  24. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.markdownlint-cli2.yaml +0 -0
  25. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.pre-commit-config.yaml +0 -0
  26. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/.pylintrc +0 -0
  27. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/CLAUDE.md +0 -0
  28. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/CONTRIBUTING.md +0 -0
  29. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/LICENSE +0 -0
  30. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/Makefile +0 -0
  31. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/README.md +0 -0
  32. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/.nojekyll +0 -0
  33. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/README.md +0 -0
  34. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/_coverpage.md +0 -0
  35. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/_navbar.md +0 -0
  36. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/_sidebar.md +0 -0
  37. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/api-reference.md +0 -0
  38. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/custom-blocks.md +0 -0
  39. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/evaluation-blocks.md +0 -0
  40. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/filtering-blocks.md +0 -0
  41. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/llm-blocks.md +0 -0
  42. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/overview.md +0 -0
  43. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/blocks/transform-blocks.md +0 -0
  44. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/concepts.md +0 -0
  45. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/development.md +0 -0
  46. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/flows/discovery.md +0 -0
  47. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/flows/overview.md +0 -0
  48. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/index.html +0 -0
  49. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/installation.md +0 -0
  50. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/docs/quick-start.md +0 -0
  51. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/annotation_classification.ipynb +0 -0
  52. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/news_classification_assessment_prompt.yaml +0 -0
  53. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/news_classification_flow.yaml +0 -0
  54. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/news_classification_prompt.yaml +0 -0
  55. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/annotation/revise_news_classification_prompt.yaml +0 -0
  56. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
  57. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
  58. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -0
  59. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
  60. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +0 -0
  61. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
  62. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/README.md +0 -0
  63. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
  64. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
  65. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  66. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  67. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  68. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  69. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  70. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  71. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
  72. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
  73. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
  74. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/knowledge_tuning/knowledge_utils.py +0 -0
  75. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/text_analysis/README.md +0 -0
  76. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
  77. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/examples/text_analysis/structured_insights_demo.ipynb +0 -0
  78. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/pyproject.toml +0 -0
  79. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/scripts/ruff.sh +0 -0
  80. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/setup.cfg +0 -0
  81. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/__init__.py +0 -0
  82. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/__init__.py +0 -0
  83. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/__init__.py +0 -0
  84. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/base.py +0 -0
  85. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
  86. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
  87. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
  88. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
  89. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
  90. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
  91. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
  92. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
  93. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -0
  94. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
  95. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -0
  96. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -0
  97. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -0
  98. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -0
  99. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
  100. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +0 -0
  101. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/__init__.py +0 -0
  102. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/config.py +0 -0
  103. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
  104. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +0 -0
  105. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +0 -0
  106. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
  107. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/llm/text_parser_block.py +0 -0
  108. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/registry.py +0 -0
  109. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
  110. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
  111. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +0 -0
  112. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/json_structure_block.py +0 -0
  113. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
  114. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
  115. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
  116. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
  117. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/__init__.py +0 -0
  118. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/base.py +0 -0
  119. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/checkpointer.py +0 -0
  120. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/metadata.py +0 -0
  121. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/migration.py +0 -0
  122. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/registry.py +0 -0
  123. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/flow/validation.py +0 -0
  124. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/__init__.py +0 -0
  125. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/error_handling.py +0 -0
  126. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
  127. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
  128. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/flow_metrics.py +0 -0
  129. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/logger_config.py +0 -0
  130. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/path_resolution.py +0 -0
  131. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
  132. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
  133. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
  134. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
  135. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +0 -0
  136. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
  137. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
  138. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +0 -0
  139. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
  140. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
  141. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
  142. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
  143. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +0 -0
  144. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
  145. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  146. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
  147. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
  148. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
  149. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
  150. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
  151. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
  152. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
  153. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -0
  154. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
  155. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
  156. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
  157. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
  158. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
  159. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
  160. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +0 -0
  161. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
  162. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub/py.typed +0 -0
  163. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/SOURCES.txt +0 -0
  164. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  165. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/requires.txt +0 -0
  166. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/src/sdg_hub.egg-info/top_level.txt +0 -0
  167. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/__init__.py +0 -0
  168. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/deprecated/test_llmblock.py +0 -0
  169. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/__init__.py +0 -0
  170. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -0
  171. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -0
  172. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/evaluation/test_verify_question_block.py +0 -0
  173. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
  174. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +0 -0
  175. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
  176. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/llm/test_textparserblock.py +0 -0
  177. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/test_base_block.py +0 -0
  178. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/test_registry.py +0 -0
  179. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_config.yaml +0 -0
  180. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -0
  181. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -0
  182. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
  183. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
  184. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
  185. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
  186. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
  187. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/testdata/test_verify_question.yaml +0 -0
  188. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_index_based_mapper.py +0 -0
  189. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_json_structure_block.py +0 -0
  190. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_melt_columns.py +0 -0
  191. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_text_concat.py +0 -0
  192. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
  193. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
  194. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
  195. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
  196. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_renameblock.py +0 -0
  197. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
  198. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
  199. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/blocks/utilblocks/test_settomajority.py +0 -0
  200. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/__init__.py +0 -0
  201. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/conftest.py +0 -0
  202. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_base.py +0 -0
  203. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_checkpointer.py +0 -0
  204. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_dataset_requirements.py +0 -0
  205. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_integration.py +0 -0
  206. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_metadata.py +0 -0
  207. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_migration.py +0 -0
  208. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_registry.py +0 -0
  209. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/flow/test_validation.py +0 -0
  210. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/utils/test_error_handling.py +0 -0
  211. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tests/utils/test_path_resolution.py +0 -0
  212. {sdg_hub-0.3.0 → sdg_hub-0.3.1}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.0'
32
- __version_tuple__ = version_tuple = (0, 3, 0)
31
+ __version__ = version = '0.3.1'
32
+ __version_tuple__ = version_tuple = (0, 3, 1)
33
33
 
34
- __commit_id__ = commit_id = 'g5b81eba8c'
34
+ __commit_id__ = commit_id = 'g4e0f10375'
@@ -214,8 +214,33 @@ class LLMClientManager:
214
214
  messages_list = messages
215
215
 
216
216
  if max_concurrency is not None:
217
+ if max_concurrency < 1:
218
+ raise ValueError(
219
+ "max_concurrency must be greater than 0, got {max_concurrency}"
220
+ )
221
+ # Adjust concurrency based on n parameter to avoid overwhelming API
222
+ # when n > 1 (multiple completions per request)
223
+ n_value = overrides.get("n") or self.config.n or 1
224
+ if n_value > 1:
225
+ # Warn if max_concurrency is less than n
226
+ if max_concurrency < n_value:
227
+ logger.warning(
228
+ f"max_concurrency ({max_concurrency}) is less than n ({n_value}). "
229
+ f"This may result in very low concurrency. Consider increasing max_concurrency "
230
+ f"or reducing n for better performance."
231
+ )
232
+
233
+ # Reduce concurrency when generating multiple completions per request
234
+ adjusted_concurrency = max(1, max_concurrency // n_value)
235
+ logger.debug(
236
+ f"Adjusted max_concurrency from {max_concurrency} to {adjusted_concurrency} "
237
+ f"for n={n_value} completions per request"
238
+ )
239
+ else:
240
+ adjusted_concurrency = max_concurrency
241
+
217
242
  # Use semaphore for concurrency control
218
- semaphore = asyncio.Semaphore(max_concurrency)
243
+ semaphore = asyncio.Semaphore(adjusted_concurrency)
219
244
 
220
245
  async def _create_with_semaphore(msgs):
221
246
  async with semaphore:
@@ -1,5 +1,6 @@
1
1
  # Third Party
2
2
  from datasets import Dataset, concatenate_datasets
3
+ import numpy as np
3
4
 
4
5
  # Local
5
6
  from .error_handling import FlowValidationError
@@ -39,28 +40,45 @@ def validate_no_duplicates(dataset: Dataset) -> None:
39
40
 
40
41
  df = dataset.to_pandas()
41
42
 
42
- # Try pandas duplicated() first - only convert types if we hit unhashable error
43
- try:
44
- duplicate_count = int(df.duplicated(keep="first").sum())
45
- except TypeError as e:
46
- if "unhashable type" in str(e):
47
- # Convert unhashable types to tuples so pandas can hash them
48
- for col in df.columns:
49
- if df[col].dtype == "object": # Only check object columns
50
- df[col] = df[col].apply(
51
- lambda x: (
52
- tuple(sorted(x.items()))
53
- if isinstance(x, dict)
54
- else tuple(x)
55
- if hasattr(x, "__iter__")
56
- and not isinstance(x, (str, bytes))
57
- else x
58
- )
59
- )
60
- duplicate_count = int(df.duplicated(keep="first").sum())
61
- else:
62
- raise # Re-raise if it's a different TypeError
63
-
43
+ def is_hashable(x):
44
+ try:
45
+ hash(x)
46
+ return True
47
+ except TypeError:
48
+ return False
49
+
50
+ def make_hashable(x):
51
+ if is_hashable(x):
52
+ # int, float, str, bytes, None etc. are already hashable
53
+ return x
54
+ if isinstance(x, np.ndarray):
55
+ if x.ndim == 0:
56
+ return make_hashable(x.item())
57
+ return tuple(make_hashable(i) for i in x)
58
+ if isinstance(x, dict):
59
+ # sort robustly even with heterogeneous key types
60
+ return tuple(
61
+ sorted(
62
+ ((k, make_hashable(v)) for k, v in x.items()),
63
+ key=lambda kv: repr(kv[0]),
64
+ )
65
+ )
66
+ if isinstance(x, (set, frozenset)):
67
+ # order‑insensitive
68
+ return frozenset(make_hashable(i) for i in x)
69
+ if hasattr(x, "__iter__"):
70
+ # lists, tuples, custom iterables
71
+ return tuple(make_hashable(i) for i in x)
72
+ # last‑resort fallback to a stable representation
73
+ return repr(x)
74
+
75
+ # Apply to the whole dataframe to ensure every cell is hashable
76
+ if hasattr(df, "map"):
77
+ df = df.map(make_hashable)
78
+ else:
79
+ df = df.applymap(make_hashable)
80
+
81
+ duplicate_count = int(df.duplicated(keep="first").sum())
64
82
  if duplicate_count > 0:
65
83
  raise FlowValidationError(
66
84
  f"Input dataset contains {duplicate_count} duplicate rows. "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -488,6 +488,37 @@ class TestLLMChatBlock:
488
488
  class TestErrorHandling:
489
489
  """Test error handling for LLMChatBlock."""
490
490
 
491
+ def test_max_concurrency_value_error(
492
+ self, mock_litellm_acompletion, sample_dataset
493
+ ):
494
+ """Test ValueError is raised when max_concurrency < 1."""
495
+ block = LLMChatBlock(
496
+ block_name="test_max_concurrency_error",
497
+ input_cols="messages",
498
+ output_cols="response",
499
+ model="openai/gpt-4",
500
+ api_key="test-key",
501
+ async_mode=True,
502
+ )
503
+
504
+ # Test with max_concurrency = 0
505
+ with pytest.raises(
506
+ ValueError, match="max_concurrency must be greater than 0, got"
507
+ ):
508
+ block.generate(sample_dataset, _flow_max_concurrency=0)
509
+
510
+ # Test with max_concurrency = -1
511
+ with pytest.raises(
512
+ ValueError, match="max_concurrency must be greater than 0, got"
513
+ ):
514
+ block.generate(sample_dataset, _flow_max_concurrency=-1)
515
+
516
+ # Test with max_concurrency = -5
517
+ with pytest.raises(
518
+ ValueError, match="max_concurrency must be greater than 0, got"
519
+ ):
520
+ block.generate(sample_dataset, _flow_max_concurrency=-5)
521
+
491
522
  def test_litellm_rate_limit_error(self, sample_dataset):
492
523
  """Test handling of LiteLLM rate limit errors."""
493
524
  with patch(
@@ -660,6 +691,131 @@ class TestMultipleResponses:
660
691
 
661
692
  assert mock_litellm_completion_multiple.call_count == 2 # One call per sample
662
693
 
694
+ def test_concurrency_adjustment_with_n_greater_than_1(
695
+ self, mock_litellm_acompletion, sample_dataset
696
+ ):
697
+ """Test concurrency is adjusted when n > 1 to avoid overwhelming API."""
698
+ with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
699
+ block = LLMChatBlock(
700
+ block_name="test_concurrency_adjustment",
701
+ input_cols="messages",
702
+ output_cols="responses",
703
+ model="openai/gpt-4",
704
+ api_key="test-key",
705
+ n=4, # Generate 4 responses per input
706
+ async_mode=True,
707
+ )
708
+
709
+ # Test with max_concurrency = 8, should be adjusted to 2 (8 // 4)
710
+ result = block.generate(sample_dataset, _flow_max_concurrency=8)
711
+
712
+ assert "responses" in result.column_names
713
+ assert len(result["responses"]) == 2
714
+
715
+ # Verify debug log was called for concurrency adjustment
716
+ mock_logger.debug.assert_called()
717
+ debug_calls = [
718
+ call
719
+ for call in mock_logger.debug.call_args_list
720
+ if "Adjusted max_concurrency" in str(call)
721
+ ]
722
+ assert len(debug_calls) > 0
723
+ assert "Adjusted max_concurrency from 8 to 2" in str(debug_calls[0])
724
+ assert "for n=4 completions per request" in str(debug_calls[0])
725
+
726
+ def test_concurrency_warning_when_max_concurrency_less_than_n(
727
+ self, mock_litellm_acompletion, sample_dataset
728
+ ):
729
+ """Test warning is logged when max_concurrency < n."""
730
+ with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
731
+ block = LLMChatBlock(
732
+ block_name="test_concurrency_warning",
733
+ input_cols="messages",
734
+ output_cols="responses",
735
+ model="openai/gpt-4",
736
+ api_key="test-key",
737
+ n=5, # Generate 5 responses per input
738
+ async_mode=True,
739
+ )
740
+
741
+ # Test with max_concurrency = 3, which is less than n=5
742
+ result = block.generate(sample_dataset, _flow_max_concurrency=3)
743
+
744
+ assert "responses" in result.column_names
745
+ assert len(result["responses"]) == 2
746
+
747
+ # Verify warning log was called
748
+ mock_logger.warning.assert_called()
749
+ warning_calls = [
750
+ call
751
+ for call in mock_logger.warning.call_args_list
752
+ if "max_concurrency" in str(call)
753
+ ]
754
+ assert len(warning_calls) > 0
755
+ assert "max_concurrency (3) is less than n (5)" in str(warning_calls[0])
756
+ assert "Consider increasing max_concurrency" in str(warning_calls[0])
757
+
758
+ def test_concurrency_not_adjusted_when_n_is_1(
759
+ self, mock_litellm_acompletion, sample_dataset
760
+ ):
761
+ """Test concurrency is not adjusted when n=1 or n=None."""
762
+ with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
763
+ # Test with n=1
764
+ block_n1 = LLMChatBlock(
765
+ block_name="test_no_adjustment_n1",
766
+ input_cols="messages",
767
+ output_cols="response",
768
+ model="openai/gpt-4",
769
+ api_key="test-key",
770
+ n=1,
771
+ async_mode=True,
772
+ )
773
+
774
+ result = block_n1.generate(sample_dataset, _flow_max_concurrency=8)
775
+
776
+ assert "response" in result.column_names
777
+ assert len(result["response"]) == 2
778
+
779
+ # No adjustment should happen, so no debug log about adjustment
780
+ debug_calls = [
781
+ call
782
+ for call in mock_logger.debug.call_args_list
783
+ if "Adjusted max_concurrency" in str(call)
784
+ ]
785
+ assert len(debug_calls) == 0
786
+
787
+ def test_concurrency_override_in_generate_call(
788
+ self, mock_litellm_acompletion, sample_dataset
789
+ ):
790
+ """Test concurrency adjustment works when n is overridden in generate call."""
791
+ with patch("sdg_hub.core.blocks.llm.client_manager.logger") as mock_logger:
792
+ block = LLMChatBlock(
793
+ block_name="test_override_adjustment",
794
+ input_cols="messages",
795
+ output_cols="responses",
796
+ model="openai/gpt-4",
797
+ api_key="test-key",
798
+ n=1, # Initially set to 1
799
+ async_mode=True,
800
+ )
801
+
802
+ # Override n to 3 at runtime with max_concurrency=9
803
+ result = block.generate(sample_dataset, n=3, _flow_max_concurrency=9)
804
+
805
+ assert "responses" in result.column_names
806
+ assert len(result["responses"]) == 2
807
+
808
+ # Verify debug log shows adjustment based on runtime n=3
809
+ mock_logger.debug.assert_called()
810
+ debug_calls = [
811
+ call
812
+ for call in mock_logger.debug.call_args_list
813
+ if "Adjusted max_concurrency" in str(call)
814
+ ]
815
+ assert len(debug_calls) > 0
816
+ assert "Adjusted max_concurrency from 9 to 3" in str(debug_calls[0])
817
+ assert "for n=3 completions per request" in str(debug_calls[0])
818
+
663
819
  def test_single_response_still_works(self, mock_litellm_completion, sample_dataset):
664
820
  """Test that n=1 or n=None still returns single strings."""
665
821
  # Test n=1