sdg-hub 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/pypi.yaml +2 -2
  2. {sdg_hub-0.2.1/src/sdg_hub.egg-info → sdg_hub-0.3.0}/PKG-INFO +42 -15
  3. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/README.md +39 -13
  4. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/blocks/llm-blocks.md +52 -5
  5. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/blocks/overview.md +10 -9
  6. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/concepts.md +7 -1
  7. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/development.md +5 -4
  8. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/flows/overview.md +45 -0
  9. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/quick-start.md +7 -2
  10. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/annotation/annotation_classification.ipynb +128 -153
  11. sdg_hub-0.3.0/examples/annotation/news_classification_flow.yaml +185 -0
  12. sdg_hub-0.3.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +59 -0
  13. sdg_hub-0.3.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +76 -0
  14. sdg_hub-0.3.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +588 -0
  15. sdg_hub-0.3.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +578 -0
  16. sdg_hub-0.3.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +289 -0
  17. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/README.md +1 -1
  18. sdg_hub-0.3.0/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
  19. sdg_hub-0.3.0/examples/text_analysis/README.md +145 -0
  20. sdg_hub-0.3.0/examples/text_analysis/extract_stock_tickers.yaml +25 -0
  21. sdg_hub-0.3.0/examples/text_analysis/structured_insights_demo.ipynb +4479 -0
  22. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/pyproject.toml +5 -1
  23. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/_version.py +16 -3
  24. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
  25. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +323 -0
  26. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +323 -0
  27. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +329 -0
  28. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/client_manager.py +92 -43
  29. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/config.py +1 -0
  30. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +74 -16
  31. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +277 -115
  32. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/text_parser_block.py +88 -23
  33. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/registry.py +48 -34
  34. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/__init__.py +2 -0
  35. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
  36. sdg_hub-0.3.0/src/sdg_hub/core/blocks/transform/json_structure_block.py +142 -0
  37. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/base.py +326 -62
  38. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/datautils.py +54 -0
  39. sdg_hub-0.3.0/src/sdg_hub/core/utils/flow_metrics.py +261 -0
  40. sdg_hub-0.3.0/src/sdg_hub/core/utils/logger_config.py +61 -0
  41. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +11 -0
  42. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +159 -0
  43. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +65 -0
  44. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +161 -0
  45. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +15 -0
  46. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +21 -0
  47. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +44 -0
  48. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +104 -0
  49. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +61 -0
  50. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  51. sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
  52. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -7
  53. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/__init__.py +2 -0
  54. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +6 -0
  55. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +27 -0
  56. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +38 -0
  57. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +21 -0
  58. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +153 -0
  59. sdg_hub-0.3.0/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +21 -0
  60. sdg_hub-0.3.0/src/sdg_hub/py.typed +0 -0
  61. {sdg_hub-0.2.1 → sdg_hub-0.3.0/src/sdg_hub.egg-info}/PKG-INFO +42 -15
  62. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub.egg-info/SOURCES.txt +35 -0
  63. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub.egg-info/requires.txt +2 -1
  64. sdg_hub-0.3.0/tests/__init__.py +0 -0
  65. sdg_hub-0.3.0/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +271 -0
  66. sdg_hub-0.3.0/tests/blocks/evaluation/test_evaluate_relevancy_block.py +189 -0
  67. sdg_hub-0.3.0/tests/blocks/evaluation/test_verify_question_block.py +331 -0
  68. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/llm/test_llm_chat_block.py +68 -25
  69. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +539 -72
  70. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/llm/test_textparserblock.py +645 -66
  71. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/test_registry.py +196 -32
  72. sdg_hub-0.3.0/tests/blocks/transform/test_json_structure_block.py +303 -0
  73. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/conftest.py +1 -1
  74. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_base.py +251 -2
  75. sdg_hub-0.3.0/tests/flow/test_dataset_requirements.py +419 -0
  76. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_integration.py +2 -2
  77. sdg_hub-0.3.0/tests/utils/test_datautils.py +132 -0
  78. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -564
  79. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -564
  80. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -564
  81. sdg_hub-0.2.1/src/sdg_hub/core/utils/logger_config.py +0 -20
  82. sdg_hub-0.2.1/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -496
  83. sdg_hub-0.2.1/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -493
  84. sdg_hub-0.2.1/tests/blocks/evaluation/test_verify_question_block.py +0 -480
  85. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/actionlint.yaml +0 -0
  86. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/actions/free-disk-space/action.yml +0 -0
  87. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/dependabot.yml +0 -0
  88. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/mergify.yml +0 -0
  89. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/actionlint.dockerfile +0 -0
  90. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/actionlint.yml +0 -0
  91. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/docs.yml +0 -0
  92. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/e2e.yml +0 -0
  93. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/lint.yml +0 -0
  94. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/matchers/actionlint.json +0 -0
  95. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/matchers/pylint.json +0 -0
  96. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.github/workflows/test.yml +0 -0
  97. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.gitignore +0 -0
  98. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.isort.cfg +0 -0
  99. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.markdownlint-cli2.yaml +0 -0
  100. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.pre-commit-config.yaml +0 -0
  101. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/.pylintrc +0 -0
  102. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/CLAUDE.md +0 -0
  103. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/CONTRIBUTING.md +0 -0
  104. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/LICENSE +0 -0
  105. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/Makefile +0 -0
  106. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/.nojekyll +0 -0
  107. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/README.md +0 -0
  108. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/_coverpage.md +0 -0
  109. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/_navbar.md +0 -0
  110. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/_sidebar.md +0 -0
  111. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/api-reference.md +0 -0
  112. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/blocks/custom-blocks.md +0 -0
  113. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/blocks/evaluation-blocks.md +0 -0
  114. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/blocks/filtering-blocks.md +0 -0
  115. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/blocks/transform-blocks.md +0 -0
  116. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/flows/discovery.md +0 -0
  117. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/index.html +0 -0
  118. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/docs/installation.md +0 -0
  119. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/annotation/news_classification_assessment_prompt.yaml +0 -0
  120. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/annotation/news_classification_prompt.yaml +0 -0
  121. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/annotation/revise_news_classification_prompt.yaml +0 -0
  122. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
  123. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
  124. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  125. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  126. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  127. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  128. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  129. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  130. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
  131. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
  132. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
  133. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/examples/knowledge_tuning/knowledge_utils.py +0 -0
  134. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/scripts/ruff.sh +0 -0
  135. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/setup.cfg +0 -0
  136. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/__init__.py +0 -0
  137. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/__init__.py +0 -0
  138. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/__init__.py +0 -0
  139. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/base.py +0 -0
  140. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
  141. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
  142. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
  143. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
  144. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
  145. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
  146. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
  147. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
  148. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
  149. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -0
  150. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
  151. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +0 -0
  152. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/__init__.py +0 -0
  153. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
  154. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
  155. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
  156. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
  157. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
  158. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
  159. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
  160. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/__init__.py +0 -0
  161. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/checkpointer.py +0 -0
  162. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/metadata.py +0 -0
  163. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/migration.py +0 -0
  164. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/registry.py +0 -0
  165. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/flow/validation.py +0 -0
  166. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/__init__.py +0 -0
  167. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/error_handling.py +0 -0
  168. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
  169. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
  170. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/path_resolution.py +0 -0
  171. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
  172. {sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab → sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa}/__init__.py +0 -0
  173. {sdg_hub-0.2.1/tests → sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary}/__init__.py +0 -0
  174. /sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md → /sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
  175. /sdg_hub-0.2.1/src/sdg_hub/py.typed → /sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
  176. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
  177. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
  178. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
  179. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
  180. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
  181. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
  182. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
  183. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  184. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/src/sdg_hub.egg-info/top_level.txt +0 -0
  185. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/deprecated/test_llmblock.py +0 -0
  186. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/evaluation/__init__.py +0 -0
  187. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
  188. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
  189. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/test_base_block.py +0 -0
  190. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_config.yaml +0 -0
  191. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -0
  192. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -0
  193. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
  194. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
  195. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
  196. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
  197. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
  198. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/testdata/test_verify_question.yaml +0 -0
  199. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/transform/test_index_based_mapper.py +0 -0
  200. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/transform/test_melt_columns.py +0 -0
  201. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/transform/test_text_concat.py +0 -0
  202. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
  203. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
  204. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
  205. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
  206. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_renameblock.py +0 -0
  207. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
  208. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
  209. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/blocks/utilblocks/test_settomajority.py +0 -0
  210. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/__init__.py +0 -0
  211. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_checkpointer.py +0 -0
  212. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_metadata.py +0 -0
  213. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_migration.py +0 -0
  214. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_registry.py +0 -0
  215. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/flow/test_validation.py +0 -0
  216. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/utils/test_error_handling.py +0 -0
  217. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tests/utils/test_path_resolution.py +0 -0
  218. {sdg_hub-0.2.1 → sdg_hub-0.3.0}/tox.ini +0 -0
@@ -78,7 +78,7 @@ jobs:
78
78
  path: dist
79
79
 
80
80
  - name: "Upload to Test PyPI"
81
- uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
81
+ uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
82
82
  with:
83
83
  repository-url: https://test.pypi.org/legacy/
84
84
 
@@ -130,4 +130,4 @@ jobs:
130
130
  rm ./dist/*.sigstore.json
131
131
 
132
132
  - name: "Upload to PyPI"
133
- uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
133
+ uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
27
27
  Requires-Dist: httpx<1.0.0,>=0.25.0
28
28
  Requires-Dist: jinja2
29
29
  Requires-Dist: litellm<1.75.0,>=1.73.0
30
- Requires-Dist: openai<2.0.0,>=1.13.3
31
30
  Requires-Dist: rich
32
31
  Requires-Dist: pydantic<3.0.0,>=2.0.0
33
32
  Requires-Dist: python-dotenv<2.0.0,>=1.0.0
@@ -54,6 +53,7 @@ Requires-Dist: sentence-transformers; extra == "examples"
54
53
  Requires-Dist: instructor; extra == "examples"
55
54
  Requires-Dist: fastapi; extra == "examples"
56
55
  Requires-Dist: nest-asyncio; extra == "examples"
56
+ Requires-Dist: ipykernel; extra == "examples"
57
57
  Provides-Extra: dev
58
58
  Requires-Dist: pre-commit<4.0,>=3.0.4; extra == "dev"
59
59
  Requires-Dist: pylint<4.0,>=2.16.2; extra == "dev"
@@ -64,6 +64,7 @@ Requires-Dist: pytest-cov; extra == "dev"
64
64
  Requires-Dist: pytest-html; extra == "dev"
65
65
  Requires-Dist: tox<5,>=4.4.2; extra == "dev"
66
66
  Requires-Dist: ruff; extra == "dev"
67
+ Requires-Dist: pytest-env; extra == "dev"
67
68
  Dynamic: license-file
68
69
 
69
70
  # `sdg_hub`: Synthetic Data Generation Toolkit
@@ -92,6 +93,8 @@ A modular Python framework for building synthetic data generation pipelines usin
92
93
 
93
94
  **📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
94
95
 
96
+ **📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
97
+
95
98
  **🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
96
99
 
97
100
 
@@ -176,22 +179,46 @@ flow.set_model_config(
176
179
  api_key="your_key",
177
180
  )
178
181
  ```
179
- #### Load your dataset and run the flow
182
+ #### Discover dataset requirements and create your dataset
180
183
  ```python
181
- # Create your dataset with required columns
182
- dataset = Dataset.from_dict({
183
- 'document': ['Your document text here...'],
184
- 'document_outline': ['1. Topic A; 2. Topic B; 3. Topic C'],
185
- 'domain': ['Computer Science'],
186
- 'icl_document': ['Example document for in-context learning...'],
187
- 'icl_query_1': ['Example question 1?'],
188
- 'icl_response_1': ['Example answer 1'],
189
- 'icl_query_2': ['Example question 2?'],
190
- 'icl_response_2': ['Example answer 2'],
191
- 'icl_query_3': ['Example question 3?'],
192
- 'icl_response_3': ['Example answer 3']
184
+ # First, discover what data the flow needs
185
+ # Get an empty dataset with the exact schema needed
186
+ schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
187
+ print(f"Required columns: {schema_dataset.column_names}")
188
+ print(f"Schema: {schema_dataset.features}")
189
+
190
+ # Option 1: Add data directly to the schema dataset
191
+ dataset = schema_dataset.add_item({
192
+ 'document': 'Your document text here...',
193
+ 'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
194
+ 'domain': 'Computer Science',
195
+ 'icl_document': 'Example document for in-context learning...',
196
+ 'icl_query_1': 'Example question 1?',
197
+ 'icl_response_1': 'Example answer 1',
198
+ 'icl_query_2': 'Example question 2?',
199
+ 'icl_response_2': 'Example answer 2',
200
+ 'icl_query_3': 'Example question 3?',
201
+ 'icl_response_3': 'Example answer 3'
193
202
  })
194
203
 
204
+ # Option 2: Create your own dataset and validate the schema
205
+ my_dataset = Dataset.from_dict(my_data_dict)
206
+ if my_dataset.features == schema_dataset.features:
207
+ print("✅ Schema matches - ready to generate!")
208
+ dataset = my_dataset
209
+ else:
210
+ print("❌ Schema mismatch - check your columns")
211
+
212
+ # Option 3: Get raw requirements for detailed inspection
213
+ requirements = flow.get_dataset_requirements()
214
+ if requirements:
215
+ print(f"Required: {requirements.required_columns}")
216
+ print(f"Optional: {requirements.optional_columns}")
217
+ print(f"Min samples: {requirements.min_samples}")
218
+ ```
219
+
220
+ #### Dry Run and Generate
221
+ ```python
195
222
  # Quick Testing with Dry Run
196
223
  dry_result = flow.dry_run(dataset, sample_size=1)
197
224
  print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
@@ -24,6 +24,8 @@ A modular Python framework for building synthetic data generation pipelines usin
24
24
 
25
25
  **📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
26
26
 
27
+ **📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
28
+
27
29
  **🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
28
30
 
29
31
 
@@ -108,22 +110,46 @@ flow.set_model_config(
108
110
  api_key="your_key",
109
111
  )
110
112
  ```
111
- #### Load your dataset and run the flow
113
+ #### Discover dataset requirements and create your dataset
112
114
  ```python
113
- # Create your dataset with required columns
114
- dataset = Dataset.from_dict({
115
- 'document': ['Your document text here...'],
116
- 'document_outline': ['1. Topic A; 2. Topic B; 3. Topic C'],
117
- 'domain': ['Computer Science'],
118
- 'icl_document': ['Example document for in-context learning...'],
119
- 'icl_query_1': ['Example question 1?'],
120
- 'icl_response_1': ['Example answer 1'],
121
- 'icl_query_2': ['Example question 2?'],
122
- 'icl_response_2': ['Example answer 2'],
123
- 'icl_query_3': ['Example question 3?'],
124
- 'icl_response_3': ['Example answer 3']
115
+ # First, discover what data the flow needs
116
+ # Get an empty dataset with the exact schema needed
117
+ schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
118
+ print(f"Required columns: {schema_dataset.column_names}")
119
+ print(f"Schema: {schema_dataset.features}")
120
+
121
+ # Option 1: Add data directly to the schema dataset
122
+ dataset = schema_dataset.add_item({
123
+ 'document': 'Your document text here...',
124
+ 'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
125
+ 'domain': 'Computer Science',
126
+ 'icl_document': 'Example document for in-context learning...',
127
+ 'icl_query_1': 'Example question 1?',
128
+ 'icl_response_1': 'Example answer 1',
129
+ 'icl_query_2': 'Example question 2?',
130
+ 'icl_response_2': 'Example answer 2',
131
+ 'icl_query_3': 'Example question 3?',
132
+ 'icl_response_3': 'Example answer 3'
125
133
  })
126
134
 
135
+ # Option 2: Create your own dataset and validate the schema
136
+ my_dataset = Dataset.from_dict(my_data_dict)
137
+ if my_dataset.features == schema_dataset.features:
138
+ print("✅ Schema matches - ready to generate!")
139
+ dataset = my_dataset
140
+ else:
141
+ print("❌ Schema mismatch - check your columns")
142
+
143
+ # Option 3: Get raw requirements for detailed inspection
144
+ requirements = flow.get_dataset_requirements()
145
+ if requirements:
146
+ print(f"Required: {requirements.required_columns}")
147
+ print(f"Optional: {requirements.optional_columns}")
148
+ print(f"Min samples: {requirements.min_samples}")
149
+ ```
150
+
151
+ #### Dry Run and Generate
152
+ ```python
127
153
  # Quick Testing with Dry Run
128
154
  dry_result = flow.dry_run(dataset, sample_size=1)
129
155
  print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
@@ -34,12 +34,9 @@ The unified chat block that replaces provider-specific implementations with a si
34
34
  ### Basic Usage
35
35
 
36
36
  ```python
37
- from sdg_hub.core.blocks import BlockRegistry
37
+ from sdg_hub.core.blocks import LLMChatBlock
38
38
  from datasets import Dataset
39
39
 
40
- # Get the LLM chat block
41
- LLMChatBlock = BlockRegistry.get_block("LLMChatBlock")
42
-
43
40
  # Configure for OpenAI
44
41
  chat_block = LLMChatBlock(
45
42
  block_name="question_answerer",
@@ -133,7 +130,7 @@ dataset = Dataset.from_dict({
133
130
  })
134
131
  ```
135
132
 
136
- #### Async Processing
133
+ #### Async Processing & Concurrency Control
137
134
  ```python
138
135
  chat_block = LLMChatBlock(
139
136
  block_name="async_chat",
@@ -147,6 +144,56 @@ chat_block = LLMChatBlock(
147
144
  result = chat_block.generate(large_dataset)
148
145
  ```
149
146
 
147
+ **Flow-Level Concurrency Control:**
148
+
149
+ When using LLM blocks within flows, you can control concurrency to prevent overwhelming API servers or hitting rate limits:
150
+
151
+ ```python
152
+ from sdg_hub import Flow
153
+
154
+ # Load a flow with LLM blocks
155
+ flow = Flow.from_yaml("path/to/your/flow.yaml")
156
+ flow.set_model_config(model="openai/gpt-4o", api_key="your-key")
157
+
158
+ # Control concurrency for each LLM block in the flow
159
+ result = flow.generate(
160
+ dataset,
161
+ max_concurrency=5 # Max 5 concurrent requests at any time
162
+ )
163
+ ```
164
+
165
+ **Benefits of Concurrency Control:**
166
+ - **Rate Limit Management** - Prevent API throttling by limiting concurrent requests
167
+ - **Resource Control** - Manage memory and network usage for large datasets
168
+ - **Provider-Friendly** - Respect API provider recommendations for concurrent requests
169
+ - **Automatic Scaling** - No concurrency limit = maximum parallelism for fastest processing
170
+
171
+ **How It Works:**
172
+
173
+ The unified async system automatically detects whether you're processing single or multiple messages and applies concurrency control appropriately:
174
+
175
+ ```python
176
+ # Single message - processed immediately
177
+ single_message = [{"role": "user", "content": "Hello"}]
178
+
179
+ # Multiple messages - concurrency controlled via semaphore
180
+ batch_messages = [
181
+ [{"role": "user", "content": "Question 1"}],
182
+ [{"role": "user", "content": "Question 2"}],
183
+ [{"role": "user", "content": "Question 3"}],
184
+ # ... up to thousands of messages
185
+ ]
186
+
187
+ # Both cases use the same unified API under the hood
188
+ # Concurrency is managed transparently
189
+ ```
190
+
191
+ **Performance Guidelines:**
192
+ - **Small datasets (<100 samples)**: No concurrency limit needed
193
+ - **Medium datasets (100-1000 samples)**: `max_concurrency=10-20`
194
+ - **Large datasets (1000+ samples)**: `max_concurrency=5-10` (respect API limits)
195
+ - **Production workloads**: Start conservative and tune based on error rates
196
+
150
197
  ### Message Format
151
198
 
152
199
  LLMChatBlock expects messages in OpenAI chat format:
@@ -30,14 +30,15 @@ All blocks inherit from `BaseBlock`, which provides:
30
30
 
31
31
  ### Standard Configuration
32
32
  ```python
33
- from sdg_hub.core.blocks import BlockRegistry
33
+ # Import the specific block you need
34
+ from sdg_hub.core.blocks import LLMChatBlock
34
35
 
35
36
  # Every block has these standard fields
36
- MyBlock = BlockRegistry.get_block("SomeBlockType")
37
- block = MyBlock(
37
+ block = LLMChatBlock(
38
38
  block_name="my_unique_block", # Required: unique identifier
39
- input_cols=["column1", "column2"], # Columns this block needs
40
- output_cols=["new_column"], # Columns this block creates
39
+ input_cols=["input_text"], # Column this block needs
40
+ output_cols=["response"], # Column this block creates
41
+ model="openai/gpt-4o", # Required: provider/model format
41
42
  # ... block-specific configuration
42
43
  )
43
44
  ```
@@ -86,13 +87,13 @@ print(f"Found {len(available_blocks)} blocks")
86
87
 
87
88
  ### 2. Block Instantiation
88
89
  ```python
89
- # Get a block class by name
90
- ChatBlock = BlockRegistry.get_block("LLMChatBlock")
90
+ # Import the specific block you need
91
+ from sdg_hub.core.blocks import LLMChatBlock
91
92
 
92
93
  # Create an instance with configuration
93
- chat_block = ChatBlock(
94
+ chat_block = LLMChatBlock(
94
95
  block_name="question_answerer",
95
- llm_config={"model": "gpt-4o"},
96
+ model="openai/gpt-4o",
96
97
  input_cols=["question"],
97
98
  output_cols=["answer"],
98
99
  prompt_template="Answer this question: {question}"
@@ -159,7 +159,13 @@ Every block validates data at runtime:
159
159
  - Watch execution logs for bottlenecks
160
160
  - Use async-friendly blocks for LLM operations
161
161
 
162
- ### 4. Design for Reuse
162
+ ### 4. Optimize for Scale
163
+ - Use `max_concurrency` parameter to control API request rates
164
+ - Start with conservative concurrency limits (5-10) for production
165
+ - Increase concurrency carefully while monitoring error rates
166
+ - Consider provider-specific rate limits and costs
167
+
168
+ ### 5. Design for Reuse
163
169
  - Create modular flows that can be combined
164
170
  - Use parameters for customization points
165
171
 
@@ -123,15 +123,16 @@ Create comprehensive tests following this pattern:
123
123
 
124
124
  import pytest
125
125
  from datasets import Dataset
126
- from sdg_hub.core.blocks import BlockRegistry
127
126
  from sdg_hub.core.utils.error_handling import MissingColumnError
127
+ # Import your custom block directly
128
+ from .my_new_block import MyNewBlock
128
129
 
129
130
  class TestMyNewBlock:
130
131
  """Test suite for MyNewBlock."""
131
132
 
132
133
  def test_basic_functionality(self):
133
134
  """Test basic block functionality."""
134
- block = BlockRegistry.get_block("MyNewBlock")(
135
+ block = MyNewBlock(
135
136
  block_name="test_block",
136
137
  input_cols=["input"],
137
138
  output_cols=["output"]
@@ -149,7 +150,7 @@ class TestMyNewBlock:
149
150
  def test_configuration_validation(self):
150
151
  """Test parameter validation."""
151
152
  with pytest.raises(ValueError):
152
- BlockRegistry.get_block("MyNewBlock")(
153
+ MyNewBlock(
153
154
  block_name="bad_config",
154
155
  input_cols=["input"],
155
156
  output_cols=["output"],
@@ -158,7 +159,7 @@ class TestMyNewBlock:
158
159
 
159
160
  def test_missing_columns(self):
160
161
  """Test error handling for missing columns."""
161
- block = BlockRegistry.get_block("MyNewBlock")(
162
+ block = MyNewBlock(
162
163
  block_name="test_block",
163
164
  input_cols=["missing_column"],
164
165
  output_cols=["output"]
@@ -296,6 +296,51 @@ result = flow.generate(
296
296
  )
297
297
  ```
298
298
 
299
+ ### Concurrency Control
300
+
301
+ For flows containing LLM blocks, you can control the maximum number of concurrent API requests to prevent overwhelming servers or hitting rate limits:
302
+
303
+ ```python
304
+ # Basic concurrency control
305
+ result = flow.generate(
306
+ dataset,
307
+ max_concurrency=5 # Max 5 concurrent requests per LLM block execution
308
+ )
309
+
310
+ # Combined with other parameters
311
+ result = flow.generate(
312
+ dataset,
313
+ max_concurrency=10,
314
+ runtime_params={
315
+ "temperature": 0.7,
316
+ "max_tokens": 200
317
+ }
318
+ )
319
+ ```
320
+
321
+ **When to Use Concurrency Control:**
322
+
323
+ - **Large Datasets** - Process thousands of samples without overwhelming APIs
324
+ - **Rate Limit Management** - Respect provider-specific concurrent request limits
325
+ - **Production Workloads** - Ensure stable, predictable resource usage
326
+ - **Cost Optimization** - Prevent burst API charges from uncontrolled parallelism
327
+
328
+ **Recommended Settings:**
329
+
330
+ ```python
331
+ # Conservative (recommended for production)
332
+ result = flow.generate(dataset, max_concurrency=5)
333
+
334
+ # Moderate (good for development/testing)
335
+ result = flow.generate(dataset, max_concurrency=10)
336
+
337
+ # Aggressive (only for robust APIs and small datasets)
338
+ result = flow.generate(dataset, max_concurrency=20)
339
+
340
+ # No limit (maximum speed, use with caution)
341
+ result = flow.generate(dataset) # Default behavior
342
+ ```
343
+
299
344
  ## 🚀 Next Steps
300
345
 
301
346
  Ready to master the flow system? Explore these detailed guides:
@@ -107,11 +107,16 @@ print(f"🔎 QA Generation Flows: {qa_flows}")
107
107
  eval_flows = FlowRegistry.search_flows(tag="evaluation")
108
108
  print(f"📊 Evaluation Flows: {eval_flows}")
109
109
 
110
+ # List all blocks by categories
111
+ all_blocks = BlockRegistry.list_blocks(grouped=True)
112
+ for category, blocks in all_blocks.items():
113
+ print(f"Blocks for category {category}: {blocks}")
114
+
110
115
  # Find blocks by category
111
- llm_blocks = BlockRegistry.search_blocks(category="llm")
116
+ llm_blocks = BlockRegistry.list_blocks(category="llm")
112
117
  print(f"🧠 LLM Blocks: {llm_blocks}")
113
118
 
114
- transform_blocks = BlockRegistry.search_blocks(category="transform")
119
+ transform_blocks = BlockRegistry.list_blocks(category="transform")
115
120
  print(f"🔄 Transform Blocks: {transform_blocks}")
116
121
  ```
117
122