sdg-hub 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. {sdg_hub-0.2.0/src/sdg_hub.egg-info → sdg_hub-0.2.2}/PKG-INFO +59 -31
  2. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/README.md +58 -29
  3. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/llm-blocks.md +52 -5
  4. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/overview.md +10 -9
  5. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/concepts.md +7 -1
  6. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/development.md +5 -4
  7. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/flows/overview.md +64 -3
  8. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/quick-start.md +7 -2
  9. sdg_hub-0.2.2/examples/annotation/annotation_classification.ipynb +840 -0
  10. sdg_hub-0.2.2/examples/annotation/news_classification_assessment_prompt.yaml +42 -0
  11. sdg_hub-0.2.2/examples/annotation/news_classification_flow.yaml +185 -0
  12. sdg_hub-0.2.2/examples/annotation/news_classification_prompt.yaml +11 -0
  13. sdg_hub-0.2.2/examples/annotation/revise_news_classification_prompt.yaml +19 -0
  14. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/README.md +1 -1
  15. sdg_hub-0.2.2/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
  16. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/pyproject.toml +0 -1
  17. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/_version.py +16 -3
  18. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
  19. sdg_hub-0.2.2/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +323 -0
  20. sdg_hub-0.2.2/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +323 -0
  21. sdg_hub-0.2.2/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +329 -0
  22. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/__init__.py +2 -0
  23. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/client_manager.py +61 -24
  24. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/config.py +1 -0
  25. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
  26. sdg_hub-0.2.2/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
  27. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
  28. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/registry.py +49 -35
  29. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
  30. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/base.py +370 -20
  31. sdg_hub-0.2.2/src/sdg_hub/core/flow/checkpointer.py +333 -0
  32. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/metadata.py +45 -0
  33. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/migration.py +12 -1
  34. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/registry.py +121 -58
  35. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/validation.py +12 -0
  36. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/__init__.py +2 -1
  37. sdg_hub-0.2.2/src/sdg_hub/core/utils/datautils.py +92 -0
  38. sdg_hub-0.2.2/src/sdg_hub/core/utils/flow_id_words.yaml +231 -0
  39. sdg_hub-0.2.2/src/sdg_hub/core/utils/flow_identifier.py +94 -0
  40. sdg_hub-0.2.2/src/sdg_hub/core/utils/yaml_utils.py +59 -0
  41. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
  42. {sdg_hub-0.2.0 → sdg_hub-0.2.2/src/sdg_hub.egg-info}/PKG-INFO +59 -31
  43. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/SOURCES.txt +15 -0
  44. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/requires.txt +0 -1
  45. sdg_hub-0.2.2/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +271 -0
  46. sdg_hub-0.2.2/tests/blocks/evaluation/test_evaluate_relevancy_block.py +189 -0
  47. sdg_hub-0.2.2/tests/blocks/evaluation/test_verify_question_block.py +331 -0
  48. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/llm/test_llm_chat_block.py +2 -0
  49. sdg_hub-0.2.2/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +1329 -0
  50. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/llm/test_textparserblock.py +241 -0
  51. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/test_registry.py +198 -34
  52. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/conftest.py +1 -1
  53. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_base.py +337 -2
  54. sdg_hub-0.2.2/tests/flow/test_checkpointer.py +331 -0
  55. sdg_hub-0.2.2/tests/flow/test_dataset_requirements.py +419 -0
  56. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_integration.py +24 -9
  57. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_metadata.py +43 -0
  58. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_migration.py +90 -7
  59. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_registry.py +248 -28
  60. sdg_hub-0.2.2/tests/utils/test_datautils.py +43 -0
  61. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/utils/test_error_handling.py +1 -2
  62. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -564
  63. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -564
  64. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -564
  65. sdg_hub-0.2.0/src/sdg_hub/core/utils/datautils.py +0 -12
  66. sdg_hub-0.2.0/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -496
  67. sdg_hub-0.2.0/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -493
  68. sdg_hub-0.2.0/tests/blocks/evaluation/test_verify_question_block.py +0 -480
  69. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/actionlint.yaml +0 -0
  70. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/actions/free-disk-space/action.yml +0 -0
  71. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/dependabot.yml +0 -0
  72. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/mergify.yml +0 -0
  73. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/actionlint.dockerfile +0 -0
  74. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/actionlint.yml +0 -0
  75. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/docs.yml +0 -0
  76. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/e2e.yml +0 -0
  77. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/lint.yml +0 -0
  78. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/matchers/actionlint.json +0 -0
  79. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/matchers/pylint.json +0 -0
  80. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/pypi.yaml +0 -0
  81. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.github/workflows/test.yml +0 -0
  82. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.gitignore +0 -0
  83. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.isort.cfg +0 -0
  84. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.markdownlint-cli2.yaml +0 -0
  85. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.pre-commit-config.yaml +0 -0
  86. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/.pylintrc +0 -0
  87. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/CLAUDE.md +0 -0
  88. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/CONTRIBUTING.md +0 -0
  89. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/LICENSE +0 -0
  90. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/Makefile +0 -0
  91. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/.nojekyll +0 -0
  92. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/README.md +0 -0
  93. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/_coverpage.md +0 -0
  94. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/_navbar.md +0 -0
  95. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/_sidebar.md +0 -0
  96. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/api-reference.md +0 -0
  97. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/custom-blocks.md +0 -0
  98. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/evaluation-blocks.md +0 -0
  99. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/filtering-blocks.md +0 -0
  100. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/blocks/transform-blocks.md +0 -0
  101. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/flows/discovery.md +0 -0
  102. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/index.html +0 -0
  103. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/docs/installation.md +0 -0
  104. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
  105. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
  106. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  107. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  108. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  109. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  110. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  111. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  112. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
  113. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
  114. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
  115. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/examples/knowledge_tuning/knowledge_utils.py +0 -0
  116. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/scripts/ruff.sh +0 -0
  117. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/setup.cfg +0 -0
  118. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/__init__.py +0 -0
  119. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/__init__.py +0 -0
  120. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/__init__.py +0 -0
  121. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/base.py +0 -0
  122. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
  123. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
  124. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
  125. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
  126. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
  127. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
  128. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
  129. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
  130. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
  131. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -0
  132. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
  133. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +0 -0
  134. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
  135. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
  136. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
  137. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
  138. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
  139. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
  140. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
  141. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
  142. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/flow/__init__.py +0 -0
  143. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/error_handling.py +0 -0
  144. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/logger_config.py +0 -0
  145. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/core/utils/path_resolution.py +0 -0
  146. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  147. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
  148. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
  149. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
  150. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
  151. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
  152. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
  153. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
  154. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
  155. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub/py.typed +0 -0
  156. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  157. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/src/sdg_hub.egg-info/top_level.txt +0 -0
  158. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/__init__.py +0 -0
  159. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/deprecated/test_llmblock.py +0 -0
  160. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/evaluation/__init__.py +0 -0
  161. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
  162. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
  163. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/test_base_block.py +0 -0
  164. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_config.yaml +0 -0
  165. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -0
  166. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -0
  167. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
  168. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
  169. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
  170. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
  171. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
  172. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/testdata/test_verify_question.yaml +0 -0
  173. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_index_based_mapper.py +0 -0
  174. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_melt_columns.py +0 -0
  175. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_text_concat.py +0 -0
  176. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
  177. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
  178. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
  179. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
  180. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_renameblock.py +0 -0
  181. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
  182. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
  183. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/blocks/utilblocks/test_settomajority.py +0 -0
  184. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/__init__.py +0 -0
  185. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/flow/test_validation.py +0 -0
  186. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tests/utils/test_path_resolution.py +0 -0
  187. {sdg_hub-0.2.0 → sdg_hub-0.2.2}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
27
27
  Requires-Dist: httpx<1.0.0,>=0.25.0
28
28
  Requires-Dist: jinja2
29
29
  Requires-Dist: litellm<1.75.0,>=1.73.0
30
- Requires-Dist: openai<2.0.0,>=1.13.3
31
30
  Requires-Dist: rich
32
31
  Requires-Dist: pydantic<3.0.0,>=2.0.0
33
32
  Requires-Dist: python-dotenv<2.0.0,>=1.0.0
@@ -92,6 +91,8 @@ A modular Python framework for building synthetic data generation pipelines usin
92
91
 
93
92
  **📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
94
93
 
94
+ **📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
95
+
95
96
  **🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
96
97
 
97
98
 
@@ -121,7 +122,7 @@ uv pip install sdg-hub[examples]
121
122
 
122
123
  ## 🚀 Quick Start
123
124
 
124
- ### 🧱 Core Concepts
125
+ ### Core Concepts
125
126
 
126
127
  **Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
127
128
 
@@ -136,7 +137,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
136
137
 
137
138
  #### Flow Discovery
138
139
  ```python
139
- from sdg_hub import FlowRegistry
140
+ from sdg_hub import FlowRegistry, Flow
140
141
 
141
142
  # Auto-discover all available flows (no setup needed!)
142
143
  FlowRegistry.discover_flows()
@@ -150,16 +151,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
150
151
  print(f"QA flows: {qa_flows}")
151
152
  ```
152
153
 
153
- #### Using Flows
154
+ Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
155
+
154
156
  ```python
155
- from sdg_hub import FlowRegistry, Flow
156
- from datasets import Dataset
157
+ # Every flow gets a deterministic ID
158
+ # Same flow name always generates the same ID
159
+ flow_id = "small-rock-799"
157
160
 
158
- # Load the flow by name
159
- flow_name = "Advanced Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
160
- flow_path = FlowRegistry.get_flow_path(flow_name)
161
+ # Use ID to reference the flow
162
+ flow_path = FlowRegistry.get_flow_path(flow_id)
161
163
  flow = Flow.from_yaml(flow_path)
164
+ ```
162
165
 
166
+ #### Discovering Models and Configuring them
167
+ ```python
163
168
  # Discover recommended models
164
169
  default_model = flow.get_default_model()
165
170
  recommendations = flow.get_model_recommendations()
@@ -171,21 +176,52 @@ flow.set_model_config(
171
176
  api_base="http://localhost:8000/v1",
172
177
  api_key="your_key",
173
178
  )
174
-
175
- # Create your dataset with required columns
176
- dataset = Dataset.from_dict({
177
- 'document': ['Your document text here...'],
178
- 'document_outline': ['1. Topic A; 2. Topic B; 3. Topic C'],
179
- 'domain': ['Computer Science'],
180
- 'icl_document': ['Example document for in-context learning...'],
181
- 'icl_query_1': ['Example question 1?'],
182
- 'icl_response_1': ['Example answer 1'],
183
- 'icl_query_2': ['Example question 2?'],
184
- 'icl_response_2': ['Example answer 2'],
185
- 'icl_query_3': ['Example question 3?'],
186
- 'icl_response_3': ['Example answer 3']
179
+ ```
180
+ #### Discover dataset requirements and create your dataset
181
+ ```python
182
+ # First, discover what data the flow needs
183
+ # Get an empty dataset with the exact schema needed
184
+ schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
185
+ print(f"Required columns: {schema_dataset.column_names}")
186
+ print(f"Schema: {schema_dataset.features}")
187
+
188
+ # Option 1: Add data directly to the schema dataset
189
+ dataset = schema_dataset.add_item({
190
+ 'document': 'Your document text here...',
191
+ 'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
192
+ 'domain': 'Computer Science',
193
+ 'icl_document': 'Example document for in-context learning...',
194
+ 'icl_query_1': 'Example question 1?',
195
+ 'icl_response_1': 'Example answer 1',
196
+ 'icl_query_2': 'Example question 2?',
197
+ 'icl_response_2': 'Example answer 2',
198
+ 'icl_query_3': 'Example question 3?',
199
+ 'icl_response_3': 'Example answer 3'
187
200
  })
188
201
 
202
+ # Option 2: Create your own dataset and validate the schema
203
+ my_dataset = Dataset.from_dict(my_data_dict)
204
+ if my_dataset.features == schema_dataset.features:
205
+ print("✅ Schema matches - ready to generate!")
206
+ dataset = my_dataset
207
+ else:
208
+ print("❌ Schema mismatch - check your columns")
209
+
210
+ # Option 3: Get raw requirements for detailed inspection
211
+ requirements = flow.get_dataset_requirements()
212
+ if requirements:
213
+ print(f"Required: {requirements.required_columns}")
214
+ print(f"Optional: {requirements.optional_columns}")
215
+ print(f"Min samples: {requirements.min_samples}")
216
+ ```
217
+
218
+ #### Dry Run and Generate
219
+ ```python
220
+ # Quick Testing with Dry Run
221
+ dry_result = flow.dry_run(dataset, sample_size=1)
222
+ print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
223
+ print(f"Output columns: {dry_result['final_dataset']['columns']}")
224
+
189
225
  # Generate high-quality QA pairs
190
226
  result = flow.generate(dataset)
191
227
 
@@ -196,14 +232,6 @@ faithfulness_scores = result['faithfulness_judgment']
196
232
  relevancy_scores = result['relevancy_score']
197
233
  ```
198
234
 
199
- #### Quick Testing with Dry Run
200
- ```python
201
- # Test the flow with a small sample first
202
- dry_result = flow.dry_run(dataset, sample_size=1)
203
- print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
204
- print(f"Output columns: {dry_result['final_dataset']['columns']}")
205
- ```
206
-
207
235
 
208
236
  ## 📄 License
209
237
 
@@ -24,6 +24,8 @@ A modular Python framework for building synthetic data generation pipelines usin
24
24
 
25
25
  **📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
26
26
 
27
+ **📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
28
+
27
29
  **🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
28
30
 
29
31
 
@@ -53,7 +55,7 @@ uv pip install sdg-hub[examples]
53
55
 
54
56
  ## 🚀 Quick Start
55
57
 
56
- ### 🧱 Core Concepts
58
+ ### Core Concepts
57
59
 
58
60
  **Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
59
61
 
@@ -68,7 +70,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
68
70
 
69
71
  #### Flow Discovery
70
72
  ```python
71
- from sdg_hub import FlowRegistry
73
+ from sdg_hub import FlowRegistry, Flow
72
74
 
73
75
  # Auto-discover all available flows (no setup needed!)
74
76
  FlowRegistry.discover_flows()
@@ -82,16 +84,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
82
84
  print(f"QA flows: {qa_flows}")
83
85
  ```
84
86
 
85
- #### Using Flows
87
+ Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
88
+
86
89
  ```python
87
- from sdg_hub import FlowRegistry, Flow
88
- from datasets import Dataset
90
+ # Every flow gets a deterministic ID
91
+ # Same flow name always generates the same ID
92
+ flow_id = "small-rock-799"
89
93
 
90
- # Load the flow by name
91
- flow_name = "Advanced Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
92
- flow_path = FlowRegistry.get_flow_path(flow_name)
94
+ # Use ID to reference the flow
95
+ flow_path = FlowRegistry.get_flow_path(flow_id)
93
96
  flow = Flow.from_yaml(flow_path)
97
+ ```
94
98
 
99
+ #### Discovering Models and Configuring them
100
+ ```python
95
101
  # Discover recommended models
96
102
  default_model = flow.get_default_model()
97
103
  recommendations = flow.get_model_recommendations()
@@ -103,21 +109,52 @@ flow.set_model_config(
103
109
  api_base="http://localhost:8000/v1",
104
110
  api_key="your_key",
105
111
  )
106
-
107
- # Create your dataset with required columns
108
- dataset = Dataset.from_dict({
109
- 'document': ['Your document text here...'],
110
- 'document_outline': ['1. Topic A; 2. Topic B; 3. Topic C'],
111
- 'domain': ['Computer Science'],
112
- 'icl_document': ['Example document for in-context learning...'],
113
- 'icl_query_1': ['Example question 1?'],
114
- 'icl_response_1': ['Example answer 1'],
115
- 'icl_query_2': ['Example question 2?'],
116
- 'icl_response_2': ['Example answer 2'],
117
- 'icl_query_3': ['Example question 3?'],
118
- 'icl_response_3': ['Example answer 3']
112
+ ```
113
+ #### Discover dataset requirements and create your dataset
114
+ ```python
115
+ # First, discover what data the flow needs
116
+ # Get an empty dataset with the exact schema needed
117
+ schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
118
+ print(f"Required columns: {schema_dataset.column_names}")
119
+ print(f"Schema: {schema_dataset.features}")
120
+
121
+ # Option 1: Add data directly to the schema dataset
122
+ dataset = schema_dataset.add_item({
123
+ 'document': 'Your document text here...',
124
+ 'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
125
+ 'domain': 'Computer Science',
126
+ 'icl_document': 'Example document for in-context learning...',
127
+ 'icl_query_1': 'Example question 1?',
128
+ 'icl_response_1': 'Example answer 1',
129
+ 'icl_query_2': 'Example question 2?',
130
+ 'icl_response_2': 'Example answer 2',
131
+ 'icl_query_3': 'Example question 3?',
132
+ 'icl_response_3': 'Example answer 3'
119
133
  })
120
134
 
135
+ # Option 2: Create your own dataset and validate the schema
136
+ my_dataset = Dataset.from_dict(my_data_dict)
137
+ if my_dataset.features == schema_dataset.features:
138
+ print("✅ Schema matches - ready to generate!")
139
+ dataset = my_dataset
140
+ else:
141
+ print("❌ Schema mismatch - check your columns")
142
+
143
+ # Option 3: Get raw requirements for detailed inspection
144
+ requirements = flow.get_dataset_requirements()
145
+ if requirements:
146
+ print(f"Required: {requirements.required_columns}")
147
+ print(f"Optional: {requirements.optional_columns}")
148
+ print(f"Min samples: {requirements.min_samples}")
149
+ ```
150
+
151
+ #### Dry Run and Generate
152
+ ```python
153
+ # Quick Testing with Dry Run
154
+ dry_result = flow.dry_run(dataset, sample_size=1)
155
+ print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
156
+ print(f"Output columns: {dry_result['final_dataset']['columns']}")
157
+
121
158
  # Generate high-quality QA pairs
122
159
  result = flow.generate(dataset)
123
160
 
@@ -128,14 +165,6 @@ faithfulness_scores = result['faithfulness_judgment']
128
165
  relevancy_scores = result['relevancy_score']
129
166
  ```
130
167
 
131
- #### Quick Testing with Dry Run
132
- ```python
133
- # Test the flow with a small sample first
134
- dry_result = flow.dry_run(dataset, sample_size=1)
135
- print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
136
- print(f"Output columns: {dry_result['final_dataset']['columns']}")
137
- ```
138
-
139
168
 
140
169
  ## 📄 License
141
170
 
@@ -34,12 +34,9 @@ The unified chat block that replaces provider-specific implementations with a si
34
34
  ### Basic Usage
35
35
 
36
36
  ```python
37
- from sdg_hub.core.blocks import BlockRegistry
37
+ from sdg_hub.core.blocks import LLMChatBlock
38
38
  from datasets import Dataset
39
39
 
40
- # Get the LLM chat block
41
- LLMChatBlock = BlockRegistry.get_block("LLMChatBlock")
42
-
43
40
  # Configure for OpenAI
44
41
  chat_block = LLMChatBlock(
45
42
  block_name="question_answerer",
@@ -133,7 +130,7 @@ dataset = Dataset.from_dict({
133
130
  })
134
131
  ```
135
132
 
136
- #### Async Processing
133
+ #### Async Processing & Concurrency Control
137
134
  ```python
138
135
  chat_block = LLMChatBlock(
139
136
  block_name="async_chat",
@@ -147,6 +144,56 @@ chat_block = LLMChatBlock(
147
144
  result = chat_block.generate(large_dataset)
148
145
  ```
149
146
 
147
+ **Flow-Level Concurrency Control:**
148
+
149
+ When using LLM blocks within flows, you can control concurrency to prevent overwhelming API servers or hitting rate limits:
150
+
151
+ ```python
152
+ from sdg_hub import Flow
153
+
154
+ # Load a flow with LLM blocks
155
+ flow = Flow.from_yaml("path/to/your/flow.yaml")
156
+ flow.set_model_config(model="openai/gpt-4o", api_key="your-key")
157
+
158
+ # Control concurrency for each LLM block in the flow
159
+ result = flow.generate(
160
+ dataset,
161
+ max_concurrency=5 # Max 5 concurrent requests at any time
162
+ )
163
+ ```
164
+
165
+ **Benefits of Concurrency Control:**
166
+ - **Rate Limit Management** - Prevent API throttling by limiting concurrent requests
167
+ - **Resource Control** - Manage memory and network usage for large datasets
168
+ - **Provider-Friendly** - Respect API provider recommendations for concurrent requests
169
+ - **Automatic Scaling** - No concurrency limit = maximum parallelism for fastest processing
170
+
171
+ **How It Works:**
172
+
173
+ The unified async system automatically detects whether you're processing single or multiple messages and applies concurrency control appropriately:
174
+
175
+ ```python
176
+ # Single message - processed immediately
177
+ single_message = [{"role": "user", "content": "Hello"}]
178
+
179
+ # Multiple messages - concurrency controlled via semaphore
180
+ batch_messages = [
181
+ [{"role": "user", "content": "Question 1"}],
182
+ [{"role": "user", "content": "Question 2"}],
183
+ [{"role": "user", "content": "Question 3"}],
184
+ # ... up to thousands of messages
185
+ ]
186
+
187
+ # Both cases use the same unified API under the hood
188
+ # Concurrency is managed transparently
189
+ ```
190
+
191
+ **Performance Guidelines:**
192
+ - **Small datasets (<100 samples)**: No concurrency limit needed
193
+ - **Medium datasets (100-1000 samples)**: `max_concurrency=10-20`
194
+ - **Large datasets (1000+ samples)**: `max_concurrency=5-10` (respect API limits)
195
+ - **Production workloads**: Start conservative and tune based on error rates
196
+
150
197
  ### Message Format
151
198
 
152
199
  LLMChatBlock expects messages in OpenAI chat format:
@@ -30,14 +30,15 @@ All blocks inherit from `BaseBlock`, which provides:
30
30
 
31
31
  ### Standard Configuration
32
32
  ```python
33
- from sdg_hub.core.blocks import BlockRegistry
33
+ # Import the specific block you need
34
+ from sdg_hub.core.blocks import LLMChatBlock
34
35
 
35
36
  # Every block has these standard fields
36
- MyBlock = BlockRegistry.get_block("SomeBlockType")
37
- block = MyBlock(
37
+ block = LLMChatBlock(
38
38
  block_name="my_unique_block", # Required: unique identifier
39
- input_cols=["column1", "column2"], # Columns this block needs
40
- output_cols=["new_column"], # Columns this block creates
39
+ input_cols=["input_text"], # Column this block needs
40
+ output_cols=["response"], # Column this block creates
41
+ model="openai/gpt-4o", # Required: provider/model format
41
42
  # ... block-specific configuration
42
43
  )
43
44
  ```
@@ -86,13 +87,13 @@ print(f"Found {len(available_blocks)} blocks")
86
87
 
87
88
  ### 2. Block Instantiation
88
89
  ```python
89
- # Get a block class by name
90
- ChatBlock = BlockRegistry.get_block("LLMChatBlock")
90
+ # Import the specific block you need
91
+ from sdg_hub.core.blocks import LLMChatBlock
91
92
 
92
93
  # Create an instance with configuration
93
- chat_block = ChatBlock(
94
+ chat_block = LLMChatBlock(
94
95
  block_name="question_answerer",
95
- llm_config={"model": "gpt-4o"},
96
+ model="openai/gpt-4o",
96
97
  input_cols=["question"],
97
98
  output_cols=["answer"],
98
99
  prompt_template="Answer this question: {question}"
@@ -159,7 +159,13 @@ Every block validates data at runtime:
159
159
  - Watch execution logs for bottlenecks
160
160
  - Use async-friendly blocks for LLM operations
161
161
 
162
- ### 4. Design for Reuse
162
+ ### 4. Optimize for Scale
163
+ - Use `max_concurrency` parameter to control API request rates
164
+ - Start with conservative concurrency limits (5-10) for production
165
+ - Increase concurrency carefully while monitoring error rates
166
+ - Consider provider-specific rate limits and costs
167
+
168
+ ### 5. Design for Reuse
163
169
  - Create modular flows that can be combined
164
170
  - Use parameters for customization points
165
171
 
@@ -123,15 +123,16 @@ Create comprehensive tests following this pattern:
123
123
 
124
124
  import pytest
125
125
  from datasets import Dataset
126
- from sdg_hub.core.blocks import BlockRegistry
127
126
  from sdg_hub.core.utils.error_handling import MissingColumnError
127
+ # Import your custom block directly
128
+ from .my_new_block import MyNewBlock
128
129
 
129
130
  class TestMyNewBlock:
130
131
  """Test suite for MyNewBlock."""
131
132
 
132
133
  def test_basic_functionality(self):
133
134
  """Test basic block functionality."""
134
- block = BlockRegistry.get_block("MyNewBlock")(
135
+ block = MyNewBlock(
135
136
  block_name="test_block",
136
137
  input_cols=["input"],
137
138
  output_cols=["output"]
@@ -149,7 +150,7 @@ class TestMyNewBlock:
149
150
  def test_configuration_validation(self):
150
151
  """Test parameter validation."""
151
152
  with pytest.raises(ValueError):
152
- BlockRegistry.get_block("MyNewBlock")(
153
+ MyNewBlock(
153
154
  block_name="bad_config",
154
155
  input_cols=["input"],
155
156
  output_cols=["output"],
@@ -158,7 +159,7 @@ class TestMyNewBlock:
158
159
 
159
160
  def test_missing_columns(self):
160
161
  """Test error handling for missing columns."""
161
- block = BlockRegistry.get_block("MyNewBlock")(
162
+ block = MyNewBlock(
162
163
  block_name="test_block",
163
164
  input_cols=["missing_column"],
164
165
  output_cols=["output"]
@@ -269,17 +269,78 @@ print(f"Sample output: {dry_result['sample_output']}")
269
269
  Customize flow behavior at runtime:
270
270
 
271
271
  ```python
272
- # Override default parameters
272
+ # Override default runtime parameters
273
273
  result = flow.generate(
274
274
  dataset,
275
- parameters={
275
+ runtime_params={
276
276
  "max_tokens": 200,
277
277
  "temperature": 0.9,
278
- "enable_evaluation": False
279
278
  }
280
279
  )
281
280
  ```
282
281
 
282
+ ### Block-Specific Runtime Arguments
283
+
284
+ You can enable or disable advanced features—such as "thinking mode"—for individual blocks at runtime using the `runtime_params` argument. This allows fine-grained control over block behavior without modifying the flow YAML.
285
+
286
+ For example, to disable "thinking mode" for several blocks:
287
+
288
+ ```python
289
+ # Set runtime_params for specific blocks
290
+ result = flow.generate(
291
+ dataset,
292
+ runtime_params = {
293
+ # LLMChatBlock blocks
294
+ "llm_chat_block_1": {"extra_body": {"chat_template_kwargs": {"enable_thinking": False}}},
295
+ }
296
+ )
297
+ ```
298
+
299
+ ### Concurrency Control
300
+
301
+ For flows containing LLM blocks, you can control the maximum number of concurrent API requests to prevent overwhelming servers or hitting rate limits:
302
+
303
+ ```python
304
+ # Basic concurrency control
305
+ result = flow.generate(
306
+ dataset,
307
+ max_concurrency=5 # Max 5 concurrent requests per LLM block execution
308
+ )
309
+
310
+ # Combined with other parameters
311
+ result = flow.generate(
312
+ dataset,
313
+ max_concurrency=10,
314
+ runtime_params={
315
+ "temperature": 0.7,
316
+ "max_tokens": 200
317
+ }
318
+ )
319
+ ```
320
+
321
+ **When to Use Concurrency Control:**
322
+
323
+ - **Large Datasets** - Process thousands of samples without overwhelming APIs
324
+ - **Rate Limit Management** - Respect provider-specific concurrent request limits
325
+ - **Production Workloads** - Ensure stable, predictable resource usage
326
+ - **Cost Optimization** - Prevent burst API charges from uncontrolled parallelism
327
+
328
+ **Recommended Settings:**
329
+
330
+ ```python
331
+ # Conservative (recommended for production)
332
+ result = flow.generate(dataset, max_concurrency=5)
333
+
334
+ # Moderate (good for development/testing)
335
+ result = flow.generate(dataset, max_concurrency=10)
336
+
337
+ # Aggressive (only for robust APIs and small datasets)
338
+ result = flow.generate(dataset, max_concurrency=20)
339
+
340
+ # No limit (maximum speed, use with caution)
341
+ result = flow.generate(dataset) # Default behavior
342
+ ```
343
+
283
344
  ## 🚀 Next Steps
284
345
 
285
346
  Ready to master the flow system? Explore these detailed guides:
@@ -107,11 +107,16 @@ print(f"🔎 QA Generation Flows: {qa_flows}")
107
107
  eval_flows = FlowRegistry.search_flows(tag="evaluation")
108
108
  print(f"📊 Evaluation Flows: {eval_flows}")
109
109
 
110
+ # List all blocks by categories
111
+ all_blocks = BlockRegistry.list_blocks(grouped=True)
112
+ for category, blocks in all_blocks.items():
113
+ print(f"Blocks for category {category}: {blocks}")
114
+
110
115
  # Find blocks by category
111
- llm_blocks = BlockRegistry.search_blocks(category="llm")
116
+ llm_blocks = BlockRegistry.list_blocks(category="llm")
112
117
  print(f"🧠 LLM Blocks: {llm_blocks}")
113
118
 
114
- transform_blocks = BlockRegistry.search_blocks(category="transform")
119
+ transform_blocks = BlockRegistry.list_blocks(category="transform")
115
120
  print(f"🔄 Transform Blocks: {transform_blocks}")
116
121
  ```
117
122