sdg-hub 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. sdg_hub-0.4.0/.github/workflows/packer.yml +15 -0
  2. {sdg_hub-0.3.0/src/sdg_hub.egg-info → sdg_hub-0.4.0}/PKG-INFO +1 -1
  3. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/README.md +0 -1
  4. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/_sidebar.md +0 -1
  5. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/blocks/filtering-blocks.md +0 -1
  6. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/blocks/llm-blocks.md +0 -1
  7. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/blocks/overview.md +0 -6
  8. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/blocks/transform-blocks.md +0 -1
  9. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/concepts.md +1 -1
  10. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/development.md +0 -7
  11. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/flows/overview.md +32 -4
  12. sdg_hub-0.4.0/examples/annotation/annotation_classification.ipynb +486 -0
  13. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/annotation/news_classification_flow.yaml +38 -8
  14. sdg_hub-0.4.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +425 -0
  15. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +5 -0
  16. sdg_hub-0.4.0/examples/text_analysis/structured_insights_demo.ipynb +520 -0
  17. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/_version.py +3 -3
  18. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/__init__.py +2 -4
  19. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/base.py +61 -6
  20. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  21. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/__init__.py +2 -4
  22. sdg_hub-0.4.0/src/sdg_hub/core/blocks/llm/llm_chat_block.py +586 -0
  23. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  24. sdg_hub-0.4.0/src/sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  25. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  26. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/base.py +7 -4
  27. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/datautils.py +40 -22
  28. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
  29. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
  30. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
  31. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
  32. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
  33. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  34. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  35. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  36. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  37. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
  38. sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  39. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
  40. sdg_hub-0.4.0/src/sdg_hub/py.typed +0 -0
  41. {sdg_hub-0.3.0 → sdg_hub-0.4.0/src/sdg_hub.egg-info}/PKG-INFO +1 -1
  42. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/SOURCES.txt +12 -14
  43. sdg_hub-0.4.0/tests/__init__.py +0 -0
  44. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/filtering/test_columnvaluefilter.py +2 -2
  45. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/llm/test_llm_chat_block.py +237 -173
  46. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +29 -34
  47. sdg_hub-0.4.0/tests/blocks/llm/test_llm_parser_block.py +671 -0
  48. sdg_hub-0.4.0/tests/blocks/llm/test_textparserblock.py +962 -0
  49. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/test_base_block.py +198 -2
  50. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_base.py +36 -7
  51. sdg_hub-0.4.0/tests/utils/test_datautils.py +661 -0
  52. sdg_hub-0.3.0/docs/blocks/evaluation-blocks.md +0 -22
  53. sdg_hub-0.3.0/examples/annotation/annotation_classification.ipynb +0 -840
  54. sdg_hub-0.3.0/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -588
  55. sdg_hub-0.3.0/examples/text_analysis/structured_insights_demo.ipynb +0 -4479
  56. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  57. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  58. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  59. sdg_hub-0.3.0/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  60. sdg_hub-0.3.0/src/sdg_hub/core/blocks/llm/client_manager.py +0 -447
  61. sdg_hub-0.3.0/src/sdg_hub/core/blocks/llm/config.py +0 -337
  62. sdg_hub-0.3.0/src/sdg_hub/core/blocks/llm/llm_chat_block.py +0 -600
  63. sdg_hub-0.3.0/tests/blocks/evaluation/__init__.py +0 -2
  64. sdg_hub-0.3.0/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +0 -271
  65. sdg_hub-0.3.0/tests/blocks/evaluation/test_evaluate_relevancy_block.py +0 -189
  66. sdg_hub-0.3.0/tests/blocks/evaluation/test_verify_question_block.py +0 -331
  67. sdg_hub-0.3.0/tests/blocks/llm/test_textparserblock.py +0 -1849
  68. sdg_hub-0.3.0/tests/blocks/testdata/test_evaluate_faithfulness.yaml +0 -17
  69. sdg_hub-0.3.0/tests/blocks/testdata/test_evaluate_relevancy.yaml +0 -24
  70. sdg_hub-0.3.0/tests/blocks/testdata/test_verify_question.yaml +0 -27
  71. sdg_hub-0.3.0/tests/utils/test_datautils.py +0 -132
  72. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/actionlint.yaml +0 -0
  73. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/actions/free-disk-space/action.yml +0 -0
  74. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/dependabot.yml +0 -0
  75. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/mergify.yml +0 -0
  76. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/actionlint.dockerfile +0 -0
  77. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/actionlint.yml +0 -0
  78. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/docs.yml +0 -0
  79. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/e2e.yml +0 -0
  80. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/lint.yml +0 -0
  81. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/matchers/actionlint.json +0 -0
  82. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/matchers/pylint.json +0 -0
  83. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/pypi.yaml +0 -0
  84. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.github/workflows/test.yml +0 -0
  85. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.gitignore +0 -0
  86. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.isort.cfg +0 -0
  87. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.markdownlint-cli2.yaml +0 -0
  88. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.pre-commit-config.yaml +0 -0
  89. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/.pylintrc +0 -0
  90. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/CLAUDE.md +0 -0
  91. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/CONTRIBUTING.md +0 -0
  92. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/LICENSE +0 -0
  93. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/Makefile +0 -0
  94. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/README.md +0 -0
  95. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/.nojekyll +0 -0
  96. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/_coverpage.md +0 -0
  97. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/_navbar.md +0 -0
  98. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/api-reference.md +0 -0
  99. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/blocks/custom-blocks.md +0 -0
  100. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/flows/discovery.md +0 -0
  101. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/index.html +0 -0
  102. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/installation.md +0 -0
  103. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/docs/quick-start.md +0 -0
  104. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/annotation/news_classification_assessment_prompt.yaml +0 -0
  105. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/annotation/news_classification_prompt.yaml +0 -0
  106. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/annotation/revise_news_classification_prompt.yaml +0 -0
  107. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
  108. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
  109. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
  110. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
  111. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/README.md +0 -0
  112. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
  113. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
  114. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  115. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  116. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  117. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  118. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  119. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  120. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
  121. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
  122. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
  123. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/knowledge_tuning/knowledge_utils.py +0 -0
  124. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/text_analysis/README.md +0 -0
  125. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
  126. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/pyproject.toml +0 -0
  127. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/scripts/ruff.sh +0 -0
  128. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/setup.cfg +0 -0
  129. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/__init__.py +0 -0
  130. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/__init__.py +0 -0
  131. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -0
  132. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -0
  133. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -0
  134. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -0
  135. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -0
  136. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -0
  137. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -0
  138. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -0
  139. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -0
  140. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -0
  141. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
  142. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
  143. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +0 -0
  144. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/registry.py +0 -0
  145. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
  146. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +0 -0
  147. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +0 -0
  148. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/json_structure_block.py +0 -0
  149. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/melt_columns.py +0 -0
  150. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/rename_columns.py +0 -0
  151. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/text_concat.py +0 -0
  152. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +0 -0
  153. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/__init__.py +0 -0
  154. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/checkpointer.py +0 -0
  155. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/metadata.py +0 -0
  156. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/migration.py +0 -0
  157. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/registry.py +0 -0
  158. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/flow/validation.py +0 -0
  159. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/__init__.py +0 -0
  160. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/error_handling.py +0 -0
  161. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
  162. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
  163. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/flow_metrics.py +0 -0
  164. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/logger_config.py +0 -0
  165. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/path_resolution.py +0 -0
  166. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
  167. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
  168. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
  169. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
  170. {sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa}/__init__.py +0 -0
  171. {sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary}/__init__.py +0 -0
  172. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
  173. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
  174. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
  175. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
  176. {sdg_hub-0.3.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts}/__init__.py +0 -0
  177. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
  178. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  179. {sdg_hub-0.3.0/tests → sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
  180. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
  181. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
  182. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
  183. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
  184. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
  185. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
  186. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
  187. /sdg_hub-0.3.0/src/sdg_hub/py.typed → /sdg_hub-0.4.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  188. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
  189. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
  190. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
  191. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
  192. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
  193. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
  194. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  195. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/requires.txt +0 -0
  196. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/src/sdg_hub.egg-info/top_level.txt +0 -0
  197. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/deprecated/test_llmblock.py +0 -0
  198. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/llm/test_promptbuilderblock.py +0 -0
  199. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/test_registry.py +0 -0
  200. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/testdata/test_config.yaml +0 -0
  201. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
  202. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
  203. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
  204. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
  205. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
  206. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/transform/test_index_based_mapper.py +0 -0
  207. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/transform/test_json_structure_block.py +0 -0
  208. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/transform/test_melt_columns.py +0 -0
  209. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/transform/test_text_concat.py +0 -0
  210. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
  211. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_combinecolumns.py +0 -0
  212. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +0 -0
  213. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_flattenblock.py +0 -0
  214. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_renameblock.py +0 -0
  215. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -0
  216. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_selectorblock.py +0 -0
  217. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/blocks/utilblocks/test_settomajority.py +0 -0
  218. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/__init__.py +0 -0
  219. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/conftest.py +0 -0
  220. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_checkpointer.py +0 -0
  221. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_dataset_requirements.py +0 -0
  222. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_integration.py +0 -0
  223. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_metadata.py +0 -0
  224. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_migration.py +0 -0
  225. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_registry.py +0 -0
  226. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/flow/test_validation.py +0 -0
  227. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/utils/test_error_handling.py +0 -0
  228. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tests/utils/test_path_resolution.py +0 -0
  229. {sdg_hub-0.3.0 → sdg_hub-0.4.0}/tox.ini +0 -0
@@ -0,0 +1,15 @@
1
+ name: Build AMI with Packer
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ build-ami:
8
+ runs-on: ubuntu-latest
9
+ permissions:
10
+ id-token: write # This is required for OIDC
11
+ contents: read
12
+
13
+ steps:
14
+ - name: Checkout repository
15
+ uses: actions/checkout@v4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -49,7 +49,6 @@ Learn about the modular block architecture that powers SDG Hub:
49
49
  - **[LLM Blocks](blocks/llm-blocks.md)** - Chat, prompt building, and text parsing
50
50
  - **[Transform Blocks](blocks/transform-blocks.md)** - Data transformation and manipulation
51
51
  - **[Filtering Blocks](blocks/filtering-blocks.md)** - Quality filtering and data validation
52
- - **[Evaluation Blocks](blocks/evaluation-blocks.md)** - Faithfulness and relevancy assessment
53
52
  - **[Custom Blocks](blocks/custom-blocks.md)** - Building your own processing blocks
54
53
 
55
54
  ### Flow System
@@ -9,7 +9,6 @@
9
9
  * [LLM Blocks](blocks/llm-blocks.md)
10
10
  * [Transform Blocks](blocks/transform-blocks.md)
11
11
  * [Filtering Blocks](blocks/filtering-blocks.md)
12
- * [Evaluation Blocks](blocks/evaluation-blocks.md)
13
12
  * [Custom Blocks](blocks/custom-blocks.md)
14
13
 
15
14
  * **Flow System**
@@ -10,7 +10,6 @@ Filters dataset rows based on column values using flexible comparison operators
10
10
 
11
11
  ## 🚀 Next Steps
12
12
 
13
- - **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
14
13
  - **[LLM Blocks](llm-blocks.md)** - AI-powered text generation
15
14
  - **[Transform Blocks](transform-blocks.md)** - Data manipulation and reshaping
16
15
  - **[Flow Integration](../flows/overview.md)** - Combine filtering into complete pipelines
@@ -239,5 +239,4 @@ Extracts structured data from LLM responses using patterns, schemas, or custom p
239
239
 
240
240
  - **[Transform Blocks](transform-blocks.md)** - Data manipulation and reshaping
241
241
  - **[Filtering Blocks](filtering-blocks.md)** - Quality control and validation
242
- - **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
243
242
  - **[Flow Integration](../flows/overview.md)** - Combine LLM blocks into complete pipelines
@@ -65,11 +65,6 @@ Data manipulation and transformation:
65
65
  Quality control and data validation:
66
66
  - **ColumnValueFilterBlock** - Filter rows based on column values
67
67
 
68
- ### 📊 Evaluation Blocks (`evaluation/`)
69
- Quality assessment and scoring:
70
- - **EvaluateFaithfulnessBlock** - Assess factual accuracy
71
- - **EvaluateRelevancyBlock** - Measure relevance scores
72
- - **VerifyQuestionBlock** - Validate question quality
73
68
 
74
69
  ## 🔧 Block Lifecycle
75
70
 
@@ -149,5 +144,4 @@ Ready to dive deeper? Explore specific block categories:
149
144
  - **[LLM Blocks](llm-blocks.md)** - AI-powered language model operations
150
145
  - **[Transform Blocks](transform-blocks.md)** - Data manipulation and reshaping
151
146
  - **[Filtering Blocks](filtering-blocks.md)** - Quality control and validation
152
- - **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
153
147
  - **[Custom Blocks](custom-blocks.md)** - Build your own processing blocks
@@ -26,6 +26,5 @@ Sets uniform values across specified columns, useful for adding metadata or defa
26
26
  ## 🚀 Next Steps
27
27
 
28
28
  - **[Filtering Blocks](filtering-blocks.md)** - Quality control and data validation
29
- - **[Evaluation Blocks](evaluation-blocks.md)** - Quality assessment and scoring
30
29
  - **[LLM Blocks](llm-blocks.md)** - AI-powered text generation
31
30
  - **[Flow Integration](../flows/overview.md)** - Combine transform blocks into complete pipelines
@@ -152,7 +152,7 @@ Every block validates data at runtime:
152
152
  - Validate your pipeline before scaling up
153
153
 
154
154
  ### 2. Layer Validation
155
- - Use evaluation blocks to assess quality
155
+ - Use basic block composition (PromptBuilder → LLMChat → Parser → Filter) to assess quality
156
156
  - Implement filtering to maintain data standards
157
157
 
158
158
  ### 3. Monitor Performance
@@ -206,13 +206,6 @@ class TestMyNewBlock:
206
206
  - Comprehensive operator support
207
207
  - Good performance on large datasets
208
208
 
209
- #### Evaluation Blocks (`src/sdg_hub/core/blocks/evaluation/`)
210
- - **Purpose**: Quality assessment and scoring
211
- - **Examples**: Faithfulness evaluation, relevancy scoring
212
- - **Requirements**:
213
- - Consistent scoring methodology
214
- - Support for different evaluation criteria
215
- - Clear documentation of scoring rubrics
216
209
 
217
210
  ## 🌊 Contributing Flows
218
211
 
@@ -169,13 +169,41 @@ blocks:
169
169
  max_tokens: 300
170
170
  async_mode: true
171
171
 
172
- # Quality evaluation
173
- - block_type: "EvaluateFaithfulnessBlock"
172
+ # Quality evaluation using basic blocks
173
+ - block_type: "PromptBuilderBlock"
174
174
  block_config:
175
- block_name: "check_faithfulness"
175
+ block_name: "faithfulness_prompt"
176
176
  input_cols: ["document", "answer"]
177
- output_cols: ["faithfulness_score"]
177
+ output_cols: ["eval_prompt"]
178
+ prompt_template: "Evaluate if this answer is faithful to the document..."
179
+
180
+ - block_type: "LLMChatBlock"
181
+ block_config:
182
+ block_name: "eval_faithfulness_llm"
183
+ input_cols: ["eval_prompt"]
184
+ output_cols: ["eval_response"]
178
185
  async_mode: true
186
+
187
+ - block_type: "LLMParserBlock"
188
+ block_config:
189
+ block_name: "extract_eval_content"
190
+ input_cols: ["eval_response"]
191
+ extract_content: true
192
+
193
+ - block_type: "TextParserBlock"
194
+ block_config:
195
+ block_name: "parse_evaluation"
196
+ input_cols: ["extract_eval_content_content"]
197
+ output_cols: ["explanation", "judgment"]
198
+ start_tags: ["[Start of Explanation]", "[Start of Answer]"]
199
+ end_tags: ["[End of Explanation]", "[End of Answer]"]
200
+
201
+ - block_type: "ColumnValueFilterBlock"
202
+ block_config:
203
+ block_name: "filter_faithful"
204
+ input_cols: ["judgment"]
205
+ filter_value: "YES"
206
+ operation: "eq"
179
207
 
180
208
  # Quality filtering
181
209
  - block_type: "ColumnValueFilterBlock"
@@ -0,0 +1,486 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "%load_ext autoreload\n",
10
+ "%autoreload 2"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "# Third Party\n",
20
+ "from datasets import load_dataset\n",
21
+ "from openai import OpenAI\n",
22
+ "from rich import print\n",
23
+ "from rich.panel import Panel\n",
24
+ "from sklearn.metrics import classification_report\n",
25
+ "\n",
26
+ "# First Party\n",
27
+ "from sdg_hub import Flow, FlowMetadata, BlockRegistry\n",
28
+ "\n",
29
+ "import nest_asyncio\n",
30
+ "nest_asyncio.apply()"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "markdown",
35
+ "metadata": {},
36
+ "source": [
37
+ "# Classifying news articles\n",
38
+ "\n",
39
+ "\n",
40
+ "In this tutorial, you’ll learn how to create your own custom data generation flow using SDG Hub. This notebook will walk you through all the essential pieces to make your own flow using `sdg_hub` for any use-case using the fundamental components of sdg_hub: `Blocks` and `Flows`\n",
41
+ "\n",
42
+ "As an example use-case, we will pick news classification. Classification is a fundamental task in machine learning, where the goal is to assign predefined categories to input data. To address the classic machine learning use-case of news or text classification, we will use sdg_hub and leverage a language model to **classify news articles** with topic labels — specifically using the [AG News dataset](https://huggingface.co/datasets/fancyzhx/ag_news) from Hugging Face.\n",
43
+ "\n",
44
+ "We’ll go step by step through a progressively improving flow. Each stage builds on the previous one, giving you a practical sense of how you can evolve your flow from using simple heuristics to highly customized and reliable data generation, using different inference paradigms such as self assessment.\n",
45
+ "\n",
46
+ "### 🔍 Understand the Task\n",
47
+ "Before we write any prompts or code, we’ll take time to understand what we want the model to do. For this exercise, the task is **text classification** — assigning one of 4 possible categories (e.g., \"World\", \"Sports\", \"Sci/Tech\", \"Business\") to a given news article\n",
48
+ "\n",
49
+ "### 🛠️ Build a Basic Annotation Flow and learn the `sdg_hub` way\n",
50
+ "We’ll start by creating a minimal flow that simply prompts the model to generate topic labels on the unlabeled data. This will use default prompts, simply populating the prompt with the text and asking the model to generate one of the 4 possible labels, with no examples.\n",
51
+ "\n",
52
+ "### 🎯 Improve with Assessment and Iteration\n",
53
+ "Next, we’ll refine the flow by adding an assessment step. Iterations and self verification on a task often lead to better performance\n",
54
+ "\n",
55
+ "Let’s get started by loading a sample of the dataset"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": null,
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "dataset = load_dataset(\"fancyzhx/ag_news\")\n",
65
+ "\n",
66
+ "train_data = dataset[\"train\"].shuffle(seed=42).select(range(500))\n",
67
+ "test_data = dataset[\"test\"].shuffle(seed=42).select(range(100))\n",
68
+ "\n",
69
+ "# map the labels to the category names\n",
70
+ "label_map = train_data.features['label'].names\n",
71
+ "\n",
72
+ "train_data = train_data.map(lambda x: {\"category\": label_map[x[\"label\"]]})\n",
73
+ "test_data = test_data.map(lambda x: {\"category\": label_map[x[\"label\"]]})"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "# Group examples by category\n",
83
+ "examples_by_category = {}\n",
84
+ "for item in train_data:\n",
85
+ " category = item['category']\n",
86
+ " if category not in examples_by_category:\n",
87
+ " examples_by_category[category] = []\n",
88
+ " examples_by_category[category].append(item['text'])\n",
89
+ "\n",
90
+ "# Print one example from each category in a panel\n",
91
+ "for category, examples in examples_by_category.items():\n",
92
+ " print(Panel(examples[0], title=f\"Category: {category}\", expand=False))\n"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "markdown",
97
+ "metadata": {},
98
+ "source": [
99
+ "## Simple Data Annotation Pipeline\n",
100
+ "\n",
101
+ "In this section, we’ll create our **first working flow** to perform classification using a language model. The goal is to understand the building blocks of `sdg_hub` and how we can employ them to get a language model to classify a given text.\n",
102
+ "\n",
103
+ "### Recap: How `sdg_hub` Works\n",
104
+ "\n",
105
+ "```mermaid\n",
106
+ "flowchart LR\n",
107
+ " A[Flow] --> B[Blocks] --> C[Prompts]\n",
108
+ " C --> D[Generated Data]\n",
109
+ "```"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "markdown",
114
+ "metadata": {},
115
+ "source": [
116
+ "# Building a Simple Classification Flow\n",
117
+ "\n",
118
+ "### Discover Blocks for us to use\n",
119
+ "\n"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": null,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "BlockRegistry.discover_blocks()"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "markdown",
133
+ "metadata": {},
134
+ "source": [
135
+ "It seems all the functionality we are interested in, such as building a prompt, chatting with an llm and parsing its output are under the `llm` category in sdg_hub. Lets start there."
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": null,
141
+ "metadata": {},
142
+ "outputs": [],
143
+ "source": [
144
+ "from sdg_hub.core.blocks.llm import PromptBuilderBlock, LLMChatBlock, TextParserBlock, LLMParserBlock"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "markdown",
149
+ "metadata": {},
150
+ "source": [
151
+ "### Creating the required blocks\n",
152
+ "\n",
153
+ "To get started, we'll construct the simplest possible flow for text classification using SDG Hub. We will focus on 3 main blocks that will often appear as a triplet while using `sdg_hub`\n",
154
+ "\n",
155
+ "1. **Prompt Builder Block**: Converts each input text into a prompt formatted for the LLM. The important input argument to keep in mind for `PromptBuilderblock` is the `prompt_config_path` which is where the prompt template is saved. Any prompt engineering we would want to do would be done in such a prompt template.\n",
156
+ "2. **LLM Chat Block**: Sends the prompt to the language model and receives its response (the predicted label).\n",
157
+ "3. **Text Parser Block**: Extracts the final label from the LLM's output.\n",
158
+ "\n",
159
+ "This setup results in a single LLM interaction per sample, forming a minimal classification pipeline.\n",
160
+ "\n",
161
+ "We are going to be using the simple prompt that can be found in `news_articles_classification_prompt.yaml`"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": null,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "promptbuilderblock_1 = PromptBuilderBlock(block_name='annotation_prompt_builder', input_cols=['text'], output_cols=['annotation_prompt'], prompt_config_path=\"news_classification_prompt.yaml\", format_as_messages=True)\n",
171
+ "llmchatblock_1 = LLMChatBlock(block_name='annotation_llm_chat_block', input_cols=['annotation_prompt'], output_cols=['raw_output'], temperature=0.0, max_tokens=5, extra_body={'guided_choice': ['World', 'Sports', 'Business', 'Sci/Tech']}, async_mode=True)\n",
172
+ "llmparserblock_1 = LLMParserBlock(block_name='annotation_llm_parser_block', input_cols=['raw_output'], extract_content=True, expand_lists=True)\n",
173
+ "textparserblock_1 = TextParserBlock(block_name='annotation_text_parser_block', input_cols=['annotation_llm_parser_block_content'], output_cols=['output'], start_tags=[''], end_tags=[''])"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "markdown",
178
+ "metadata": {},
179
+ "source": [
180
+ "### Designing the `Flow`\n",
181
+ "\n",
182
+ "The `Flow` class is at the heart of SDG Hub. Simply put, a `Flow` is a chain of `Blocks` that get executed sequentially. Here, we will simply chain our PromptBuilder -> LLMChatBlock -> TextParser, in that order:\n",
183
+ "\n",
184
+ "```mermaid\n",
185
+ "flowchart LR\n",
186
+ " subgraph Flow\n",
187
+ " direction LR\n",
188
+ " A[PromptBuilderBlock] --> B[LLMChatBlock] --> C[TextParserBlock]\n",
189
+ " end\n",
190
+ "```\n",
191
+ "\n"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": null,
197
+ "metadata": {},
198
+ "outputs": [],
199
+ "source": [
200
+ "flow = Flow(blocks=[promptbuilderblock_1, llmchatblock_1, llmparserblock_1, textparserblock_1], metadata=FlowMetadata(name=\"annotation_flow\", description=\"A flow for news article classification\", author=\"sdg_hub\"))"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "markdown",
205
+ "metadata": {},
206
+ "source": [
207
+ "### Set the model configs for the `Flow`\n",
208
+ "\n",
209
+ "In SDG Hub, model details such as the API base URL, the API Key (if any) and the model name are set at a Flow level using the `set_model_config` method as shown. The `model` parameter accepts a string in the format of \"`provider`/`model_name`\". Here our `provider` is 'hosted_vllm' as we are using a locally hosted model through vllm, and the model name is \"meta-llama/Llama-3.3-70B-Instruct\"\n",
210
+ "\n",
211
+ "We must set the `api_base` parameter and point it to where the model endpoint can be found, in this case, `http://localhost:8000/v1`"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": null,
217
+ "metadata": {},
218
+ "outputs": [],
219
+ "source": [
220
+ "# flow.set_model_config(model=\"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\", api_base=\"http://localhost:8000/v1\", api_key=\"\")\n",
221
+ "\n",
222
+ "flow.set_model_config(model=\"hosted_vllm/qwen3-8b\", api_base=\"http://localhost:8101/v1\", api_key=\"empty\")\n"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "markdown",
227
+ "metadata": {},
228
+ "source": [
229
+ "### Time to generate!\n",
230
+ "\n",
231
+ "In sdg_hub, the way to generate data is very simple. we simply use the `generate` method from `Flow`. At its simplest form, all the `generate` method needs is the input dataset to operate on. Additionally, we can pass runtime parameters for each block as well, if we wish to override any of the block specific model configs."
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": [
240
+ "generated_data = flow.generate(test_data)"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "markdown",
245
+ "metadata": {},
246
+ "source": [
247
+ "### Evaluation\n",
248
+ "\n",
249
+ "Now that we’ve generated synthetic labels using our simple classification flow, it’s time to evaluate how well the model performed. The goal of this section is to compare the predicted labels against the **true labels** from the dataset using standard classification metrics (precision, recall, f-1 score and classification accuracy)\n",
250
+ "\n",
251
+ "We’ll use `sklearn.metrics.classification_report`, which provides precision, recall, F1-score, and support for each class.\n"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "execution_count": null,
257
+ "metadata": {},
258
+ "outputs": [],
259
+ "source": [
260
+ "print(classification_report(generated_data[\"category\"], generated_data[\"output\"]))"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "markdown",
265
+ "metadata": {},
266
+ "source": [
267
+ "## Introducing an Assessment step\n",
268
+ "\n",
269
+ "Our initial flow used a one step approach — the model was given the task, a fixed label set, and some input text. While this baseline gives us a useful starting point, it has clear limitations:\n",
270
+ "\n",
271
+ "- The model may rely on generic heuristics or surface patterns that don’t generalize well.\n",
272
+ "- It can confuse similar categories (e.g., \"World\" vs. \"Business\") without knowing how they're typically used.\n",
273
+ "- Without guidance, the model may underperform on edge cases or ambiguous queries.\n",
274
+ "\n",
275
+ "\n",
276
+ "### What is Assessment\n",
277
+ "\n",
278
+ "With an assessment step, we will call to the same LLM, but this time, we provide the LLM with its own previous categorization label, and the original text. We will prompt the LLM to think about the original prediction, and give it context about challening cases\n",
279
+ "In this manner, we can elicit critical judgement from the model about its own prior classification decision. This type of additional context can be useful in the next iteration.\n",
280
+ "\n",
281
+ "\n",
282
+ "### What We’ll Do Next\n",
283
+ "\n",
284
+ "We’ll now enhance our flow by introducing another chain of `PromptBuilder` -> `LLMChatBlock` -> `TextParserBlock` whose purpose is to pass the (original text + prediction) to the LLM and obtain a verification or assessment of the prediction.\n",
285
+ "\n",
286
+ "\n",
287
+ "```mermaid\n",
288
+ "flowchart LR\n",
289
+ " subgraph Flow1[Initial Classification]\n",
290
+ " direction LR\n",
291
+ " A[PromptBuilderBlock] --> B[LLMChatBlock] --> C[TextParserBlock]\n",
292
+ " end\n",
293
+ " subgraph Flow2[Assessment]\n",
294
+ " direction LR\n",
295
+ " D[PromptBuilderBlock_Assessment] --> E[LLMChatBlock_Assessment] --> F[TextParserBlock_Assessment]\n",
296
+ " end\n",
297
+ " \n",
298
+ " C --> D\n",
299
+ "```\n",
300
+ "\n",
301
+ "\n",
302
+ "We will investigate if this catches any of the mis-classifications, and get an idea of how well our verification prompting works!"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": null,
308
+ "metadata": {},
309
+ "outputs": [],
310
+ "source": [
311
+ "promptbuilderblock_assessment = PromptBuilderBlock(block_name='verifier_prompt_builder', input_cols=['text', 'output'], output_cols=['assessment_prompt'], prompt_config_path=\"news_classification_assessment_prompt.yaml\", format_as_messages=True)\n",
312
+ "llmchatblock_assessment = LLMChatBlock(block_name='verifier_llm_chat_block', input_cols=['assessment_prompt'], output_cols=['raw_assessment_output'], async_mode=True)\n",
313
+ "llmparserblock_assessment = LLMParserBlock(block_name='verifier_llm_parser_block', input_cols=['raw_assessment_output'], extract_content=True, expand_lists=True)\n",
314
+ "textparserblock_assessment = TextParserBlock(block_name='verifier_text_parser_block', input_cols=['verifier_llm_parser_block_content'], output_cols=['assessment_output'], start_tags=[''], end_tags=[''])\n",
315
+ "\n",
316
+ "flow = Flow(blocks=[promptbuilderblock_1, llmchatblock_1, llmparserblock_1, textparserblock_1, promptbuilderblock_assessment, llmchatblock_assessment, llmparserblock_assessment, textparserblock_assessment], metadata=FlowMetadata(name=\"annotation_flow\", description=\"A flow for news article classification\", author=\"sdg_hub\"))\n",
317
+ "# flow.set_model_config(model=\"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\", api_base=\"http://localhost:8000/v1\", api_key=\"\")\n",
318
+ "flow.set_model_config(model=\"hosted_vllm/qwen3-8b\", api_base=\"http://localhost:8101/v1\", api_key=\"empty\")\n",
319
+ "\n",
320
+ "\n",
321
+ "\n",
322
+ "generated_data = flow.generate(test_data)"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": [
331
+ "generated_data_pd = generated_data.to_pandas()\n",
332
+ "mislabeled_samples = generated_data_pd[generated_data_pd[\"category\"] != generated_data_pd[\"output\"]]\n",
333
+ "\n",
334
+ "print(Panel(mislabeled_samples.iloc[0]['assessment_output'], title=\"Assessment\"))\n",
335
+ "print(Panel(str(mislabeled_samples.iloc[0]['category']), title=\"Ground truth label\"))"
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "markdown",
340
+ "metadata": {},
341
+ "source": [
342
+ "Great! Now we can see that the assessment step is working good, especially on the misclassified samples as shown above. The above is a hard example which has slipped past our original classification flow, but was caught by our assessment step's critical judgement."
343
+ ]
344
+ },
345
+ {
346
+ "cell_type": "markdown",
347
+ "metadata": {},
348
+ "source": [
349
+ "### Revising the Classifications\n",
350
+ "\n",
351
+ "We will now create our final revision step, which will take the results of the initial prediction and the assessment steps and pass it onto the LLM once again for a revised attempt at classifying the same input text. The flow can be imagined like so:\n",
352
+ "\n",
353
+ "```mermaid\n",
354
+ "flowchart LR\n",
355
+ " subgraph Flow1[Initial Classification]\n",
356
+ " direction LR\n",
357
+ " A[PromptBuilderBlock] --> B[LLMChatBlock] --> C[TextParserBlock]\n",
358
+ " end\n",
359
+ " subgraph Flow2[Assessment]\n",
360
+ " direction LR\n",
361
+ " D[PromptBuilderBlock_Assessment] --> E[LLMChatBlock_Assessment] --> F[TextParserBlock_Assessment]\n",
362
+ " end\n",
363
+ " subgraph Flow3[Revised Classification]\n",
364
+ " direction LR\n",
365
+ " G[PromptBuilderBlock_Revision] --> H[LLMChatBlock_Revision] --> I[TextParserBlock_Revision]\n",
366
+ " end\n",
367
+ " \n",
368
+ " C --> D\n",
369
+ " F --> G\n",
370
+ "```"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "promptbuilderblock_revision = PromptBuilderBlock(block_name='revised_prompt_builder', input_cols=['text', 'output', 'assessment_output'], output_cols=['revised_prompt'], prompt_config_path=\"revise_news_classification_prompt.yaml\", format_as_messages=True)\n",
380
+ "llmchatblock_revision = LLMChatBlock(block_name='revised_llm_chat_block', input_cols=['revised_prompt'], output_cols=['raw_revised_output'], temperature=0.0, max_tokens=5, extra_body={'guided_choice': ['World', 'Sports', 'Business', 'Sci/Tech']}, async_mode=True)\n",
381
+ "llmparserblock_revision = LLMParserBlock(block_name='revised_llm_parser_block', input_cols=['raw_revised_output'], extract_content=True, expand_lists=True)\n",
382
+ "textparserblock_revision = TextParserBlock(block_name='revised_text_parser_block', input_cols=['revised_llm_parser_block_content'], output_cols=['revised_output'], start_tags=[''], end_tags=[''])\n",
383
+ "\n",
384
+ "flow = Flow(blocks=[promptbuilderblock_1, llmchatblock_1, llmparserblock_1, textparserblock_1, promptbuilderblock_assessment, llmchatblock_assessment, llmparserblock_assessment, textparserblock_assessment, promptbuilderblock_revision, llmchatblock_revision, llmparserblock_revision, textparserblock_revision], metadata=FlowMetadata(name=\"news_classification_flow\", description=\"A flow for news article classification with assessment and revision\", author=\"sdg_hub\"))\n",
385
+ "# flow.set_model_config(model=\"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\", api_base=\"http://localhost:8000/v1\", api_key=\"\")\n",
386
+ "flow.set_model_config(model=\"hosted_vllm/qwen3-8b\", api_base=\"http://localhost:8101/v1\", api_key=\"empty\")"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": null,
392
+ "metadata": {},
393
+ "outputs": [],
394
+ "source": [
395
+ "generated_data = flow.generate(test_data)"
396
+ ]
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": null,
401
+ "metadata": {},
402
+ "outputs": [],
403
+ "source": [
404
+ "print(classification_report(generated_data[\"category\"], generated_data[\"revised_output\"]))"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "markdown",
409
+ "metadata": {},
410
+ "source": [
411
+ "🔥 We improved the results drastically! Let us take a look at the number of mislabeled samples before and after the assessment + revision steps\n"
412
+ ]
413
+ },
414
+ {
415
+ "cell_type": "code",
416
+ "execution_count": null,
417
+ "metadata": {},
418
+ "outputs": [],
419
+ "source": [
420
+ "generated_data_pd = generated_data.to_pandas()\n",
421
+ "num_mislabeled_output = (generated_data_pd[\"category\"] != generated_data_pd[\"output\"]).sum()\n",
422
+ "num_mislabeled_revised = (generated_data_pd[\"category\"] != generated_data_pd[\"revised_output\"]).sum()\n",
423
+ "print(f\"Number of mislabeled samples (original output): {num_mislabeled_output}\")\n",
424
+ "print(f\"Number of mislabeled samples (revised output): {num_mislabeled_revised}\")\n"
425
+ ]
426
+ },
427
+ {
428
+ "cell_type": "markdown",
429
+ "metadata": {},
430
+ "source": [
431
+ "Great, we whave now improved the classification accuracy of our system by augmenting our naive classification flow by adding an assessment followed by a revision step\n"
432
+ ]
433
+ },
434
+ {
435
+ "cell_type": "markdown",
436
+ "metadata": {},
437
+ "source": [
438
+ "### Export the flow to yaml form\n"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": null,
444
+ "metadata": {},
445
+ "outputs": [],
446
+ "source": [
447
+ "flow.to_yaml(\"news_classification_flow.yaml\")"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "markdown",
452
+ "metadata": {},
453
+ "source": [
454
+ "## ✅ Summary: What You’ve Learned\n",
455
+ "\n",
456
+ "In this tutorial, you learned how to create your own flow for a custom use-case using `sdg_hub`, using the fundamental components: `Flow` and `Block`. You also learned how to create and structure the prompts. You learned how to design an assessment or a judgement step in order to improve the performance of the overall system. You started from scratch and evolved it into a robust, high-accuracy system.\n",
457
+ "\n",
458
+ "## 🚀 What’s Next?\n",
459
+ "\n",
460
+ "* Prompt Engineer! - You can add examples for classifications directly in the classification steps and see how this improves the performance. In-context examples are extremely effective at aligning the model's outputs to the task at hand\n",
461
+ "* Try it out on your own data!"
462
+ ]
463
+ }
464
+ ],
465
+ "metadata": {
466
+ "kernelspec": {
467
+ "display_name": "test_nb",
468
+ "language": "python",
469
+ "name": "python3"
470
+ },
471
+ "language_info": {
472
+ "codemirror_mode": {
473
+ "name": "ipython",
474
+ "version": 3
475
+ },
476
+ "file_extension": ".py",
477
+ "mimetype": "text/x-python",
478
+ "name": "python",
479
+ "nbconvert_exporter": "python",
480
+ "pygments_lexer": "ipython3",
481
+ "version": "3.12.8"
482
+ }
483
+ },
484
+ "nbformat": 4,
485
+ "nbformat_minor": 2
486
+ }