sdg-hub 0.1.4__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (376) hide show
  1. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/lint.yml +13 -11
  2. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/pypi.yaml +2 -2
  3. sdg_hub-0.2.1/CLAUDE.md +171 -0
  4. sdg_hub-0.2.1/CONTRIBUTING.md +251 -0
  5. sdg_hub-0.2.1/PKG-INFO +221 -0
  6. sdg_hub-0.2.1/README.md +153 -0
  7. sdg_hub-0.2.1/docs/README.md +76 -0
  8. sdg_hub-0.2.1/docs/_coverpage.md +14 -0
  9. sdg_hub-0.2.1/docs/_navbar.md +3 -0
  10. sdg_hub-0.2.1/docs/_sidebar.md +23 -0
  11. sdg_hub-0.2.1/docs/api-reference.md +14 -0
  12. sdg_hub-0.2.1/docs/blocks/custom-blocks.md +136 -0
  13. sdg_hub-0.2.1/docs/blocks/evaluation-blocks.md +22 -0
  14. sdg_hub-0.2.1/docs/blocks/filtering-blocks.md +16 -0
  15. sdg_hub-0.2.1/docs/blocks/llm-blocks.md +196 -0
  16. sdg_hub-0.2.1/docs/blocks/overview.md +152 -0
  17. sdg_hub-0.2.1/docs/blocks/transform-blocks.md +31 -0
  18. sdg_hub-0.2.1/docs/concepts.md +173 -0
  19. sdg_hub-0.2.1/docs/development.md +495 -0
  20. sdg_hub-0.2.1/docs/flows/discovery.md +169 -0
  21. sdg_hub-0.2.1/docs/flows/overview.md +305 -0
  22. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/docs/index.html +25 -15
  23. sdg_hub-0.2.1/docs/installation.md +121 -0
  24. sdg_hub-0.2.1/docs/quick-start.md +169 -0
  25. sdg_hub-0.2.1/examples/annotation/annotation_classification.ipynb +865 -0
  26. sdg_hub-0.2.1/examples/annotation/news_classification_assessment_prompt.yaml +42 -0
  27. sdg_hub-0.2.1/examples/annotation/news_classification_prompt.yaml +11 -0
  28. sdg_hub-0.2.1/examples/annotation/revise_news_classification_prompt.yaml +19 -0
  29. sdg_hub-0.2.1/examples/knowledge_tuning/instructlab/.gitignore +1 -0
  30. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/README.md +4 -2
  31. sdg_hub-0.2.1/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +19 -0
  32. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +16 -18
  33. sdg_hub-0.2.1/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +233 -0
  34. sdg_hub-0.2.1/examples/knowledge_tuning/instructlab/logger_config.py +15 -0
  35. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/knowledge_utils.py +103 -43
  36. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/pyproject.toml +24 -19
  37. sdg_hub-0.2.1/scripts/ruff.sh +23 -0
  38. sdg_hub-0.2.1/src/sdg_hub/__init__.py +30 -0
  39. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/src/sdg_hub/_version.py +2 -2
  40. sdg_hub-0.2.1/src/sdg_hub/core/__init__.py +22 -0
  41. sdg_hub-0.2.1/src/sdg_hub/core/blocks/__init__.py +58 -0
  42. sdg_hub-0.2.1/src/sdg_hub/core/blocks/base.py +313 -0
  43. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
  44. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
  45. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
  46. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
  47. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
  48. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
  49. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
  50. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
  51. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
  52. sdg_hub-0.2.1/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
  53. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/__init__.py +9 -0
  54. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
  55. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
  56. sdg_hub-0.2.1/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
  57. sdg_hub-0.2.1/src/sdg_hub/core/blocks/filtering/__init__.py +12 -0
  58. sdg_hub-0.2.1/src/sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
  59. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/__init__.py +27 -0
  60. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/client_manager.py +398 -0
  61. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/config.py +336 -0
  62. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/error_handler.py +368 -0
  63. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
  64. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +491 -0
  65. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
  66. sdg_hub-0.2.1/src/sdg_hub/core/blocks/llm/text_parser_block.py +357 -0
  67. sdg_hub-0.2.1/src/sdg_hub/core/blocks/registry.py +331 -0
  68. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/__init__.py +23 -0
  69. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
  70. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
  71. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/melt_columns.py +126 -0
  72. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/rename_columns.py +69 -0
  73. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/text_concat.py +102 -0
  74. sdg_hub-0.2.1/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
  75. sdg_hub-0.2.1/src/sdg_hub/core/flow/__init__.py +20 -0
  76. sdg_hub-0.2.1/src/sdg_hub/core/flow/base.py +1209 -0
  77. sdg_hub-0.2.1/src/sdg_hub/core/flow/checkpointer.py +333 -0
  78. sdg_hub-0.2.1/src/sdg_hub/core/flow/metadata.py +389 -0
  79. sdg_hub-0.2.1/src/sdg_hub/core/flow/migration.py +198 -0
  80. sdg_hub-0.2.1/src/sdg_hub/core/flow/registry.py +393 -0
  81. sdg_hub-0.2.1/src/sdg_hub/core/flow/validation.py +277 -0
  82. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.1/src/sdg_hub/core}/utils/__init__.py +7 -4
  83. sdg_hub-0.2.1/src/sdg_hub/core/utils/datautils.py +63 -0
  84. sdg_hub-0.2.1/src/sdg_hub/core/utils/error_handling.py +208 -0
  85. sdg_hub-0.2.1/src/sdg_hub/core/utils/flow_id_words.yaml +231 -0
  86. sdg_hub-0.2.1/src/sdg_hub/core/utils/flow_identifier.py +94 -0
  87. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.1/src/sdg_hub/core}/utils/path_resolution.py +2 -2
  88. sdg_hub-0.2.1/src/sdg_hub/core/utils/yaml_utils.py +59 -0
  89. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
  90. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
  91. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
  92. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
  93. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
  94. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
  95. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +192 -0
  96. sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
  97. sdg_hub-0.2.1/src/sdg_hub.egg-info/PKG-INFO +221 -0
  98. sdg_hub-0.2.1/src/sdg_hub.egg-info/SOURCES.txt +174 -0
  99. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/requires.txt +14 -5
  100. sdg_hub-0.2.1/tests/blocks/deprecated/test_llmblock.py +148 -0
  101. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.1/tests/blocks/evaluation}/__init__.py +1 -2
  102. sdg_hub-0.2.1/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +496 -0
  103. sdg_hub-0.2.1/tests/blocks/evaluation/test_evaluate_relevancy_block.py +493 -0
  104. sdg_hub-0.2.1/tests/blocks/evaluation/test_verify_question_block.py +480 -0
  105. sdg_hub-0.2.1/tests/blocks/filtering/test_columnvaluefilter.py +361 -0
  106. sdg_hub-0.2.1/tests/blocks/llm/test_llm_chat_block.py +985 -0
  107. sdg_hub-0.2.1/tests/blocks/llm/test_llm_chat_with_parsing_retry_block.py +868 -0
  108. sdg_hub-0.2.1/tests/blocks/llm/test_promptbuilderblock.py +552 -0
  109. sdg_hub-0.2.1/tests/blocks/llm/test_textparserblock.py +1270 -0
  110. sdg_hub-0.2.1/tests/blocks/test_base_block.py +982 -0
  111. sdg_hub-0.2.1/tests/blocks/test_registry.py +398 -0
  112. sdg_hub-0.2.1/tests/blocks/testdata/test_evaluate_faithfulness.yaml +17 -0
  113. sdg_hub-0.2.1/tests/blocks/testdata/test_evaluate_relevancy.yaml +24 -0
  114. sdg_hub-0.2.1/tests/blocks/testdata/test_prompt_format_config.yaml +9 -0
  115. sdg_hub-0.2.1/tests/blocks/testdata/test_prompt_format_no_system.yaml +6 -0
  116. sdg_hub-0.2.1/tests/blocks/testdata/test_prompt_format_strict.yaml +6 -0
  117. sdg_hub-0.2.1/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +5 -0
  118. sdg_hub-0.2.1/tests/blocks/testdata/test_prompt_no_user_messages.yaml +5 -0
  119. sdg_hub-0.2.1/tests/blocks/testdata/test_verify_question.yaml +27 -0
  120. sdg_hub-0.2.1/tests/blocks/transform/test_index_based_mapper.py +303 -0
  121. sdg_hub-0.2.1/tests/blocks/transform/test_melt_columns.py +273 -0
  122. sdg_hub-0.2.1/tests/blocks/transform/test_text_concat.py +122 -0
  123. sdg_hub-0.2.1/tests/blocks/transform/test_uniform_col_val_setter.py +397 -0
  124. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_combinecolumns.py +5 -3
  125. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +13 -13
  126. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_flattenblock.py +2 -2
  127. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_renameblock.py +2 -17
  128. sdg_hub-0.2.1/tests/blocks/utilblocks/test_samplepopulatorblock.py +37 -0
  129. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_selectorblock.py +12 -14
  130. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/utilblocks/test_settomajority.py +7 -4
  131. sdg_hub-0.2.1/tests/flow/__init__.py +2 -0
  132. sdg_hub-0.2.1/tests/flow/conftest.py +194 -0
  133. sdg_hub-0.2.1/tests/flow/test_base.py +1123 -0
  134. sdg_hub-0.2.1/tests/flow/test_checkpointer.py +331 -0
  135. sdg_hub-0.2.1/tests/flow/test_integration.py +530 -0
  136. sdg_hub-0.2.1/tests/flow/test_metadata.py +464 -0
  137. sdg_hub-0.2.1/tests/flow/test_migration.py +449 -0
  138. sdg_hub-0.2.1/tests/flow/test_registry.py +604 -0
  139. sdg_hub-0.2.1/tests/flow/test_validation.py +348 -0
  140. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/utils/test_error_handling.py +3 -4
  141. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/utils/test_path_resolution.py +3 -4
  142. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tox.ini +1 -2
  143. sdg_hub-0.1.4/CLAUDE.md +0 -100
  144. sdg_hub-0.1.4/CONTRIBUTING.md +0 -30
  145. sdg_hub-0.1.4/MANIFEST.in +0 -2
  146. sdg_hub-0.1.4/PKG-INFO +0 -190
  147. sdg_hub-0.1.4/README.md +0 -131
  148. sdg_hub-0.1.4/assets/imgs/IL_skills_pipeline.png +0 -0
  149. sdg_hub-0.1.4/assets/imgs/fig-workflow.png +0 -0
  150. sdg_hub-0.1.4/assets/imgs/instructlab-banner.png +0 -0
  151. sdg_hub-0.1.4/assets/imgs/overview.png +0 -0
  152. sdg_hub-0.1.4/docs/README.md +0 -51
  153. sdg_hub-0.1.4/docs/_coverpage.md +0 -11
  154. sdg_hub-0.1.4/docs/_navbar.md +0 -5
  155. sdg_hub-0.1.4/docs/_sidebar.md +0 -27
  156. sdg_hub-0.1.4/docs/architecture.md +0 -149
  157. sdg_hub-0.1.4/docs/blocks.md +0 -537
  158. sdg_hub-0.1.4/docs/changelog.md +0 -82
  159. sdg_hub-0.1.4/docs/configuration.md +0 -201
  160. sdg_hub-0.1.4/docs/development.md +0 -367
  161. sdg_hub-0.1.4/docs/examples.md +0 -191
  162. sdg_hub-0.1.4/docs/installation.md +0 -100
  163. sdg_hub-0.1.4/docs/prompts.md +0 -370
  164. sdg_hub-0.1.4/docs/quick-start.md +0 -128
  165. sdg_hub-0.1.4/docs/web-interface.md +0 -230
  166. sdg_hub-0.1.4/examples/knowledge_tuning/README.md +0 -115
  167. sdg_hub-0.1.4/examples/knowledge_tuning/data-generation-with-llama-70b/data-generation-with-llama-70b.ipynb +0 -340
  168. sdg_hub-0.1.4/examples/knowledge_tuning/data-generation-with-llama-70b/synth_knowledge1.5_llama3.3.yaml +0 -136
  169. sdg_hub-0.1.4/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -196
  170. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/README.md +0 -311
  171. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/assets/customized_nano_quality_results.png +0 -0
  172. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/blocks/blocks.py +0 -60
  173. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge1.5_nemotron_super_49b.yaml +0 -174
  174. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b.yaml +0 -178
  175. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b_rewrite_with_diversity.yaml +0 -118
  176. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b_summary_diversity.yaml +0 -118
  177. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b_summary_diversity_cot.yaml +0 -118
  178. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/generate.py +0 -194
  179. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_answers.yaml +0 -51
  180. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_answers_cot.yaml +0 -31
  181. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_doc_rewrite_inst.yaml +0 -25
  182. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_document_rewrite.yaml +0 -20
  183. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_questions.yaml +0 -47
  184. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_questions_responses.yaml +0 -59
  185. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_summary.yaml +0 -20
  186. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_summary_inst.yaml +0 -24
  187. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/reasoning_sdg.ipynb +0 -1251
  188. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/reasoning_sdg_data_mixing.ipynb +0 -471
  189. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/reasoning_sdg_financebench.ipynb +0 -1078
  190. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/utils.py +0 -121
  191. sdg_hub-0.1.4/examples/skills_tuning/instructlab/README.md +0 -118
  192. sdg_hub-0.1.4/examples/skills_tuning/instructlab/annotation_classification.ipynb +0 -543
  193. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/__init__.py +0 -3
  194. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/add_question.py +0 -91
  195. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/docling_parse_pdf.py +0 -91
  196. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/json_format.py +0 -133
  197. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/detailed_annotation.yaml +0 -16
  198. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/grounded_summary_extraction.yaml +0 -53
  199. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/simple_annotation.yaml +0 -16
  200. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/unstructured_to_structured.yaml +0 -47
  201. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/keywords.yaml +0 -29
  202. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/named_entities.yaml +0 -40
  203. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/sentiment.yaml +0 -28
  204. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/summary.yaml +0 -29
  205. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/09b5b62d328d3d0719b6825357fdfb48.pdf +0 -169
  206. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/0d631e444d1c22f0be99a69f5deaff94.pdf +0 -112
  207. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/1270f7f67f406b52a2ee86584b452bff.pdf +0 -74
  208. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/14f3d2486b21e639a953afb7ad03d90c.pdf +0 -112
  209. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/1689b94530eca82b7758c86b4cf3125f.pdf +0 -112
  210. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/171fd9df333ddd814c764843ed624121.pdf +0 -150
  211. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/1949bd0c9c4c23d495d880c4c552bfe1.pdf +0 -131
  212. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/2b626b620ef42f716c6028c74ee4187b.pdf +0 -74
  213. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/3877b1983229ec488c6349a188bccf92.pdf +0 -207
  214. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/3bc6d3e1c0a117340d288c289bf7f679.pdf +0 -93
  215. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/3e714a49937be1672aa48244ba7254ce.pdf +0 -74
  216. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/6064088db0200b32f3f3e848047c5ab6.pdf +0 -74
  217. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/73c60e60043b8775dac929320839a8c6.pdf +0 -93
  218. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/77423f08f0208d476dea73c639f6293a.pdf +0 -169
  219. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/78cf0d3e40caba622d8914916f0f9146.pdf +0 -93
  220. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/7a29e2dcd505f944b16d1e3173cb1c01.pdf +0 -93
  221. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/8c1b4f4af2af2847a240041390e31399.pdf +0 -93
  222. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/8cd753ed00aeee0ed32d03823eef3f7e.pdf +0 -93
  223. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/a24a661c2eb55542903c72391ec09f9b.pdf +0 -112
  224. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/b3d7bc295d09d9927e465213612c0192.pdf +0 -150
  225. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/b7050f62f52a3d2803beea21404f7af6.pdf +0 -112
  226. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/b9b40b0c1e92fb226067bdceacbdab5c.pdf +0 -74
  227. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/c20824ea6f927fe380f48a904cf4821b.pdf +0 -93
  228. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/c2bad61ce58687fad602549f6048004b.pdf +0 -93
  229. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/c47a92e006b54d014a79b447528c55a7.pdf +0 -112
  230. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/da879f8ea1c23aa6565cccaacac271fc.pdf +0 -169
  231. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/e52e6870e8a04339ef969543fc0f0329.pdf +0 -74
  232. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/ecd8e1f1c0fa27dfdd24b358cb65012f.pdf +0 -74
  233. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/f28832481653818f8062a497655fb09e.pdf +0 -74
  234. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/ff898f396d49760343d08575ea773b54.pdf +0 -93
  235. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts.jsonl +0 -30
  236. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/table_manipulation_qna.yaml +0 -97
  237. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/unstructured_to_structured_qna.yaml +0 -49
  238. sdg_hub-0.1.4/examples/skills_tuning/instructlab/structured_summary.ipynb +0 -555
  239. sdg_hub-0.1.4/examples/skills_tuning/instructlab/table_manipulation.ipynb +0 -1034
  240. sdg_hub-0.1.4/examples/skills_tuning/instructlab/unstructured_to_structured.ipynb +0 -591
  241. sdg_hub-0.1.4/scripts/ruff.sh +0 -54
  242. sdg_hub-0.1.4/src/sdg_hub/blocks/__init__.py +0 -42
  243. sdg_hub-0.1.4/src/sdg_hub/blocks/block.py +0 -96
  244. sdg_hub-0.1.4/src/sdg_hub/blocks/llmblock.py +0 -375
  245. sdg_hub-0.1.4/src/sdg_hub/blocks/openaichatblock.py +0 -556
  246. sdg_hub-0.1.4/src/sdg_hub/blocks/utilblocks.py +0 -597
  247. sdg_hub-0.1.4/src/sdg_hub/checkpointer.py +0 -139
  248. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
  249. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
  250. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/detailed_description.yaml +0 -10
  251. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
  252. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
  253. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/__init__.py +0 -0
  254. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
  255. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
  256. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
  257. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
  258. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
  259. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
  260. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
  261. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
  262. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
  263. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
  264. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
  265. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
  266. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/router.yaml +0 -12
  267. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
  268. sdg_hub-0.1.4/src/sdg_hub/configs/reasoning/__init__.py +0 -0
  269. sdg_hub-0.1.4/src/sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
  270. sdg_hub-0.1.4/src/sdg_hub/configs/skills/__init__.py +0 -0
  271. sdg_hub-0.1.4/src/sdg_hub/configs/skills/analyzer.yaml +0 -48
  272. sdg_hub-0.1.4/src/sdg_hub/configs/skills/annotation.yaml +0 -36
  273. sdg_hub-0.1.4/src/sdg_hub/configs/skills/contexts.yaml +0 -28
  274. sdg_hub-0.1.4/src/sdg_hub/configs/skills/critic.yaml +0 -60
  275. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
  276. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
  277. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
  278. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
  279. sdg_hub-0.1.4/src/sdg_hub/configs/skills/freeform_questions.yaml +0 -34
  280. sdg_hub-0.1.4/src/sdg_hub/configs/skills/freeform_responses.yaml +0 -39
  281. sdg_hub-0.1.4/src/sdg_hub/configs/skills/grounded_questions.yaml +0 -38
  282. sdg_hub-0.1.4/src/sdg_hub/configs/skills/grounded_responses.yaml +0 -59
  283. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
  284. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
  285. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
  286. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
  287. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
  288. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
  289. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
  290. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
  291. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
  292. sdg_hub-0.1.4/src/sdg_hub/configs/skills/judge.yaml +0 -53
  293. sdg_hub-0.1.4/src/sdg_hub/configs/skills/planner.yaml +0 -67
  294. sdg_hub-0.1.4/src/sdg_hub/configs/skills/respond.yaml +0 -8
  295. sdg_hub-0.1.4/src/sdg_hub/configs/skills/revised_responder.yaml +0 -78
  296. sdg_hub-0.1.4/src/sdg_hub/configs/skills/router.yaml +0 -59
  297. sdg_hub-0.1.4/src/sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
  298. sdg_hub-0.1.4/src/sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
  299. sdg_hub-0.1.4/src/sdg_hub/flow.py +0 -477
  300. sdg_hub-0.1.4/src/sdg_hub/flow_runner.py +0 -450
  301. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
  302. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
  303. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
  304. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -136
  305. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
  306. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
  307. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
  308. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
  309. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
  310. sdg_hub-0.1.4/src/sdg_hub/pipeline.py +0 -121
  311. sdg_hub-0.1.4/src/sdg_hub/prompts.py +0 -80
  312. sdg_hub-0.1.4/src/sdg_hub/registry.py +0 -122
  313. sdg_hub-0.1.4/src/sdg_hub/sdg.py +0 -206
  314. sdg_hub-0.1.4/src/sdg_hub/utils/config_validation.py +0 -91
  315. sdg_hub-0.1.4/src/sdg_hub/utils/datautils.py +0 -14
  316. sdg_hub-0.1.4/src/sdg_hub/utils/error_handling.py +0 -94
  317. sdg_hub-0.1.4/src/sdg_hub/utils/validation_result.py +0 -10
  318. sdg_hub-0.1.4/src/sdg_hub.egg-info/PKG-INFO +0 -190
  319. sdg_hub-0.1.4/src/sdg_hub.egg-info/SOURCES.txt +0 -253
  320. sdg_hub-0.1.4/tests/__init__.py +0 -0
  321. sdg_hub-0.1.4/tests/blocks/test_llmblock.py +0 -343
  322. sdg_hub-0.1.4/tests/blocks/test_openaichatblock.py +0 -647
  323. sdg_hub-0.1.4/tests/blocks/utilblocks/test_filterblock.py +0 -157
  324. sdg_hub-0.1.4/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -195
  325. sdg_hub-0.1.4/tests/flows/test_flow.py +0 -257
  326. sdg_hub-0.1.4/tests/flows/test_flow_column_validation.py +0 -72
  327. sdg_hub-0.1.4/tests/flows/test_flow_path.py +0 -109
  328. sdg_hub-0.1.4/tests/flows/test_flow_validation.py +0 -46
  329. sdg_hub-0.1.4/tests/flows/testdata/test_config_1.yaml +0 -7
  330. sdg_hub-0.1.4/tests/flows/testdata/test_flow_1.yaml +0 -7
  331. sdg_hub-0.1.4/tests/flows/testdata/test_flow_2.yaml +0 -10
  332. sdg_hub-0.1.4/tests/test_checkpointer.py +0 -175
  333. sdg_hub-0.1.4/tests/test_flowrunner.py +0 -899
  334. sdg_hub-0.1.4/tests/test_pipeline.py +0 -201
  335. sdg_hub-0.1.4/tests/test_sdg.py +0 -413
  336. sdg_hub-0.1.4/tests/utils/test_config_validation.py +0 -174
  337. sdg_hub-0.1.4/web_interface/README.md +0 -133
  338. sdg_hub-0.1.4/web_interface/app.py +0 -227
  339. sdg_hub-0.1.4/web_interface/static/css/style.css +0 -171
  340. sdg_hub-0.1.4/web_interface/static/js/app.js +0 -518
  341. sdg_hub-0.1.4/web_interface/templates/index.html +0 -119
  342. sdg_hub-0.1.4/web_interface/test_block_types.py +0 -75
  343. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/actionlint.yaml +0 -0
  344. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/actions/free-disk-space/action.yml +0 -0
  345. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/dependabot.yml +0 -0
  346. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/mergify.yml +0 -0
  347. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/actionlint.dockerfile +0 -0
  348. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/actionlint.yml +0 -0
  349. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/docs.yml +0 -0
  350. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/e2e.yml +0 -0
  351. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/matchers/actionlint.json +0 -0
  352. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/matchers/pylint.json +0 -0
  353. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.github/workflows/test.yml +0 -0
  354. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.gitignore +0 -0
  355. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.isort.cfg +0 -0
  356. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.markdownlint-cli2.yaml +0 -0
  357. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.pre-commit-config.yaml +0 -0
  358. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/.pylintrc +0 -0
  359. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/LICENSE +0 -0
  360. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/Makefile +0 -0
  361. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/docs/.nojekyll +0 -0
  362. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  363. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  364. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  365. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  366. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  367. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  368. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/setup.cfg +0 -0
  369. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.1/src/sdg_hub/core/utils}/logger_config.py +1 -1
  370. /sdg_hub-0.1.4/scripts/__init__.py → /sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  371. {sdg_hub-0.1.4/src/sdg_hub/configs → sdg_hub-0.2.1/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
  372. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/src/sdg_hub/py.typed +0 -0
  373. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  374. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/src/sdg_hub.egg-info/top_level.txt +0 -0
  375. {sdg_hub-0.1.4/src/sdg_hub/configs/annotations → sdg_hub-0.2.1/tests}/__init__.py +0 -0
  376. {sdg_hub-0.1.4 → sdg_hub-0.2.1}/tests/blocks/testdata/test_config.yaml +0 -0
@@ -5,7 +5,7 @@ name: Lint, Format, and MyPy
5
5
  on:
6
6
  push:
7
7
  branches:
8
- - "main-disabled"
8
+ - "main"
9
9
  paths:
10
10
  - '**.py'
11
11
  - 'pyproject.toml'
@@ -15,7 +15,7 @@ on:
15
15
  - '.github/**'
16
16
  pull_request:
17
17
  branches:
18
- - "main-disabled"
18
+ - "main"
19
19
  paths:
20
20
  - '**.py'
21
21
  - 'pyproject.toml'
@@ -57,13 +57,15 @@ jobs:
57
57
  run: |
58
58
  tox -e ruff -- check
59
59
 
60
- - name: Run linting
61
- if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
62
- run: |
63
- echo "::add-matcher::.github/workflows/matchers/pylint.json"
64
- tox -e lint
60
+ # Pylint disabled for now - may re-enable as non-blocking check in future
61
+ # - name: Run linting
62
+ # if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
63
+ # run: |
64
+ # echo "::add-matcher::.github/workflows/matchers/pylint.json"
65
+ # tox -e lint
65
66
 
66
- - name: Run mypy type checks
67
- if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
68
- run: |
69
- tox -e mypy
67
+ # MyPy type checking disabled for now - may re-enable as non-blocking check in future
68
+ # - name: Run mypy type checks
69
+ # if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
70
+ # run: |
71
+ # tox -e mypy
@@ -72,7 +72,7 @@ jobs:
72
72
  egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
73
73
 
74
74
  - name: "Download build artifacts"
75
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
75
+ uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
76
76
  with:
77
77
  name: Packages
78
78
  path: dist
@@ -104,7 +104,7 @@ jobs:
104
104
  egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
105
105
 
106
106
  - name: "Download build artifacts"
107
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
107
+ uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
108
108
  with:
109
109
  name: Packages
110
110
  path: dist
@@ -0,0 +1,171 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ **Requirements:** Python 3.9+
8
+
9
+ SDG Hub is a Python framework for synthetic data generation using composable blocks and flows. Transform datasets through **building-block composition** - mix and match LLM-powered and traditional processing blocks like Lego pieces to create sophisticated data generation workflows.
10
+
11
+ **Core Concepts:**
12
+ - **Blocks** are composable units that transform datasets - think data processing Lego pieces
13
+ - **Flows** orchestrate multiple blocks into complete pipelines defined in YAML
14
+ - Simple concept: `dataset → Block₁ → Block₂ → Block₃ → enriched_dataset`
15
+
16
+ ## Development Commands
17
+
18
+ **Use `uv` for all Python commands and package management.**
19
+
20
+ ### Setup and Installation
21
+ ```bash
22
+ # Install core dependencies
23
+ uv pip install .
24
+
25
+ # Install with development dependencies
26
+ uv pip install .[dev]
27
+ # Alternative: uv sync --extra dev
28
+
29
+ # Install with optional vLLM support
30
+ uv pip install .[vllm]
31
+ # Alternative: uv sync --extra vllm
32
+
33
+ # Install with examples dependencies
34
+ uv pip install .[examples]
35
+ # Alternative: uv sync --extra examples
36
+ ```
37
+
38
+ ### Testing
39
+ ```bash
40
+ # Run all tests
41
+ tox -e py3-unit
42
+
43
+ # Run tests with coverage
44
+ tox -e py3-unitcov
45
+
46
+ # Run specific test file
47
+ pytest tests/test_specific_file.py
48
+
49
+ # Run tests matching pattern
50
+ pytest -k "test_pattern"
51
+ ```
52
+
53
+ ### Linting and Formatting
54
+ ```bash
55
+ # Run full verification (lint, mypy, ruff)
56
+ make verify
57
+
58
+ # Individual lint commands
59
+ tox -e lint # Full pylint check
60
+ tox -e fastlint # Fast pylint (without 3rd party)
61
+ tox -e ruff # Ruff formatting and fixes
62
+ tox -e mypy # Type checking
63
+
64
+ # Format code with ruff
65
+ tox -e ruff fix
66
+
67
+ # Check code formatting
68
+ tox -e ruff check
69
+ ```
70
+
71
+ ### Other Make targets
72
+ ```bash
73
+ make actionlint # Lint GitHub Actions
74
+ make md-lint # Lint markdown files
75
+ ```
76
+
77
+ ## Core Architecture
78
+
79
+ ### Block System
80
+ The framework is built around a modular block system with **composability at its core** - mix and match blocks to build simple transformations or complex multi-stage pipelines:
81
+
82
+ - **BaseBlock** (`src/sdg_hub/core/blocks/base.py`): Abstract base class for all processing blocks with Pydantic validation
83
+ - **BlockRegistry** (`src/sdg_hub/core/blocks/registry.py`): Auto-discovery system for organizing blocks with zero setup
84
+ - Blocks are organized in categories:
85
+ - `llm/`: LLM-powered blocks (chat, prompt building, text parsing) with async execution
86
+ - `transform/`: Data transformation blocks (column operations, text manipulation)
87
+ - `filtering/`: Data filtering blocks with quality thresholds
88
+ - `evaluation/`: Quality evaluation blocks (faithfulness, relevancy assessment)
89
+ - `deprecated_blocks/`: Legacy blocks maintained for backward compatibility
90
+
91
+ **Key Benefits**: Type-safe composition, automatic validation, rich logging, and high-performance async processing.
92
+
93
+ ### Flow System
94
+ Flows orchestrate multiple blocks into data processing pipelines:
95
+
96
+ - **Flow** (`src/sdg_hub/core/flow/base.py`): Main flow execution class with Pydantic validation
97
+ - **FlowRegistry** (`src/sdg_hub/core/flow/registry.py`): Registry for flow discovery
98
+ - **FlowMetadata** (`src/sdg_hub/core/flow/metadata.py`): Metadata and parameter definitions
99
+ - **FlowValidator** (`src/sdg_hub/core/flow/validation.py`): YAML structure validation
100
+ - **FlowMigration** (`src/sdg_hub/core/flow/migration.py`): Backward compatibility for old flow formats
101
+
102
+ ### Flow Configuration
103
+ Flows are defined in YAML files with this structure:
104
+ ```yaml
105
+ metadata:
106
+ name: "flow_name"
107
+ version: "1.0.0"
108
+ author: "Author Name"
109
+ description: "Flow description"
110
+
111
+ parameters:
112
+ param_name:
113
+ type: "string"
114
+ default: "default_value"
115
+ description: "Parameter description"
116
+
117
+ blocks:
118
+ - block_type: "BlockTypeName"
119
+ block_config:
120
+ block_name: "unique_block_name"
121
+ # block-specific configuration
122
+ ```
123
+
124
+ ### Built-in Flow Discovery
125
+ The framework includes auto-discovery for flows in `src/sdg_hub/flows/`. Example flow structure:
126
+ ```
127
+ flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/
128
+ ├── flow.yaml # Main flow definition
129
+ ├── atomic_facts.yaml # Sub-flow configurations
130
+ ├── detailed_summary.yaml
131
+ └── generate_questions_responses.yaml
132
+ ```
133
+
134
+ ## Key Patterns
135
+
136
+ ### Block Development
137
+ When creating new blocks:
138
+ 1. Inherit from `BaseBlock` and implement the `generate()` method
139
+ 2. Use Pydantic field validation for configuration
140
+ 3. Follow the standardized column handling patterns (`input_cols`, `output_cols`)
141
+ 4. Register blocks in appropriate category directories
142
+ 5. Include proper error handling and logging
143
+
144
+ ### Dataset Processing
145
+ All blocks operate on HuggingFace `datasets.Dataset` objects:
146
+ - Input validation ensures required columns exist
147
+ - Output validation prevents column collisions
148
+ - Rich logging provides processing summaries
149
+ - Empty dataset handling with appropriate errors
150
+
151
+ ### Backward Compatibility
152
+ The framework maintains compatibility with legacy formats:
153
+ - Deprecated blocks are preserved in `deprecated_blocks/`
154
+ - Flow migration automatically converts old YAML formats
155
+ - Legacy LLMBlocks receive special handling during execution
156
+
157
+ ## Testing Guidelines
158
+
159
+ - Tests are organized by block category under `tests/blocks/`
160
+ - Use `pytest` fixtures for common test data
161
+ - Test configuration files are in `tests/blocks/testdata/`
162
+ - Follow the existing pattern of testing both success and error cases
163
+ - Mock LLM clients when testing LLM-powered blocks
164
+
165
+ ## Important Notes
166
+
167
+ - Always use `uv` for Python package management
168
+ - The framework uses Pydantic extensively for validation and configuration
169
+ - LLM clients are managed through the `client_manager.py` system
170
+ - Path resolution is handled centrally in `utils/path_resolution.py`
171
+ - Error handling follows custom exception patterns in `utils/error_handling.py`
@@ -0,0 +1,251 @@
1
+ # Contributing to SDG Hub
2
+
3
+ Welcome to SDG Hub development! This guide covers everything you need to know about contributing blocks, flows, and other improvements to the SDG Hub ecosystem.
4
+
5
+ For detailed documentation including examples and advanced patterns, see our comprehensive [Development Guide](docs/development.md).
6
+
7
+ ## 🚀 Quick Start
8
+
9
+ ### Development Setup
10
+
11
+ 1. **Clone the Repository**
12
+ ```bash
13
+ git clone https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub.git
14
+ cd sdg_hub
15
+ ```
16
+
17
+ 2. **Install Development Dependencies**
18
+ ```bash
19
+ # Using uv (recommended)
20
+ uv sync --extra dev
21
+
22
+ # Or using pip
23
+ pip install .[dev]
24
+ ```
25
+
26
+ ## 🛠️ Development Tools
27
+
28
+ ### Linting and Code Quality
29
+
30
+ **Primary linting tools** (required for all contributions):
31
+ ```bash
32
+ tox -e lint # Full pylint check
33
+ tox -e fastlint # Quick pylint check
34
+ tox -e mypy # Type checking
35
+
36
+ # Ruff (code formatting and linting)
37
+ tox -e ruff # Format and fix issues (development mode)
38
+ tox -e ruff -- check # Check only, no fixes (CI mode)
39
+ ./scripts/ruff.sh # Direct script - format and fix
40
+ ./scripts/ruff.sh check # Direct script - check only
41
+ ./scripts/ruff.sh --help # Pass custom arguments to ruff
42
+ ```
43
+
44
+ **Optional development tools** (require additional dependencies):
45
+ ```bash
46
+ make actionlint # Lint GitHub Actions (requires: actionlint, shellcheck)
47
+ make md-lint # Lint markdown files (requires: podman/docker)
48
+ make verify # Run extended checks: pylint, mypy, ruff (may differ from CI)
49
+ ```
50
+
51
+ ### Testing
52
+
53
+ SDG Hub uses [tox](https://tox.wiki/) for test automation and [pytest](https://docs.pytest.org/) as a test framework:
54
+
55
+ ```bash
56
+ # Run all tests
57
+ tox -e py3-unit
58
+
59
+ # Run with coverage
60
+ tox -e py3-unitcov
61
+
62
+ # Run specific tests
63
+ pytest tests/test_specific_file.py
64
+ pytest -k "test_pattern"
65
+ ```
66
+
67
+ ## 🧱 Contributing Blocks
68
+
69
+ Blocks are the core processing units of SDG Hub. To contribute a new block:
70
+
71
+ 1. **Choose the appropriate category**: `llm`, `transform`, `filtering`, or `evaluation`
72
+ 2. **Implement your block** following the [Custom Blocks Guide](docs/blocks/custom-blocks.md)
73
+ 3. **Add comprehensive tests** in `tests/blocks/[category]/`
74
+ 4. **Update documentation** in the relevant block category page
75
+
76
+ ### Example Block Structure
77
+
78
+ ```python
79
+ from sdg_hub.core.blocks.base import BaseBlock
80
+ from sdg_hub.core.blocks.registry import BlockRegistry
81
+
82
+ @BlockRegistry.register("MyNewBlock", "category", "Description")
83
+ class MyNewBlock(BaseBlock):
84
+ """Comprehensive docstring with examples."""
85
+
86
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
87
+ # Your implementation here
88
+ pass
89
+ ```
90
+
91
+ ## 🌊 Contributing Flows
92
+
93
+ Flows orchestrate multiple blocks into complete pipelines. To contribute a new flow:
94
+
95
+ 1. **Design your flow** with clear use case and objectives
96
+ 2. **Create flow directory structure** under `src/sdg_hub/flows/[category]/`
97
+ 3. **Implement the flow** with comprehensive YAML configuration
98
+ 4. **Add tests** and documentation
99
+
100
+ ### Flow Directory Structure
101
+
102
+ ```
103
+ src/sdg_hub/flows/[category]/[use_case]/[variant]/
104
+ ├── flow.yaml # Main flow definition
105
+ ├── prompt_template_1.yaml # Supporting templates
106
+ └── README.md # Flow documentation
107
+ ```
108
+
109
+ ## 📋 Contribution Checklist
110
+
111
+ ### For New Blocks
112
+ - [ ] Block placed in correct category directory
113
+ - [ ] Inherits from `BaseBlock` and implements `generate()`
114
+ - [ ] Registered with `@BlockRegistry.register()`
115
+ - [ ] Comprehensive docstring with examples
116
+ - [ ] Proper Pydantic field validation
117
+ - [ ] Comprehensive test suite
118
+ - [ ] Documentation updated
119
+ - [ ] All linting checks pass
120
+ - [ ] All tests pass
121
+
122
+ ### For New Flows
123
+ - [ ] Flow directory structure follows conventions
124
+ - [ ] Complete metadata in `flow.yaml`
125
+ - [ ] Required input columns documented
126
+ - [ ] Supporting templates included
127
+ - [ ] Flow-specific README created
128
+ - [ ] Integration tests written
129
+ - [ ] Documentation updated
130
+
131
+ ## 🔄 Development Workflow
132
+
133
+ ### Git Workflow
134
+
135
+ **Branch Naming:**
136
+ - `feature/block-name-implementation` - New blocks
137
+ - `feature/flow-name-implementation` - New flows
138
+ - `fix/issue-description` - Bug fixes
139
+ - `docs/section-updates` - Documentation updates
140
+
141
+ **Commit Messages:**
142
+ Follow conventional commits:
143
+ ```
144
+ feat(blocks): add TextSummarizerBlock for document summarization
145
+ fix(flows): correct parameter validation in QA generation flow
146
+ docs(blocks): update LLM block examples with new model config
147
+ ```
148
+
149
+ **Pull Request Process:**
150
+ 1. Create feature branch from `main`
151
+ 2. Implement changes with tests and documentation
152
+ 3. Run full verification: `make verify && tox -e py3-unit`
153
+ 4. Create PR with clear description
154
+ 5. Address review feedback
155
+ 6. Squash and merge when approved
156
+
157
+ ## 🤝 Community Guidelines
158
+
159
+ - Be respectful and inclusive
160
+ - Provide constructive feedback
161
+ - Help newcomers get started
162
+ - Follow the project's coding standards
163
+ - Report issues responsibly
164
+
165
+ ## 📚 Documentation
166
+
167
+ For comprehensive guides and examples:
168
+
169
+ - **[Development Guide](docs/development.md)** - Complete development documentation
170
+ - **[Custom Blocks](docs/blocks/custom-blocks.md)** - Building custom blocks
171
+ - **[Flow Configuration](docs/flows/yaml-configuration.md)** - YAML configuration guide
172
+ - **[Block System Overview](docs/blocks/overview.md)** - Understanding the block architecture
173
+ - **[Flow System Overview](docs/flows/overview.md)** - Understanding flow orchestration
174
+
175
+ ## 🚀 Getting Help
176
+
177
+ - **GitHub Issues** - Report bugs, request features
178
+ - **GitHub Discussions** - Ask questions, share ideas
179
+ - **Documentation** - Check existing docs first
180
+ - **Code Examples** - Look at existing implementations
181
+
182
+ You can run all tests by simply running the `tox -e py3-unit` command.
183
+
184
+ ## Documentation Guidelines
185
+
186
+ ### NumPy-Style Docstrings
187
+
188
+ If you choose to add docstrings to your functions, we recommend following the NumPy docstring format for consistency with the scientific Python ecosystem.
189
+
190
+ #### Basic Structure
191
+
192
+ ```python
193
+ def example_function(param1, param2=None):
194
+ """Brief description of the function.
195
+
196
+ Longer description providing more context about what the function does,
197
+ its purpose, and any important behavioral notes.
198
+
199
+ Parameters
200
+ ----------
201
+ param1 : str
202
+ Description of the first parameter
203
+ param2 : int, optional
204
+ Description of the second parameter (default: None)
205
+
206
+ Returns
207
+ -------
208
+ bool
209
+ Description of what the function returns
210
+
211
+ Raises
212
+ ------
213
+ ValueError
214
+ When invalid input is provided
215
+
216
+ Examples
217
+ --------
218
+ >>> result = example_function("hello", 42)
219
+ >>> print(result)
220
+ True
221
+ """
222
+ ```
223
+
224
+ #### Key Guidelines
225
+
226
+ - **Summary**: Start with a concise one-line description
227
+ - **Parameters**: Document all function parameters with types and descriptions
228
+ - **Returns**: Describe return values with types and meaning
229
+ - **Types**: Use standard Python types (`str`, `int`, `list`, `dict`, etc.)
230
+ - **Optional parameters**: Mark default parameters as "optional"
231
+ - **Examples**: Include simple usage examples when helpful
232
+
233
+ #### When to Add Docstrings
234
+
235
+ Docstrings are **optional** but recommended for:
236
+ - Public API functions and classes
237
+ - Complex functions with multiple parameters
238
+ - Functions that might be confusing to other developers
239
+ - Core framework components
240
+
241
+ #### When to Skip Docstrings
242
+
243
+ You may skip docstrings for:
244
+ - Simple utility functions with obvious behavior
245
+ - Private/internal functions (starting with `_`)
246
+ - Functions with self-explanatory names and simple parameters
247
+
248
+ **Remember**: Quality over quantity. A well-written docstring is better than a verbose one, and no docstring is better than a poor one.
249
+
250
+
251
+ Thank you for contributing to SDG Hub! 🎉