sdg-hub 0.1.4__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/lint.yml +13 -11
  2. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/pypi.yaml +2 -2
  3. sdg_hub-0.2.0/CLAUDE.md +171 -0
  4. sdg_hub-0.2.0/CONTRIBUTING.md +251 -0
  5. sdg_hub-0.2.0/PKG-INFO +218 -0
  6. sdg_hub-0.2.0/README.md +150 -0
  7. sdg_hub-0.2.0/docs/README.md +76 -0
  8. sdg_hub-0.2.0/docs/_coverpage.md +14 -0
  9. sdg_hub-0.2.0/docs/_navbar.md +3 -0
  10. sdg_hub-0.2.0/docs/_sidebar.md +23 -0
  11. sdg_hub-0.2.0/docs/api-reference.md +14 -0
  12. sdg_hub-0.2.0/docs/blocks/custom-blocks.md +136 -0
  13. sdg_hub-0.2.0/docs/blocks/evaluation-blocks.md +22 -0
  14. sdg_hub-0.2.0/docs/blocks/filtering-blocks.md +16 -0
  15. sdg_hub-0.2.0/docs/blocks/llm-blocks.md +196 -0
  16. sdg_hub-0.2.0/docs/blocks/overview.md +152 -0
  17. sdg_hub-0.2.0/docs/blocks/transform-blocks.md +31 -0
  18. sdg_hub-0.2.0/docs/concepts.md +173 -0
  19. sdg_hub-0.2.0/docs/development.md +495 -0
  20. sdg_hub-0.2.0/docs/flows/discovery.md +169 -0
  21. sdg_hub-0.2.0/docs/flows/overview.md +289 -0
  22. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/docs/index.html +25 -15
  23. sdg_hub-0.2.0/docs/installation.md +121 -0
  24. sdg_hub-0.2.0/docs/quick-start.md +169 -0
  25. sdg_hub-0.2.0/examples/knowledge_tuning/instructlab/.gitignore +1 -0
  26. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/README.md +4 -2
  27. sdg_hub-0.2.0/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +19 -0
  28. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +16 -18
  29. sdg_hub-0.2.0/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +233 -0
  30. sdg_hub-0.2.0/examples/knowledge_tuning/instructlab/logger_config.py +15 -0
  31. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/knowledge_utils.py +103 -43
  32. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/pyproject.toml +24 -19
  33. sdg_hub-0.2.0/scripts/ruff.sh +23 -0
  34. sdg_hub-0.2.0/src/sdg_hub/__init__.py +30 -0
  35. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/src/sdg_hub/_version.py +2 -2
  36. sdg_hub-0.2.0/src/sdg_hub/core/__init__.py +22 -0
  37. sdg_hub-0.2.0/src/sdg_hub/core/blocks/__init__.py +58 -0
  38. sdg_hub-0.2.0/src/sdg_hub/core/blocks/base.py +313 -0
  39. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
  40. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
  41. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
  42. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
  43. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
  44. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
  45. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
  46. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
  47. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
  48. sdg_hub-0.2.0/src/sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
  49. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/__init__.py +9 -0
  50. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
  51. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
  52. sdg_hub-0.2.0/src/sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
  53. sdg_hub-0.2.0/src/sdg_hub/core/blocks/filtering/__init__.py +12 -0
  54. sdg_hub-0.2.0/src/sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
  55. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/__init__.py +25 -0
  56. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/client_manager.py +398 -0
  57. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/config.py +336 -0
  58. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/error_handler.py +368 -0
  59. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
  60. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
  61. sdg_hub-0.2.0/src/sdg_hub/core/blocks/llm/text_parser_block.py +310 -0
  62. sdg_hub-0.2.0/src/sdg_hub/core/blocks/registry.py +331 -0
  63. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/__init__.py +23 -0
  64. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
  65. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
  66. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/melt_columns.py +126 -0
  67. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/rename_columns.py +69 -0
  68. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/text_concat.py +102 -0
  69. sdg_hub-0.2.0/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
  70. sdg_hub-0.2.0/src/sdg_hub/core/flow/__init__.py +20 -0
  71. sdg_hub-0.2.0/src/sdg_hub/core/flow/base.py +980 -0
  72. sdg_hub-0.2.0/src/sdg_hub/core/flow/metadata.py +344 -0
  73. sdg_hub-0.2.0/src/sdg_hub/core/flow/migration.py +187 -0
  74. sdg_hub-0.2.0/src/sdg_hub/core/flow/registry.py +330 -0
  75. sdg_hub-0.2.0/src/sdg_hub/core/flow/validation.py +265 -0
  76. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.0/src/sdg_hub/core}/utils/__init__.py +6 -4
  77. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.0/src/sdg_hub/core}/utils/datautils.py +1 -3
  78. sdg_hub-0.2.0/src/sdg_hub/core/utils/error_handling.py +208 -0
  79. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.0/src/sdg_hub/core}/utils/path_resolution.py +2 -2
  80. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
  81. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
  82. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
  83. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
  84. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
  85. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
  86. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +191 -0
  87. sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
  88. sdg_hub-0.2.0/src/sdg_hub.egg-info/PKG-INFO +218 -0
  89. sdg_hub-0.2.0/src/sdg_hub.egg-info/SOURCES.txt +163 -0
  90. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/src/sdg_hub.egg-info/requires.txt +14 -5
  91. sdg_hub-0.2.0/tests/blocks/deprecated/test_llmblock.py +148 -0
  92. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.0/tests/blocks/evaluation}/__init__.py +1 -2
  93. sdg_hub-0.2.0/tests/blocks/evaluation/test_evaluate_faithfulness_block.py +496 -0
  94. sdg_hub-0.2.0/tests/blocks/evaluation/test_evaluate_relevancy_block.py +493 -0
  95. sdg_hub-0.2.0/tests/blocks/evaluation/test_verify_question_block.py +480 -0
  96. sdg_hub-0.2.0/tests/blocks/filtering/test_columnvaluefilter.py +361 -0
  97. sdg_hub-0.2.0/tests/blocks/llm/test_llm_chat_block.py +985 -0
  98. sdg_hub-0.2.0/tests/blocks/llm/test_promptbuilderblock.py +552 -0
  99. sdg_hub-0.2.0/tests/blocks/llm/test_textparserblock.py +1029 -0
  100. sdg_hub-0.2.0/tests/blocks/test_base_block.py +982 -0
  101. sdg_hub-0.2.0/tests/blocks/test_registry.py +398 -0
  102. sdg_hub-0.2.0/tests/blocks/testdata/test_evaluate_faithfulness.yaml +17 -0
  103. sdg_hub-0.2.0/tests/blocks/testdata/test_evaluate_relevancy.yaml +24 -0
  104. sdg_hub-0.2.0/tests/blocks/testdata/test_prompt_format_config.yaml +9 -0
  105. sdg_hub-0.2.0/tests/blocks/testdata/test_prompt_format_no_system.yaml +6 -0
  106. sdg_hub-0.2.0/tests/blocks/testdata/test_prompt_format_strict.yaml +6 -0
  107. sdg_hub-0.2.0/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +5 -0
  108. sdg_hub-0.2.0/tests/blocks/testdata/test_prompt_no_user_messages.yaml +5 -0
  109. sdg_hub-0.2.0/tests/blocks/testdata/test_verify_question.yaml +27 -0
  110. sdg_hub-0.2.0/tests/blocks/transform/test_index_based_mapper.py +303 -0
  111. sdg_hub-0.2.0/tests/blocks/transform/test_melt_columns.py +273 -0
  112. sdg_hub-0.2.0/tests/blocks/transform/test_text_concat.py +122 -0
  113. sdg_hub-0.2.0/tests/blocks/transform/test_uniform_col_val_setter.py +397 -0
  114. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/utilblocks/test_combinecolumns.py +5 -3
  115. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/utilblocks/test_duplicatecolumnsblock.py +13 -13
  116. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/utilblocks/test_flattenblock.py +2 -2
  117. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/utilblocks/test_renameblock.py +2 -17
  118. sdg_hub-0.2.0/tests/blocks/utilblocks/test_samplepopulatorblock.py +37 -0
  119. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/utilblocks/test_selectorblock.py +12 -14
  120. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/utilblocks/test_settomajority.py +7 -4
  121. sdg_hub-0.2.0/tests/flow/__init__.py +2 -0
  122. sdg_hub-0.2.0/tests/flow/conftest.py +194 -0
  123. sdg_hub-0.2.0/tests/flow/test_base.py +942 -0
  124. sdg_hub-0.2.0/tests/flow/test_integration.py +515 -0
  125. sdg_hub-0.2.0/tests/flow/test_metadata.py +421 -0
  126. sdg_hub-0.2.0/tests/flow/test_migration.py +366 -0
  127. sdg_hub-0.2.0/tests/flow/test_registry.py +384 -0
  128. sdg_hub-0.2.0/tests/flow/test_validation.py +348 -0
  129. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/utils/test_error_handling.py +4 -4
  130. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/utils/test_path_resolution.py +3 -4
  131. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tox.ini +1 -2
  132. sdg_hub-0.1.4/CLAUDE.md +0 -100
  133. sdg_hub-0.1.4/CONTRIBUTING.md +0 -30
  134. sdg_hub-0.1.4/MANIFEST.in +0 -2
  135. sdg_hub-0.1.4/PKG-INFO +0 -190
  136. sdg_hub-0.1.4/README.md +0 -131
  137. sdg_hub-0.1.4/assets/imgs/IL_skills_pipeline.png +0 -0
  138. sdg_hub-0.1.4/assets/imgs/fig-workflow.png +0 -0
  139. sdg_hub-0.1.4/assets/imgs/instructlab-banner.png +0 -0
  140. sdg_hub-0.1.4/assets/imgs/overview.png +0 -0
  141. sdg_hub-0.1.4/docs/README.md +0 -51
  142. sdg_hub-0.1.4/docs/_coverpage.md +0 -11
  143. sdg_hub-0.1.4/docs/_navbar.md +0 -5
  144. sdg_hub-0.1.4/docs/_sidebar.md +0 -27
  145. sdg_hub-0.1.4/docs/architecture.md +0 -149
  146. sdg_hub-0.1.4/docs/blocks.md +0 -537
  147. sdg_hub-0.1.4/docs/changelog.md +0 -82
  148. sdg_hub-0.1.4/docs/configuration.md +0 -201
  149. sdg_hub-0.1.4/docs/development.md +0 -367
  150. sdg_hub-0.1.4/docs/examples.md +0 -191
  151. sdg_hub-0.1.4/docs/installation.md +0 -100
  152. sdg_hub-0.1.4/docs/prompts.md +0 -370
  153. sdg_hub-0.1.4/docs/quick-start.md +0 -128
  154. sdg_hub-0.1.4/docs/web-interface.md +0 -230
  155. sdg_hub-0.1.4/examples/knowledge_tuning/README.md +0 -115
  156. sdg_hub-0.1.4/examples/knowledge_tuning/data-generation-with-llama-70b/data-generation-with-llama-70b.ipynb +0 -340
  157. sdg_hub-0.1.4/examples/knowledge_tuning/data-generation-with-llama-70b/synth_knowledge1.5_llama3.3.yaml +0 -136
  158. sdg_hub-0.1.4/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -196
  159. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/README.md +0 -311
  160. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/assets/customized_nano_quality_results.png +0 -0
  161. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/blocks/blocks.py +0 -60
  162. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge1.5_nemotron_super_49b.yaml +0 -174
  163. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b.yaml +0 -178
  164. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b_rewrite_with_diversity.yaml +0 -118
  165. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b_summary_diversity.yaml +0 -118
  166. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/flows/synth_knowledge_reasoning_nemotron_super_49b_summary_diversity_cot.yaml +0 -118
  167. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/generate.py +0 -194
  168. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_answers.yaml +0 -51
  169. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_answers_cot.yaml +0 -31
  170. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_doc_rewrite_inst.yaml +0 -25
  171. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_document_rewrite.yaml +0 -20
  172. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_questions.yaml +0 -47
  173. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_questions_responses.yaml +0 -59
  174. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_summary.yaml +0 -20
  175. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/prompts/generate_summary_inst.yaml +0 -24
  176. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/reasoning_sdg.ipynb +0 -1251
  177. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/reasoning_sdg_data_mixing.ipynb +0 -471
  178. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/reasoning_sdg_financebench.ipynb +0 -1078
  179. sdg_hub-0.1.4/examples/knowledge_tuning/knowledge_tuning_with_reasoning_model/utils.py +0 -121
  180. sdg_hub-0.1.4/examples/skills_tuning/instructlab/README.md +0 -118
  181. sdg_hub-0.1.4/examples/skills_tuning/instructlab/annotation_classification.ipynb +0 -543
  182. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/__init__.py +0 -3
  183. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/add_question.py +0 -91
  184. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/docling_parse_pdf.py +0 -91
  185. sdg_hub-0.1.4/examples/skills_tuning/instructlab/blocks/json_format.py +0 -133
  186. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/detailed_annotation.yaml +0 -16
  187. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/grounded_summary_extraction.yaml +0 -53
  188. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/simple_annotation.yaml +0 -16
  189. sdg_hub-0.1.4/examples/skills_tuning/instructlab/flows/unstructured_to_structured.yaml +0 -47
  190. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/keywords.yaml +0 -29
  191. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/named_entities.yaml +0 -40
  192. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/sentiment.yaml +0 -28
  193. sdg_hub-0.1.4/examples/skills_tuning/instructlab/prompts/summary.yaml +0 -29
  194. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/09b5b62d328d3d0719b6825357fdfb48.pdf +0 -169
  195. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/0d631e444d1c22f0be99a69f5deaff94.pdf +0 -112
  196. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/1270f7f67f406b52a2ee86584b452bff.pdf +0 -74
  197. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/14f3d2486b21e639a953afb7ad03d90c.pdf +0 -112
  198. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/1689b94530eca82b7758c86b4cf3125f.pdf +0 -112
  199. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/171fd9df333ddd814c764843ed624121.pdf +0 -150
  200. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/1949bd0c9c4c23d495d880c4c552bfe1.pdf +0 -131
  201. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/2b626b620ef42f716c6028c74ee4187b.pdf +0 -74
  202. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/3877b1983229ec488c6349a188bccf92.pdf +0 -207
  203. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/3bc6d3e1c0a117340d288c289bf7f679.pdf +0 -93
  204. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/3e714a49937be1672aa48244ba7254ce.pdf +0 -74
  205. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/6064088db0200b32f3f3e848047c5ab6.pdf +0 -74
  206. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/73c60e60043b8775dac929320839a8c6.pdf +0 -93
  207. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/77423f08f0208d476dea73c639f6293a.pdf +0 -169
  208. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/78cf0d3e40caba622d8914916f0f9146.pdf +0 -93
  209. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/7a29e2dcd505f944b16d1e3173cb1c01.pdf +0 -93
  210. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/8c1b4f4af2af2847a240041390e31399.pdf +0 -93
  211. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/8cd753ed00aeee0ed32d03823eef3f7e.pdf +0 -93
  212. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/a24a661c2eb55542903c72391ec09f9b.pdf +0 -112
  213. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/b3d7bc295d09d9927e465213612c0192.pdf +0 -150
  214. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/b7050f62f52a3d2803beea21404f7af6.pdf +0 -112
  215. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/b9b40b0c1e92fb226067bdceacbdab5c.pdf +0 -74
  216. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/c20824ea6f927fe380f48a904cf4821b.pdf +0 -93
  217. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/c2bad61ce58687fad602549f6048004b.pdf +0 -93
  218. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/c47a92e006b54d014a79b447528c55a7.pdf +0 -112
  219. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/da879f8ea1c23aa6565cccaacac271fc.pdf +0 -169
  220. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/e52e6870e8a04339ef969543fc0f0329.pdf +0 -74
  221. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/ecd8e1f1c0fa27dfdd24b358cb65012f.pdf +0 -74
  222. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/f28832481653818f8062a497655fb09e.pdf +0 -74
  223. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts/ff898f396d49760343d08575ea773b54.pdf +0 -93
  224. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/financial_call_transcripts.jsonl +0 -30
  225. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/table_manipulation_qna.yaml +0 -97
  226. sdg_hub-0.1.4/examples/skills_tuning/instructlab/seed_data/unstructured_to_structured_qna.yaml +0 -49
  227. sdg_hub-0.1.4/examples/skills_tuning/instructlab/structured_summary.ipynb +0 -555
  228. sdg_hub-0.1.4/examples/skills_tuning/instructlab/table_manipulation.ipynb +0 -1034
  229. sdg_hub-0.1.4/examples/skills_tuning/instructlab/unstructured_to_structured.ipynb +0 -591
  230. sdg_hub-0.1.4/scripts/ruff.sh +0 -54
  231. sdg_hub-0.1.4/src/sdg_hub/blocks/__init__.py +0 -42
  232. sdg_hub-0.1.4/src/sdg_hub/blocks/block.py +0 -96
  233. sdg_hub-0.1.4/src/sdg_hub/blocks/llmblock.py +0 -375
  234. sdg_hub-0.1.4/src/sdg_hub/blocks/openaichatblock.py +0 -556
  235. sdg_hub-0.1.4/src/sdg_hub/blocks/utilblocks.py +0 -597
  236. sdg_hub-0.1.4/src/sdg_hub/checkpointer.py +0 -139
  237. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
  238. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
  239. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/detailed_description.yaml +0 -10
  240. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
  241. sdg_hub-0.1.4/src/sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
  242. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/__init__.py +0 -0
  243. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
  244. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
  245. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
  246. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
  247. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
  248. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
  249. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
  250. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
  251. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
  252. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
  253. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
  254. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
  255. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/router.yaml +0 -12
  256. sdg_hub-0.1.4/src/sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
  257. sdg_hub-0.1.4/src/sdg_hub/configs/reasoning/__init__.py +0 -0
  258. sdg_hub-0.1.4/src/sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
  259. sdg_hub-0.1.4/src/sdg_hub/configs/skills/__init__.py +0 -0
  260. sdg_hub-0.1.4/src/sdg_hub/configs/skills/analyzer.yaml +0 -48
  261. sdg_hub-0.1.4/src/sdg_hub/configs/skills/annotation.yaml +0 -36
  262. sdg_hub-0.1.4/src/sdg_hub/configs/skills/contexts.yaml +0 -28
  263. sdg_hub-0.1.4/src/sdg_hub/configs/skills/critic.yaml +0 -60
  264. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
  265. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
  266. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
  267. sdg_hub-0.1.4/src/sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
  268. sdg_hub-0.1.4/src/sdg_hub/configs/skills/freeform_questions.yaml +0 -34
  269. sdg_hub-0.1.4/src/sdg_hub/configs/skills/freeform_responses.yaml +0 -39
  270. sdg_hub-0.1.4/src/sdg_hub/configs/skills/grounded_questions.yaml +0 -38
  271. sdg_hub-0.1.4/src/sdg_hub/configs/skills/grounded_responses.yaml +0 -59
  272. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
  273. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
  274. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
  275. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
  276. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
  277. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
  278. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
  279. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
  280. sdg_hub-0.1.4/src/sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
  281. sdg_hub-0.1.4/src/sdg_hub/configs/skills/judge.yaml +0 -53
  282. sdg_hub-0.1.4/src/sdg_hub/configs/skills/planner.yaml +0 -67
  283. sdg_hub-0.1.4/src/sdg_hub/configs/skills/respond.yaml +0 -8
  284. sdg_hub-0.1.4/src/sdg_hub/configs/skills/revised_responder.yaml +0 -78
  285. sdg_hub-0.1.4/src/sdg_hub/configs/skills/router.yaml +0 -59
  286. sdg_hub-0.1.4/src/sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
  287. sdg_hub-0.1.4/src/sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
  288. sdg_hub-0.1.4/src/sdg_hub/flow.py +0 -477
  289. sdg_hub-0.1.4/src/sdg_hub/flow_runner.py +0 -450
  290. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
  291. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
  292. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
  293. sdg_hub-0.1.4/src/sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -136
  294. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
  295. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
  296. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
  297. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
  298. sdg_hub-0.1.4/src/sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
  299. sdg_hub-0.1.4/src/sdg_hub/pipeline.py +0 -121
  300. sdg_hub-0.1.4/src/sdg_hub/prompts.py +0 -80
  301. sdg_hub-0.1.4/src/sdg_hub/registry.py +0 -122
  302. sdg_hub-0.1.4/src/sdg_hub/sdg.py +0 -206
  303. sdg_hub-0.1.4/src/sdg_hub/utils/config_validation.py +0 -91
  304. sdg_hub-0.1.4/src/sdg_hub/utils/error_handling.py +0 -94
  305. sdg_hub-0.1.4/src/sdg_hub/utils/validation_result.py +0 -10
  306. sdg_hub-0.1.4/src/sdg_hub.egg-info/PKG-INFO +0 -190
  307. sdg_hub-0.1.4/src/sdg_hub.egg-info/SOURCES.txt +0 -253
  308. sdg_hub-0.1.4/tests/__init__.py +0 -0
  309. sdg_hub-0.1.4/tests/blocks/test_llmblock.py +0 -343
  310. sdg_hub-0.1.4/tests/blocks/test_openaichatblock.py +0 -647
  311. sdg_hub-0.1.4/tests/blocks/utilblocks/test_filterblock.py +0 -157
  312. sdg_hub-0.1.4/tests/blocks/utilblocks/test_samplepopulatorblock.py +0 -195
  313. sdg_hub-0.1.4/tests/flows/test_flow.py +0 -257
  314. sdg_hub-0.1.4/tests/flows/test_flow_column_validation.py +0 -72
  315. sdg_hub-0.1.4/tests/flows/test_flow_path.py +0 -109
  316. sdg_hub-0.1.4/tests/flows/test_flow_validation.py +0 -46
  317. sdg_hub-0.1.4/tests/flows/testdata/test_config_1.yaml +0 -7
  318. sdg_hub-0.1.4/tests/flows/testdata/test_flow_1.yaml +0 -7
  319. sdg_hub-0.1.4/tests/flows/testdata/test_flow_2.yaml +0 -10
  320. sdg_hub-0.1.4/tests/test_checkpointer.py +0 -175
  321. sdg_hub-0.1.4/tests/test_flowrunner.py +0 -899
  322. sdg_hub-0.1.4/tests/test_pipeline.py +0 -201
  323. sdg_hub-0.1.4/tests/test_sdg.py +0 -413
  324. sdg_hub-0.1.4/tests/utils/test_config_validation.py +0 -174
  325. sdg_hub-0.1.4/web_interface/README.md +0 -133
  326. sdg_hub-0.1.4/web_interface/app.py +0 -227
  327. sdg_hub-0.1.4/web_interface/static/css/style.css +0 -171
  328. sdg_hub-0.1.4/web_interface/static/js/app.js +0 -518
  329. sdg_hub-0.1.4/web_interface/templates/index.html +0 -119
  330. sdg_hub-0.1.4/web_interface/test_block_types.py +0 -75
  331. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/actionlint.yaml +0 -0
  332. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/actions/free-disk-space/action.yml +0 -0
  333. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/dependabot.yml +0 -0
  334. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/mergify.yml +0 -0
  335. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/actionlint.dockerfile +0 -0
  336. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/actionlint.yml +0 -0
  337. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/docs.yml +0 -0
  338. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/e2e.yml +0 -0
  339. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/matchers/actionlint.json +0 -0
  340. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/matchers/pylint.json +0 -0
  341. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.github/workflows/test.yml +0 -0
  342. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.gitignore +0 -0
  343. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.isort.cfg +0 -0
  344. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.markdownlint-cli2.yaml +0 -0
  345. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.pre-commit-config.yaml +0 -0
  346. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/.pylintrc +0 -0
  347. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/LICENSE +0 -0
  348. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/Makefile +0 -0
  349. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/docs/.nojekyll +0 -0
  350. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  351. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  352. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  353. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  354. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  355. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  356. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/setup.cfg +0 -0
  357. {sdg_hub-0.1.4/src/sdg_hub → sdg_hub-0.2.0/src/sdg_hub/core/utils}/logger_config.py +1 -1
  358. /sdg_hub-0.1.4/scripts/__init__.py → /sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  359. {sdg_hub-0.1.4/src/sdg_hub/configs → sdg_hub-0.2.0/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
  360. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/src/sdg_hub/py.typed +0 -0
  361. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  362. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/src/sdg_hub.egg-info/top_level.txt +0 -0
  363. {sdg_hub-0.1.4/src/sdg_hub/configs/annotations → sdg_hub-0.2.0/tests}/__init__.py +0 -0
  364. {sdg_hub-0.1.4 → sdg_hub-0.2.0}/tests/blocks/testdata/test_config.yaml +0 -0
@@ -5,7 +5,7 @@ name: Lint, Format, and MyPy
5
5
  on:
6
6
  push:
7
7
  branches:
8
- - "main-disabled"
8
+ - "main"
9
9
  paths:
10
10
  - '**.py'
11
11
  - 'pyproject.toml'
@@ -15,7 +15,7 @@ on:
15
15
  - '.github/**'
16
16
  pull_request:
17
17
  branches:
18
- - "main-disabled"
18
+ - "main"
19
19
  paths:
20
20
  - '**.py'
21
21
  - 'pyproject.toml'
@@ -57,13 +57,15 @@ jobs:
57
57
  run: |
58
58
  tox -e ruff -- check
59
59
 
60
- - name: Run linting
61
- if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
62
- run: |
63
- echo "::add-matcher::.github/workflows/matchers/pylint.json"
64
- tox -e lint
60
+ # Pylint disabled for now - may re-enable as non-blocking check in future
61
+ # - name: Run linting
62
+ # if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
63
+ # run: |
64
+ # echo "::add-matcher::.github/workflows/matchers/pylint.json"
65
+ # tox -e lint
65
66
 
66
- - name: Run mypy type checks
67
- if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
68
- run: |
69
- tox -e mypy
67
+ # MyPy type checking disabled for now - may re-enable as non-blocking check in future
68
+ # - name: Run mypy type checks
69
+ # if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
70
+ # run: |
71
+ # tox -e mypy
@@ -72,7 +72,7 @@ jobs:
72
72
  egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
73
73
 
74
74
  - name: "Download build artifacts"
75
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
75
+ uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
76
76
  with:
77
77
  name: Packages
78
78
  path: dist
@@ -104,7 +104,7 @@ jobs:
104
104
  egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
105
105
 
106
106
  - name: "Download build artifacts"
107
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
107
+ uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
108
108
  with:
109
109
  name: Packages
110
110
  path: dist
@@ -0,0 +1,171 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ **Requirements:** Python 3.9+
8
+
9
+ SDG Hub is a Python framework for synthetic data generation using composable blocks and flows. Transform datasets through **building-block composition** - mix and match LLM-powered and traditional processing blocks like Lego pieces to create sophisticated data generation workflows.
10
+
11
+ **Core Concepts:**
12
+ - **Blocks** are composable units that transform datasets - think data processing Lego pieces
13
+ - **Flows** orchestrate multiple blocks into complete pipelines defined in YAML
14
+ - Simple concept: `dataset → Block₁ → Block₂ → Block₃ → enriched_dataset`
15
+
16
+ ## Development Commands
17
+
18
+ **Use `uv` for all Python commands and package management.**
19
+
20
+ ### Setup and Installation
21
+ ```bash
22
+ # Install core dependencies
23
+ uv pip install .
24
+
25
+ # Install with development dependencies
26
+ uv pip install .[dev]
27
+ # Alternative: uv sync --extra dev
28
+
29
+ # Install with optional vLLM support
30
+ uv pip install .[vllm]
31
+ # Alternative: uv sync --extra vllm
32
+
33
+ # Install with examples dependencies
34
+ uv pip install .[examples]
35
+ # Alternative: uv sync --extra examples
36
+ ```
37
+
38
+ ### Testing
39
+ ```bash
40
+ # Run all tests
41
+ tox -e py3-unit
42
+
43
+ # Run tests with coverage
44
+ tox -e py3-unitcov
45
+
46
+ # Run specific test file
47
+ pytest tests/test_specific_file.py
48
+
49
+ # Run tests matching pattern
50
+ pytest -k "test_pattern"
51
+ ```
52
+
53
+ ### Linting and Formatting
54
+ ```bash
55
+ # Run full verification (lint, mypy, ruff)
56
+ make verify
57
+
58
+ # Individual lint commands
59
+ tox -e lint # Full pylint check
60
+ tox -e fastlint # Fast pylint (without 3rd party)
61
+ tox -e ruff # Ruff formatting and fixes
62
+ tox -e mypy # Type checking
63
+
64
+ # Format code with ruff
65
+ tox -e ruff fix
66
+
67
+ # Check code formatting
68
+ tox -e ruff check
69
+ ```
70
+
71
+ ### Other Make targets
72
+ ```bash
73
+ make actionlint # Lint GitHub Actions
74
+ make md-lint # Lint markdown files
75
+ ```
76
+
77
+ ## Core Architecture
78
+
79
+ ### Block System
80
+ The framework is built around a modular block system with **composability at its core** - mix and match blocks to build simple transformations or complex multi-stage pipelines:
81
+
82
+ - **BaseBlock** (`src/sdg_hub/core/blocks/base.py`): Abstract base class for all processing blocks with Pydantic validation
83
+ - **BlockRegistry** (`src/sdg_hub/core/blocks/registry.py`): Auto-discovery system for organizing blocks with zero setup
84
+ - Blocks are organized in categories:
85
+ - `llm/`: LLM-powered blocks (chat, prompt building, text parsing) with async execution
86
+ - `transform/`: Data transformation blocks (column operations, text manipulation)
87
+ - `filtering/`: Data filtering blocks with quality thresholds
88
+ - `evaluation/`: Quality evaluation blocks (faithfulness, relevancy assessment)
89
+ - `deprecated_blocks/`: Legacy blocks maintained for backward compatibility
90
+
91
+ **Key Benefits**: Type-safe composition, automatic validation, rich logging, and high-performance async processing.
92
+
93
+ ### Flow System
94
+ Flows orchestrate multiple blocks into data processing pipelines:
95
+
96
+ - **Flow** (`src/sdg_hub/core/flow/base.py`): Main flow execution class with Pydantic validation
97
+ - **FlowRegistry** (`src/sdg_hub/core/flow/registry.py`): Registry for flow discovery
98
+ - **FlowMetadata** (`src/sdg_hub/core/flow/metadata.py`): Metadata and parameter definitions
99
+ - **FlowValidator** (`src/sdg_hub/core/flow/validation.py`): YAML structure validation
100
+ - **FlowMigration** (`src/sdg_hub/core/flow/migration.py`): Backward compatibility for old flow formats
101
+
102
+ ### Flow Configuration
103
+ Flows are defined in YAML files with this structure:
104
+ ```yaml
105
+ metadata:
106
+ name: "flow_name"
107
+ version: "1.0.0"
108
+ author: "Author Name"
109
+ description: "Flow description"
110
+
111
+ parameters:
112
+ param_name:
113
+ type: "string"
114
+ default: "default_value"
115
+ description: "Parameter description"
116
+
117
+ blocks:
118
+ - block_type: "BlockTypeName"
119
+ block_config:
120
+ block_name: "unique_block_name"
121
+ # block-specific configuration
122
+ ```
123
+
124
+ ### Built-in Flow Discovery
125
+ The framework includes auto-discovery for flows in `src/sdg_hub/flows/`. Example flow structure:
126
+ ```
127
+ flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/
128
+ ├── flow.yaml # Main flow definition
129
+ ├── atomic_facts.yaml # Sub-flow configurations
130
+ ├── detailed_summary.yaml
131
+ └── generate_questions_responses.yaml
132
+ ```
133
+
134
+ ## Key Patterns
135
+
136
+ ### Block Development
137
+ When creating new blocks:
138
+ 1. Inherit from `BaseBlock` and implement the `generate()` method
139
+ 2. Use Pydantic field validation for configuration
140
+ 3. Follow the standardized column handling patterns (`input_cols`, `output_cols`)
141
+ 4. Register blocks in appropriate category directories
142
+ 5. Include proper error handling and logging
143
+
144
+ ### Dataset Processing
145
+ All blocks operate on HuggingFace `datasets.Dataset` objects:
146
+ - Input validation ensures required columns exist
147
+ - Output validation prevents column collisions
148
+ - Rich logging provides processing summaries
149
+ - Empty dataset handling with appropriate errors
150
+
151
+ ### Backward Compatibility
152
+ The framework maintains compatibility with legacy formats:
153
+ - Deprecated blocks are preserved in `deprecated_blocks/`
154
+ - Flow migration automatically converts old YAML formats
155
+ - Legacy LLMBlocks receive special handling during execution
156
+
157
+ ## Testing Guidelines
158
+
159
+ - Tests are organized by block category under `tests/blocks/`
160
+ - Use `pytest` fixtures for common test data
161
+ - Test configuration files are in `tests/blocks/testdata/`
162
+ - Follow the existing pattern of testing both success and error cases
163
+ - Mock LLM clients when testing LLM-powered blocks
164
+
165
+ ## Important Notes
166
+
167
+ - Always use `uv` for Python package management
168
+ - The framework uses Pydantic extensively for validation and configuration
169
+ - LLM clients are managed through the `client_manager.py` system
170
+ - Path resolution is handled centrally in `utils/path_resolution.py`
171
+ - Error handling follows custom exception patterns in `utils/error_handling.py`
@@ -0,0 +1,251 @@
1
+ # Contributing to SDG Hub
2
+
3
+ Welcome to SDG Hub development! This guide covers everything you need to know about contributing blocks, flows, and other improvements to the SDG Hub ecosystem.
4
+
5
+ For detailed documentation including examples and advanced patterns, see our comprehensive [Development Guide](docs/development.md).
6
+
7
+ ## 🚀 Quick Start
8
+
9
+ ### Development Setup
10
+
11
+ 1. **Clone the Repository**
12
+ ```bash
13
+ git clone https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub.git
14
+ cd sdg_hub
15
+ ```
16
+
17
+ 2. **Install Development Dependencies**
18
+ ```bash
19
+ # Using uv (recommended)
20
+ uv sync --extra dev
21
+
22
+ # Or using pip
23
+ pip install .[dev]
24
+ ```
25
+
26
+ ## 🛠️ Development Tools
27
+
28
+ ### Linting and Code Quality
29
+
30
+ **Primary linting tools** (required for all contributions):
31
+ ```bash
32
+ tox -e lint # Full pylint check
33
+ tox -e fastlint # Quick pylint check
34
+ tox -e mypy # Type checking
35
+
36
+ # Ruff (code formatting and linting)
37
+ tox -e ruff # Format and fix issues (development mode)
38
+ tox -e ruff -- check # Check only, no fixes (CI mode)
39
+ ./scripts/ruff.sh # Direct script - format and fix
40
+ ./scripts/ruff.sh check # Direct script - check only
41
+ ./scripts/ruff.sh --help # Pass custom arguments to ruff
42
+ ```
43
+
44
+ **Optional development tools** (require additional dependencies):
45
+ ```bash
46
+ make actionlint # Lint GitHub Actions (requires: actionlint, shellcheck)
47
+ make md-lint # Lint markdown files (requires: podman/docker)
48
+ make verify # Run extended checks: pylint, mypy, ruff (may differ from CI)
49
+ ```
50
+
51
+ ### Testing
52
+
53
+ SDG Hub uses [tox](https://tox.wiki/) for test automation and [pytest](https://docs.pytest.org/) as a test framework:
54
+
55
+ ```bash
56
+ # Run all tests
57
+ tox -e py3-unit
58
+
59
+ # Run with coverage
60
+ tox -e py3-unitcov
61
+
62
+ # Run specific tests
63
+ pytest tests/test_specific_file.py
64
+ pytest -k "test_pattern"
65
+ ```
66
+
67
+ ## 🧱 Contributing Blocks
68
+
69
+ Blocks are the core processing units of SDG Hub. To contribute a new block:
70
+
71
+ 1. **Choose the appropriate category**: `llm`, `transform`, `filtering`, or `evaluation`
72
+ 2. **Implement your block** following the [Custom Blocks Guide](docs/blocks/custom-blocks.md)
73
+ 3. **Add comprehensive tests** in `tests/blocks/[category]/`
74
+ 4. **Update documentation** in the relevant block category page
75
+
76
+ ### Example Block Structure
77
+
78
+ ```python
79
+ from sdg_hub.core.blocks.base import BaseBlock
80
+ from sdg_hub.core.blocks.registry import BlockRegistry
81
+
82
+ @BlockRegistry.register("MyNewBlock", "category", "Description")
83
+ class MyNewBlock(BaseBlock):
84
+ """Comprehensive docstring with examples."""
85
+
86
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
87
+ # Your implementation here
88
+ pass
89
+ ```
90
+
91
+ ## 🌊 Contributing Flows
92
+
93
+ Flows orchestrate multiple blocks into complete pipelines. To contribute a new flow:
94
+
95
+ 1. **Design your flow** with clear use case and objectives
96
+ 2. **Create flow directory structure** under `src/sdg_hub/flows/[category]/`
97
+ 3. **Implement the flow** with comprehensive YAML configuration
98
+ 4. **Add tests** and documentation
99
+
100
+ ### Flow Directory Structure
101
+
102
+ ```
103
+ src/sdg_hub/flows/[category]/[use_case]/[variant]/
104
+ ├── flow.yaml # Main flow definition
105
+ ├── prompt_template_1.yaml # Supporting templates
106
+ └── README.md # Flow documentation
107
+ ```
108
+
109
+ ## 📋 Contribution Checklist
110
+
111
+ ### For New Blocks
112
+ - [ ] Block placed in correct category directory
113
+ - [ ] Inherits from `BaseBlock` and implements `generate()`
114
+ - [ ] Registered with `@BlockRegistry.register()`
115
+ - [ ] Comprehensive docstring with examples
116
+ - [ ] Proper Pydantic field validation
117
+ - [ ] Comprehensive test suite
118
+ - [ ] Documentation updated
119
+ - [ ] All linting checks pass
120
+ - [ ] All tests pass
121
+
122
+ ### For New Flows
123
+ - [ ] Flow directory structure follows conventions
124
+ - [ ] Complete metadata in `flow.yaml`
125
+ - [ ] Required input columns documented
126
+ - [ ] Supporting templates included
127
+ - [ ] Flow-specific README created
128
+ - [ ] Integration tests written
129
+ - [ ] Documentation updated
130
+
131
+ ## 🔄 Development Workflow
132
+
133
+ ### Git Workflow
134
+
135
+ **Branch Naming:**
136
+ - `feature/block-name-implementation` - New blocks
137
+ - `feature/flow-name-implementation` - New flows
138
+ - `fix/issue-description` - Bug fixes
139
+ - `docs/section-updates` - Documentation updates
140
+
141
+ **Commit Messages:**
142
+ Follow conventional commits:
143
+ ```
144
+ feat(blocks): add TextSummarizerBlock for document summarization
145
+ fix(flows): correct parameter validation in QA generation flow
146
+ docs(blocks): update LLM block examples with new model config
147
+ ```
148
+
149
+ **Pull Request Process:**
150
+ 1. Create feature branch from `main`
151
+ 2. Implement changes with tests and documentation
152
+ 3. Run full verification: `make verify && tox -e py3-unit`
153
+ 4. Create PR with clear description
154
+ 5. Address review feedback
155
+ 6. Squash and merge when approved
156
+
157
+ ## 🤝 Community Guidelines
158
+
159
+ - Be respectful and inclusive
160
+ - Provide constructive feedback
161
+ - Help newcomers get started
162
+ - Follow the project's coding standards
163
+ - Report issues responsibly
164
+
165
+ ## 📚 Documentation
166
+
167
+ For comprehensive guides and examples:
168
+
169
+ - **[Development Guide](docs/development.md)** - Complete development documentation
170
+ - **[Custom Blocks](docs/blocks/custom-blocks.md)** - Building custom blocks
171
+ - **[Flow Configuration](docs/flows/yaml-configuration.md)** - YAML configuration guide
172
+ - **[Block System Overview](docs/blocks/overview.md)** - Understanding the block architecture
173
+ - **[Flow System Overview](docs/flows/overview.md)** - Understanding flow orchestration
174
+
175
+ ## 🚀 Getting Help
176
+
177
+ - **GitHub Issues** - Report bugs, request features
178
+ - **GitHub Discussions** - Ask questions, share ideas
179
+ - **Documentation** - Check existing docs first
180
+ - **Code Examples** - Look at existing implementations
181
+
182
+ You can run all tests by simply running the `tox -e py3-unit` command.
183
+
184
+ ## Documentation Guidelines
185
+
186
+ ### NumPy-Style Docstrings
187
+
188
+ If you choose to add docstrings to your functions, we recommend following the NumPy docstring format for consistency with the scientific Python ecosystem.
189
+
190
+ #### Basic Structure
191
+
192
+ ```python
193
+ def example_function(param1, param2=None):
194
+ """Brief description of the function.
195
+
196
+ Longer description providing more context about what the function does,
197
+ its purpose, and any important behavioral notes.
198
+
199
+ Parameters
200
+ ----------
201
+ param1 : str
202
+ Description of the first parameter
203
+ param2 : int, optional
204
+ Description of the second parameter (default: None)
205
+
206
+ Returns
207
+ -------
208
+ bool
209
+ Description of what the function returns
210
+
211
+ Raises
212
+ ------
213
+ ValueError
214
+ When invalid input is provided
215
+
216
+ Examples
217
+ --------
218
+ >>> result = example_function("hello", 42)
219
+ >>> print(result)
220
+ True
221
+ """
222
+ ```
223
+
224
+ #### Key Guidelines
225
+
226
+ - **Summary**: Start with a concise one-line description
227
+ - **Parameters**: Document all function parameters with types and descriptions
228
+ - **Returns**: Describe return values with types and meaning
229
+ - **Types**: Use standard Python types (`str`, `int`, `list`, `dict`, etc.)
230
+ - **Optional parameters**: Mark default parameters as "optional"
231
+ - **Examples**: Include simple usage examples when helpful
232
+
233
+ #### When to Add Docstrings
234
+
235
+ Docstrings are **optional** but recommended for:
236
+ - Public API functions and classes
237
+ - Complex functions with multiple parameters
238
+ - Functions that might be confusing to other developers
239
+ - Core framework components
240
+
241
+ #### When to Skip Docstrings
242
+
243
+ You may skip docstrings for:
244
+ - Simple utility functions with obvious behavior
245
+ - Private/internal functions (starting with `_`)
246
+ - Functions with self-explanatory names and simple parameters
247
+
248
+ **Remember**: Quality over quantity. A well-written docstring is better than a verbose one, and no docstring is better than a poor one.
249
+
250
+
251
+ Thank you for contributing to SDG Hub! 🎉