sdg-hub 0.7.2__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/actionlint.dockerfile +1 -1
  2. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/PKG-INFO +2 -2
  3. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/llm-blocks.md +2 -2
  4. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/overview.md +3 -3
  5. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/text_analysis/structured_insights_demo.ipynb +3 -3
  6. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/pyproject.toml +1 -1
  7. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/_version.py +3 -3
  8. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/__init__.py +9 -2
  9. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/base.py +4 -1
  10. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/filtering/column_value_filter.py +2 -0
  11. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/__init__.py +3 -2
  12. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/llm_chat_block.py +2 -0
  13. sdg_hub-0.7.2/src/sdg_hub/core/blocks/llm/llm_parser_block.py → sdg_hub-0.7.3/src/sdg_hub/core/blocks/llm/llm_response_extractor_block.py +32 -9
  14. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/prompt_builder_block.py +2 -0
  15. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/text_parser_block.py +2 -0
  16. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/duplicate_columns.py +2 -0
  17. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/index_based_mapper.py +2 -0
  18. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/json_structure_block.py +2 -0
  19. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/melt_columns.py +2 -0
  20. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/rename_columns.py +2 -0
  21. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/text_concat.py +2 -0
  22. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/uniform_col_val_setter.py +2 -0
  23. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/base.py +7 -31
  24. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/flow_metrics.py +3 -3
  25. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/flow.yaml +6 -6
  26. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +4 -4
  27. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +3 -3
  28. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +4 -4
  29. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +2 -2
  30. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +7 -7
  31. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +7 -7
  32. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/flow.yaml +4 -4
  33. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/PKG-INFO +2 -2
  34. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/SOURCES.txt +2 -2
  35. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/requires.txt +1 -1
  36. sdg_hub-0.7.2/tests/blocks/llm/test_llm_parser_block.py → sdg_hub-0.7.3/tests/blocks/llm/test_llm_response_extractor_block.py +55 -52
  37. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/llm/test_promptbuilderblock.py +1 -1
  38. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/test_base_block.py +4 -3
  39. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_base.py +28 -0
  40. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_flow_metrics.py +11 -11
  41. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/actionlint.yaml +0 -0
  42. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/actions/free-disk-space/action.yml +0 -0
  43. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/dependabot.yml +0 -0
  44. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/mergify.yml +0 -0
  45. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/actionlint.yml +0 -0
  46. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/docs.yml +0 -0
  47. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/integration-test.yml +0 -0
  48. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/lint.yml +0 -0
  49. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/matchers/actionlint.json +0 -0
  50. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/matchers/pylint.json +0 -0
  51. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/packer.yml +0 -0
  52. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/pypi.yaml +0 -0
  53. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.github/workflows/test.yml +0 -0
  54. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.gitignore +0 -0
  55. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.isort.cfg +0 -0
  56. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.markdownlint-cli2.yaml +0 -0
  57. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.pre-commit-config.yaml +0 -0
  58. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/.pylintrc +0 -0
  59. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/CLAUDE.md +0 -0
  60. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/CONTRIBUTING.md +0 -0
  61. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/LICENSE +0 -0
  62. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/Makefile +0 -0
  63. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/README.md +0 -0
  64. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/.nojekyll +0 -0
  65. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/README.md +0 -0
  66. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/_coverpage.md +0 -0
  67. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/_navbar.md +0 -0
  68. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/_sidebar.md +0 -0
  69. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/api-reference.md +0 -0
  70. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/assets/logo.png +0 -0
  71. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/assets/sdg-hub-cover.png +0 -0
  72. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/custom-blocks.md +0 -0
  73. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/filtering-blocks.md +0 -0
  74. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/overview.md +0 -0
  75. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/blocks/transform-blocks.md +0 -0
  76. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/concepts.md +0 -0
  77. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/development.md +0 -0
  78. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/available-flows.md +0 -0
  79. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/custom-flows.md +0 -0
  80. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/flows/discovery.md +0 -0
  81. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/index.html +0 -0
  82. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/installation.md +0 -0
  83. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/docs/quick-start.md +0 -0
  84. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/.env.example +0 -0
  85. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
  86. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/document_pre_processing.ipynb +0 -0
  87. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/imgs/quality_benchmark_accuracy.png +0 -0
  88. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_generation.ipynb +0 -0
  89. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb +0 -0
  90. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing_utils.py +0 -0
  91. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/raft_builder.py +0 -0
  92. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/.gitignore +0 -0
  93. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/README.md +0 -0
  94. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/assets/imgs/instructlab-banner.png +0 -0
  95. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/docling_v2_config.yaml +0 -0
  96. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/docparser.py +0 -0
  97. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/docparser_v2.py +0 -0
  98. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
  99. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
  100. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  101. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_collection/ibm-annual-report/qna.yaml +0 -0
  102. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/document_pre_processing.ipynb +0 -0
  103. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/knowledge_generation_and_mixing.ipynb +0 -0
  104. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/knowledge_generation_ja.ipynb +0 -0
  105. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/instructlab/logger_config.py +0 -0
  106. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/knowledge_tuning/knowledge_utils.py +0 -0
  107. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/rag_evaluation/ibm-annual-report-2024.pdf +0 -0
  108. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/rag_evaluation/rag_evaluation_dataset_generation.ipynb +0 -0
  109. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/text_analysis/README.md +0 -0
  110. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/examples/text_analysis/extract_stock_tickers.yaml +0 -0
  111. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/packer/centos.pkr.hcl +0 -0
  112. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/packer/setup-centos.sh +0 -0
  113. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/ruff.sh +0 -0
  114. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/scripts/snyk_notebook_scan.sh +0 -0
  115. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/setup.cfg +0 -0
  116. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/__init__.py +0 -0
  117. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/__init__.py +0 -0
  118. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/filtering/__init__.py +0 -0
  119. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/llm/error_handler.py +0 -0
  120. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/registry.py +0 -0
  121. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/blocks/transform/__init__.py +0 -0
  122. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/__init__.py +0 -0
  123. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/checkpointer.py +0 -0
  124. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/metadata.py +0 -0
  125. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/registry.py +0 -0
  126. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/flow/validation.py +0 -0
  127. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/__init__.py +0 -0
  128. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/datautils.py +0 -0
  129. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/error_handling.py +0 -0
  130. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/flow_id_words.yaml +0 -0
  131. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/flow_identifier.py +0 -0
  132. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/logger_config.py +0 -0
  133. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/path_resolution.py +0 -0
  134. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/time_estimator.py +0 -0
  135. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/core/utils/yaml_utils.py +0 -0
  136. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/__init__.py +0 -0
  137. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/answer_generation.yaml +0 -0
  138. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/conceptual_qa_generation.yaml +0 -0
  139. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/context_extraction.yaml +0 -0
  140. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/groundedness_critic.yaml +0 -0
  141. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/question_evolution.yaml +0 -0
  142. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/evaluation/rag/topic_generation.yaml +0 -0
  143. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
  144. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
  145. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +0 -0
  146. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  147. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
  148. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +0 -0
  149. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +0 -0
  150. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +0 -0
  151. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +0 -0
  152. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
  153. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +0 -0
  154. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md +0 -0
  155. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py +0 -0
  156. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +0 -0
  157. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +0 -0
  158. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +0 -0
  159. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +0 -0
  160. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +0 -0
  161. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +0 -0
  162. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +0 -0
  163. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  164. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  165. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +0 -0
  166. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +0 -0
  167. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +0 -0
  168. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +0 -0
  169. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/__init__.py +0 -0
  170. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/__init__.py +0 -0
  171. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +0 -0
  172. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +0 -0
  173. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +0 -0
  174. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +0 -0
  175. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub/py.typed +0 -0
  176. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
  177. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/src/sdg_hub.egg-info/top_level.txt +0 -0
  178. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/__init__.py +0 -0
  179. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/filtering/test_columnvaluefilter.py +0 -0
  180. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/llm/test_llm_chat_block.py +0 -0
  181. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/llm/test_textparserblock.py +0 -0
  182. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/test_registry.py +0 -0
  183. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_config.yaml +0 -0
  184. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_format_config.yaml +0 -0
  185. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_format_no_system.yaml +0 -0
  186. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_format_strict.yaml +0 -0
  187. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_invalid_final_role.yaml +0 -0
  188. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/testdata/test_prompt_no_user_messages.yaml +0 -0
  189. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_index_based_mapper.py +0 -0
  190. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_json_structure_block.py +0 -0
  191. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_melt_columns.py +0 -0
  192. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_rename_columns.py +0 -0
  193. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_text_concat.py +0 -0
  194. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/blocks/transform/test_uniform_col_val_setter.py +0 -0
  195. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/__init__.py +0 -0
  196. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/conftest.py +0 -0
  197. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_checkpointer.py +0 -0
  198. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_dataset_requirements.py +0 -0
  199. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_integration.py +0 -0
  200. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_metadata.py +0 -0
  201. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_registry.py +0 -0
  202. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_time_estimation.py +0 -0
  203. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/flow/test_validation.py +0 -0
  204. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/README.md +0 -0
  205. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/__init__.py +0 -0
  206. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/README.md +0 -0
  207. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/__init__.py +0 -0
  208. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/conftest.py +0 -0
  209. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/test_data/test_seed_data.jsonl +0 -0
  210. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/integration/knowledge_tuning/enhanced_summary_knowledge_tuning/test_functional.py +0 -0
  211. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_datautils.py +0 -0
  212. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_error_handling.py +0 -0
  213. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tests/utils/test_path_resolution.py +0 -0
  214. {sdg_hub-0.7.2 → sdg_hub-0.7.3}/tox.ini +0 -0
@@ -1,3 +1,3 @@
1
1
  # Since dependabot cannot update workflows using docker,
2
2
  # we use this indirection since dependabot can update this file.
3
- FROM rhysd/actionlint:1.7.9@sha256:a0383f60d92601e2694e24b24d37df7b6a40bed7cedbc447611c50009bf02d94
3
+ FROM rhysd/actionlint:1.7.10@sha256:ef8299f97635c4c30e2298f48f30763ab782a4ad2c95b744649439a039421e36
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -26,7 +26,7 @@ Requires-Dist: click<9.0.0,>=8.1.7
26
26
  Requires-Dist: datasets>=4.0.0
27
27
  Requires-Dist: httpx<1.0.0,>=0.25.0
28
28
  Requires-Dist: jinja2
29
- Requires-Dist: litellm<1.75.0,>=1.73.0
29
+ Requires-Dist: litellm<2.0.0,>=1.73.0
30
30
  Requires-Dist: rich
31
31
  Requires-Dist: pandas
32
32
  Requires-Dist: pydantic<3.0.0,>=2.0.0
@@ -603,7 +603,7 @@ print(result["judgment"]) # ['YES']
603
603
  TextParserBlock is commonly used after LLMChatBlock to structure responses:
604
604
 
605
605
  ```python
606
- from sdg_hub.core.blocks import LLMChatBlock, LLMParserBlock, TextParserBlock
606
+ from sdg_hub.core.blocks import LLMChatBlock, LLMResponseExtractorBlock, TextParserBlock
607
607
 
608
608
  # Step 1: Generate LLM response
609
609
  chat_block = LLMChatBlock(
@@ -615,7 +615,7 @@ chat_block = LLMChatBlock(
615
615
 
616
616
  # Step 2: Extract content from response object
617
617
  # Use field_prefix="" to get cleaner column names
618
- llm_parser = LLMParserBlock(
618
+ llm_parser = LLMResponseExtractorBlock(
619
619
  block_name="extract_eval",
620
620
  input_cols=["eval_response"],
621
621
  extract_content=True,
@@ -316,7 +316,7 @@ blocks:
316
316
  output_cols: ["eval_response"]
317
317
  async_mode: true
318
318
 
319
- - block_type: "LLMParserBlock"
319
+ - block_type: "LLMResponseExtractorBlock"
320
320
  block_config:
321
321
  block_name: "extract_eval_content"
322
322
  input_cols: ["eval_response"]
@@ -537,7 +537,7 @@ result = flow.generate(
537
537
  | | `top_p` | Nucleus sampling threshold | `0.0` - `1.0` |
538
538
  | | `frequency_penalty` | Penalize token repetition | `-2.0` - `2.0` |
539
539
  | | `presence_penalty` | Penalize new topics | `-2.0` - `2.0` |
540
- | **LLMParserBlock** | `extract_content` | Extract main content field | `True`, `False` |
540
+ | **LLMResponseExtractorBlock** | `extract_content` | Extract main content field | `True`, `False` |
541
541
  | | `extract_reasoning_content` | Extract reasoning/thinking | `True`, `False` |
542
542
  | | `extract_tool_calls` | Extract tool call data | `True`, `False` |
543
543
  | | `field_prefix` | Prefix for output fields | `"llm_"`, `"parsed_"` |
@@ -752,7 +752,7 @@ result = flow.generate(dataset)
752
752
  │ │ generate_question │ LLMChatBlock │ 45.30s │ 100 → 100 │ +1 │ ✓││
753
753
  │ │ generate_answer │ LLMChatBlock │ 78.45s │ 100 → 100 │ +1 │ ✓││
754
754
  │ │ eval_faithfulness... │ LLMChatBlock │ 52.20s │ 100 → 100 │ +1 │ ✓││
755
- │ │ extract_eval_con... │ LLMParserBlock │ 0.15s │ 100 → 100 │ +2 │ ✓││
755
+ │ │ extract_eval_con... │ LLMResponseExtractorBlock │ 0.15s │ 100 → 100 │ +2 │ ✓││
756
756
  │ │ parse_evaluation │ TextParserBlock │ 0.22s │ 100 → 100 │ +2 │ ✓││
757
757
  │ │ filter_faithful │ ColumnValueF... │ 0.08s │ 100 → 87 │ — │ ✓││
758
758
  │ ├──────────────────────┼─────────────────┼──────────┼──────────────┼─────────┼──┤│
@@ -332,7 +332,7 @@
332
332
  " LLMChatBlock,\n",
333
333
  " PromptBuilderBlock,\n",
334
334
  " TextParserBlock,\n",
335
- " LLMParserBlock,\n",
335
+ " LLMResponseExtractorBlock,\n",
336
336
  ")\n",
337
337
  "from sdg_hub.core.blocks.transform import JSONStructureBlock\n",
338
338
  "\n",
@@ -355,7 +355,7 @@
355
355
  " temperature=0.1, # Low temperature for more consistent extraction\n",
356
356
  ")\n",
357
357
  "\n",
358
- "ticker_llm_parser_block = LLMParserBlock(\n",
358
+ "ticker_llm_response_extractor_block = LLMResponseExtractorBlock(\n",
359
359
  " block_name=\"extract_stock_tickers\",\n",
360
360
  " input_cols=[\"raw_stock_tickers\"],\n",
361
361
  " extract_content=True,\n",
@@ -406,7 +406,7 @@
406
406
  "ticker_blocks = [\n",
407
407
  " ticker_prompt_block,\n",
408
408
  " ticker_llm_block,\n",
409
- " ticker_llm_parser_block,\n",
409
+ " ticker_llm_response_extractor_block,\n",
410
410
  " ticker_parser_block,\n",
411
411
  " enhanced_json_block,\n",
412
412
  "]\n",
@@ -33,7 +33,7 @@ dependencies = [
33
33
  "datasets>=4.0.0",
34
34
  "httpx>=0.25.0,<1.0.0",
35
35
  "jinja2",
36
- "litellm>=1.73.0,<1.75.0",
36
+ "litellm>=1.73.0,<2.0.0", # raising cap since tests run without errors related to 'backoff' cap back to <1.75.0 if errors surface
37
37
  "rich",
38
38
  "pandas",
39
39
  "pydantic>=2.0.0,<3.0.0", # cap before v3; adjust the lower bound to the minimum v2.x you’ve tested
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.7.2'
32
- __version_tuple__ = version_tuple = (0, 7, 2)
31
+ __version__ = version = '0.7.3'
32
+ __version_tuple__ = version_tuple = (0, 7, 3)
33
33
 
34
- __commit_id__ = commit_id = 'g99a40a268'
34
+ __commit_id__ = commit_id = 'g97824a47f'
@@ -6,7 +6,13 @@ This package provides various block implementations for data generation, process
6
6
  # Local
7
7
  from .base import BaseBlock
8
8
  from .filtering import ColumnValueFilterBlock
9
- from .llm import LLMChatBlock, LLMParserBlock, PromptBuilderBlock, TextParserBlock
9
+ from .llm import (
10
+ LLMChatBlock,
11
+ LLMParserBlock,
12
+ LLMResponseExtractorBlock,
13
+ PromptBuilderBlock,
14
+ TextParserBlock,
15
+ )
10
16
  from .registry import BlockRegistry
11
17
  from .transform import (
12
18
  DuplicateColumnsBlock,
@@ -28,7 +34,8 @@ __all__ = [
28
34
  "TextConcatBlock",
29
35
  "UniformColumnValueSetter",
30
36
  "LLMChatBlock",
31
- "LLMParserBlock",
37
+ "LLMParserBlock", # Deprecated alias for LLMResponseExtractorBlock
38
+ "LLMResponseExtractorBlock",
32
39
  "TextParserBlock",
33
40
  "PromptBuilderBlock",
34
41
  ]
@@ -49,6 +49,9 @@ class BaseBlock(BaseModel, ABC):
49
49
  block_name: str = Field(
50
50
  ..., description="Unique identifier for this block instance"
51
51
  )
52
+ block_type: Optional[str] = Field(
53
+ None, description="Block type (e.g., 'llm', 'transform', 'parser', 'filtering')"
54
+ )
52
55
  input_cols: Union[str, list[str], dict[str, Any], None] = Field(
53
56
  None, description="Input columns: str, list, or dict"
54
57
  )
@@ -366,5 +369,5 @@ class BaseBlock(BaseModel, ABC):
366
369
  Dict[str, Any]
367
370
  """
368
371
  config = self.get_config()
369
- config["block_type"] = self.__class__.__name__
372
+ config["block_class"] = self.__class__.__name__
370
373
  return config
@@ -46,6 +46,8 @@ DTYPE_MAP = {
46
46
  "Filters datasets based on column values using various comparison operations",
47
47
  )
48
48
  class ColumnValueFilterBlock(BaseBlock):
49
+ block_type: str = "filtering"
50
+
49
51
  """A block for filtering datasets based on column values.
50
52
 
51
53
  This block allows filtering of datasets using various operations (e.g., equals, contains)
@@ -9,7 +9,7 @@ local models (vLLM, Ollama), and more.
9
9
  # Local
10
10
  from .error_handler import ErrorCategory, LLMErrorHandler
11
11
  from .llm_chat_block import LLMChatBlock
12
- from .llm_parser_block import LLMParserBlock
12
+ from .llm_response_extractor_block import LLMParserBlock, LLMResponseExtractorBlock
13
13
  from .prompt_builder_block import PromptBuilderBlock
14
14
  from .text_parser_block import TextParserBlock
15
15
 
@@ -17,7 +17,8 @@ __all__ = [
17
17
  "LLMErrorHandler",
18
18
  "ErrorCategory",
19
19
  "LLMChatBlock",
20
- "LLMParserBlock",
20
+ "LLMParserBlock", # Deprecated alias for LLMResponseExtractorBlock
21
+ "LLMResponseExtractorBlock",
21
22
  "PromptBuilderBlock",
22
23
  "TextParserBlock",
23
24
  ]
@@ -32,6 +32,8 @@ logger = setup_logger(__name__)
32
32
  class LLMChatBlock(BaseBlock):
33
33
  model_config = ConfigDict(extra="allow")
34
34
 
35
+ block_type: str = "llm"
36
+
35
37
  """Unified LLM chat block supporting all providers via LiteLLM.
36
38
 
37
39
  This block provides a minimal wrapper around LiteLLM's completion API,
@@ -1,7 +1,7 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
- """LLM parser block for extracting fields from LLM response objects.
2
+ """LLM response extractor block for extracting fields from LLM response objects.
3
3
 
4
- This module provides the LLMParserBlock for extracting specific fields
4
+ This module provides the LLMResponseExtractorBlock for extracting specific fields
5
5
  (content, reasoning_content, tool_calls) from chat completion response objects.
6
6
  """
7
7
 
@@ -22,13 +22,15 @@ logger = setup_logger(__name__)
22
22
 
23
23
 
24
24
  @BlockRegistry.register(
25
- "LLMParserBlock",
25
+ "LLMResponseExtractorBlock",
26
26
  "llm",
27
27
  "Extracts specified fields from LLM response objects",
28
28
  )
29
- class LLMParserBlock(BaseBlock):
29
+ class LLMResponseExtractorBlock(BaseBlock):
30
30
  _flow_requires_jsonl_tmp: bool = True
31
31
 
32
+ block_type: str = "llm_util"
33
+
32
34
  """Block for extracting fields from LLM response objects.
33
35
 
34
36
  This block extracts specified fields from chat completion response objects.
@@ -88,7 +90,7 @@ class LLMParserBlock(BaseBlock):
88
90
  ]
89
91
  ):
90
92
  raise ValueError(
91
- "LLMParserBlock requires at least one extraction field to be enabled: "
93
+ "LLMResponseExtractorBlock requires at least one extraction field to be enabled: "
92
94
  "extract_content, extract_reasoning_content, or extract_tool_calls"
93
95
  )
94
96
 
@@ -106,7 +108,7 @@ class LLMParserBlock(BaseBlock):
106
108
  return self
107
109
 
108
110
  def _validate_custom(self, dataset: pd.DataFrame) -> None:
109
- """Validate LLMParserBlock specific requirements.
111
+ """Validate LLMResponseExtractorBlock specific requirements.
110
112
 
111
113
  Parameters
112
114
  ----------
@@ -116,14 +118,16 @@ class LLMParserBlock(BaseBlock):
116
118
  Raises
117
119
  ------
118
120
  ValueError
119
- If LLMParserBlock requirements are not met.
121
+ If LLMResponseExtractorBlock requirements are not met.
120
122
  """
121
123
  # Validate that we have exactly one input column
122
124
  if len(self.input_cols) == 0:
123
- raise ValueError("LLMParserBlock expects at least one input column")
125
+ raise ValueError(
126
+ "LLMResponseExtractorBlock expects at least one input column"
127
+ )
124
128
  if len(self.input_cols) > 1:
125
129
  logger.warning(
126
- f"LLMParserBlock expects exactly one input column, but got {len(self.input_cols)}. "
130
+ f"LLMResponseExtractorBlock expects exactly one input column, but got {len(self.input_cols)}. "
127
131
  f"Using the first column: {self.input_cols[0]}"
128
132
  )
129
133
 
@@ -324,3 +328,22 @@ class LLMParserBlock(BaseBlock):
324
328
  new_data.extend(self._generate(sample))
325
329
 
326
330
  return pd.DataFrame(new_data)
331
+
332
+
333
+ # Backwards compatibility alias (deprecated)
334
+ # Register deprecated alias in BlockRegistry so old YAML flows still work
335
+ @BlockRegistry.register(
336
+ "LLMParserBlock",
337
+ "llm",
338
+ "Deprecated: Use LLMResponseExtractorBlock instead",
339
+ deprecated=True,
340
+ replacement="LLMResponseExtractorBlock",
341
+ )
342
+ class LLMParserBlock(LLMResponseExtractorBlock):
343
+ """Deprecated alias for LLMResponseExtractorBlock.
344
+
345
+ This class exists for backwards compatibility with existing code and YAML flows.
346
+ Use LLMResponseExtractorBlock instead.
347
+ """
348
+
349
+ pass
@@ -222,6 +222,8 @@ class PromptRenderer:
222
222
  "Formats prompts into structured chat messages or plain text using Jinja templates",
223
223
  )
224
224
  class PromptBuilderBlock(BaseBlock):
225
+ block_type: str = "llm_util"
226
+
225
227
  """Block for formatting prompts into structured chat messages or plain text.
226
228
 
227
229
  This block takes input from dataset columns, applies Jinja templates from a YAML config
@@ -30,6 +30,8 @@ logger = setup_logger(__name__)
30
30
  class TextParserBlock(BaseBlock):
31
31
  _flow_requires_jsonl_tmp: bool = True
32
32
 
33
+ block_type: str = "parser"
34
+
33
35
  """Block for parsing and post-processing text content.
34
36
 
35
37
  This block handles text parsing using start/end tags, custom regex patterns,
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
27
27
  "Duplicates existing columns with new names according to a mapping specification",
28
28
  )
29
29
  class DuplicateColumnsBlock(BaseBlock):
30
+ block_type: str = "transform"
31
+
30
32
  """Block for duplicating existing columns with new names.
31
33
 
32
34
  This block creates copies of existing columns with new names according to a mapping specification.
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
28
28
  "Maps values from source columns to output columns based on choice columns using shared mapping",
29
29
  )
30
30
  class IndexBasedMapperBlock(BaseBlock):
31
+ block_type: str = "transform"
32
+
31
33
  """Block for mapping values from source columns to output columns based on choice columns.
32
34
 
33
35
  This block uses a shared mapping dictionary to select values from source columns and
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
28
28
  "Combines multiple columns into a single column containing a structured JSON object",
29
29
  )
30
30
  class JSONStructureBlock(BaseBlock):
31
+ block_type: str = "transform"
32
+
31
33
  """Block for combining multiple columns into a structured JSON object.
32
34
 
33
35
  This block takes values from multiple input columns and combines them into a single
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
28
28
  "Transforms wide dataset format into long format by melting columns into rows",
29
29
  )
30
30
  class MeltColumnsBlock(BaseBlock):
31
+ block_type: str = "transform"
32
+
31
33
  """Block for flattening multiple columns into a long format.
32
34
 
33
35
  This block transforms a wide dataset format into a long format by melting
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
27
27
  "Renames columns in a dataset according to a mapping specification",
28
28
  )
29
29
  class RenameColumnsBlock(BaseBlock):
30
+ block_type: str = "transform"
31
+
30
32
  """Block for renaming columns in a dataset.
31
33
 
32
34
  This block renames columns in a dataset according to a mapping specification.
@@ -27,6 +27,8 @@ logger = setup_logger(__name__)
27
27
  "Combines multiple columns into a single column using a specified separator",
28
28
  )
29
29
  class TextConcatBlock(BaseBlock):
30
+ block_type: str = "transform"
31
+
30
32
  """Block for combining multiple columns into a single column.
31
33
 
32
34
  This block concatenates values from multiple columns into a single output column,
@@ -28,6 +28,8 @@ logger = setup_logger(__name__)
28
28
  "Replaces all values in a column with a single summary statistic (e.g., mode, mean, median)",
29
29
  )
30
30
  class UniformColumnValueSetter(BaseBlock):
31
+ block_type: str = "transform"
32
+
31
33
  """Block that replaces all values in a column with a single aggregate value.
32
34
 
33
35
  Supported strategies include: mode, min, max, mean, median.
@@ -679,7 +679,7 @@ class Flow(BaseModel):
679
679
  self._block_metrics.append(
680
680
  {
681
681
  "block_name": block.block_name,
682
- "block_type": block.__class__.__name__,
682
+ "block_class": block.__class__.__name__,
683
683
  "execution_time": execution_time,
684
684
  "input_rows": input_rows,
685
685
  "output_rows": output_rows,
@@ -701,7 +701,7 @@ class Flow(BaseModel):
701
701
  self._block_metrics.append(
702
702
  {
703
703
  "block_name": block.block_name,
704
- "block_type": block.__class__.__name__,
704
+ "block_class": block.__class__.__name__,
705
705
  "execution_time": execution_time,
706
706
  "input_rows": input_rows,
707
707
  "output_rows": 0,
@@ -882,38 +882,14 @@ class Flow(BaseModel):
882
882
  )
883
883
 
884
884
  def _detect_llm_blocks(self) -> list[str]:
885
- """Detect LLM blocks in the flow by checking for model-related attribute existence.
886
-
887
- LLM blocks are identified by having model, api_base, or api_key attributes,
888
- regardless of their values (they may be None until set_model_config() is called).
885
+ """Detect blocks with block_type='llm'.
889
886
 
890
887
  Returns
891
888
  -------
892
889
  List[str]
893
- List of block names that have LLM-related attributes.
890
+ List of block names that are LLM blocks.
894
891
  """
895
- llm_blocks = []
896
-
897
- for block in self.blocks:
898
- block_type = block.__class__.__name__
899
- block_name = block.block_name
900
-
901
- # Check by attribute existence (not value) - LLM blocks have these attributes even if None
902
- has_model_attr = hasattr(block, "model")
903
- has_api_base_attr = hasattr(block, "api_base")
904
- has_api_key_attr = hasattr(block, "api_key")
905
-
906
- # A block is considered an LLM block if it has any LLM-related attributes
907
- is_llm_block = has_model_attr or has_api_base_attr or has_api_key_attr
908
-
909
- if is_llm_block:
910
- llm_blocks.append(block_name)
911
- logger.debug(
912
- f"Detected LLM block '{block_name}' ({block_type}): "
913
- f"has_model_attr={has_model_attr}, has_api_base_attr={has_api_base_attr}, has_api_key_attr={has_api_key_attr}"
914
- )
915
-
916
- return llm_blocks
892
+ return [block.block_name for block in self.blocks if block.block_type == "llm"]
917
893
 
918
894
  def is_model_config_required(self) -> bool:
919
895
  """Check if model configuration is required for this flow.
@@ -1152,7 +1128,7 @@ class Flow(BaseModel):
1152
1128
  # Record block execution info
1153
1129
  block_info = {
1154
1130
  "block_name": block.block_name,
1155
- "block_type": block.__class__.__name__,
1131
+ "block_class": block.__class__.__name__,
1156
1132
  "execution_time_seconds": block_execution_time,
1157
1133
  "input_rows": input_rows,
1158
1134
  "output_rows": len(current_dataset),
@@ -1341,7 +1317,7 @@ class Flow(BaseModel):
1341
1317
  "metadata": self.metadata.model_dump(),
1342
1318
  "blocks": [
1343
1319
  {
1344
- "block_type": block.__class__.__name__,
1320
+ "block_class": block.__class__.__name__,
1345
1321
  "block_name": block.block_name,
1346
1322
  "input_cols": getattr(block, "input_cols", None),
1347
1323
  "output_cols": getattr(block, "output_cols", None),
@@ -31,12 +31,12 @@ def aggregate_block_metrics(entries: list[dict[str, Any]]) -> list[dict[str, Any
31
31
  """
32
32
  agg: dict[tuple[str, str], dict[str, Any]] = {}
33
33
  for m in entries:
34
- key = (m.get("block_name"), m.get("block_type"))
34
+ key = (m.get("block_name"), m.get("block_class"))
35
35
  a = agg.setdefault(
36
36
  key,
37
37
  {
38
38
  "block_name": key[0],
39
- "block_type": key[1],
39
+ "block_class": key[1],
40
40
  "execution_time": 0.0,
41
41
  "input_rows": 0,
42
42
  "output_rows": 0,
@@ -138,7 +138,7 @@ def display_metrics_summary(
138
138
 
139
139
  table.add_row(
140
140
  metrics["block_name"],
141
- metrics["block_type"],
141
+ metrics["block_class"],
142
142
  duration,
143
143
  row_change,
144
144
  col_change,
@@ -41,7 +41,7 @@ blocks:
41
41
  max_tokens: 2048
42
42
  temperature: 0.7
43
43
 
44
- - block_type: LLMParserBlock
44
+ - block_type: LLMResponseExtractorBlock
45
45
  block_config:
46
46
  block_name: parse_topic
47
47
  input_cols: topic_response
@@ -73,7 +73,7 @@ blocks:
73
73
  max_tokens: 2048
74
74
  temperature: 0.7
75
75
 
76
- - block_type: LLMParserBlock
76
+ - block_type: LLMResponseExtractorBlock
77
77
  block_config:
78
78
  block_name: parse_question
79
79
  input_cols: question_response
@@ -97,7 +97,7 @@ blocks:
97
97
  max_tokens: 4096
98
98
  temperature: 0.7
99
99
 
100
- - block_type: LLMParserBlock
100
+ - block_type: LLMResponseExtractorBlock
101
101
  block_config:
102
102
  block_name: parse_evolved_question
103
103
  input_cols: evolution_response
@@ -123,7 +123,7 @@ blocks:
123
123
  max_tokens: 4096
124
124
  temperature: 0.2
125
125
 
126
- - block_type: LLMParserBlock
126
+ - block_type: LLMResponseExtractorBlock
127
127
  block_config:
128
128
  block_name: parse_answer
129
129
  input_cols: answer_response
@@ -150,7 +150,7 @@ blocks:
150
150
  max_tokens: 512
151
151
  temperature: 0.0
152
152
 
153
- - block_type: LLMParserBlock
153
+ - block_type: LLMResponseExtractorBlock
154
154
  block_config:
155
155
  block_name: parse_critic_score
156
156
  input_cols: critic_response
@@ -185,7 +185,7 @@ blocks:
185
185
  max_tokens: 4096
186
186
  temperature: 0.0
187
187
 
188
- - block_type: LLMParserBlock
188
+ - block_type: LLMResponseExtractorBlock
189
189
  block_config:
190
190
  block_name: parse_extracted_context
191
191
  input_cols: extraction_response
@@ -60,7 +60,7 @@ blocks:
60
60
  temperature: 0.7
61
61
  n: 50
62
62
  async_mode: true
63
- - block_type: LLMParserBlock
63
+ - block_type: LLMResponseExtractorBlock
64
64
  block_config:
65
65
  block_name: extract_detailed_summary
66
66
  input_cols: raw_summary
@@ -108,7 +108,7 @@ blocks:
108
108
  temperature: 0.7
109
109
  n: 1
110
110
  async_mode: true
111
- - block_type: LLMParserBlock
111
+ - block_type: LLMResponseExtractorBlock
112
112
  block_config:
113
113
  block_name: extract_questions
114
114
  input_cols: question_list
@@ -142,7 +142,7 @@ blocks:
142
142
  temperature: 0.7
143
143
  n: 1
144
144
  async_mode: true
145
- - block_type: LLMParserBlock
145
+ - block_type: LLMResponseExtractorBlock
146
146
  block_config:
147
147
  block_name: extract_answers
148
148
  input_cols: response_dict
@@ -174,7 +174,7 @@ blocks:
174
174
  output_cols: eval_faithful_response_dict
175
175
  n: 1
176
176
  async_mode: true
177
- - block_type: LLMParserBlock
177
+ - block_type: LLMResponseExtractorBlock
178
178
  block_config:
179
179
  block_name: extract_eval_faithful
180
180
  input_cols: eval_faithful_response_dict
@@ -64,7 +64,7 @@ blocks:
64
64
  temperature: 1.0
65
65
  n: 1
66
66
  async_mode: true
67
- - block_type: LLMParserBlock
67
+ - block_type: LLMResponseExtractorBlock
68
68
  block_config:
69
69
  block_name: extract_questions
70
70
  input_cols: question_list
@@ -98,7 +98,7 @@ blocks:
98
98
  temperature: 1.0
99
99
  n: 1
100
100
  async_mode: true
101
- - block_type: LLMParserBlock
101
+ - block_type: LLMResponseExtractorBlock
102
102
  block_config:
103
103
  block_name: extract_answer
104
104
  input_cols: response_dict
@@ -130,7 +130,7 @@ blocks:
130
130
  output_cols: eval_faithful_response_dict
131
131
  n: 1
132
132
  async_mode: true
133
- - block_type: LLMParserBlock
133
+ - block_type: LLMResponseExtractorBlock
134
134
  block_config:
135
135
  block_name: extract_eval_faithful
136
136
  input_cols: eval_faithful_response_dict
@@ -62,7 +62,7 @@ blocks:
62
62
  temperature: 0.7
63
63
  n: 50
64
64
  async_mode: true
65
- - block_type: LLMParserBlock
65
+ - block_type: LLMResponseExtractorBlock
66
66
  block_config:
67
67
  block_name: extract_extractive_summary
68
68
  input_cols: raw_summary
@@ -110,7 +110,7 @@ blocks:
110
110
  temperature: 0.7
111
111
  n: 1
112
112
  async_mode: true
113
- - block_type: LLMParserBlock
113
+ - block_type: LLMResponseExtractorBlock
114
114
  block_config:
115
115
  block_name: extract_questions
116
116
  input_cols: question_list
@@ -144,7 +144,7 @@ blocks:
144
144
  temperature: 0.7
145
145
  n: 1
146
146
  async_mode: true
147
- - block_type: LLMParserBlock
147
+ - block_type: LLMResponseExtractorBlock
148
148
  block_config:
149
149
  block_name: extract_answers
150
150
  input_cols: response_dict
@@ -176,7 +176,7 @@ blocks:
176
176
  output_cols: eval_faithful_response_dict
177
177
  n: 1
178
178
  async_mode: true
179
- - block_type: LLMParserBlock
179
+ - block_type: LLMResponseExtractorBlock
180
180
  block_config:
181
181
  block_name: extract_eval_faithful
182
182
  input_cols: eval_faithful_response_dict
@@ -49,7 +49,7 @@ blocks:
49
49
  temperature: 0.7
50
50
  n: 1
51
51
  async_mode: true
52
- - block_type: LLMParserBlock
52
+ - block_type: LLMResponseExtractorBlock
53
53
  block_config:
54
54
  block_name: extract_atomic_facts
55
55
  input_cols: raw_summary
@@ -98,7 +98,7 @@ blocks:
98
98
  temperature: 0.7
99
99
  n: 1
100
100
  async_mode: true
101
- - block_type: LLMParserBlock
101
+ - block_type: LLMResponseExtractorBlock
102
102
  block_config:
103
103
  block_name: extract_key_fact_qa
104
104
  input_cols: raw_key_fact_qa