sdg-hub 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. sdg_hub/_version.py +2 -2
  2. sdg_hub/core/blocks/__init__.py +2 -4
  3. sdg_hub/core/blocks/base.py +61 -6
  4. sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  5. sdg_hub/core/blocks/llm/__init__.py +2 -4
  6. sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
  7. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  8. sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  9. sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  10. sdg_hub/core/flow/base.py +7 -4
  11. sdg_hub/core/utils/datautils.py +40 -22
  12. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
  13. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  14. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
  15. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
  16. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
  17. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
  18. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  19. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  20. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  21. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  22. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  25. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
  26. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
  27. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +30 -26
  28. sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  29. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  30. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  31. sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  32. sdg_hub/core/blocks/llm/client_manager.py +0 -447
  33. sdg_hub/core/blocks/llm/config.py +0 -337
  34. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
  35. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
  36. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -50,10 +50,16 @@ blocks:
50
50
  max_tokens: 1024
51
51
  temperature: 0.3
52
52
  async_mode: true
53
+ - block_type: "LLMParserBlock"
54
+ block_config:
55
+ block_name: "extract_summary"
56
+ input_cols: "raw_summary"
57
+ extract_content: true
58
+ expand_lists: true
53
59
  - block_type: "TextParserBlock"
54
60
  block_config:
55
61
  block_name: "parse_summary"
56
- input_cols: "raw_summary"
62
+ input_cols: "extract_summary_content"
57
63
  output_cols: "summary"
58
64
  start_tags:
59
65
  - "[SUMMARY]"
@@ -76,10 +82,16 @@ blocks:
76
82
  max_tokens: 512
77
83
  temperature: 0.3
78
84
  async_mode: true
85
+ - block_type: "LLMParserBlock"
86
+ block_config:
87
+ block_name: "extract_keywords"
88
+ input_cols: "raw_keywords"
89
+ extract_content: true
90
+ expand_lists: true
79
91
  - block_type: "TextParserBlock"
80
92
  block_config:
81
93
  block_name: "parse_keywords"
82
- input_cols: "raw_keywords"
94
+ input_cols: "extract_keywords_content"
83
95
  output_cols: "keywords"
84
96
  start_tags:
85
97
  - "[KEYWORDS]"
@@ -102,10 +114,16 @@ blocks:
102
114
  max_tokens: 1024
103
115
  temperature: 0.3
104
116
  async_mode: true
117
+ - block_type: "LLMParserBlock"
118
+ block_config:
119
+ block_name: "extract_entities"
120
+ input_cols: "raw_entities"
121
+ extract_content: true
122
+ expand_lists: true
105
123
  - block_type: "TextParserBlock"
106
124
  block_config:
107
125
  block_name: "parse_entities"
108
- input_cols: "raw_entities"
126
+ input_cols: "extract_entities_content"
109
127
  output_cols: "entities"
110
128
  start_tags:
111
129
  - "[ENTITIES]"
@@ -128,10 +146,16 @@ blocks:
128
146
  max_tokens: 256
129
147
  temperature: 0.1
130
148
  async_mode: true
149
+ - block_type: "LLMParserBlock"
150
+ block_config:
151
+ block_name: "extract_sentiment"
152
+ input_cols: "raw_sentiment"
153
+ extract_content: true
154
+ expand_lists: true
131
155
  - block_type: "TextParserBlock"
132
156
  block_config:
133
157
  block_name: "parse_sentiment"
134
- input_cols: "raw_sentiment"
158
+ input_cols: "extract_sentiment_content"
135
159
  output_cols: "sentiment"
136
160
  start_tags:
137
161
  - "[SENTIMENT]"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -1,9 +1,9 @@
1
1
  sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
2
- sdg_hub/_version.py,sha256=5zTqm8rgXsWYBpB2M3Zw_K1D-aV8wP7NsBLrmMKkrAQ,704
2
+ sdg_hub/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
3
3
  sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
5
- sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
6
- sdg_hub/core/blocks/base.py,sha256=TrzUAkG7Tiquk0Z3SOFsb5mRnHd1IbHH6gFPVH1P7T8,10424
5
+ sdg_hub/core/blocks/__init__.py,sha256=5FsbkcO-dmBv6MqO96TPn9FKKPTQZQCv20j4wR7UvQw,1502
6
+ sdg_hub/core/blocks/base.py,sha256=-SOdBpJwtRTMsrmCEuLjUBQMRCo_PLYlHEBRrz8sF9g,13031
7
7
  sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
8
8
  sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
9
9
  sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
@@ -15,20 +15,15 @@ sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKp
15
15
  sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
16
16
  sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
17
17
  sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
18
- sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
19
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
20
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
21
- sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
22
18
  sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
23
- sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
24
- sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
25
- sdg_hub/core/blocks/llm/client_manager.py,sha256=IpMUwECL9_oNFC3yxg9A6BRqMcdg0Wdpzx28BhX45Xo,14742
26
- sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
19
+ sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=2Z9j_CiiTn5mHZ9gfXU-itLXDmeXSh0UI0x1x7j-LQ0,6001
20
+ sdg_hub/core/blocks/llm/__init__.py,sha256=AyS0dd3pkPPXH5a9aj4mT5HsKjX2vjXfkmQc6rkFV4A,795
27
21
  sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
28
- sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=9ytjxjADM0FydkLapZPSQPfzjjrFIdFONs3EJEoKnaw,23007
29
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=H7LqYpEiFO1g2cxncAl4vkLhQxAYgGpV6cUyQTSG03k,27628
22
+ sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=MHhI2x9i6LrfDXgvAy2_6YxgyoD7j6BpCgNGsM69xDg,22194
23
+ sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=DW4b09IqXmcshvXawFheDyaLp3rz7vpO5VBrKdUQYW8,31703
24
+ sdg_hub/core/blocks/llm/llm_parser_block.py,sha256=aoHqsDDhaIgCDfPpv7acc0DVN-zUgzFflRVB4win0aM,12012
30
25
  sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
31
- sdg_hub/core/blocks/llm/text_parser_block.py,sha256=8oRlXEkw8ULA8XVa7WtQZUojodl_ihs1omZpvbwoJQE,17165
26
+ sdg_hub/core/blocks/llm/text_parser_block.py,sha256=975HK6NfXiU9Any4UDMpBNidRpyhHmc76BXUN69SVyc,12566
32
27
  sdg_hub/core/blocks/transform/__init__.py,sha256=lF9InjOzA6p_mjiwV-a2Kwstq9kqRiQ-dEwbsmR9yQs,825
33
28
  sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
34
29
  sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
@@ -38,14 +33,14 @@ sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqD
38
33
  sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
39
34
  sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
40
35
  sdg_hub/core/flow/__init__.py,sha256=N2NZGngvd7qpT5FI_knKukUFM0IkD9K5jdTi-gDeUI4,475
41
- sdg_hub/core/flow/base.py,sha256=8Xacytg9M82Mbv8r2GLbQgNltH-hCtFS1Fa1WpfFlSw,56488
36
+ sdg_hub/core/flow/base.py,sha256=6UlQ7ymVNs03UQ4NNgD15Y6eFyKPcl5JpuWOZuY70Mo,56654
42
37
  sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
43
38
  sdg_hub/core/flow/metadata.py,sha256=h9jpvAzWsF5n4ztZMzwa9ZNgnzKTHmFWdn7YbyJLHCw,12977
44
39
  sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
45
40
  sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
46
41
  sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
47
42
  sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
48
- sdg_hub/core/utils/datautils.py,sha256=q94NzBEtNwRFhzpk3FHofgJJU0gVRgAV3AAWZ1MroFk,3860
43
+ sdg_hub/core/utils/datautils.py,sha256=__HkUe1DxcJVHKrFX68z_hDXwxJygBlJDfjJLnj7rHc,4230
49
44
  sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
50
45
  sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
51
46
  sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
@@ -59,12 +54,14 @@ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/gener
59
54
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml,sha256=qHOgUNrQz2vjUjJiEHNGWxDDXwjJlP1kofTxeGgLyPI,1461
60
55
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
56
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml,sha256=Ik6gAml0O-jPq8jpXBAkURzYkQuFOnDZb4LDwjmfAiE,381
62
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=va9ESTlEaZozy8pXTJ8OICjRg08KSP4l305YUKFuGAE,4417
57
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=_h_EFdxen842BeJd20soaCeR4eccccxAerUV6myUefE,5567
58
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml,sha256=OJDlm8uGNqGPertACSG5pKKVGOKdfsQ6RMeh4UHZMJs,4442
63
60
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
61
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml,sha256=SeapWoOx3fhN5SvWYuHss_9prLE8xSkOic7JkbDHSR0,4081
65
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=Iv4AlbE9PFtTn6teekgiNtrTiYio_nYWS8gyD6eFLUA,4580
62
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=Yy6-2Vytdr4FPxC5wTQkcv7Amy-DBMA3H8vOx9tBB9U,5735
66
63
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=Rv0c4s5vim2I5jKzQgjcUfVMdla6czzmZUU67hlTAbg,2898
64
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=QYN-zNl0YtqKXCTpMJBD9vbYsTf-30cap9ziiDwxKk0,3248
68
65
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml,sha256=YKMX_CuvcThG_bdNCAIXdVBkMvB72I89RGq2ltSSgc8,3298
69
66
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
67
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -74,17 +71,24 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
74
71
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
75
72
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
76
73
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
77
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=oyrLRjEnmioMa_G_sd9yQK_nBt4arwWV5fvKgzYE2ds,6090
74
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=QOhucXsokNEXGdXtk38qxQnSDwiCngUciXRjBqDcnDU,9088
78
75
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
76
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml,sha256=OjPZaSCOSLxEWgW3pmNwF7mmLhGhFGTmKL_3rKdqeW4,2488
79
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml,sha256=nEy_RcotHGiiENrmUANpKkbIFsrARAeSwECrBeHi2so,391
80
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml,sha256=V90W0IeJQZTFThA8v0UOs3DtZbtU3BI9jkpChw1BULo,402
81
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=ittFo_tyvG_1eqooO_9NK4jqepafgpHFGy2fuVfjFto,9207
82
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml,sha256=96SQqXG7fmb-50SdX85sgVtrFcQ-oNKe_0BoQdZmY5g,2638
79
83
  sdg_hub/flows/text_analysis/__init__.py,sha256=WStks4eM_KHNTVsHglcj8vFghmI0PH9P1hUrijBLbwc,125
80
84
  sdg_hub/flows/text_analysis/structured_insights/__init__.py,sha256=_DT4NR05JD9CZoSWROPr2lC6se0VjSqQPZJJlEV79mk,274
81
85
  sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml,sha256=1YGPypFJYS8qfYFj2J6ERTgodKJvMF4YHNGt_vOF5qc,1000
82
86
  sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_SDy14Zu-qS2sbKfUBmGlYj3k7CUg6HzzXlFCXRKuU,1169
83
87
  sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
84
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=2HuGTyNwYe6a8Ev-QdKZXwe29NL4wOkq4ecEV9a7NDg,4221
88
+ sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=Qpo9WPtl0PWhBF1stIM8OjaTvhtw3dn4eDADt-xj5cA,4965
85
89
  sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
86
- sdg_hub-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
87
- sdg_hub-0.3.0.dist-info/METADATA,sha256=eVLM1fK2-9uD_eWhSRW5VTbdUs-XIn_Va3Z-rY31Utk,9735
88
- sdg_hub-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
89
- sdg_hub-0.3.0.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
90
- sdg_hub-0.3.0.dist-info/RECORD,,
90
+ sdg_hub-0.4.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
91
+ sdg_hub-0.4.0.dist-info/METADATA,sha256=SPjLdht-43yAyDwZzdk91SYoQn8jRbsCTr4qBkXVVlw,9735
92
+ sdg_hub-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
93
+ sdg_hub-0.4.0.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
94
+ sdg_hub-0.4.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- """Evaluation blocks for SDG Hub."""
3
-
4
- # Local
5
- from .evaluate_faithfulness_block import EvaluateFaithfulnessBlock
6
- from .evaluate_relevancy_block import EvaluateRelevancyBlock
7
- from .verify_question_block import VerifyQuestionBlock
8
-
9
- __all__ = ["EvaluateFaithfulnessBlock", "EvaluateRelevancyBlock", "VerifyQuestionBlock"]
@@ -1,323 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- """Thin wrapper for faithfulness evaluation using 4 composed blocks.
3
-
4
- This module provides a simple, lightweight wrapper that composes:
5
- - PromptBuilderBlock: builds evaluation prompts
6
- - LLMChatBlock: generates LLM responses
7
- - TextParserBlock: parses structured output
8
- - ColumnValueFilterBlock: filters based on judgment
9
-
10
- The wrapper exposes minimal LLM interface for flow detection while
11
- delegating all functionality to the internal blocks.
12
- """
13
-
14
- # Standard
15
- from typing import Any, Optional
16
-
17
- # Third Party
18
- from datasets import Dataset
19
- from pydantic import ConfigDict, Field, field_validator
20
-
21
- # Local
22
- from ...utils.error_handling import BlockValidationError
23
- from ...utils.logger_config import setup_logger
24
- from ..base import BaseBlock
25
- from ..filtering.column_value_filter import ColumnValueFilterBlock
26
- from ..llm.llm_chat_block import LLMChatBlock
27
- from ..llm.prompt_builder_block import PromptBuilderBlock
28
- from ..llm.text_parser_block import TextParserBlock
29
- from ..registry import BlockRegistry
30
-
31
- logger = setup_logger(__name__)
32
-
33
-
34
- @BlockRegistry.register(
35
- "EvaluateFaithfulnessBlock",
36
- "evaluation",
37
- "Thin wrapper composing 4 blocks for faithfulness evaluation",
38
- )
39
- class EvaluateFaithfulnessBlock(BaseBlock):
40
- """Thin wrapper for faithfulness evaluation using composed blocks.
41
-
42
- Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
43
- into a single evaluation pipeline with smart parameter routing.
44
-
45
- Parameters
46
- ----------
47
- block_name : str
48
- Name of the block.
49
- input_cols : List[str]
50
- Input columns: ["document", "response"]
51
- output_cols : List[str]
52
- Output columns: ["faithfulness_explanation", "faithfulness_judgment"]
53
- model : Optional[str]
54
- LLM model identifier.
55
- api_base : Optional[str]
56
- API base URL.
57
- api_key : Optional[str]
58
- API key.
59
- prompt_config_path : str
60
- Path to YAML prompt template file (required).
61
- **kwargs : Any
62
- All other parameters are automatically routed to appropriate internal blocks
63
- based on each block's accepted parameters. This includes all LLM parameters
64
- (temperature, max_tokens, extra_body, extra_headers, etc.), text parser
65
- parameters, and filter parameters.
66
- """
67
-
68
- model_config = ConfigDict(
69
- extra="allow"
70
- ) # Allow extra fields for dynamic forwarding
71
-
72
- # --- Core configuration ---
73
- prompt_config_path: str = Field(
74
- ...,
75
- description="Path to YAML file containing the faithfulness evaluation prompt template",
76
- )
77
-
78
- # --- LLM interface (for flow detection) ---
79
- model: Optional[str] = Field(None, description="LLM model identifier")
80
- api_base: Optional[str] = Field(None, description="API base URL")
81
- api_key: Optional[str] = Field(None, description="API key")
82
-
83
- # --- Filter configuration ---
84
- filter_value: str = Field(
85
- "YES", description="Value to filter on for faithfulness judgment"
86
- )
87
- operation: str = Field("eq", description="Filter operation")
88
- convert_dtype: Optional[str] = Field(
89
- None, description="Data type conversion for filter column"
90
- )
91
-
92
- # --- Parser configuration ---
93
- start_tags: list[str] = Field(
94
- ["[Start of Explanation]", "[Start of Answer]"],
95
- description="Start tags for parsing explanation and judgment",
96
- )
97
- end_tags: list[str] = Field(
98
- ["[End of Explanation]", "[End of Answer]"],
99
- description="End tags for parsing explanation and judgment",
100
- )
101
- parsing_pattern: Optional[str] = Field(
102
- None,
103
- description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
104
- )
105
-
106
- # --- Internal blocks (composition) ---
107
- prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
108
- llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
109
- text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
110
- filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
111
-
112
- @field_validator("input_cols")
113
- @classmethod
114
- def validate_input_cols(cls, v):
115
- """Validate input columns."""
116
- if v != ["document", "response"]:
117
- raise ValueError(
118
- f"EvaluateFaithfulnessBlock expects input_cols ['document', 'response'], got {v}"
119
- )
120
- return v
121
-
122
- @field_validator("output_cols")
123
- @classmethod
124
- def validate_output_cols(cls, v):
125
- """Validate output columns."""
126
- expected = ["faithfulness_explanation", "faithfulness_judgment"]
127
- if v != expected:
128
- raise ValueError(
129
- f"EvaluateFaithfulnessBlock expects output_cols {expected}, got {v}"
130
- )
131
- return v
132
-
133
- def __init__(self, **kwargs):
134
- """Initialize with smart parameter routing."""
135
- super().__init__(**kwargs)
136
- self._create_internal_blocks(**kwargs)
137
-
138
- # Log initialization if model is configured
139
- if self.model:
140
- logger.info(
141
- f"Initialized EvaluateFaithfulnessBlock '{self.block_name}' with model '{self.model}'"
142
- )
143
-
144
- def _extract_params(self, kwargs: dict, block_class) -> dict:
145
- """Extract parameters for specific block class based on its model_fields."""
146
- # Exclude parameters that are handled by this wrapper's structure
147
- wrapper_params = {
148
- "block_name",
149
- "input_cols",
150
- "output_cols",
151
- }
152
-
153
- # Extract parameters that the target block accepts
154
- params = {
155
- k: v
156
- for k, v in kwargs.items()
157
- if k in block_class.model_fields and k not in wrapper_params
158
- }
159
-
160
- # Also include declared fields from this composite block that the target block accepts
161
- for field_name in self.__class__.model_fields:
162
- if (
163
- field_name in block_class.model_fields
164
- and field_name not in wrapper_params
165
- ):
166
- field_value = getattr(self, field_name)
167
- if field_value is not None: # Only forward non-None values
168
- params[field_name] = field_value
169
-
170
- return params
171
-
172
- def _create_internal_blocks(self, **kwargs):
173
- """Create internal blocks with parameter routing."""
174
- # Route parameters to appropriate blocks
175
- prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
176
- llm_params = self._extract_params(kwargs, LLMChatBlock)
177
- parser_params = self._extract_params(kwargs, TextParserBlock)
178
- filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
179
-
180
- self.prompt_builder = PromptBuilderBlock(
181
- block_name=f"{self.block_name}_prompt_builder",
182
- input_cols=["document", "response"],
183
- output_cols=["eval_faithfulness_prompt"],
184
- **prompt_params,
185
- )
186
-
187
- # Create LLM chat block with dynamic LLM parameter forwarding
188
- llm_config = {
189
- "block_name": f"{self.block_name}_llm_chat",
190
- "input_cols": ["eval_faithfulness_prompt"],
191
- "output_cols": ["raw_eval_faithfulness"],
192
- **llm_params,
193
- }
194
-
195
- # Only add LLM parameters if they are provided
196
- if self.model is not None:
197
- llm_config["model"] = self.model
198
- if self.api_base is not None:
199
- llm_config["api_base"] = self.api_base
200
- if self.api_key is not None:
201
- llm_config["api_key"] = self.api_key
202
-
203
- self.llm_chat = LLMChatBlock(**llm_config)
204
-
205
- # Create text parser
206
- self.text_parser = TextParserBlock(
207
- block_name=f"{self.block_name}_text_parser",
208
- input_cols=["raw_eval_faithfulness"],
209
- output_cols=["faithfulness_explanation", "faithfulness_judgment"],
210
- **parser_params,
211
- )
212
-
213
- self.filter_block = ColumnValueFilterBlock(
214
- block_name=f"{self.block_name}_filter",
215
- input_cols=["faithfulness_judgment"],
216
- output_cols=[], # Filter doesn't create new columns
217
- **filter_params,
218
- )
219
-
220
- def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
221
- """Execute the 4-block faithfulness evaluation pipeline.
222
-
223
- Parameters
224
- ----------
225
- samples : Dataset
226
- Input dataset with 'document' and 'response' columns.
227
- **kwargs : Any
228
- Additional arguments passed to internal blocks.
229
-
230
- Returns
231
- -------
232
- Dataset
233
- Filtered dataset with faithfulness evaluation results.
234
- """
235
- # Validate model is configured
236
- if not self.model:
237
- raise BlockValidationError(
238
- f"Model not configured for block '{self.block_name}'. "
239
- f"Call flow.set_model_config() before generating."
240
- )
241
-
242
- logger.info(
243
- f"Starting faithfulness evaluation for {len(samples)} samples",
244
- extra={"block_name": self.block_name, "model": self.model},
245
- )
246
-
247
- try:
248
- # Execute 4-block pipeline with validation delegation
249
- result = self.prompt_builder(samples, **kwargs)
250
- result = self.llm_chat(result, **kwargs)
251
- result = self.text_parser(result, **kwargs)
252
- result = self.filter_block(result, **kwargs)
253
-
254
- logger.info(
255
- f"Faithfulness evaluation completed: {len(samples)} → {len(result)} samples",
256
- extra={"block_name": self.block_name},
257
- )
258
-
259
- return result
260
-
261
- except Exception as e:
262
- logger.error(
263
- f"Error during faithfulness evaluation: {e}",
264
- extra={"block_name": self.block_name, "error": str(e)},
265
- )
266
- raise
267
-
268
- def __getattr__(self, name: str) -> Any:
269
- """Forward attribute access to appropriate internal block."""
270
- # Check each internal block to see which one has this parameter
271
- for block_attr, block_class in [
272
- ("prompt_builder", PromptBuilderBlock),
273
- ("llm_chat", LLMChatBlock),
274
- ("text_parser", TextParserBlock),
275
- ("filter_block", ColumnValueFilterBlock),
276
- ]:
277
- if hasattr(self, block_attr) and name in block_class.model_fields:
278
- internal_block = getattr(self, block_attr)
279
- if internal_block is not None:
280
- return getattr(internal_block, name)
281
- raise AttributeError(
282
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
283
- )
284
-
285
- def __setattr__(self, name: str, value: Any) -> None:
286
- """Handle dynamic parameter updates from flow.set_model_config()."""
287
- super().__setattr__(name, value)
288
-
289
- # Forward to appropriate internal blocks
290
- for block_attr, block_class in [
291
- ("prompt_builder", PromptBuilderBlock),
292
- ("llm_chat", LLMChatBlock),
293
- ("text_parser", TextParserBlock),
294
- ("filter_block", ColumnValueFilterBlock),
295
- ]:
296
- if hasattr(self, block_attr) and name in block_class.model_fields:
297
- setattr(getattr(self, block_attr), name, value)
298
-
299
- def _reinitialize_client_manager(self) -> None:
300
- """Reinitialize internal LLM block's client manager."""
301
- if hasattr(self.llm_chat, "_reinitialize_client_manager"):
302
- self.llm_chat._reinitialize_client_manager()
303
-
304
- def get_internal_blocks_info(self) -> dict[str, Any]:
305
- """Get information about internal blocks."""
306
- return {
307
- "prompt_builder": self.prompt_builder.get_info(),
308
- "llm_chat": self.llm_chat.get_info(),
309
- "text_parser": self.text_parser.get_info(),
310
- "filter": self.filter_block.get_info(),
311
- }
312
-
313
- def __repr__(self) -> str:
314
- """String representation of the block."""
315
- filter_value = (
316
- getattr(self.filter_block, "filter_value", "YES")
317
- if hasattr(self, "filter_block")
318
- else "YES"
319
- )
320
- return (
321
- f"EvaluateFaithfulnessBlock(name='{self.block_name}', "
322
- f"model='{self.model}', filter_value='{filter_value}')"
323
- )