sdg-hub 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/__init__.py +0 -2
- sdg_hub/_version.py +2 -2
- sdg_hub/core/__init__.py +1 -2
- sdg_hub/core/blocks/__init__.py +2 -4
- sdg_hub/core/blocks/base.py +61 -6
- sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
- sdg_hub/core/blocks/llm/__init__.py +2 -4
- sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
- sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
- sdg_hub/core/flow/__init__.py +3 -4
- sdg_hub/core/flow/base.py +11 -73
- sdg_hub/core/flow/metadata.py +1 -68
- sdg_hub/core/flow/registry.py +0 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -12
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +158 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -12
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -3
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +147 -28
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +303 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -5
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/METADATA +2 -1
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/RECORD +34 -30
- sdg_hub/core/blocks/evaluation/__init__.py +0 -9
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
- sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
- sdg_hub/core/blocks/llm/client_manager.py +0 -472
- sdg_hub/core/blocks/llm/config.py +0 -337
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/WHEEL +0 -0
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
- role: system
|
2
|
+
content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
3
|
+
|
4
|
+
- role: user
|
5
|
+
content: |
|
6
|
+
Develop a series of as many educational question and answer pairs as possible from a chapter in a {{domain}} textbook.
|
7
|
+
|
8
|
+
The questions should:
|
9
|
+
* Be self-contained, not requiring references to tables, figures, or specific sections in the text for understanding.
|
10
|
+
* Focus on teaching and reinforcing the key knowledge and concepts presented in the chapter.
|
11
|
+
* Avoid sections with minimal educational content like index pages or prefaces. In such cases, respond with [UNANSWERABLE].
|
12
|
+
* Be directly relevant to the textbook's domain. For instance, in a science textbook, questions should revolve around scientific terms, definitions, and practical applications, while in a legal textbook, they should cover legal principles, case law, and precedents.
|
13
|
+
* Be formulated to allow for independent answers, avoiding direct references to specific theorems or text sections. For example, rather than asking 'Under what conditions is the fixed point of a function unique according to Theorem 3.1.5?', ask 'How does the Fixed Point Iteration method contribute to understanding function uniqueness?'
|
14
|
+
* Span a range of difficulty levels to accommodate a diverse student audience, from basic understanding to advanced comprehension.
|
15
|
+
* Include a variety of question types such as multiple-choice for basic recall, short answer for deeper understanding, and essay or problem-solving questions to test application and analysis skills.
|
16
|
+
* Align closely with the learning objectives of the textbook or the specific chapter, ensuring that the questions test the fundamental concepts and skills that the chapter aims to impart.
|
17
|
+
* Be in Japanese.
|
18
|
+
|
19
|
+
Strictly follow this format for each question answer pair your generate while responding:
|
20
|
+
|
21
|
+
[QUESTION]
|
22
|
+
<Insert question here>
|
23
|
+
[ANSWER]
|
24
|
+
<Insert answer here>
|
25
|
+
[END]
|
26
|
+
|
27
|
+
Each question and answer pair should stand alone as a mini-lesson, encapsulating a key concept or idea from the chapter in a way that is accessible and informative without requiring the reader to refer back to the textbook.
|
28
|
+
|
29
|
+
Here are some examples of questions:
|
30
|
+
|
31
|
+
[Document]
|
32
|
+
{{icl_document}}
|
33
|
+
|
34
|
+
[QUESTION]
|
35
|
+
{{icl_query_1}}
|
36
|
+
[ANSWER]
|
37
|
+
{{icl_response_1}}
|
38
|
+
[END]
|
39
|
+
|
40
|
+
[QUESTION]
|
41
|
+
{{icl_query_2}}
|
42
|
+
[ANSWER]
|
43
|
+
{{icl_response_2}}
|
44
|
+
[END]
|
45
|
+
|
46
|
+
[QUESTION]
|
47
|
+
{{icl_query_3}}
|
48
|
+
[ANSWER]
|
49
|
+
{{icl_response_3}}
|
50
|
+
[END]
|
51
|
+
|
52
|
+
Now, here is the document:
|
53
|
+
[DOCUMENT]
|
54
|
+
{{document_outline}}
|
55
|
+
{{document}}
|
@@ -24,7 +24,6 @@ metadata:
|
|
24
24
|
- "entity-extraction"
|
25
25
|
- "keyword-extraction"
|
26
26
|
license: "Apache-2.0"
|
27
|
-
min_sdg_hub_version: "0.2.0"
|
28
27
|
dataset_requirements:
|
29
28
|
required_columns:
|
30
29
|
- "text"
|
@@ -50,10 +49,16 @@ blocks:
|
|
50
49
|
max_tokens: 1024
|
51
50
|
temperature: 0.3
|
52
51
|
async_mode: true
|
52
|
+
- block_type: "LLMParserBlock"
|
53
|
+
block_config:
|
54
|
+
block_name: "extract_summary"
|
55
|
+
input_cols: "raw_summary"
|
56
|
+
extract_content: true
|
57
|
+
expand_lists: true
|
53
58
|
- block_type: "TextParserBlock"
|
54
59
|
block_config:
|
55
60
|
block_name: "parse_summary"
|
56
|
-
input_cols: "
|
61
|
+
input_cols: "extract_summary_content"
|
57
62
|
output_cols: "summary"
|
58
63
|
start_tags:
|
59
64
|
- "[SUMMARY]"
|
@@ -76,10 +81,16 @@ blocks:
|
|
76
81
|
max_tokens: 512
|
77
82
|
temperature: 0.3
|
78
83
|
async_mode: true
|
84
|
+
- block_type: "LLMParserBlock"
|
85
|
+
block_config:
|
86
|
+
block_name: "extract_keywords"
|
87
|
+
input_cols: "raw_keywords"
|
88
|
+
extract_content: true
|
89
|
+
expand_lists: true
|
79
90
|
- block_type: "TextParserBlock"
|
80
91
|
block_config:
|
81
92
|
block_name: "parse_keywords"
|
82
|
-
input_cols: "
|
93
|
+
input_cols: "extract_keywords_content"
|
83
94
|
output_cols: "keywords"
|
84
95
|
start_tags:
|
85
96
|
- "[KEYWORDS]"
|
@@ -102,10 +113,16 @@ blocks:
|
|
102
113
|
max_tokens: 1024
|
103
114
|
temperature: 0.3
|
104
115
|
async_mode: true
|
116
|
+
- block_type: "LLMParserBlock"
|
117
|
+
block_config:
|
118
|
+
block_name: "extract_entities"
|
119
|
+
input_cols: "raw_entities"
|
120
|
+
extract_content: true
|
121
|
+
expand_lists: true
|
105
122
|
- block_type: "TextParserBlock"
|
106
123
|
block_config:
|
107
124
|
block_name: "parse_entities"
|
108
|
-
input_cols: "
|
125
|
+
input_cols: "extract_entities_content"
|
109
126
|
output_cols: "entities"
|
110
127
|
start_tags:
|
111
128
|
- "[ENTITIES]"
|
@@ -128,10 +145,16 @@ blocks:
|
|
128
145
|
max_tokens: 256
|
129
146
|
temperature: 0.1
|
130
147
|
async_mode: true
|
148
|
+
- block_type: "LLMParserBlock"
|
149
|
+
block_config:
|
150
|
+
block_name: "extract_sentiment"
|
151
|
+
input_cols: "raw_sentiment"
|
152
|
+
extract_content: true
|
153
|
+
expand_lists: true
|
131
154
|
- block_type: "TextParserBlock"
|
132
155
|
block_config:
|
133
156
|
block_name: "parse_sentiment"
|
134
|
-
input_cols: "
|
157
|
+
input_cols: "extract_sentiment_content"
|
135
158
|
output_cols: "sentiment"
|
136
159
|
start_tags:
|
137
160
|
- "[SENTIMENT]"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.1
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -65,6 +65,7 @@ Requires-Dist: pytest-html; extra == "dev"
|
|
65
65
|
Requires-Dist: tox<5,>=4.4.2; extra == "dev"
|
66
66
|
Requires-Dist: ruff; extra == "dev"
|
67
67
|
Requires-Dist: pytest-env; extra == "dev"
|
68
|
+
Requires-Dist: nbconvert>=7.0.0; extra == "dev"
|
68
69
|
Dynamic: license-file
|
69
70
|
|
70
71
|
# `sdg_hub`: Synthetic Data Generation Toolkit
|
@@ -1,9 +1,9 @@
|
|
1
|
-
sdg_hub/__init__.py,sha256=
|
2
|
-
sdg_hub/_version.py,sha256=
|
1
|
+
sdg_hub/__init__.py,sha256=TlkZT40-70urdcWLqv3kupaJj8s-SVgd2QyvlSFwb4A,510
|
2
|
+
sdg_hub/_version.py,sha256=k7cu0JKra64gmMNU_UfA5sw2eNc_GRvf3QmesiYAy8g,704
|
3
3
|
sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
sdg_hub/core/__init__.py,sha256=
|
5
|
-
sdg_hub/core/blocks/__init__.py,sha256=
|
6
|
-
sdg_hub/core/blocks/base.py,sha256
|
4
|
+
sdg_hub/core/__init__.py,sha256=e3BoejbqjYhasf9t__L4qE52lkD9EBjx4o--2kqKdro,460
|
5
|
+
sdg_hub/core/blocks/__init__.py,sha256=5FsbkcO-dmBv6MqO96TPn9FKKPTQZQCv20j4wR7UvQw,1502
|
6
|
+
sdg_hub/core/blocks/base.py,sha256=-SOdBpJwtRTMsrmCEuLjUBQMRCo_PLYlHEBRrz8sF9g,13031
|
7
7
|
sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
|
8
8
|
sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
|
9
9
|
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
|
@@ -15,20 +15,15 @@ sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKp
|
|
15
15
|
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
|
16
16
|
sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
|
17
17
|
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
|
18
|
-
sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
|
19
|
-
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
|
20
|
-
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
|
21
|
-
sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
|
22
18
|
sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
|
23
|
-
sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=
|
24
|
-
sdg_hub/core/blocks/llm/__init__.py,sha256=
|
25
|
-
sdg_hub/core/blocks/llm/client_manager.py,sha256=6RNqYvFIh4SF6jopI6tTY5MA01y8Qo-tAhsE0GeHZZ0,16109
|
26
|
-
sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
|
19
|
+
sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=2Z9j_CiiTn5mHZ9gfXU-itLXDmeXSh0UI0x1x7j-LQ0,6001
|
20
|
+
sdg_hub/core/blocks/llm/__init__.py,sha256=AyS0dd3pkPPXH5a9aj4mT5HsKjX2vjXfkmQc6rkFV4A,795
|
27
21
|
sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
|
28
|
-
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=
|
29
|
-
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=
|
22
|
+
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=MHhI2x9i6LrfDXgvAy2_6YxgyoD7j6BpCgNGsM69xDg,22194
|
23
|
+
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=DW4b09IqXmcshvXawFheDyaLp3rz7vpO5VBrKdUQYW8,31703
|
24
|
+
sdg_hub/core/blocks/llm/llm_parser_block.py,sha256=aoHqsDDhaIgCDfPpv7acc0DVN-zUgzFflRVB4win0aM,12012
|
30
25
|
sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
|
31
|
-
sdg_hub/core/blocks/llm/text_parser_block.py,sha256=
|
26
|
+
sdg_hub/core/blocks/llm/text_parser_block.py,sha256=975HK6NfXiU9Any4UDMpBNidRpyhHmc76BXUN69SVyc,12566
|
32
27
|
sdg_hub/core/blocks/transform/__init__.py,sha256=lF9InjOzA6p_mjiwV-a2Kwstq9kqRiQ-dEwbsmR9yQs,825
|
33
28
|
sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
|
34
29
|
sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
|
@@ -37,12 +32,12 @@ sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2
|
|
37
32
|
sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
|
38
33
|
sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
|
39
34
|
sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
|
40
|
-
sdg_hub/core/flow/__init__.py,sha256=
|
41
|
-
sdg_hub/core/flow/base.py,sha256=
|
35
|
+
sdg_hub/core/flow/__init__.py,sha256=0_m_htuZfPxk8xQ9IKfp0Pz-JRE4O7lYMUFrKyLNoLA,409
|
36
|
+
sdg_hub/core/flow/base.py,sha256=IRnNEZ3laDmR4sW_MTseL4syhLuUylyHY_0tS5QaS-A,54084
|
42
37
|
sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
|
43
|
-
sdg_hub/core/flow/metadata.py,sha256=
|
38
|
+
sdg_hub/core/flow/metadata.py,sha256=cFrpJjWOaK87aCuRFyC3Pdf83oYU93mrmZEMdUnhsN8,10540
|
44
39
|
sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
|
45
|
-
sdg_hub/core/flow/registry.py,sha256=
|
40
|
+
sdg_hub/core/flow/registry.py,sha256=N6KfX-L7QRkooznIFxDuhRZYuDA5g3N5zC-KRm2jVhk,12109
|
46
41
|
sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
|
47
42
|
sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
|
48
43
|
sdg_hub/core/utils/datautils.py,sha256=__HkUe1DxcJVHKrFX68z_hDXwxJygBlJDfjJLnj7rHc,4230
|
@@ -59,12 +54,14 @@ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/gener
|
|
59
54
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml,sha256=qHOgUNrQz2vjUjJiEHNGWxDDXwjJlP1kofTxeGgLyPI,1461
|
60
55
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
61
56
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml,sha256=Ik6gAml0O-jPq8jpXBAkURzYkQuFOnDZb4LDwjmfAiE,381
|
62
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=
|
57
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=fUdzY9dtU69o99Uq8FIPycgVWdLD-1kbY97Bh-Vo2A0,5538
|
58
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml,sha256=smPWVUZRCt58EagWDmJVmTBQj8qMcjpzh-Q3GSuFrz0,4413
|
63
60
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
61
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml,sha256=SeapWoOx3fhN5SvWYuHss_9prLE8xSkOic7JkbDHSR0,4081
|
65
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=
|
62
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=iNNIfofFE7awK7iivtIFWxjfjy8QviMugOPPnOTySKA,5706
|
66
63
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=
|
64
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=CIUZNYhvszT-jpz1Hvh6nS2y5W34P529ZOMp8thEQ9k,3219
|
68
65
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml,sha256=YKMX_CuvcThG_bdNCAIXdVBkMvB72I89RGq2ltSSgc8,3298
|
69
66
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
70
67
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -74,17 +71,24 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
|
|
74
71
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
|
75
72
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
|
76
73
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
|
77
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=
|
74
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=HR8sf7RUZKr8UqKztBj-nlvyrve1UMUu8x8qgYM6O14,9055
|
78
75
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
|
76
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
77
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml,sha256=OjPZaSCOSLxEWgW3pmNwF7mmLhGhFGTmKL_3rKdqeW4,2488
|
79
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml,sha256=nEy_RcotHGiiENrmUANpKkbIFsrARAeSwECrBeHi2so,391
|
80
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml,sha256=V90W0IeJQZTFThA8v0UOs3DtZbtU3BI9jkpChw1BULo,402
|
81
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=iY1N6CY97fEkqI5oqaamSfqmiXpHPhWH_aOppsMxVjY,9176
|
82
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml,sha256=96SQqXG7fmb-50SdX85sgVtrFcQ-oNKe_0BoQdZmY5g,2638
|
79
83
|
sdg_hub/flows/text_analysis/__init__.py,sha256=WStks4eM_KHNTVsHglcj8vFghmI0PH9P1hUrijBLbwc,125
|
80
84
|
sdg_hub/flows/text_analysis/structured_insights/__init__.py,sha256=_DT4NR05JD9CZoSWROPr2lC6se0VjSqQPZJJlEV79mk,274
|
81
85
|
sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml,sha256=1YGPypFJYS8qfYFj2J6ERTgodKJvMF4YHNGt_vOF5qc,1000
|
82
86
|
sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_SDy14Zu-qS2sbKfUBmGlYj3k7CUg6HzzXlFCXRKuU,1169
|
83
87
|
sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
|
84
|
-
sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=
|
88
|
+
sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=BBV18SdvuVTAESjwkJ7V1jbb-cSTBvNl3SCycd0oEQ4,4934
|
85
89
|
sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
|
86
|
-
sdg_hub-0.
|
87
|
-
sdg_hub-0.
|
88
|
-
sdg_hub-0.
|
89
|
-
sdg_hub-0.
|
90
|
-
sdg_hub-0.
|
90
|
+
sdg_hub-0.4.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
91
|
+
sdg_hub-0.4.1.dist-info/METADATA,sha256=pLRs5oOsVI9515UEZxcUEZFZhCoZ0kli0KLpBPPPB7w,9783
|
92
|
+
sdg_hub-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
93
|
+
sdg_hub-0.4.1.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
|
94
|
+
sdg_hub-0.4.1.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Evaluation blocks for SDG Hub."""
|
3
|
-
|
4
|
-
# Local
|
5
|
-
from .evaluate_faithfulness_block import EvaluateFaithfulnessBlock
|
6
|
-
from .evaluate_relevancy_block import EvaluateRelevancyBlock
|
7
|
-
from .verify_question_block import VerifyQuestionBlock
|
8
|
-
|
9
|
-
__all__ = ["EvaluateFaithfulnessBlock", "EvaluateRelevancyBlock", "VerifyQuestionBlock"]
|
@@ -1,323 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Thin wrapper for faithfulness evaluation using 4 composed blocks.
|
3
|
-
|
4
|
-
This module provides a simple, lightweight wrapper that composes:
|
5
|
-
- PromptBuilderBlock: builds evaluation prompts
|
6
|
-
- LLMChatBlock: generates LLM responses
|
7
|
-
- TextParserBlock: parses structured output
|
8
|
-
- ColumnValueFilterBlock: filters based on judgment
|
9
|
-
|
10
|
-
The wrapper exposes minimal LLM interface for flow detection while
|
11
|
-
delegating all functionality to the internal blocks.
|
12
|
-
"""
|
13
|
-
|
14
|
-
# Standard
|
15
|
-
from typing import Any, Optional
|
16
|
-
|
17
|
-
# Third Party
|
18
|
-
from datasets import Dataset
|
19
|
-
from pydantic import ConfigDict, Field, field_validator
|
20
|
-
|
21
|
-
# Local
|
22
|
-
from ...utils.error_handling import BlockValidationError
|
23
|
-
from ...utils.logger_config import setup_logger
|
24
|
-
from ..base import BaseBlock
|
25
|
-
from ..filtering.column_value_filter import ColumnValueFilterBlock
|
26
|
-
from ..llm.llm_chat_block import LLMChatBlock
|
27
|
-
from ..llm.prompt_builder_block import PromptBuilderBlock
|
28
|
-
from ..llm.text_parser_block import TextParserBlock
|
29
|
-
from ..registry import BlockRegistry
|
30
|
-
|
31
|
-
logger = setup_logger(__name__)
|
32
|
-
|
33
|
-
|
34
|
-
@BlockRegistry.register(
|
35
|
-
"EvaluateFaithfulnessBlock",
|
36
|
-
"evaluation",
|
37
|
-
"Thin wrapper composing 4 blocks for faithfulness evaluation",
|
38
|
-
)
|
39
|
-
class EvaluateFaithfulnessBlock(BaseBlock):
|
40
|
-
"""Thin wrapper for faithfulness evaluation using composed blocks.
|
41
|
-
|
42
|
-
Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
|
43
|
-
into a single evaluation pipeline with smart parameter routing.
|
44
|
-
|
45
|
-
Parameters
|
46
|
-
----------
|
47
|
-
block_name : str
|
48
|
-
Name of the block.
|
49
|
-
input_cols : List[str]
|
50
|
-
Input columns: ["document", "response"]
|
51
|
-
output_cols : List[str]
|
52
|
-
Output columns: ["faithfulness_explanation", "faithfulness_judgment"]
|
53
|
-
model : Optional[str]
|
54
|
-
LLM model identifier.
|
55
|
-
api_base : Optional[str]
|
56
|
-
API base URL.
|
57
|
-
api_key : Optional[str]
|
58
|
-
API key.
|
59
|
-
prompt_config_path : str
|
60
|
-
Path to YAML prompt template file (required).
|
61
|
-
**kwargs : Any
|
62
|
-
All other parameters are automatically routed to appropriate internal blocks
|
63
|
-
based on each block's accepted parameters. This includes all LLM parameters
|
64
|
-
(temperature, max_tokens, extra_body, extra_headers, etc.), text parser
|
65
|
-
parameters, and filter parameters.
|
66
|
-
"""
|
67
|
-
|
68
|
-
model_config = ConfigDict(
|
69
|
-
extra="allow"
|
70
|
-
) # Allow extra fields for dynamic forwarding
|
71
|
-
|
72
|
-
# --- Core configuration ---
|
73
|
-
prompt_config_path: str = Field(
|
74
|
-
...,
|
75
|
-
description="Path to YAML file containing the faithfulness evaluation prompt template",
|
76
|
-
)
|
77
|
-
|
78
|
-
# --- LLM interface (for flow detection) ---
|
79
|
-
model: Optional[str] = Field(None, description="LLM model identifier")
|
80
|
-
api_base: Optional[str] = Field(None, description="API base URL")
|
81
|
-
api_key: Optional[str] = Field(None, description="API key")
|
82
|
-
|
83
|
-
# --- Filter configuration ---
|
84
|
-
filter_value: str = Field(
|
85
|
-
"YES", description="Value to filter on for faithfulness judgment"
|
86
|
-
)
|
87
|
-
operation: str = Field("eq", description="Filter operation")
|
88
|
-
convert_dtype: Optional[str] = Field(
|
89
|
-
None, description="Data type conversion for filter column"
|
90
|
-
)
|
91
|
-
|
92
|
-
# --- Parser configuration ---
|
93
|
-
start_tags: list[str] = Field(
|
94
|
-
["[Start of Explanation]", "[Start of Answer]"],
|
95
|
-
description="Start tags for parsing explanation and judgment",
|
96
|
-
)
|
97
|
-
end_tags: list[str] = Field(
|
98
|
-
["[End of Explanation]", "[End of Answer]"],
|
99
|
-
description="End tags for parsing explanation and judgment",
|
100
|
-
)
|
101
|
-
parsing_pattern: Optional[str] = Field(
|
102
|
-
None,
|
103
|
-
description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
|
104
|
-
)
|
105
|
-
|
106
|
-
# --- Internal blocks (composition) ---
|
107
|
-
prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
|
108
|
-
llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
|
109
|
-
text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
|
110
|
-
filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
|
111
|
-
|
112
|
-
@field_validator("input_cols")
|
113
|
-
@classmethod
|
114
|
-
def validate_input_cols(cls, v):
|
115
|
-
"""Validate input columns."""
|
116
|
-
if v != ["document", "response"]:
|
117
|
-
raise ValueError(
|
118
|
-
f"EvaluateFaithfulnessBlock expects input_cols ['document', 'response'], got {v}"
|
119
|
-
)
|
120
|
-
return v
|
121
|
-
|
122
|
-
@field_validator("output_cols")
|
123
|
-
@classmethod
|
124
|
-
def validate_output_cols(cls, v):
|
125
|
-
"""Validate output columns."""
|
126
|
-
expected = ["faithfulness_explanation", "faithfulness_judgment"]
|
127
|
-
if v != expected:
|
128
|
-
raise ValueError(
|
129
|
-
f"EvaluateFaithfulnessBlock expects output_cols {expected}, got {v}"
|
130
|
-
)
|
131
|
-
return v
|
132
|
-
|
133
|
-
def __init__(self, **kwargs):
|
134
|
-
"""Initialize with smart parameter routing."""
|
135
|
-
super().__init__(**kwargs)
|
136
|
-
self._create_internal_blocks(**kwargs)
|
137
|
-
|
138
|
-
# Log initialization if model is configured
|
139
|
-
if self.model:
|
140
|
-
logger.info(
|
141
|
-
f"Initialized EvaluateFaithfulnessBlock '{self.block_name}' with model '{self.model}'"
|
142
|
-
)
|
143
|
-
|
144
|
-
def _extract_params(self, kwargs: dict, block_class) -> dict:
|
145
|
-
"""Extract parameters for specific block class based on its model_fields."""
|
146
|
-
# Exclude parameters that are handled by this wrapper's structure
|
147
|
-
wrapper_params = {
|
148
|
-
"block_name",
|
149
|
-
"input_cols",
|
150
|
-
"output_cols",
|
151
|
-
}
|
152
|
-
|
153
|
-
# Extract parameters that the target block accepts
|
154
|
-
params = {
|
155
|
-
k: v
|
156
|
-
for k, v in kwargs.items()
|
157
|
-
if k in block_class.model_fields and k not in wrapper_params
|
158
|
-
}
|
159
|
-
|
160
|
-
# Also include declared fields from this composite block that the target block accepts
|
161
|
-
for field_name in self.__class__.model_fields:
|
162
|
-
if (
|
163
|
-
field_name in block_class.model_fields
|
164
|
-
and field_name not in wrapper_params
|
165
|
-
):
|
166
|
-
field_value = getattr(self, field_name)
|
167
|
-
if field_value is not None: # Only forward non-None values
|
168
|
-
params[field_name] = field_value
|
169
|
-
|
170
|
-
return params
|
171
|
-
|
172
|
-
def _create_internal_blocks(self, **kwargs):
|
173
|
-
"""Create internal blocks with parameter routing."""
|
174
|
-
# Route parameters to appropriate blocks
|
175
|
-
prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
|
176
|
-
llm_params = self._extract_params(kwargs, LLMChatBlock)
|
177
|
-
parser_params = self._extract_params(kwargs, TextParserBlock)
|
178
|
-
filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
|
179
|
-
|
180
|
-
self.prompt_builder = PromptBuilderBlock(
|
181
|
-
block_name=f"{self.block_name}_prompt_builder",
|
182
|
-
input_cols=["document", "response"],
|
183
|
-
output_cols=["eval_faithfulness_prompt"],
|
184
|
-
**prompt_params,
|
185
|
-
)
|
186
|
-
|
187
|
-
# Create LLM chat block with dynamic LLM parameter forwarding
|
188
|
-
llm_config = {
|
189
|
-
"block_name": f"{self.block_name}_llm_chat",
|
190
|
-
"input_cols": ["eval_faithfulness_prompt"],
|
191
|
-
"output_cols": ["raw_eval_faithfulness"],
|
192
|
-
**llm_params,
|
193
|
-
}
|
194
|
-
|
195
|
-
# Only add LLM parameters if they are provided
|
196
|
-
if self.model is not None:
|
197
|
-
llm_config["model"] = self.model
|
198
|
-
if self.api_base is not None:
|
199
|
-
llm_config["api_base"] = self.api_base
|
200
|
-
if self.api_key is not None:
|
201
|
-
llm_config["api_key"] = self.api_key
|
202
|
-
|
203
|
-
self.llm_chat = LLMChatBlock(**llm_config)
|
204
|
-
|
205
|
-
# Create text parser
|
206
|
-
self.text_parser = TextParserBlock(
|
207
|
-
block_name=f"{self.block_name}_text_parser",
|
208
|
-
input_cols=["raw_eval_faithfulness"],
|
209
|
-
output_cols=["faithfulness_explanation", "faithfulness_judgment"],
|
210
|
-
**parser_params,
|
211
|
-
)
|
212
|
-
|
213
|
-
self.filter_block = ColumnValueFilterBlock(
|
214
|
-
block_name=f"{self.block_name}_filter",
|
215
|
-
input_cols=["faithfulness_judgment"],
|
216
|
-
output_cols=[], # Filter doesn't create new columns
|
217
|
-
**filter_params,
|
218
|
-
)
|
219
|
-
|
220
|
-
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
221
|
-
"""Execute the 4-block faithfulness evaluation pipeline.
|
222
|
-
|
223
|
-
Parameters
|
224
|
-
----------
|
225
|
-
samples : Dataset
|
226
|
-
Input dataset with 'document' and 'response' columns.
|
227
|
-
**kwargs : Any
|
228
|
-
Additional arguments passed to internal blocks.
|
229
|
-
|
230
|
-
Returns
|
231
|
-
-------
|
232
|
-
Dataset
|
233
|
-
Filtered dataset with faithfulness evaluation results.
|
234
|
-
"""
|
235
|
-
# Validate model is configured
|
236
|
-
if not self.model:
|
237
|
-
raise BlockValidationError(
|
238
|
-
f"Model not configured for block '{self.block_name}'. "
|
239
|
-
f"Call flow.set_model_config() before generating."
|
240
|
-
)
|
241
|
-
|
242
|
-
logger.info(
|
243
|
-
f"Starting faithfulness evaluation for {len(samples)} samples",
|
244
|
-
extra={"block_name": self.block_name, "model": self.model},
|
245
|
-
)
|
246
|
-
|
247
|
-
try:
|
248
|
-
# Execute 4-block pipeline with validation delegation
|
249
|
-
result = self.prompt_builder(samples, **kwargs)
|
250
|
-
result = self.llm_chat(result, **kwargs)
|
251
|
-
result = self.text_parser(result, **kwargs)
|
252
|
-
result = self.filter_block(result, **kwargs)
|
253
|
-
|
254
|
-
logger.info(
|
255
|
-
f"Faithfulness evaluation completed: {len(samples)} → {len(result)} samples",
|
256
|
-
extra={"block_name": self.block_name},
|
257
|
-
)
|
258
|
-
|
259
|
-
return result
|
260
|
-
|
261
|
-
except Exception as e:
|
262
|
-
logger.error(
|
263
|
-
f"Error during faithfulness evaluation: {e}",
|
264
|
-
extra={"block_name": self.block_name, "error": str(e)},
|
265
|
-
)
|
266
|
-
raise
|
267
|
-
|
268
|
-
def __getattr__(self, name: str) -> Any:
|
269
|
-
"""Forward attribute access to appropriate internal block."""
|
270
|
-
# Check each internal block to see which one has this parameter
|
271
|
-
for block_attr, block_class in [
|
272
|
-
("prompt_builder", PromptBuilderBlock),
|
273
|
-
("llm_chat", LLMChatBlock),
|
274
|
-
("text_parser", TextParserBlock),
|
275
|
-
("filter_block", ColumnValueFilterBlock),
|
276
|
-
]:
|
277
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
278
|
-
internal_block = getattr(self, block_attr)
|
279
|
-
if internal_block is not None:
|
280
|
-
return getattr(internal_block, name)
|
281
|
-
raise AttributeError(
|
282
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
283
|
-
)
|
284
|
-
|
285
|
-
def __setattr__(self, name: str, value: Any) -> None:
|
286
|
-
"""Handle dynamic parameter updates from flow.set_model_config()."""
|
287
|
-
super().__setattr__(name, value)
|
288
|
-
|
289
|
-
# Forward to appropriate internal blocks
|
290
|
-
for block_attr, block_class in [
|
291
|
-
("prompt_builder", PromptBuilderBlock),
|
292
|
-
("llm_chat", LLMChatBlock),
|
293
|
-
("text_parser", TextParserBlock),
|
294
|
-
("filter_block", ColumnValueFilterBlock),
|
295
|
-
]:
|
296
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
297
|
-
setattr(getattr(self, block_attr), name, value)
|
298
|
-
|
299
|
-
def _reinitialize_client_manager(self) -> None:
|
300
|
-
"""Reinitialize internal LLM block's client manager."""
|
301
|
-
if hasattr(self.llm_chat, "_reinitialize_client_manager"):
|
302
|
-
self.llm_chat._reinitialize_client_manager()
|
303
|
-
|
304
|
-
def get_internal_blocks_info(self) -> dict[str, Any]:
|
305
|
-
"""Get information about internal blocks."""
|
306
|
-
return {
|
307
|
-
"prompt_builder": self.prompt_builder.get_info(),
|
308
|
-
"llm_chat": self.llm_chat.get_info(),
|
309
|
-
"text_parser": self.text_parser.get_info(),
|
310
|
-
"filter": self.filter_block.get_info(),
|
311
|
-
}
|
312
|
-
|
313
|
-
def __repr__(self) -> str:
|
314
|
-
"""String representation of the block."""
|
315
|
-
filter_value = (
|
316
|
-
getattr(self.filter_block, "filter_value", "YES")
|
317
|
-
if hasattr(self, "filter_block")
|
318
|
-
else "YES"
|
319
|
-
)
|
320
|
-
return (
|
321
|
-
f"EvaluateFaithfulnessBlock(name='{self.block_name}', "
|
322
|
-
f"model='{self.model}', filter_value='{filter_value}')"
|
323
|
-
)
|