sdg-hub 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/__init__.py +2 -4
- sdg_hub/core/blocks/base.py +61 -6
- sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
- sdg_hub/core/blocks/llm/__init__.py +2 -4
- sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
- sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
- sdg_hub/core/flow/base.py +7 -4
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +29 -25
- sdg_hub/core/blocks/evaluation/__init__.py +0 -9
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
- sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
- sdg_hub/core/blocks/llm/client_manager.py +0 -472
- sdg_hub/core/blocks/llm/config.py +0 -337
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -61,10 +61,16 @@ blocks:
|
|
61
61
|
temperature: 0.7
|
62
62
|
n: 50
|
63
63
|
async_mode: true
|
64
|
+
- block_type: LLMParserBlock
|
65
|
+
block_config:
|
66
|
+
block_name: extract_detailed_summary
|
67
|
+
input_cols: raw_summary
|
68
|
+
extract_content: true
|
69
|
+
expand_lists: true
|
64
70
|
- block_type: TextParserBlock
|
65
71
|
block_config:
|
66
72
|
block_name: parse_detailed_summary
|
67
|
-
input_cols:
|
73
|
+
input_cols: extract_detailed_summary_content
|
68
74
|
output_cols: summary
|
69
75
|
start_tags:
|
70
76
|
- ''
|
@@ -99,10 +105,16 @@ blocks:
|
|
99
105
|
temperature: 0.7
|
100
106
|
n: 1
|
101
107
|
async_mode: true
|
108
|
+
- block_type: LLMParserBlock
|
109
|
+
block_config:
|
110
|
+
block_name: extract_questions
|
111
|
+
input_cols: question_list
|
112
|
+
extract_content: true
|
113
|
+
expand_lists: true
|
102
114
|
- block_type: TextParserBlock
|
103
115
|
block_config:
|
104
116
|
block_name: parse_question_list
|
105
|
-
input_cols:
|
117
|
+
input_cols: extract_questions_content
|
106
118
|
output_cols: question
|
107
119
|
start_tags:
|
108
120
|
- '[QUESTION]'
|
@@ -127,33 +139,61 @@ blocks:
|
|
127
139
|
temperature: 0.7
|
128
140
|
n: 1
|
129
141
|
async_mode: true
|
142
|
+
- block_type: LLMParserBlock
|
143
|
+
block_config:
|
144
|
+
block_name: extract_answers
|
145
|
+
input_cols: response_dict
|
146
|
+
extract_content: true
|
147
|
+
expand_lists: true
|
130
148
|
- block_type: TextParserBlock
|
131
149
|
block_config:
|
132
150
|
block_name: parse_response_dict
|
133
|
-
input_cols:
|
151
|
+
input_cols: extract_answers_content
|
134
152
|
output_cols: response
|
135
153
|
start_tags:
|
136
154
|
- ''
|
137
155
|
end_tags:
|
138
156
|
- ''
|
139
157
|
save_reasoning_content: true
|
140
|
-
- block_type:
|
158
|
+
- block_type: PromptBuilderBlock
|
141
159
|
block_config:
|
142
|
-
block_name:
|
160
|
+
block_name: eval_faithful_prompt
|
143
161
|
input_cols:
|
144
162
|
- document
|
145
163
|
- response
|
146
|
-
output_cols:
|
147
|
-
- faithfulness_explanation
|
148
|
-
- faithfulness_judgment
|
164
|
+
output_cols: eval_faithful_prompt
|
149
165
|
prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
|
150
|
-
filter_value: 'YES'
|
151
|
-
operation: eq
|
152
|
-
async_mode: true
|
153
166
|
format_as_messages: true
|
167
|
+
- block_type: LLMChatBlock
|
168
|
+
block_config:
|
169
|
+
block_name: eval_faithful_llm_chat
|
170
|
+
input_cols: eval_faithful_prompt
|
171
|
+
output_cols: eval_faithful_response_dict
|
172
|
+
n: 1
|
173
|
+
async_mode: true
|
174
|
+
- block_type: LLMParserBlock
|
175
|
+
block_config:
|
176
|
+
block_name: extract_eval_faithful
|
177
|
+
input_cols: eval_faithful_response_dict
|
178
|
+
extract_content: true
|
179
|
+
|
180
|
+
- block_type: TextParserBlock
|
181
|
+
block_config:
|
182
|
+
block_name: parse_eval_faithful
|
183
|
+
input_cols: extract_eval_faithful_content
|
184
|
+
output_cols:
|
185
|
+
- faithfulness_explanation
|
186
|
+
- faithfulness_judgment
|
154
187
|
start_tags:
|
155
188
|
- '[Start of Explanation]'
|
156
189
|
- '[Start of Answer]'
|
157
190
|
end_tags:
|
158
191
|
- '[End of Explanation]'
|
159
192
|
- '[End of Answer]'
|
193
|
+
- block_type: ColumnValueFilterBlock
|
194
|
+
block_config:
|
195
|
+
block_name: eval_faithful_filter
|
196
|
+
input_cols:
|
197
|
+
- faithfulness_judgment
|
198
|
+
filter_value: 'YES'
|
199
|
+
operation: eq
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py
ADDED
File without changes
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
metadata:
|
2
|
+
name: Document Based Knowledge Tuning Dataset Generation Flow
|
3
|
+
description: Directly generates QA pairs from the raw document.
|
4
|
+
version: 2.0.0
|
5
|
+
author: SDG Hub Contributors
|
6
|
+
recommended_models:
|
7
|
+
default: openai/gpt-oss-120b
|
8
|
+
compatible:
|
9
|
+
- meta-llama/Llama-3.3-70B-Instruct
|
10
|
+
- microsoft/phi-4
|
11
|
+
- mistralai/Mixtral-8x7B-Instruct-v0.1
|
12
|
+
experimental: []
|
13
|
+
tags:
|
14
|
+
- knowledge-tuning
|
15
|
+
- document-internalization
|
16
|
+
- question-generation
|
17
|
+
- qa-pairs
|
18
|
+
- detailed-summaries
|
19
|
+
license: Apache-2.0
|
20
|
+
min_sdg_hub_version: 0.2.0
|
21
|
+
dataset_requirements:
|
22
|
+
required_columns:
|
23
|
+
- document
|
24
|
+
- document_outline
|
25
|
+
- domain
|
26
|
+
- icl_document
|
27
|
+
- icl_query_1
|
28
|
+
- icl_query_2
|
29
|
+
- icl_query_3
|
30
|
+
description: 'Input dataset should contain documents with text content and domain classification. Each document should be substantial enough for meaningful question generation (minimum 100 words recommended). The flow generates three types
|
31
|
+
of summaries: detailed (n=20), extractive (n=10), and key facts (n=50), each producing corresponding QA pairs designed to help LLMs internalize document knowledge for knowledge tuning.'
|
32
|
+
output_columns:
|
33
|
+
- question
|
34
|
+
- response
|
35
|
+
- raw_document
|
36
|
+
- faithfulness_explanation
|
37
|
+
- faithfulness_judgment
|
38
|
+
id: stellar-peak-605
|
39
|
+
blocks:
|
40
|
+
- block_type: DuplicateColumnsBlock
|
41
|
+
block_config:
|
42
|
+
block_name: duplicate_document_col
|
43
|
+
input_cols:
|
44
|
+
document: base_document
|
45
|
+
- block_type: PromptBuilderBlock
|
46
|
+
block_config:
|
47
|
+
block_name: question_generation_prompt
|
48
|
+
input_cols:
|
49
|
+
- domain
|
50
|
+
- document
|
51
|
+
- document_outline
|
52
|
+
- icl_document
|
53
|
+
- icl_query_1
|
54
|
+
- icl_query_2
|
55
|
+
- icl_query_3
|
56
|
+
output_cols: question_generation_prompt
|
57
|
+
prompt_config_path: ../generate_question_list.yaml
|
58
|
+
format_as_messages: true
|
59
|
+
- block_type: LLMChatBlock
|
60
|
+
block_config:
|
61
|
+
block_name: question_generation
|
62
|
+
input_cols: question_generation_prompt
|
63
|
+
output_cols: question_list
|
64
|
+
max_tokens: 256
|
65
|
+
temperature: 1.0
|
66
|
+
n: 1
|
67
|
+
async_mode: true
|
68
|
+
- block_type: LLMParserBlock
|
69
|
+
block_config:
|
70
|
+
block_name: extract_questions
|
71
|
+
input_cols: question_list
|
72
|
+
extract_content: true
|
73
|
+
expand_lists: true
|
74
|
+
- block_type: TextParserBlock
|
75
|
+
block_config:
|
76
|
+
block_name: parse_question_list
|
77
|
+
input_cols: extract_questions_content
|
78
|
+
output_cols: question
|
79
|
+
start_tags:
|
80
|
+
- '[QUESTION]'
|
81
|
+
end_tags:
|
82
|
+
- '[END]'
|
83
|
+
- block_type: PromptBuilderBlock
|
84
|
+
block_config:
|
85
|
+
block_name: answer_generation_prompt
|
86
|
+
input_cols:
|
87
|
+
- question
|
88
|
+
- document
|
89
|
+
- document_outline
|
90
|
+
output_cols: answer_generation_prompt
|
91
|
+
prompt_config_path: ../generate_answers.yaml
|
92
|
+
format_as_messages: true
|
93
|
+
- block_type: LLMChatBlock
|
94
|
+
block_config:
|
95
|
+
block_name: answer_generation
|
96
|
+
input_cols: answer_generation_prompt
|
97
|
+
output_cols: response_dict
|
98
|
+
max_tokens: 4096
|
99
|
+
temperature: 1.0
|
100
|
+
n: 1
|
101
|
+
async_mode: true
|
102
|
+
- block_type: LLMParserBlock
|
103
|
+
block_config:
|
104
|
+
block_name: extract_answer
|
105
|
+
input_cols: response_dict
|
106
|
+
extract_content: true
|
107
|
+
expand_lists: true
|
108
|
+
- block_type: TextParserBlock
|
109
|
+
block_config:
|
110
|
+
block_name: parse_response_dict
|
111
|
+
input_cols: extract_answer_content
|
112
|
+
output_cols: response
|
113
|
+
start_tags:
|
114
|
+
- ''
|
115
|
+
end_tags:
|
116
|
+
- ''
|
117
|
+
save_reasoning_content: true
|
118
|
+
- block_type: PromptBuilderBlock
|
119
|
+
block_config:
|
120
|
+
block_name: eval_faithful_prompt
|
121
|
+
input_cols:
|
122
|
+
- document
|
123
|
+
- response
|
124
|
+
output_cols: eval_faithful_prompt
|
125
|
+
prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
|
126
|
+
format_as_messages: true
|
127
|
+
- block_type: LLMChatBlock
|
128
|
+
block_config:
|
129
|
+
block_name: eval_faithful_llm_chat
|
130
|
+
input_cols: eval_faithful_prompt
|
131
|
+
output_cols: eval_faithful_response_dict
|
132
|
+
n: 1
|
133
|
+
async_mode: true
|
134
|
+
- block_type: LLMParserBlock
|
135
|
+
block_config:
|
136
|
+
block_name: extract_eval_faithful
|
137
|
+
input_cols: eval_faithful_response_dict
|
138
|
+
extract_content: true
|
139
|
+
|
140
|
+
- block_type: TextParserBlock
|
141
|
+
block_config:
|
142
|
+
block_name: parse_eval_faithful
|
143
|
+
input_cols: extract_eval_faithful_content
|
144
|
+
output_cols:
|
145
|
+
- faithfulness_explanation
|
146
|
+
- faithfulness_judgment
|
147
|
+
start_tags:
|
148
|
+
- '[Start of Explanation]'
|
149
|
+
- '[Start of Answer]'
|
150
|
+
end_tags:
|
151
|
+
- '[End of Explanation]'
|
152
|
+
- '[End of Answer]'
|
153
|
+
- block_type: ColumnValueFilterBlock
|
154
|
+
block_config:
|
155
|
+
block_name: eval_faithful_filter
|
156
|
+
input_cols:
|
157
|
+
- faithfulness_judgment
|
158
|
+
filter_value: 'YES'
|
159
|
+
operation: eq
|
@@ -63,10 +63,16 @@ blocks:
|
|
63
63
|
temperature: 0.7
|
64
64
|
n: 50
|
65
65
|
async_mode: true
|
66
|
+
- block_type: LLMParserBlock
|
67
|
+
block_config:
|
68
|
+
block_name: extract_extractive_summary
|
69
|
+
input_cols: raw_summary
|
70
|
+
extract_content: true
|
71
|
+
expand_lists: true
|
66
72
|
- block_type: TextParserBlock
|
67
73
|
block_config:
|
68
74
|
block_name: parse_extractive_summary
|
69
|
-
input_cols:
|
75
|
+
input_cols: extract_extractive_summary_content
|
70
76
|
output_cols: summary
|
71
77
|
start_tags:
|
72
78
|
- ''
|
@@ -101,10 +107,16 @@ blocks:
|
|
101
107
|
temperature: 0.7
|
102
108
|
n: 1
|
103
109
|
async_mode: true
|
110
|
+
- block_type: LLMParserBlock
|
111
|
+
block_config:
|
112
|
+
block_name: extract_questions
|
113
|
+
input_cols: question_list
|
114
|
+
extract_content: true
|
115
|
+
expand_lists: true
|
104
116
|
- block_type: TextParserBlock
|
105
117
|
block_config:
|
106
118
|
block_name: parse_question_list
|
107
|
-
input_cols:
|
119
|
+
input_cols: extract_questions_content
|
108
120
|
output_cols: question
|
109
121
|
start_tags:
|
110
122
|
- '[QUESTION]'
|
@@ -129,33 +141,61 @@ blocks:
|
|
129
141
|
temperature: 0.7
|
130
142
|
n: 1
|
131
143
|
async_mode: true
|
144
|
+
- block_type: LLMParserBlock
|
145
|
+
block_config:
|
146
|
+
block_name: extract_answers
|
147
|
+
input_cols: response_dict
|
148
|
+
extract_content: true
|
149
|
+
expand_lists: true
|
132
150
|
- block_type: TextParserBlock
|
133
151
|
block_config:
|
134
152
|
block_name: parse_response_dict
|
135
|
-
input_cols:
|
153
|
+
input_cols: extract_answers_content
|
136
154
|
output_cols: response
|
137
155
|
start_tags:
|
138
156
|
- ''
|
139
157
|
end_tags:
|
140
158
|
- ''
|
141
159
|
save_reasoning_content: true
|
142
|
-
- block_type:
|
160
|
+
- block_type: PromptBuilderBlock
|
143
161
|
block_config:
|
144
|
-
block_name:
|
162
|
+
block_name: eval_faithful_prompt
|
145
163
|
input_cols:
|
146
164
|
- document
|
147
165
|
- response
|
148
|
-
output_cols:
|
149
|
-
- faithfulness_explanation
|
150
|
-
- faithfulness_judgment
|
166
|
+
output_cols: eval_faithful_prompt
|
151
167
|
prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
|
152
|
-
filter_value: 'YES'
|
153
|
-
operation: eq
|
154
|
-
async_mode: true
|
155
168
|
format_as_messages: true
|
169
|
+
- block_type: LLMChatBlock
|
170
|
+
block_config:
|
171
|
+
block_name: eval_faithful_llm_chat
|
172
|
+
input_cols: eval_faithful_prompt
|
173
|
+
output_cols: eval_faithful_response_dict
|
174
|
+
n: 1
|
175
|
+
async_mode: true
|
176
|
+
- block_type: LLMParserBlock
|
177
|
+
block_config:
|
178
|
+
block_name: extract_eval_faithful
|
179
|
+
input_cols: eval_faithful_response_dict
|
180
|
+
extract_content: true
|
181
|
+
|
182
|
+
- block_type: TextParserBlock
|
183
|
+
block_config:
|
184
|
+
block_name: parse_eval_faithful
|
185
|
+
input_cols: extract_eval_faithful_content
|
186
|
+
output_cols:
|
187
|
+
- faithfulness_explanation
|
188
|
+
- faithfulness_judgement
|
156
189
|
start_tags:
|
157
190
|
- '[Start of Explanation]'
|
158
191
|
- '[Start of Answer]'
|
159
192
|
end_tags:
|
160
193
|
- '[End of Explanation]'
|
161
194
|
- '[End of Answer]'
|
195
|
+
- block_type: ColumnValueFilterBlock
|
196
|
+
block_config:
|
197
|
+
block_name: eval_faithful_filter
|
198
|
+
input_cols:
|
199
|
+
- faithfulness_judgement
|
200
|
+
filter_value: 'YES'
|
201
|
+
operation: eq
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml
CHANGED
@@ -50,10 +50,16 @@ blocks:
|
|
50
50
|
temperature: 0.7
|
51
51
|
n: 1
|
52
52
|
async_mode: true
|
53
|
+
- block_type: LLMParserBlock
|
54
|
+
block_config:
|
55
|
+
block_name: extract_atomic_facts
|
56
|
+
input_cols: raw_summary
|
57
|
+
extract_content: true
|
58
|
+
expand_lists: true
|
53
59
|
- block_type: TextParserBlock
|
54
60
|
block_config:
|
55
61
|
block_name: parse_atomic_facts
|
56
|
-
input_cols:
|
62
|
+
input_cols: extract_atomic_facts_content
|
57
63
|
output_cols: atomic_facts
|
58
64
|
start_tags:
|
59
65
|
- '### Key Facts With Context'
|
@@ -89,10 +95,16 @@ blocks:
|
|
89
95
|
temperature: 0.7
|
90
96
|
n: 1
|
91
97
|
async_mode: true
|
98
|
+
- block_type: LLMParserBlock
|
99
|
+
block_config:
|
100
|
+
block_name: extract_key_fact_qa
|
101
|
+
input_cols: raw_key_fact_qa
|
102
|
+
extract_content: true
|
103
|
+
expand_lists: true
|
92
104
|
- block_type: TextParserBlock
|
93
105
|
block_config:
|
94
106
|
block_name: parse_key_fact_qa
|
95
|
-
input_cols:
|
107
|
+
input_cols: extract_key_fact_qa_content
|
96
108
|
output_cols:
|
97
109
|
- question
|
98
110
|
- response
|
@@ -54,11 +54,19 @@ blocks:
|
|
54
54
|
output_cols: raw_summary_detailed
|
55
55
|
max_tokens: 2048
|
56
56
|
async_mode: true
|
57
|
+
n: 2
|
58
|
+
|
59
|
+
- block_type: LLMParserBlock
|
60
|
+
block_config:
|
61
|
+
block_name: detailed_summary
|
62
|
+
input_cols: raw_summary_detailed
|
63
|
+
extract_content: true
|
64
|
+
extract_reasoning_content: true
|
57
65
|
|
58
66
|
- block_type: TextParserBlock
|
59
67
|
block_config:
|
60
68
|
block_name: parse_detailed_summary
|
61
|
-
input_cols:
|
69
|
+
input_cols: detailed_summary_content
|
62
70
|
output_cols: summary_detailed
|
63
71
|
start_tags: [""]
|
64
72
|
end_tags: [""]
|
@@ -78,10 +86,16 @@ blocks:
|
|
78
86
|
max_tokens: 2048
|
79
87
|
async_mode: true
|
80
88
|
|
89
|
+
- block_type: LLMParserBlock
|
90
|
+
block_config:
|
91
|
+
block_name: atomic_facts
|
92
|
+
input_cols: raw_atomic_facts
|
93
|
+
extract_content: true
|
94
|
+
|
81
95
|
- block_type: TextParserBlock
|
82
96
|
block_config:
|
83
97
|
block_name: parse_atomic_facts
|
84
|
-
input_cols:
|
98
|
+
input_cols: atomic_facts_content
|
85
99
|
output_cols: summary_atomic_facts
|
86
100
|
start_tags: [""]
|
87
101
|
end_tags: [""]
|
@@ -101,10 +115,16 @@ blocks:
|
|
101
115
|
max_tokens: 2048
|
102
116
|
async_mode: true
|
103
117
|
|
118
|
+
- block_type: LLMParserBlock
|
119
|
+
block_config:
|
120
|
+
block_name: extractive_summary
|
121
|
+
input_cols: raw_summary_extractive
|
122
|
+
extract_content: true
|
123
|
+
|
104
124
|
- block_type: TextParserBlock
|
105
125
|
block_config:
|
106
126
|
block_name: parse_extractive_summary
|
107
|
-
input_cols:
|
127
|
+
input_cols: extractive_summary_content
|
108
128
|
output_cols: summary_extractive
|
109
129
|
start_tags: [""]
|
110
130
|
end_tags: [""]
|
@@ -136,50 +156,150 @@ blocks:
|
|
136
156
|
max_tokens: 2048
|
137
157
|
async_mode: true
|
138
158
|
|
159
|
+
- block_type: LLMParserBlock
|
160
|
+
block_config:
|
161
|
+
block_name: get_knowledge_generation
|
162
|
+
input_cols: raw_knowledge_generation
|
163
|
+
extract_content: true
|
164
|
+
|
139
165
|
- block_type: TextParserBlock
|
140
166
|
block_config:
|
141
167
|
block_name: parse_knowledge_generation
|
142
|
-
input_cols:
|
168
|
+
input_cols: get_knowledge_generation_content
|
143
169
|
output_cols: [question, response]
|
144
170
|
parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
|
145
171
|
parser_cleanup_tags: ["[END]"]
|
146
172
|
|
147
|
-
- block_type:
|
173
|
+
- block_type: PromptBuilderBlock
|
148
174
|
block_config:
|
149
|
-
block_name:
|
175
|
+
block_name: eval_faithful_prompt
|
150
176
|
input_cols: [document, response]
|
151
|
-
output_cols:
|
177
|
+
output_cols: eval_faithful_prompt
|
152
178
|
prompt_config_path: evaluate_faithfulness.yaml
|
179
|
+
format_as_messages: true
|
180
|
+
|
181
|
+
- block_type: LLMChatBlock
|
182
|
+
block_config:
|
183
|
+
block_name: eval_faithful_llm_chat
|
184
|
+
input_cols: eval_faithful_prompt
|
185
|
+
output_cols: eval_faithful_response_dict
|
186
|
+
max_tokens: 2048
|
187
|
+
n: 1
|
188
|
+
async_mode: true
|
189
|
+
|
190
|
+
- block_type: LLMParserBlock
|
191
|
+
block_config:
|
192
|
+
block_name: extract_eval_faithful
|
193
|
+
input_cols: eval_faithful_response_dict
|
194
|
+
extract_content: true
|
195
|
+
|
196
|
+
- block_type: TextParserBlock
|
197
|
+
block_config:
|
198
|
+
block_name: parse_eval_faithful
|
199
|
+
input_cols: extract_eval_faithful_content
|
200
|
+
output_cols:
|
201
|
+
- faithfulness_explanation
|
202
|
+
- faithfulness_judgment
|
203
|
+
start_tags:
|
204
|
+
- '[Start of Explanation]'
|
205
|
+
- '[Start of Answer]'
|
206
|
+
end_tags:
|
207
|
+
- '[End of Explanation]'
|
208
|
+
- '[End of Answer]'
|
209
|
+
|
210
|
+
- block_type: ColumnValueFilterBlock
|
211
|
+
block_config:
|
212
|
+
block_name: eval_faithful_filter
|
213
|
+
input_cols:
|
214
|
+
- faithfulness_judgment
|
153
215
|
filter_value: "YES"
|
154
216
|
operation: eq
|
155
|
-
async_mode: true
|
156
|
-
start_tags: ["[Start of Explanation]", "[Start of Answer]"]
|
157
|
-
end_tags: ["[End of Explanation]", "[End of Answer]"]
|
158
217
|
|
159
|
-
- block_type:
|
218
|
+
- block_type: PromptBuilderBlock
|
160
219
|
block_config:
|
161
|
-
block_name:
|
162
|
-
input_cols:
|
163
|
-
|
220
|
+
block_name: eval_relevancy_prompt
|
221
|
+
input_cols:
|
222
|
+
- question
|
223
|
+
- response
|
224
|
+
output_cols: eval_relevancy_prompt
|
164
225
|
prompt_config_path: evaluate_relevancy.yaml
|
226
|
+
format_as_messages: true
|
227
|
+
- block_type: LLMChatBlock
|
228
|
+
block_config:
|
229
|
+
block_name: eval_relevancy_llm_chat
|
230
|
+
input_cols: eval_relevancy_prompt
|
231
|
+
output_cols: eval_relevancy_response_dict
|
232
|
+
max_tokens: 2048
|
233
|
+
n: 1
|
234
|
+
async_mode: true
|
235
|
+
- block_type: LLMParserBlock
|
236
|
+
block_config:
|
237
|
+
block_name: extract_eval_relevancy
|
238
|
+
input_cols: eval_relevancy_response_dict
|
239
|
+
extract_content: true
|
240
|
+
|
241
|
+
- block_type: TextParserBlock
|
242
|
+
block_config:
|
243
|
+
block_name: parse_eval_relevancy
|
244
|
+
input_cols: extract_eval_relevancy_content
|
245
|
+
output_cols:
|
246
|
+
- relevancy_explanation
|
247
|
+
- relevancy_score
|
248
|
+
start_tags:
|
249
|
+
- '[Start of Feedback]'
|
250
|
+
- '[Start of Score]'
|
251
|
+
end_tags:
|
252
|
+
- '[End of Feedback]'
|
253
|
+
- '[End of Score]'
|
254
|
+
- block_type: ColumnValueFilterBlock
|
255
|
+
block_config:
|
256
|
+
block_name: eval_relevancy_filter
|
257
|
+
input_cols:
|
258
|
+
- relevancy_score
|
165
259
|
filter_value: 2.0
|
166
260
|
operation: eq
|
167
261
|
convert_dtype: float
|
168
|
-
max_tokens: 2048
|
169
|
-
async_mode: true
|
170
|
-
start_tags: ["[Start of Feedback]", "[Start of Score]"]
|
171
|
-
end_tags: ["[End of Feedback]", "[End of Score]"]
|
172
262
|
|
173
|
-
- block_type:
|
263
|
+
- block_type: PromptBuilderBlock
|
174
264
|
block_config:
|
175
|
-
block_name:
|
176
|
-
input_cols:
|
177
|
-
|
265
|
+
block_name: verify_question_prompt
|
266
|
+
input_cols:
|
267
|
+
- question
|
268
|
+
output_cols: verify_question_prompt
|
178
269
|
prompt_config_path: evaluate_question.yaml
|
270
|
+
format_as_messages: true
|
271
|
+
- block_type: LLMChatBlock
|
272
|
+
block_config:
|
273
|
+
block_name: verify_question_llm_chat
|
274
|
+
input_cols: verify_question_prompt
|
275
|
+
output_cols: verify_question_response_dict
|
276
|
+
max_tokens: 2048
|
277
|
+
n: 1
|
278
|
+
async_mode: true
|
279
|
+
- block_type: LLMParserBlock
|
280
|
+
block_config:
|
281
|
+
block_name: extract_verify_question
|
282
|
+
input_cols: verify_question_response_dict
|
283
|
+
extract_content: true
|
284
|
+
|
285
|
+
- block_type: TextParserBlock
|
286
|
+
block_config:
|
287
|
+
block_name: parse_verify_question
|
288
|
+
input_cols: extract_verify_question_content
|
289
|
+
output_cols:
|
290
|
+
- verification_explanation
|
291
|
+
- verification_rating
|
292
|
+
start_tags:
|
293
|
+
- '[Start of Explanation]'
|
294
|
+
- '[Start of Rating]'
|
295
|
+
end_tags:
|
296
|
+
- '[End of Explanation]'
|
297
|
+
- '[End of Rating]'
|
298
|
+
- block_type: ColumnValueFilterBlock
|
299
|
+
block_config:
|
300
|
+
block_name: verify_question_filter
|
301
|
+
input_cols:
|
302
|
+
- verification_rating
|
179
303
|
filter_value: 1.0
|
180
304
|
operation: ge
|
181
305
|
convert_dtype: float
|
182
|
-
max_tokens: 2048
|
183
|
-
async_mode: true
|
184
|
-
start_tags: ["[Start of Explanation]", "[Start of Rating]"]
|
185
|
-
end_tags: ["[End of Explanation]", "[End of Rating]"]
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md
ADDED
File without changes
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,41 @@
|
|
1
|
+
- role: system
|
2
|
+
content: You are an AI assistant knowledgeable about {{domain}} domain. Be accurate but concise in response.
|
3
|
+
|
4
|
+
- role: user
|
5
|
+
content: |
|
6
|
+
Please break down the following snippet from an article about {{domain}} into atomic facts.
|
7
|
+
|
8
|
+
1. Make sure each fact is grounded in the given text.
|
9
|
+
2. Include any necessary information needed to explain the fact or concept
|
10
|
+
3. The atomic facts should be as simple as possible, if it's compound sentence, break down one more time
|
11
|
+
4. For clarity, avoid using pronouns like 'it', 'he', 'she', 'this', 'that' etc., and instead use the full names or titles.
|
12
|
+
5. Focus only on key concepts and facts. Skip any question or problems mentioned in the passage.
|
13
|
+
6. Output the response in Japanese.
|
14
|
+
|
15
|
+
To help you understand the task, here is an example:
|
16
|
+
[Passage]
|
17
|
+
The tournament was contested by ten national teams, maintaining the same format used in 2019. After six weeks of round-robin matches, India, South Africa, Australia, and New Zealand finished as the top four and qualified for the knockout stage. In the knockout stage, India and Australia beat New Zealand and South Africa, respectively, to advance to the final, played on 19 November at the Narendra Modi Stadium in Ahmedabad. Australia won the final by six wickets, winning their sixth Cricket World Cup title.
|
18
|
+
[Facts]
|
19
|
+
1. The tournament was contested by ten national teams.
|
20
|
+
2. The tournament maintained the same format used in 2019.
|
21
|
+
3. The round-robin matches lasted for six weeks.
|
22
|
+
4. India finished as one of the top four teams.
|
23
|
+
5. South Africa finished as one of the top four teams.
|
24
|
+
6. Australia finished as one of the top four teams.
|
25
|
+
7. New Zealand finished as one of the top four teams.
|
26
|
+
8. India, South Africa, Australia, and New Zealand qualified for the knockout stage.
|
27
|
+
9. In the knockout stage, India beat New Zealand.
|
28
|
+
10. In the knockout stage, Australia beat South Africa.
|
29
|
+
11. India advanced to the final.
|
30
|
+
12. Australia advanced to the final.
|
31
|
+
13. The final was played on 19 November.
|
32
|
+
14. The final was held at the Narendra Modi Stadium in Ahmedabad.
|
33
|
+
15. Australia won the final by six wickets.
|
34
|
+
16. Australia won their sixth Cricket World Cup title.
|
35
|
+
[End]
|
36
|
+
|
37
|
+
Now it's your turn breakdown following snippet from article about {{domain}} into atomic facts following similar style as above examples
|
38
|
+
[Passage]
|
39
|
+
{{document_outline}}
|
40
|
+
{{document}}
|
41
|
+
[Facts]
|
@@ -0,0 +1,14 @@
|
|
1
|
+
- role: system
|
2
|
+
content: You are an AI assistant that is expert at summarizing text.
|
3
|
+
|
4
|
+
- role: user
|
5
|
+
content: |
|
6
|
+
Give me detailed summary for below document, making sure all key points are covered.
|
7
|
+
|
8
|
+
Do not add any new information.
|
9
|
+
Do not miss any key points from the provided document.
|
10
|
+
Output the response in Japanese.
|
11
|
+
|
12
|
+
Document:
|
13
|
+
{{document_outline}}
|
14
|
+
{{document}}
|