sdg-hub 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. sdg_hub/_version.py +2 -2
  2. sdg_hub/core/blocks/__init__.py +2 -4
  3. sdg_hub/core/blocks/base.py +61 -6
  4. sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  5. sdg_hub/core/blocks/llm/__init__.py +2 -4
  6. sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
  7. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  8. sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  9. sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  10. sdg_hub/core/flow/base.py +7 -4
  11. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
  12. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  13. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
  14. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
  15. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
  16. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
  17. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  18. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  19. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  20. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  21. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  22. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  24. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
  25. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
  26. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +29 -25
  27. sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  28. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  29. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  30. sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  31. sdg_hub/core/blocks/llm/client_manager.py +0 -472
  32. sdg_hub/core/blocks/llm/config.py +0 -337
  33. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
  34. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
  35. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -61,10 +61,16 @@ blocks:
61
61
  temperature: 0.7
62
62
  n: 50
63
63
  async_mode: true
64
+ - block_type: LLMParserBlock
65
+ block_config:
66
+ block_name: extract_detailed_summary
67
+ input_cols: raw_summary
68
+ extract_content: true
69
+ expand_lists: true
64
70
  - block_type: TextParserBlock
65
71
  block_config:
66
72
  block_name: parse_detailed_summary
67
- input_cols: raw_summary
73
+ input_cols: extract_detailed_summary_content
68
74
  output_cols: summary
69
75
  start_tags:
70
76
  - ''
@@ -99,10 +105,16 @@ blocks:
99
105
  temperature: 0.7
100
106
  n: 1
101
107
  async_mode: true
108
+ - block_type: LLMParserBlock
109
+ block_config:
110
+ block_name: extract_questions
111
+ input_cols: question_list
112
+ extract_content: true
113
+ expand_lists: true
102
114
  - block_type: TextParserBlock
103
115
  block_config:
104
116
  block_name: parse_question_list
105
- input_cols: question_list
117
+ input_cols: extract_questions_content
106
118
  output_cols: question
107
119
  start_tags:
108
120
  - '[QUESTION]'
@@ -127,33 +139,61 @@ blocks:
127
139
  temperature: 0.7
128
140
  n: 1
129
141
  async_mode: true
142
+ - block_type: LLMParserBlock
143
+ block_config:
144
+ block_name: extract_answers
145
+ input_cols: response_dict
146
+ extract_content: true
147
+ expand_lists: true
130
148
  - block_type: TextParserBlock
131
149
  block_config:
132
150
  block_name: parse_response_dict
133
- input_cols: response_dict
151
+ input_cols: extract_answers_content
134
152
  output_cols: response
135
153
  start_tags:
136
154
  - ''
137
155
  end_tags:
138
156
  - ''
139
157
  save_reasoning_content: true
140
- - block_type: EvaluateFaithfulnessBlock
158
+ - block_type: PromptBuilderBlock
141
159
  block_config:
142
- block_name: eval_faithfulness
160
+ block_name: eval_faithful_prompt
143
161
  input_cols:
144
162
  - document
145
163
  - response
146
- output_cols:
147
- - faithfulness_explanation
148
- - faithfulness_judgment
164
+ output_cols: eval_faithful_prompt
149
165
  prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
150
- filter_value: 'YES'
151
- operation: eq
152
- async_mode: true
153
166
  format_as_messages: true
167
+ - block_type: LLMChatBlock
168
+ block_config:
169
+ block_name: eval_faithful_llm_chat
170
+ input_cols: eval_faithful_prompt
171
+ output_cols: eval_faithful_response_dict
172
+ n: 1
173
+ async_mode: true
174
+ - block_type: LLMParserBlock
175
+ block_config:
176
+ block_name: extract_eval_faithful
177
+ input_cols: eval_faithful_response_dict
178
+ extract_content: true
179
+
180
+ - block_type: TextParserBlock
181
+ block_config:
182
+ block_name: parse_eval_faithful
183
+ input_cols: extract_eval_faithful_content
184
+ output_cols:
185
+ - faithfulness_explanation
186
+ - faithfulness_judgment
154
187
  start_tags:
155
188
  - '[Start of Explanation]'
156
189
  - '[Start of Answer]'
157
190
  end_tags:
158
191
  - '[End of Explanation]'
159
192
  - '[End of Answer]'
193
+ - block_type: ColumnValueFilterBlock
194
+ block_config:
195
+ block_name: eval_faithful_filter
196
+ input_cols:
197
+ - faithfulness_judgment
198
+ filter_value: 'YES'
199
+ operation: eq
@@ -0,0 +1,159 @@
1
+ metadata:
2
+ name: Document Based Knowledge Tuning Dataset Generation Flow
3
+ description: Directly generates QA pairs from the raw document.
4
+ version: 2.0.0
5
+ author: SDG Hub Contributors
6
+ recommended_models:
7
+ default: openai/gpt-oss-120b
8
+ compatible:
9
+ - meta-llama/Llama-3.3-70B-Instruct
10
+ - microsoft/phi-4
11
+ - mistralai/Mixtral-8x7B-Instruct-v0.1
12
+ experimental: []
13
+ tags:
14
+ - knowledge-tuning
15
+ - document-internalization
16
+ - question-generation
17
+ - qa-pairs
18
+ - detailed-summaries
19
+ license: Apache-2.0
20
+ min_sdg_hub_version: 0.2.0
21
+ dataset_requirements:
22
+ required_columns:
23
+ - document
24
+ - document_outline
25
+ - domain
26
+ - icl_document
27
+ - icl_query_1
28
+ - icl_query_2
29
+ - icl_query_3
30
+ description: 'Input dataset should contain documents with text content and domain classification. Each document should be substantial enough for meaningful question generation (minimum 100 words recommended). The flow generates three types
31
+ of summaries: detailed (n=20), extractive (n=10), and key facts (n=50), each producing corresponding QA pairs designed to help LLMs internalize document knowledge for knowledge tuning.'
32
+ output_columns:
33
+ - question
34
+ - response
35
+ - raw_document
36
+ - faithfulness_explanation
37
+ - faithfulness_judgment
38
+ id: stellar-peak-605
39
+ blocks:
40
+ - block_type: DuplicateColumnsBlock
41
+ block_config:
42
+ block_name: duplicate_document_col
43
+ input_cols:
44
+ document: base_document
45
+ - block_type: PromptBuilderBlock
46
+ block_config:
47
+ block_name: question_generation_prompt
48
+ input_cols:
49
+ - domain
50
+ - document
51
+ - document_outline
52
+ - icl_document
53
+ - icl_query_1
54
+ - icl_query_2
55
+ - icl_query_3
56
+ output_cols: question_generation_prompt
57
+ prompt_config_path: ../generate_question_list.yaml
58
+ format_as_messages: true
59
+ - block_type: LLMChatBlock
60
+ block_config:
61
+ block_name: question_generation
62
+ input_cols: question_generation_prompt
63
+ output_cols: question_list
64
+ max_tokens: 256
65
+ temperature: 1.0
66
+ n: 1
67
+ async_mode: true
68
+ - block_type: LLMParserBlock
69
+ block_config:
70
+ block_name: extract_questions
71
+ input_cols: question_list
72
+ extract_content: true
73
+ expand_lists: true
74
+ - block_type: TextParserBlock
75
+ block_config:
76
+ block_name: parse_question_list
77
+ input_cols: extract_questions_content
78
+ output_cols: question
79
+ start_tags:
80
+ - '[QUESTION]'
81
+ end_tags:
82
+ - '[END]'
83
+ - block_type: PromptBuilderBlock
84
+ block_config:
85
+ block_name: answer_generation_prompt
86
+ input_cols:
87
+ - question
88
+ - document
89
+ - document_outline
90
+ output_cols: answer_generation_prompt
91
+ prompt_config_path: ../generate_answers.yaml
92
+ format_as_messages: true
93
+ - block_type: LLMChatBlock
94
+ block_config:
95
+ block_name: answer_generation
96
+ input_cols: answer_generation_prompt
97
+ output_cols: response_dict
98
+ max_tokens: 4096
99
+ temperature: 1.0
100
+ n: 1
101
+ async_mode: true
102
+ - block_type: LLMParserBlock
103
+ block_config:
104
+ block_name: extract_answer
105
+ input_cols: response_dict
106
+ extract_content: true
107
+ expand_lists: true
108
+ - block_type: TextParserBlock
109
+ block_config:
110
+ block_name: parse_response_dict
111
+ input_cols: extract_answer_content
112
+ output_cols: response
113
+ start_tags:
114
+ - ''
115
+ end_tags:
116
+ - ''
117
+ save_reasoning_content: true
118
+ - block_type: PromptBuilderBlock
119
+ block_config:
120
+ block_name: eval_faithful_prompt
121
+ input_cols:
122
+ - document
123
+ - response
124
+ output_cols: eval_faithful_prompt
125
+ prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
126
+ format_as_messages: true
127
+ - block_type: LLMChatBlock
128
+ block_config:
129
+ block_name: eval_faithful_llm_chat
130
+ input_cols: eval_faithful_prompt
131
+ output_cols: eval_faithful_response_dict
132
+ n: 1
133
+ async_mode: true
134
+ - block_type: LLMParserBlock
135
+ block_config:
136
+ block_name: extract_eval_faithful
137
+ input_cols: eval_faithful_response_dict
138
+ extract_content: true
139
+
140
+ - block_type: TextParserBlock
141
+ block_config:
142
+ block_name: parse_eval_faithful
143
+ input_cols: extract_eval_faithful_content
144
+ output_cols:
145
+ - faithfulness_explanation
146
+ - faithfulness_judgment
147
+ start_tags:
148
+ - '[Start of Explanation]'
149
+ - '[Start of Answer]'
150
+ end_tags:
151
+ - '[End of Explanation]'
152
+ - '[End of Answer]'
153
+ - block_type: ColumnValueFilterBlock
154
+ block_config:
155
+ block_name: eval_faithful_filter
156
+ input_cols:
157
+ - faithfulness_judgment
158
+ filter_value: 'YES'
159
+ operation: eq
@@ -63,10 +63,16 @@ blocks:
63
63
  temperature: 0.7
64
64
  n: 50
65
65
  async_mode: true
66
+ - block_type: LLMParserBlock
67
+ block_config:
68
+ block_name: extract_extractive_summary
69
+ input_cols: raw_summary
70
+ extract_content: true
71
+ expand_lists: true
66
72
  - block_type: TextParserBlock
67
73
  block_config:
68
74
  block_name: parse_extractive_summary
69
- input_cols: raw_summary
75
+ input_cols: extract_extractive_summary_content
70
76
  output_cols: summary
71
77
  start_tags:
72
78
  - ''
@@ -101,10 +107,16 @@ blocks:
101
107
  temperature: 0.7
102
108
  n: 1
103
109
  async_mode: true
110
+ - block_type: LLMParserBlock
111
+ block_config:
112
+ block_name: extract_questions
113
+ input_cols: question_list
114
+ extract_content: true
115
+ expand_lists: true
104
116
  - block_type: TextParserBlock
105
117
  block_config:
106
118
  block_name: parse_question_list
107
- input_cols: question_list
119
+ input_cols: extract_questions_content
108
120
  output_cols: question
109
121
  start_tags:
110
122
  - '[QUESTION]'
@@ -129,33 +141,61 @@ blocks:
129
141
  temperature: 0.7
130
142
  n: 1
131
143
  async_mode: true
144
+ - block_type: LLMParserBlock
145
+ block_config:
146
+ block_name: extract_answers
147
+ input_cols: response_dict
148
+ extract_content: true
149
+ expand_lists: true
132
150
  - block_type: TextParserBlock
133
151
  block_config:
134
152
  block_name: parse_response_dict
135
- input_cols: response_dict
153
+ input_cols: extract_answers_content
136
154
  output_cols: response
137
155
  start_tags:
138
156
  - ''
139
157
  end_tags:
140
158
  - ''
141
159
  save_reasoning_content: true
142
- - block_type: EvaluateFaithfulnessBlock
160
+ - block_type: PromptBuilderBlock
143
161
  block_config:
144
- block_name: eval_faithfulness
162
+ block_name: eval_faithful_prompt
145
163
  input_cols:
146
164
  - document
147
165
  - response
148
- output_cols:
149
- - faithfulness_explanation
150
- - faithfulness_judgment
166
+ output_cols: eval_faithful_prompt
151
167
  prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
152
- filter_value: 'YES'
153
- operation: eq
154
- async_mode: true
155
168
  format_as_messages: true
169
+ - block_type: LLMChatBlock
170
+ block_config:
171
+ block_name: eval_faithful_llm_chat
172
+ input_cols: eval_faithful_prompt
173
+ output_cols: eval_faithful_response_dict
174
+ n: 1
175
+ async_mode: true
176
+ - block_type: LLMParserBlock
177
+ block_config:
178
+ block_name: extract_eval_faithful
179
+ input_cols: eval_faithful_response_dict
180
+ extract_content: true
181
+
182
+ - block_type: TextParserBlock
183
+ block_config:
184
+ block_name: parse_eval_faithful
185
+ input_cols: extract_eval_faithful_content
186
+ output_cols:
187
+ - faithfulness_explanation
188
+ - faithfulness_judgement
156
189
  start_tags:
157
190
  - '[Start of Explanation]'
158
191
  - '[Start of Answer]'
159
192
  end_tags:
160
193
  - '[End of Explanation]'
161
194
  - '[End of Answer]'
195
+ - block_type: ColumnValueFilterBlock
196
+ block_config:
197
+ block_name: eval_faithful_filter
198
+ input_cols:
199
+ - faithfulness_judgement
200
+ filter_value: 'YES'
201
+ operation: eq
@@ -50,10 +50,16 @@ blocks:
50
50
  temperature: 0.7
51
51
  n: 1
52
52
  async_mode: true
53
+ - block_type: LLMParserBlock
54
+ block_config:
55
+ block_name: extract_atomic_facts
56
+ input_cols: raw_summary
57
+ extract_content: true
58
+ expand_lists: true
53
59
  - block_type: TextParserBlock
54
60
  block_config:
55
61
  block_name: parse_atomic_facts
56
- input_cols: raw_summary
62
+ input_cols: extract_atomic_facts_content
57
63
  output_cols: atomic_facts
58
64
  start_tags:
59
65
  - '### Key Facts With Context'
@@ -89,10 +95,16 @@ blocks:
89
95
  temperature: 0.7
90
96
  n: 1
91
97
  async_mode: true
98
+ - block_type: LLMParserBlock
99
+ block_config:
100
+ block_name: extract_key_fact_qa
101
+ input_cols: raw_key_fact_qa
102
+ extract_content: true
103
+ expand_lists: true
92
104
  - block_type: TextParserBlock
93
105
  block_config:
94
106
  block_name: parse_key_fact_qa
95
- input_cols: raw_key_fact_qa
107
+ input_cols: extract_key_fact_qa_content
96
108
  output_cols:
97
109
  - question
98
110
  - response
@@ -54,11 +54,19 @@ blocks:
54
54
  output_cols: raw_summary_detailed
55
55
  max_tokens: 2048
56
56
  async_mode: true
57
+ n: 2
58
+
59
+ - block_type: LLMParserBlock
60
+ block_config:
61
+ block_name: detailed_summary
62
+ input_cols: raw_summary_detailed
63
+ extract_content: true
64
+ extract_reasoning_content: true
57
65
 
58
66
  - block_type: TextParserBlock
59
67
  block_config:
60
68
  block_name: parse_detailed_summary
61
- input_cols: raw_summary_detailed
69
+ input_cols: detailed_summary_content
62
70
  output_cols: summary_detailed
63
71
  start_tags: [""]
64
72
  end_tags: [""]
@@ -78,10 +86,16 @@ blocks:
78
86
  max_tokens: 2048
79
87
  async_mode: true
80
88
 
89
+ - block_type: LLMParserBlock
90
+ block_config:
91
+ block_name: atomic_facts
92
+ input_cols: raw_atomic_facts
93
+ extract_content: true
94
+
81
95
  - block_type: TextParserBlock
82
96
  block_config:
83
97
  block_name: parse_atomic_facts
84
- input_cols: raw_atomic_facts
98
+ input_cols: atomic_facts_content
85
99
  output_cols: summary_atomic_facts
86
100
  start_tags: [""]
87
101
  end_tags: [""]
@@ -101,10 +115,16 @@ blocks:
101
115
  max_tokens: 2048
102
116
  async_mode: true
103
117
 
118
+ - block_type: LLMParserBlock
119
+ block_config:
120
+ block_name: extractive_summary
121
+ input_cols: raw_summary_extractive
122
+ extract_content: true
123
+
104
124
  - block_type: TextParserBlock
105
125
  block_config:
106
126
  block_name: parse_extractive_summary
107
- input_cols: raw_summary_extractive
127
+ input_cols: extractive_summary_content
108
128
  output_cols: summary_extractive
109
129
  start_tags: [""]
110
130
  end_tags: [""]
@@ -136,50 +156,150 @@ blocks:
136
156
  max_tokens: 2048
137
157
  async_mode: true
138
158
 
159
+ - block_type: LLMParserBlock
160
+ block_config:
161
+ block_name: get_knowledge_generation
162
+ input_cols: raw_knowledge_generation
163
+ extract_content: true
164
+
139
165
  - block_type: TextParserBlock
140
166
  block_config:
141
167
  block_name: parse_knowledge_generation
142
- input_cols: raw_knowledge_generation
168
+ input_cols: get_knowledge_generation_content
143
169
  output_cols: [question, response]
144
170
  parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
145
171
  parser_cleanup_tags: ["[END]"]
146
172
 
147
- - block_type: EvaluateFaithfulnessBlock
173
+ - block_type: PromptBuilderBlock
148
174
  block_config:
149
- block_name: eval_faithfulness
175
+ block_name: eval_faithful_prompt
150
176
  input_cols: [document, response]
151
- output_cols: [faithfulness_explanation, faithfulness_judgment]
177
+ output_cols: eval_faithful_prompt
152
178
  prompt_config_path: evaluate_faithfulness.yaml
179
+ format_as_messages: true
180
+
181
+ - block_type: LLMChatBlock
182
+ block_config:
183
+ block_name: eval_faithful_llm_chat
184
+ input_cols: eval_faithful_prompt
185
+ output_cols: eval_faithful_response_dict
186
+ max_tokens: 2048
187
+ n: 1
188
+ async_mode: true
189
+
190
+ - block_type: LLMParserBlock
191
+ block_config:
192
+ block_name: extract_eval_faithful
193
+ input_cols: eval_faithful_response_dict
194
+ extract_content: true
195
+
196
+ - block_type: TextParserBlock
197
+ block_config:
198
+ block_name: parse_eval_faithful
199
+ input_cols: extract_eval_faithful_content
200
+ output_cols:
201
+ - faithfulness_explanation
202
+ - faithfulness_judgment
203
+ start_tags:
204
+ - '[Start of Explanation]'
205
+ - '[Start of Answer]'
206
+ end_tags:
207
+ - '[End of Explanation]'
208
+ - '[End of Answer]'
209
+
210
+ - block_type: ColumnValueFilterBlock
211
+ block_config:
212
+ block_name: eval_faithful_filter
213
+ input_cols:
214
+ - faithfulness_judgment
153
215
  filter_value: "YES"
154
216
  operation: eq
155
- async_mode: true
156
- start_tags: ["[Start of Explanation]", "[Start of Answer]"]
157
- end_tags: ["[End of Explanation]", "[End of Answer]"]
158
217
 
159
- - block_type: EvaluateRelevancyBlock
218
+ - block_type: PromptBuilderBlock
160
219
  block_config:
161
- block_name: eval_relevancy
162
- input_cols: [question, response]
163
- output_cols: [relevancy_explanation, relevancy_score]
220
+ block_name: eval_relevancy_prompt
221
+ input_cols:
222
+ - question
223
+ - response
224
+ output_cols: eval_relevancy_prompt
164
225
  prompt_config_path: evaluate_relevancy.yaml
226
+ format_as_messages: true
227
+ - block_type: LLMChatBlock
228
+ block_config:
229
+ block_name: eval_relevancy_llm_chat
230
+ input_cols: eval_relevancy_prompt
231
+ output_cols: eval_relevancy_response_dict
232
+ max_tokens: 2048
233
+ n: 1
234
+ async_mode: true
235
+ - block_type: LLMParserBlock
236
+ block_config:
237
+ block_name: extract_eval_relevancy
238
+ input_cols: eval_relevancy_response_dict
239
+ extract_content: true
240
+
241
+ - block_type: TextParserBlock
242
+ block_config:
243
+ block_name: parse_eval_relevancy
244
+ input_cols: extract_eval_relevancy_content
245
+ output_cols:
246
+ - relevancy_explanation
247
+ - relevancy_score
248
+ start_tags:
249
+ - '[Start of Feedback]'
250
+ - '[Start of Score]'
251
+ end_tags:
252
+ - '[End of Feedback]'
253
+ - '[End of Score]'
254
+ - block_type: ColumnValueFilterBlock
255
+ block_config:
256
+ block_name: eval_relevancy_filter
257
+ input_cols:
258
+ - relevancy_score
165
259
  filter_value: 2.0
166
260
  operation: eq
167
261
  convert_dtype: float
168
- max_tokens: 2048
169
- async_mode: true
170
- start_tags: ["[Start of Feedback]", "[Start of Score]"]
171
- end_tags: ["[End of Feedback]", "[End of Score]"]
172
262
 
173
- - block_type: VerifyQuestionBlock
263
+ - block_type: PromptBuilderBlock
174
264
  block_config:
175
- block_name: verify_question
176
- input_cols: [question]
177
- output_cols: [verification_explanation, verification_rating]
265
+ block_name: verify_question_prompt
266
+ input_cols:
267
+ - question
268
+ output_cols: verify_question_prompt
178
269
  prompt_config_path: evaluate_question.yaml
270
+ format_as_messages: true
271
+ - block_type: LLMChatBlock
272
+ block_config:
273
+ block_name: verify_question_llm_chat
274
+ input_cols: verify_question_prompt
275
+ output_cols: verify_question_response_dict
276
+ max_tokens: 2048
277
+ n: 1
278
+ async_mode: true
279
+ - block_type: LLMParserBlock
280
+ block_config:
281
+ block_name: extract_verify_question
282
+ input_cols: verify_question_response_dict
283
+ extract_content: true
284
+
285
+ - block_type: TextParserBlock
286
+ block_config:
287
+ block_name: parse_verify_question
288
+ input_cols: extract_verify_question_content
289
+ output_cols:
290
+ - verification_explanation
291
+ - verification_rating
292
+ start_tags:
293
+ - '[Start of Explanation]'
294
+ - '[Start of Rating]'
295
+ end_tags:
296
+ - '[End of Explanation]'
297
+ - '[End of Rating]'
298
+ - block_type: ColumnValueFilterBlock
299
+ block_config:
300
+ block_name: verify_question_filter
301
+ input_cols:
302
+ - verification_rating
179
303
  filter_value: 1.0
180
304
  operation: ge
181
305
  convert_dtype: float
182
- max_tokens: 2048
183
- async_mode: true
184
- start_tags: ["[Start of Explanation]", "[Start of Rating]"]
185
- end_tags: ["[End of Explanation]", "[End of Rating]"]
@@ -0,0 +1,41 @@
1
+ - role: system
2
+ content: You are an AI assistant knowledgeable about {{domain}} domain. Be accurate but concise in response.
3
+
4
+ - role: user
5
+ content: |
6
+ Please break down the following snippet from an article about {{domain}} into atomic facts.
7
+
8
+ 1. Make sure each fact is grounded in the given text.
9
+ 2. Include any necessary information needed to explain the fact or concept
10
+ 3. The atomic facts should be as simple as possible, if it's compound sentence, break down one more time
11
+ 4. For clarity, avoid using pronouns like 'it', 'he', 'she', 'this', 'that' etc., and instead use the full names or titles.
12
+ 5. Focus only on key concepts and facts. Skip any question or problems mentioned in the passage.
13
+ 6. Output the response in Japanese.
14
+
15
+ To help you understand the task, here is an example:
16
+ [Passage]
17
+ The tournament was contested by ten national teams, maintaining the same format used in 2019. After six weeks of round-robin matches, India, South Africa, Australia, and New Zealand finished as the top four and qualified for the knockout stage. In the knockout stage, India and Australia beat New Zealand and South Africa, respectively, to advance to the final, played on 19 November at the Narendra Modi Stadium in Ahmedabad. Australia won the final by six wickets, winning their sixth Cricket World Cup title.
18
+ [Facts]
19
+ 1. The tournament was contested by ten national teams.
20
+ 2. The tournament maintained the same format used in 2019.
21
+ 3. The round-robin matches lasted for six weeks.
22
+ 4. India finished as one of the top four teams.
23
+ 5. South Africa finished as one of the top four teams.
24
+ 6. Australia finished as one of the top four teams.
25
+ 7. New Zealand finished as one of the top four teams.
26
+ 8. India, South Africa, Australia, and New Zealand qualified for the knockout stage.
27
+ 9. In the knockout stage, India beat New Zealand.
28
+ 10. In the knockout stage, Australia beat South Africa.
29
+ 11. India advanced to the final.
30
+ 12. Australia advanced to the final.
31
+ 13. The final was played on 19 November.
32
+ 14. The final was held at the Narendra Modi Stadium in Ahmedabad.
33
+ 15. Australia won the final by six wickets.
34
+ 16. Australia won their sixth Cricket World Cup title.
35
+ [End]
36
+
37
+ Now it's your turn breakdown following snippet from article about {{domain}} into atomic facts following similar style as above examples
38
+ [Passage]
39
+ {{document_outline}}
40
+ {{document}}
41
+ [Facts]
@@ -0,0 +1,14 @@
1
+ - role: system
2
+ content: You are an AI assistant that is expert at summarizing text.
3
+
4
+ - role: user
5
+ content: |
6
+ Give me detailed summary for below document, making sure all key points are covered.
7
+
8
+ Do not add any new information.
9
+ Do not miss any key points from the provided document.
10
+ Output the response in Japanese.
11
+
12
+ Document:
13
+ {{document_outline}}
14
+ {{document}}