sdg-hub 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. sdg_hub/_version.py +2 -2
  2. sdg_hub/core/blocks/__init__.py +2 -4
  3. sdg_hub/core/blocks/base.py +61 -6
  4. sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  5. sdg_hub/core/blocks/llm/__init__.py +2 -4
  6. sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
  7. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  8. sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  9. sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  10. sdg_hub/core/flow/base.py +7 -4
  11. sdg_hub/core/utils/datautils.py +40 -22
  12. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
  13. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  14. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
  15. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
  16. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
  17. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
  18. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  19. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  20. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  21. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  22. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  25. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
  26. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
  27. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +30 -26
  28. sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  29. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  30. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  31. sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  32. sdg_hub/core/blocks/llm/client_manager.py +0 -447
  33. sdg_hub/core/blocks/llm/config.py +0 -337
  34. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
  35. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
  36. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -54,11 +54,19 @@ blocks:
54
54
  output_cols: raw_summary_detailed
55
55
  max_tokens: 2048
56
56
  async_mode: true
57
+ n: 2
58
+
59
+ - block_type: LLMParserBlock
60
+ block_config:
61
+ block_name: detailed_summary
62
+ input_cols: raw_summary_detailed
63
+ extract_content: true
64
+ extract_reasoning_content: true
57
65
 
58
66
  - block_type: TextParserBlock
59
67
  block_config:
60
68
  block_name: parse_detailed_summary
61
- input_cols: raw_summary_detailed
69
+ input_cols: detailed_summary_content
62
70
  output_cols: summary_detailed
63
71
  start_tags: [""]
64
72
  end_tags: [""]
@@ -78,10 +86,16 @@ blocks:
78
86
  max_tokens: 2048
79
87
  async_mode: true
80
88
 
89
+ - block_type: LLMParserBlock
90
+ block_config:
91
+ block_name: atomic_facts
92
+ input_cols: raw_atomic_facts
93
+ extract_content: true
94
+
81
95
  - block_type: TextParserBlock
82
96
  block_config:
83
97
  block_name: parse_atomic_facts
84
- input_cols: raw_atomic_facts
98
+ input_cols: atomic_facts_content
85
99
  output_cols: summary_atomic_facts
86
100
  start_tags: [""]
87
101
  end_tags: [""]
@@ -101,10 +115,16 @@ blocks:
101
115
  max_tokens: 2048
102
116
  async_mode: true
103
117
 
118
+ - block_type: LLMParserBlock
119
+ block_config:
120
+ block_name: extractive_summary
121
+ input_cols: raw_summary_extractive
122
+ extract_content: true
123
+
104
124
  - block_type: TextParserBlock
105
125
  block_config:
106
126
  block_name: parse_extractive_summary
107
- input_cols: raw_summary_extractive
127
+ input_cols: extractive_summary_content
108
128
  output_cols: summary_extractive
109
129
  start_tags: [""]
110
130
  end_tags: [""]
@@ -136,50 +156,150 @@ blocks:
136
156
  max_tokens: 2048
137
157
  async_mode: true
138
158
 
159
+ - block_type: LLMParserBlock
160
+ block_config:
161
+ block_name: get_knowledge_generation
162
+ input_cols: raw_knowledge_generation
163
+ extract_content: true
164
+
139
165
  - block_type: TextParserBlock
140
166
  block_config:
141
167
  block_name: parse_knowledge_generation
142
- input_cols: raw_knowledge_generation
168
+ input_cols: get_knowledge_generation_content
143
169
  output_cols: [question, response]
144
170
  parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
145
171
  parser_cleanup_tags: ["[END]"]
146
172
 
147
- - block_type: EvaluateFaithfulnessBlock
173
+ - block_type: PromptBuilderBlock
148
174
  block_config:
149
- block_name: eval_faithfulness
175
+ block_name: eval_faithful_prompt
150
176
  input_cols: [document, response]
151
- output_cols: [faithfulness_explanation, faithfulness_judgment]
177
+ output_cols: eval_faithful_prompt
152
178
  prompt_config_path: evaluate_faithfulness.yaml
179
+ format_as_messages: true
180
+
181
+ - block_type: LLMChatBlock
182
+ block_config:
183
+ block_name: eval_faithful_llm_chat
184
+ input_cols: eval_faithful_prompt
185
+ output_cols: eval_faithful_response_dict
186
+ max_tokens: 2048
187
+ n: 1
188
+ async_mode: true
189
+
190
+ - block_type: LLMParserBlock
191
+ block_config:
192
+ block_name: extract_eval_faithful
193
+ input_cols: eval_faithful_response_dict
194
+ extract_content: true
195
+
196
+ - block_type: TextParserBlock
197
+ block_config:
198
+ block_name: parse_eval_faithful
199
+ input_cols: extract_eval_faithful_content
200
+ output_cols:
201
+ - faithfulness_explanation
202
+ - faithfulness_judgment
203
+ start_tags:
204
+ - '[Start of Explanation]'
205
+ - '[Start of Answer]'
206
+ end_tags:
207
+ - '[End of Explanation]'
208
+ - '[End of Answer]'
209
+
210
+ - block_type: ColumnValueFilterBlock
211
+ block_config:
212
+ block_name: eval_faithful_filter
213
+ input_cols:
214
+ - faithfulness_judgment
153
215
  filter_value: "YES"
154
216
  operation: eq
155
- async_mode: true
156
- start_tags: ["[Start of Explanation]", "[Start of Answer]"]
157
- end_tags: ["[End of Explanation]", "[End of Answer]"]
158
217
 
159
- - block_type: EvaluateRelevancyBlock
218
+ - block_type: PromptBuilderBlock
160
219
  block_config:
161
- block_name: eval_relevancy
162
- input_cols: [question, response]
163
- output_cols: [relevancy_explanation, relevancy_score]
220
+ block_name: eval_relevancy_prompt
221
+ input_cols:
222
+ - question
223
+ - response
224
+ output_cols: eval_relevancy_prompt
164
225
  prompt_config_path: evaluate_relevancy.yaml
226
+ format_as_messages: true
227
+ - block_type: LLMChatBlock
228
+ block_config:
229
+ block_name: eval_relevancy_llm_chat
230
+ input_cols: eval_relevancy_prompt
231
+ output_cols: eval_relevancy_response_dict
232
+ max_tokens: 2048
233
+ n: 1
234
+ async_mode: true
235
+ - block_type: LLMParserBlock
236
+ block_config:
237
+ block_name: extract_eval_relevancy
238
+ input_cols: eval_relevancy_response_dict
239
+ extract_content: true
240
+
241
+ - block_type: TextParserBlock
242
+ block_config:
243
+ block_name: parse_eval_relevancy
244
+ input_cols: extract_eval_relevancy_content
245
+ output_cols:
246
+ - relevancy_explanation
247
+ - relevancy_score
248
+ start_tags:
249
+ - '[Start of Feedback]'
250
+ - '[Start of Score]'
251
+ end_tags:
252
+ - '[End of Feedback]'
253
+ - '[End of Score]'
254
+ - block_type: ColumnValueFilterBlock
255
+ block_config:
256
+ block_name: eval_relevancy_filter
257
+ input_cols:
258
+ - relevancy_score
165
259
  filter_value: 2.0
166
260
  operation: eq
167
261
  convert_dtype: float
168
- max_tokens: 2048
169
- async_mode: true
170
- start_tags: ["[Start of Feedback]", "[Start of Score]"]
171
- end_tags: ["[End of Feedback]", "[End of Score]"]
172
262
 
173
- - block_type: VerifyQuestionBlock
263
+ - block_type: PromptBuilderBlock
174
264
  block_config:
175
- block_name: verify_question
176
- input_cols: [question]
177
- output_cols: [verification_explanation, verification_rating]
265
+ block_name: verify_question_prompt
266
+ input_cols:
267
+ - question
268
+ output_cols: verify_question_prompt
178
269
  prompt_config_path: evaluate_question.yaml
270
+ format_as_messages: true
271
+ - block_type: LLMChatBlock
272
+ block_config:
273
+ block_name: verify_question_llm_chat
274
+ input_cols: verify_question_prompt
275
+ output_cols: verify_question_response_dict
276
+ max_tokens: 2048
277
+ n: 1
278
+ async_mode: true
279
+ - block_type: LLMParserBlock
280
+ block_config:
281
+ block_name: extract_verify_question
282
+ input_cols: verify_question_response_dict
283
+ extract_content: true
284
+
285
+ - block_type: TextParserBlock
286
+ block_config:
287
+ block_name: parse_verify_question
288
+ input_cols: extract_verify_question_content
289
+ output_cols:
290
+ - verification_explanation
291
+ - verification_rating
292
+ start_tags:
293
+ - '[Start of Explanation]'
294
+ - '[Start of Rating]'
295
+ end_tags:
296
+ - '[End of Explanation]'
297
+ - '[End of Rating]'
298
+ - block_type: ColumnValueFilterBlock
299
+ block_config:
300
+ block_name: verify_question_filter
301
+ input_cols:
302
+ - verification_rating
179
303
  filter_value: 1.0
180
304
  operation: ge
181
305
  convert_dtype: float
182
- max_tokens: 2048
183
- async_mode: true
184
- start_tags: ["[Start of Explanation]", "[Start of Rating]"]
185
- end_tags: ["[End of Explanation]", "[End of Rating]"]
@@ -0,0 +1,41 @@
1
+ - role: system
2
+ content: You are an AI assistant knowledgeable about {{domain}} domain. Be accurate but concise in response.
3
+
4
+ - role: user
5
+ content: |
6
+ Please break down the following snippet from an article about {{domain}} into atomic facts.
7
+
8
+ 1. Make sure each fact is grounded in the given text.
9
+ 2. Include any necessary information needed to explain the fact or concept
10
+ 3. The atomic facts should be as simple as possible, if it's compound sentence, break down one more time
11
+ 4. For clarity, avoid using pronouns like 'it', 'he', 'she', 'this', 'that' etc., and instead use the full names or titles.
12
+ 5. Focus only on key concepts and facts. Skip any question or problems mentioned in the passage.
13
+ 6. Output the response in Japanese.
14
+
15
+ To help you understand the task, here is an example:
16
+ [Passage]
17
+ The tournament was contested by ten national teams, maintaining the same format used in 2019. After six weeks of round-robin matches, India, South Africa, Australia, and New Zealand finished as the top four and qualified for the knockout stage. In the knockout stage, India and Australia beat New Zealand and South Africa, respectively, to advance to the final, played on 19 November at the Narendra Modi Stadium in Ahmedabad. Australia won the final by six wickets, winning their sixth Cricket World Cup title.
18
+ [Facts]
19
+ 1. The tournament was contested by ten national teams.
20
+ 2. The tournament maintained the same format used in 2019.
21
+ 3. The round-robin matches lasted for six weeks.
22
+ 4. India finished as one of the top four teams.
23
+ 5. South Africa finished as one of the top four teams.
24
+ 6. Australia finished as one of the top four teams.
25
+ 7. New Zealand finished as one of the top four teams.
26
+ 8. India, South Africa, Australia, and New Zealand qualified for the knockout stage.
27
+ 9. In the knockout stage, India beat New Zealand.
28
+ 10. In the knockout stage, Australia beat South Africa.
29
+ 11. India advanced to the final.
30
+ 12. Australia advanced to the final.
31
+ 13. The final was played on 19 November.
32
+ 14. The final was held at the Narendra Modi Stadium in Ahmedabad.
33
+ 15. Australia won the final by six wickets.
34
+ 16. Australia won their sixth Cricket World Cup title.
35
+ [End]
36
+
37
+ Now it's your turn breakdown following snippet from article about {{domain}} into atomic facts following similar style as above examples
38
+ [Passage]
39
+ {{document_outline}}
40
+ {{document}}
41
+ [Facts]
@@ -0,0 +1,14 @@
1
+ - role: system
2
+ content: You are an AI assistant that is expert at summarizing text.
3
+
4
+ - role: user
5
+ content: |
6
+ Give me detailed summary for below document, making sure all key points are covered.
7
+
8
+ Do not add any new information.
9
+ Do not miss any key points from the provided document.
10
+ Output the response in Japanese.
11
+
12
+ Document:
13
+ {{document_outline}}
14
+ {{document}}
@@ -0,0 +1,14 @@
1
+ - role: system
2
+ content: You are an AI assistant that is expert at summarizing text.
3
+
4
+ - role: user
5
+ content: |
6
+ Give me detailed extractive summary for below document, making sure all key points are covered.
7
+
8
+ Do not add any new information.
9
+ Do not miss any key points from the provided document.
10
+ Output the response in Japanese.
11
+
12
+ Document:
13
+ {{document_outline}}
14
+ {{document}}
@@ -0,0 +1,304 @@
1
+ metadata:
2
+ id: clean-shadow-397
3
+ name: "Advanced Japanese Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
4
+ description: "A comprehensive flow that generates high-quality question-answer pairs from Japanese input documents using multiple LLM blocks for question generation, answer synthesis, and quality evaluation."
5
+ version: "1.0.0"
6
+ author: "SDG Hub Contributors"
7
+
8
+ recommended_models:
9
+ default: "microsoft/phi-4"
10
+ compatible: ["meta-llama/Llama-3.3-70B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1"]
11
+ experimental: []
12
+
13
+ tags:
14
+ - "question-generation"
15
+ - "knowledge-extraction"
16
+ - "qa-pairs"
17
+ - "document-processing"
18
+ - "educational"
19
+ - "japanese"
20
+
21
+ license: "Apache-2.0"
22
+ min_sdg_hub_version: "0.2.0"
23
+
24
+ dataset_requirements:
25
+ required_columns:
26
+ - "document"
27
+ - "document_outline"
28
+ - "domain"
29
+ - "icl_document"
30
+ - "icl_query_1"
31
+ - "icl_response_1"
32
+ - "icl_query_2"
33
+ - "icl_response_2"
34
+ - "icl_query_3"
35
+ - "icl_response_3"
36
+ description: "Input dataset should contain documents with Japanese text content and domain classification. Each document should be substantial enough for meaningful question generation (minimum 100 words recommended)."
37
+
38
+ blocks:
39
+ - block_type: DuplicateColumnsBlock
40
+ block_config:
41
+ block_name: duplicate_document_col
42
+ input_cols: {document: base_document}
43
+
44
+ - block_type: PromptBuilderBlock
45
+ block_config:
46
+ block_name: detailed_summary_prompt
47
+ input_cols: [document, document_outline]
48
+ output_cols: summary_prompt
49
+ prompt_config_path: detailed_summary_ja.yaml
50
+
51
+ - block_type: LLMChatBlock
52
+ block_config:
53
+ block_name: gen_detailed_summary
54
+ input_cols: summary_prompt
55
+ output_cols: raw_summary_detailed
56
+ max_tokens: 2048
57
+ async_mode: true
58
+
59
+ - block_type: LLMParserBlock
60
+ block_config:
61
+ block_name: extract_detailed_summary
62
+ input_cols: raw_summary_detailed
63
+ extract_content: true
64
+
65
+ - block_type: TextParserBlock
66
+ block_config:
67
+ block_name: parse_detailed_summary
68
+ input_cols: extract_detailed_summary_content
69
+ output_cols: summary_detailed
70
+ start_tags: [""]
71
+ end_tags: [""]
72
+
73
+ - block_type: PromptBuilderBlock
74
+ block_config:
75
+ block_name: atomic_facts_prompt
76
+ input_cols: [document, document_outline, domain]
77
+ output_cols: atomic_facts_prompt
78
+ prompt_config_path: atomic_facts_ja.yaml
79
+
80
+ - block_type: LLMChatBlock
81
+ block_config:
82
+ block_name: gen_atomic_facts
83
+ input_cols: atomic_facts_prompt
84
+ output_cols: raw_atomic_facts
85
+ max_tokens: 2048
86
+ async_mode: true
87
+
88
+ - block_type: LLMParserBlock
89
+ block_config:
90
+ block_name: extract_atomic_facts
91
+ input_cols: raw_atomic_facts
92
+ extract_content: true
93
+
94
+ - block_type: TextParserBlock
95
+ block_config:
96
+ block_name: parse_atomic_facts
97
+ input_cols: extract_atomic_facts_content
98
+ output_cols: summary_atomic_facts
99
+ start_tags: [""]
100
+ end_tags: [""]
101
+
102
+ - block_type: PromptBuilderBlock
103
+ block_config:
104
+ block_name: extractive_summary_prompt
105
+ input_cols: [document, document_outline]
106
+ output_cols: extractive_summary_prompt
107
+ prompt_config_path: extractive_summary_ja.yaml
108
+
109
+ - block_type: LLMChatBlock
110
+ block_config:
111
+ block_name: gen_extractive_summary
112
+ input_cols: extractive_summary_prompt
113
+ output_cols: raw_summary_extractive
114
+ max_tokens: 2048
115
+ async_mode: true
116
+
117
+ - block_type: LLMParserBlock
118
+ block_config:
119
+ block_name: extract_extractive_summary
120
+ input_cols: raw_summary_extractive
121
+ extract_content: true
122
+
123
+ - block_type: TextParserBlock
124
+ block_config:
125
+ block_name: parse_extractive_summary
126
+ input_cols: extract_extractive_summary_content
127
+ output_cols: summary_extractive
128
+ start_tags: [""]
129
+ end_tags: [""]
130
+
131
+ - block_type: MeltColumnsBlock
132
+ block_config:
133
+ block_name: melt_summary_columns
134
+ input_cols: [summary_detailed, summary_extractive, summary_atomic_facts, base_document]
135
+ output_cols: [summary, dataset_type]
136
+
137
+ - block_type: RenameColumnsBlock
138
+ block_config:
139
+ block_name: rename_to_document_column
140
+ input_cols: {document: raw_document, summary: document}
141
+
142
+ - block_type: PromptBuilderBlock
143
+ block_config:
144
+ block_name: knowledge_generation_prompt
145
+ input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
146
+ output_cols: knowledge_generation_prompt
147
+ prompt_config_path: generate_questions_responses_ja.yaml
148
+
149
+ - block_type: LLMChatBlock
150
+ block_config:
151
+ block_name: knowledge_generation
152
+ input_cols: knowledge_generation_prompt
153
+ output_cols: raw_knowledge_generation
154
+ temperature: 0.0
155
+ max_tokens: 2048
156
+ async_mode: true
157
+
158
+ - block_type: LLMParserBlock
159
+ block_config:
160
+ block_name: extract_knowledge_generation
161
+ input_cols: raw_knowledge_generation
162
+ extract_content: true
163
+
164
+ - block_type: TextParserBlock
165
+ block_config:
166
+ block_name: parse_knowledge_generation
167
+ input_cols: extract_knowledge_generation_content
168
+ output_cols: [question, response]
169
+ parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
170
+ parser_cleanup_tags: ["[END]"]
171
+
172
+ - block_type: PromptBuilderBlock
173
+ block_config:
174
+ block_name: eval_faithful_prompt
175
+ input_cols: [document, response]
176
+ output_cols: eval_faithful_prompt
177
+ prompt_config_path: ../../instructlab/evaluate_faithfulness.yaml
178
+ format_as_messages: true
179
+
180
+ - block_type: LLMChatBlock
181
+ block_config:
182
+ block_name: eval_faithful_llm_chat
183
+ input_cols: eval_faithful_prompt
184
+ output_cols: eval_faithful_response_dict
185
+ max_tokens: 2048
186
+ n: 1
187
+ async_mode: true
188
+
189
+ - block_type: LLMParserBlock
190
+ block_config:
191
+ block_name: extract_eval_faithful
192
+ input_cols: eval_faithful_response_dict
193
+ extract_content: true
194
+
195
+ - block_type: TextParserBlock
196
+ block_config:
197
+ block_name: parse_eval_faithful
198
+ input_cols: extract_eval_faithful_content
199
+ output_cols:
200
+ - faithfulness_explanation
201
+ - faithfulness_judgment
202
+ start_tags:
203
+ - '[Start of Explanation]'
204
+ - '[Start of Answer]'
205
+ end_tags:
206
+ - '[End of Explanation]'
207
+ - '[End of Answer]'
208
+
209
+ - block_type: ColumnValueFilterBlock
210
+ block_config:
211
+ block_name: eval_faithful_filter
212
+ input_cols:
213
+ - faithfulness_judgment
214
+ filter_value: "YES"
215
+ operation: eq
216
+
217
+ - block_type: PromptBuilderBlock
218
+ block_config:
219
+ block_name: eval_relevancy_prompt
220
+ input_cols:
221
+ - question
222
+ - response
223
+ output_cols: eval_relevancy_prompt
224
+ prompt_config_path: ../../instructlab/evaluate_relevancy.yaml
225
+ format_as_messages: true
226
+ - block_type: LLMChatBlock
227
+ block_config:
228
+ block_name: eval_relevancy_llm_chat
229
+ input_cols: eval_relevancy_prompt
230
+ output_cols: eval_relevancy_response_dict
231
+ max_tokens: 2048
232
+ n: 1
233
+ async_mode: true
234
+ - block_type: LLMParserBlock
235
+ block_config:
236
+ block_name: extract_eval_relevancy
237
+ input_cols: eval_relevancy_response_dict
238
+ extract_content: true
239
+
240
+ - block_type: TextParserBlock
241
+ block_config:
242
+ block_name: parse_eval_relevancy
243
+ input_cols: extract_eval_relevancy_content
244
+ output_cols:
245
+ - relevancy_explanation
246
+ - relevancy_score
247
+ start_tags:
248
+ - '[Start of Feedback]'
249
+ - '[Start of Score]'
250
+ end_tags:
251
+ - '[End of Feedback]'
252
+ - '[End of Score]'
253
+ - block_type: ColumnValueFilterBlock
254
+ block_config:
255
+ block_name: eval_relevancy_filter
256
+ input_cols:
257
+ - relevancy_score
258
+ filter_value: 2.0
259
+ operation: eq
260
+ convert_dtype: float
261
+
262
+ - block_type: PromptBuilderBlock
263
+ block_config:
264
+ block_name: verify_question_prompt
265
+ input_cols:
266
+ - question
267
+ output_cols: verify_question_prompt
268
+ prompt_config_path: ../../instructlab/evaluate_question.yaml
269
+ format_as_messages: true
270
+ - block_type: LLMChatBlock
271
+ block_config:
272
+ block_name: verify_question_llm_chat
273
+ input_cols: verify_question_prompt
274
+ output_cols: verify_question_response_dict
275
+ max_tokens: 2048
276
+ n: 1
277
+ async_mode: true
278
+ - block_type: LLMParserBlock
279
+ block_config:
280
+ block_name: extract_verify_question
281
+ input_cols: verify_question_response_dict
282
+ extract_content: true
283
+
284
+ - block_type: TextParserBlock
285
+ block_config:
286
+ block_name: parse_verify_question
287
+ input_cols: extract_verify_question_content
288
+ output_cols:
289
+ - verification_explanation
290
+ - verification_rating
291
+ start_tags:
292
+ - '[Start of Explanation]'
293
+ - '[Start of Rating]'
294
+ end_tags:
295
+ - '[End of Explanation]'
296
+ - '[End of Rating]'
297
+ - block_type: ColumnValueFilterBlock
298
+ block_config:
299
+ block_name: verify_question_filter
300
+ input_cols:
301
+ - verification_rating
302
+ filter_value: 1.0
303
+ operation: ge
304
+ convert_dtype: float
@@ -0,0 +1,55 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Develop a series of as many educational question and answer pairs as possible from a chapter in a {{domain}} textbook.
7
+
8
+ The questions should:
9
+ * Be self-contained, not requiring references to tables, figures, or specific sections in the text for understanding.
10
+ * Focus on teaching and reinforcing the key knowledge and concepts presented in the chapter.
11
+ * Avoid sections with minimal educational content like index pages or prefaces. In such cases, respond with [UNANSWERABLE].
12
+ * Be directly relevant to the textbook's domain. For instance, in a science textbook, questions should revolve around scientific terms, definitions, and practical applications, while in a legal textbook, they should cover legal principles, case law, and precedents.
13
+ * Be formulated to allow for independent answers, avoiding direct references to specific theorems or text sections. For example, rather than asking 'Under what conditions is the fixed point of a function unique according to Theorem 3.1.5?', ask 'How does the Fixed Point Iteration method contribute to understanding function uniqueness?'
14
+ * Span a range of difficulty levels to accommodate a diverse student audience, from basic understanding to advanced comprehension.
15
+ * Include a variety of question types such as multiple-choice for basic recall, short answer for deeper understanding, and essay or problem-solving questions to test application and analysis skills.
16
+ * Align closely with the learning objectives of the textbook or the specific chapter, ensuring that the questions test the fundamental concepts and skills that the chapter aims to impart.
17
+ * Be in Japanese.
18
+
19
+ Strictly follow this format for each question answer pair your generate while responding:
20
+
21
+ [QUESTION]
22
+ <Insert question here>
23
+ [ANSWER]
24
+ <Insert answer here>
25
+ [END]
26
+
27
+ Each question and answer pair should stand alone as a mini-lesson, encapsulating a key concept or idea from the chapter in a way that is accessible and informative without requiring the reader to refer back to the textbook.
28
+
29
+ Here are some examples of questions:
30
+
31
+ [Document]
32
+ {{icl_document}}
33
+
34
+ [QUESTION]
35
+ {{icl_query_1}}
36
+ [ANSWER]
37
+ {{icl_response_1}}
38
+ [END]
39
+
40
+ [QUESTION]
41
+ {{icl_query_2}}
42
+ [ANSWER]
43
+ {{icl_response_2}}
44
+ [END]
45
+
46
+ [QUESTION]
47
+ {{icl_query_3}}
48
+ [ANSWER]
49
+ {{icl_response_3}}
50
+ [END]
51
+
52
+ Now, here is the document:
53
+ [DOCUMENT]
54
+ {{document_outline}}
55
+ {{document}}