sdg-hub 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/__init__.py +3 -0
- sdg_hub/_version.py +21 -0
- sdg_hub/blocks/__init__.py +36 -0
- sdg_hub/blocks/block.py +96 -0
- sdg_hub/blocks/llmblock.py +375 -0
- sdg_hub/blocks/utilblocks.py +597 -0
- sdg_hub/checkpointer.py +139 -0
- sdg_hub/configs/__init__.py +0 -0
- sdg_hub/configs/annotations/__init__.py +0 -0
- sdg_hub/configs/annotations/cot_reflection.yaml +34 -0
- sdg_hub/configs/annotations/detailed_annotations.yaml +28 -0
- sdg_hub/configs/annotations/detailed_description.yaml +10 -0
- sdg_hub/configs/annotations/detailed_description_icl.yaml +32 -0
- sdg_hub/configs/annotations/simple_annotations.yaml +9 -0
- sdg_hub/configs/knowledge/__init__.py +0 -0
- sdg_hub/configs/knowledge/atomic_facts.yaml +45 -0
- sdg_hub/configs/knowledge/auxilary_instructions.yaml +35 -0
- sdg_hub/configs/knowledge/detailed_summary.yaml +17 -0
- sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +68 -0
- sdg_hub/configs/knowledge/evaluate_question.yaml +38 -0
- sdg_hub/configs/knowledge/evaluate_relevancy.yaml +85 -0
- sdg_hub/configs/knowledge/extractive_summary.yaml +17 -0
- sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +39 -0
- sdg_hub/configs/knowledge/generate_questions_responses.yaml +56 -0
- sdg_hub/configs/knowledge/mcq_generation.yaml +83 -0
- sdg_hub/configs/knowledge/router.yaml +12 -0
- sdg_hub/configs/knowledge/simple_generate_qa.yaml +34 -0
- sdg_hub/configs/reasoning/__init__.py +0 -0
- sdg_hub/configs/reasoning/dynamic_cot.yaml +40 -0
- sdg_hub/configs/skills/__init__.py +0 -0
- sdg_hub/configs/skills/analyzer.yaml +48 -0
- sdg_hub/configs/skills/annotation.yaml +36 -0
- sdg_hub/configs/skills/contexts.yaml +28 -0
- sdg_hub/configs/skills/critic.yaml +60 -0
- sdg_hub/configs/skills/evaluate_freeform_pair.yaml +111 -0
- sdg_hub/configs/skills/evaluate_freeform_questions.yaml +78 -0
- sdg_hub/configs/skills/evaluate_grounded_pair.yaml +119 -0
- sdg_hub/configs/skills/evaluate_grounded_questions.yaml +51 -0
- sdg_hub/configs/skills/freeform_questions.yaml +34 -0
- sdg_hub/configs/skills/freeform_responses.yaml +39 -0
- sdg_hub/configs/skills/grounded_questions.yaml +38 -0
- sdg_hub/configs/skills/grounded_responses.yaml +59 -0
- sdg_hub/configs/skills/icl_examples/STEM.yaml +56 -0
- sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
- sdg_hub/configs/skills/icl_examples/coding.yaml +97 -0
- sdg_hub/configs/skills/icl_examples/extraction.yaml +36 -0
- sdg_hub/configs/skills/icl_examples/humanities.yaml +71 -0
- sdg_hub/configs/skills/icl_examples/math.yaml +85 -0
- sdg_hub/configs/skills/icl_examples/reasoning.yaml +30 -0
- sdg_hub/configs/skills/icl_examples/roleplay.yaml +45 -0
- sdg_hub/configs/skills/icl_examples/writing.yaml +80 -0
- sdg_hub/configs/skills/judge.yaml +53 -0
- sdg_hub/configs/skills/planner.yaml +67 -0
- sdg_hub/configs/skills/respond.yaml +8 -0
- sdg_hub/configs/skills/revised_responder.yaml +78 -0
- sdg_hub/configs/skills/router.yaml +59 -0
- sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +27 -0
- sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +31 -0
- sdg_hub/flow.py +306 -0
- sdg_hub/flow_runner.py +204 -0
- sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +13 -0
- sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +12 -0
- sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +89 -0
- sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +136 -0
- sdg_hub/flows/generation/skills/improve_responses.yaml +103 -0
- sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +12 -0
- sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +12 -0
- sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +80 -0
- sdg_hub/flows/generation/skills/synth_skills.yaml +59 -0
- sdg_hub/logger_config.py +20 -0
- sdg_hub/pipeline.py +121 -0
- sdg_hub/prompts.py +43 -0
- sdg_hub/py.typed +0 -0
- sdg_hub/registry.py +122 -0
- sdg_hub/sdg.py +206 -0
- sdg_hub/utils/__init__.py +5 -0
- sdg_hub/utils/datautils.py +14 -0
- sdg_hub-0.1.0.dist-info/METADATA +190 -0
- sdg_hub-0.1.0.dist-info/RECORD +82 -0
- sdg_hub-0.1.0.dist-info/WHEEL +5 -0
- sdg_hub-0.1.0.dist-info/licenses/LICENSE +201 -0
- sdg_hub-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
system: You are a helpful assistant, that is an expert at generating question and answers based on given guidelines.
|
2
|
+
|
3
|
+
introduction: Create a series of multiple choice questions by following the given guidelines
|
4
|
+
|
5
|
+
principles: |
|
6
|
+
Guidelines for generation:
|
7
|
+
* Create Multiple Choice Questions based on the data presented in the documents provided.
|
8
|
+
* Each question should be accompanied by a correct answer that accurately interprets the data.
|
9
|
+
* Ensure that the question and the answer are grounded in the provided document.
|
10
|
+
* Return the question between the [Start of Question] and [End of Question] tags.
|
11
|
+
* Return the answer within the [Start of Answer] and [End of Answer] tags.
|
12
|
+
|
13
|
+
Follow this structure for each example:
|
14
|
+
|
15
|
+
[Start of Document]
|
16
|
+
The boiling point of water is the temperature at which it changes from liquid to gas. This occurs at 100 degrees Celsius under standard atmospheric pressure.
|
17
|
+
[End of Document]
|
18
|
+
|
19
|
+
[Start of Question]
|
20
|
+
What does the boiling point of water represent?
|
21
|
+
|
22
|
+
A) Solidification
|
23
|
+
B) Evaporation
|
24
|
+
C) Condensation
|
25
|
+
D) Freezing
|
26
|
+
[End of Question]
|
27
|
+
|
28
|
+
[Start of Answer]
|
29
|
+
B) Evaporation
|
30
|
+
[End of Answer]
|
31
|
+
|
32
|
+
examples: |
|
33
|
+
|
34
|
+
Example 1:
|
35
|
+
[Start of Document]
|
36
|
+
Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy. This process involves the absorption of light by chlorophyll, conversion of inorganic carbon dioxide (CO2) into organic compounds, and release of oxygen (O2) as a byproduct. The general equation for photosynthesis can be represented as
|
37
|
+
6CO2 + 6H2O + light energy → C6H12O6 + 6O2.
|
38
|
+
[Start of Document]
|
39
|
+
|
40
|
+
[Start of Question]
|
41
|
+
What is the primary function of photosynthesis in plants?
|
42
|
+
|
43
|
+
A) To produce carbon dioxide
|
44
|
+
B) To convert light energy into chemical energy
|
45
|
+
C) To absorb oxygen from the atmosphere
|
46
|
+
D) To release carbon dioxide into the environment
|
47
|
+
[End of Question]
|
48
|
+
|
49
|
+
[Start of Answer]
|
50
|
+
B) To convert light energy into chemical energy
|
51
|
+
[End of Answer]
|
52
|
+
|
53
|
+
Example 2:
|
54
|
+
[Start of Document]
|
55
|
+
E-commerce, short for electronic commerce, refers to the buying and selling of goods and services over the Internet. It encompasses a variety of transactions, including B2B (business to business), B2C (business to consumer), and C2C (consumer to consumer). E-commerce platforms can be purely digital or may combine online and physical operations.
|
56
|
+
[End of Document]
|
57
|
+
|
58
|
+
[Start of Question]
|
59
|
+
E-commerce primarily involves what kind of transactions?
|
60
|
+
|
61
|
+
A) Digital
|
62
|
+
B) Local
|
63
|
+
C) Manual
|
64
|
+
D) Verbal
|
65
|
+
[End of Question]
|
66
|
+
|
67
|
+
[Start of Answer]
|
68
|
+
A) Digital
|
69
|
+
[End of Answer]
|
70
|
+
|
71
|
+
generation: |
|
72
|
+
Follow the guidelines and structure given above to create series of Multiple choice question, along with correct answers, based on the provided document.
|
73
|
+
* Return the question between the [Start of Question] and [End of Question] tags.
|
74
|
+
* Return the answer within the [Start of Answer] and [End of Answer] tags.
|
75
|
+
|
76
|
+
Here is the document:
|
77
|
+
[Start of Document]
|
78
|
+
{{document_outline}}
|
79
|
+
{{document}}
|
80
|
+
[End of Document]
|
81
|
+
|
82
|
+
start_tags: ["[Start of Question]", "[Start of Answer]"]
|
83
|
+
end_tags: ["[End of Question]", "[End of Answer]"]
|
@@ -0,0 +1,34 @@
|
|
1
|
+
system: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
2
|
+
|
3
|
+
introduction: Develop a series of educational question and answer pairs from a chapter in a {{domain}} textbook.
|
4
|
+
|
5
|
+
principles: |
|
6
|
+
Here are the requirements:
|
7
|
+
1. Try not to repeat the verb for each instruction to maximize diversity.
|
8
|
+
2. The language used for the instruction also should be diverse. For example, you should combine questions with imperative instructions.
|
9
|
+
3. The type of instructions should be similar to provided examples. The generated instruction and the output should be grounded in the provided document.
|
10
|
+
4. A GPT language model should be able to complete the instruction. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action.
|
11
|
+
5. The instructions should be in English.
|
12
|
+
6. The instructions should be 1 to 2 sentences long. Either an imperative sentence or a question is permitted.
|
13
|
+
7. The output should be an appropriate response to the input and the instruction. Long outputs are preferable.
|
14
|
+
|
15
|
+
examples: |
|
16
|
+
Here are some examples to help you understand the type of questions that are asked for this document:
|
17
|
+
|
18
|
+
{{icl_query_1}}
|
19
|
+
{{icl_response_1}}
|
20
|
+
|
21
|
+
{{icl_query_2}}
|
22
|
+
{{icl_response_2}}
|
23
|
+
|
24
|
+
{{icl_query_3}}
|
25
|
+
{{icl_response_3}}
|
26
|
+
|
27
|
+
Here is the document:
|
28
|
+
{{document}}
|
29
|
+
|
30
|
+
generation: |
|
31
|
+
Provide a single question and answer pair based on the document.
|
32
|
+
|
33
|
+
start_tags: [""]
|
34
|
+
end_tags: [""]
|
File without changes
|
@@ -0,0 +1,40 @@
|
|
1
|
+
system: You are an AI assistant that uses dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning for problem-solving. Your responses must adhere to the following instructions
|
2
|
+
|
3
|
+
principles: |
|
4
|
+
1. Break down the solution into clear steps, providing a descriptive title and content for each step to ensure logical progression.
|
5
|
+
2. Adjust your reasoning dynamically based on intermediate results and reflections, adapting your strategy as needed.
|
6
|
+
3. Regularly evaluate your progress, being critical and honest about your reasoning. After every three steps, perform a detailed self-reflection to identify potential biases and consider alternative strategies.
|
7
|
+
4. For mathematical problems, show all work explicitly using LaTeX notation and provide detailed proofs.
|
8
|
+
5. Explore multiple solutions individually when possible, comparing approaches during reflections.
|
9
|
+
6. Use a scratchpad to document calculations, reasoning, and any intermediate thoughts explicitly.
|
10
|
+
7. Stay aware of your limitations as an AI, clearly communicating what you can and cannot do.
|
11
|
+
|
12
|
+
examples: |
|
13
|
+
Respond in JSON format, with each response containing the following keys:
|
14
|
+
- current_action: Indicates the current action being taken, chosen from:
|
15
|
+
* think: Engage in thoughtful planning about how to approach or solve the task, considering potential strategies and identifying crucial elements.
|
16
|
+
* reflect: Pause to evaluate and reconsider your reasoning, assessing potential biases or errors.
|
17
|
+
* backtrack: Revert to a previous step and try a different solution path.
|
18
|
+
* generate: Present the final answer if confident.
|
19
|
+
* terminate: Conclude the process if no further action is needed.
|
20
|
+
- title: Describes the focus of the current step.
|
21
|
+
- content: Provides a detailed explanation of the step.
|
22
|
+
- confidence: A number between 0 and 1 representing your confidence in the content produced for the current action.
|
23
|
+
- next_action: Suggests the next action to be taken, chosen from the same set of actions.
|
24
|
+
|
25
|
+
Example of a valid JSON response:
|
26
|
+
```json
|
27
|
+
{
|
28
|
+
"current_action": "think",
|
29
|
+
"title": "Identifying Key Information",
|
30
|
+
"content": "To begin solving this problem, we need to carefully examine the given information and identify the crucial elements that will guide our solution process. This involves...",
|
31
|
+
"confidence": 0.8,
|
32
|
+
"next_action": "reflect"
|
33
|
+
}
|
34
|
+
```
|
35
|
+
|
36
|
+
generation: |
|
37
|
+
Your goal is to demonstrate a thorough, adaptive, and self-reflective problem-solving process, emphasizing dynamic thinking and learning from your reasoning.
|
38
|
+
|
39
|
+
|
40
|
+
|
File without changes
|
@@ -0,0 +1,48 @@
|
|
1
|
+
system: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
You will assume the role of an question analyzer. Given an user question your task is to analyze the question and generate an analysis including the domain of the task, a brief description of the task, and 5 domain specific rubric to evaluate the response.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
1. Analysis:
|
8
|
+
* Briefly describe the task of the question, identify the domain of the task, and provide a clear understanding of the user's request.
|
9
|
+
* Look for any keywords or phrases that indicate the user's specific requirements when it comes to the format or type of response.
|
10
|
+
* The analysis should be clear, concise and unambiguous.
|
11
|
+
* Return the analysis between [Start of Analysis] and [End of Analysis] tags.
|
12
|
+
|
13
|
+
2. Rubric:
|
14
|
+
* Generate 3 to 5 domain specific rubric to evaluate the response.
|
15
|
+
* The generated rubric should be clear, concise and unambiguous.
|
16
|
+
* The rubric should be specific to the domain of the question and should not be generic.
|
17
|
+
* The rubric should be actionable and feasible.
|
18
|
+
* The rubric should satisfy all the criteria provided in the question. For instance, input and output format, type of response, etc.
|
19
|
+
* Return the rubric between [Start of Rubric] and [End of Rubric] tags.
|
20
|
+
|
21
|
+
As a general guideline, generate all the required information without any explanation or reasoning.
|
22
|
+
|
23
|
+
examples: |
|
24
|
+
To help you understand the task, here is an example:
|
25
|
+
|
26
|
+
[Start of Question]
|
27
|
+
{{ icl_query }}
|
28
|
+
[End of Question]
|
29
|
+
|
30
|
+
[Start of Analysis]
|
31
|
+
{{ icl_analysis }}
|
32
|
+
[End of Analysis]
|
33
|
+
|
34
|
+
[Start of Rubric]
|
35
|
+
{{ icl_rubric }}
|
36
|
+
[End of Rubric]
|
37
|
+
|
38
|
+
generation: |
|
39
|
+
Now it's your turn to analyze the following question. Remember to follow the paradigm and return the analysis and rubric in the respective sections. Strictly format the response using the specified tags.
|
40
|
+
* Return the analysis between [Start of Analysis] and [End of Analysis] tags.
|
41
|
+
* Return the rubric between [Start of Rubric] and [End of Rubric] tags.
|
42
|
+
|
43
|
+
[Start of Question]
|
44
|
+
{{ question }}
|
45
|
+
[End of Question]
|
46
|
+
|
47
|
+
start_tags: ["[Start of Analysis]", "[Start of Rubric]"]
|
48
|
+
end_tags: ["[End of Analysis]", "[End of Rubric]"]
|
@@ -0,0 +1,36 @@
|
|
1
|
+
system: |
|
2
|
+
{{ system }}
|
3
|
+
introduction: |
|
4
|
+
{{ task_description }}
|
5
|
+
principles: |
|
6
|
+
{{ principles }}
|
7
|
+
examples: |
|
8
|
+
To better assist you with this task, here are some examples:
|
9
|
+
{% if seed_samples is defined %}
|
10
|
+
{% for sample in seed_samples %}
|
11
|
+
[Start of Question]
|
12
|
+
{{ sample.seed_question }}
|
13
|
+
[End of Question]
|
14
|
+
|
15
|
+
[Start of Response]
|
16
|
+
{{ sample.seed_response }}
|
17
|
+
[End of Response]
|
18
|
+
{% endfor %}
|
19
|
+
{% else %}
|
20
|
+
[Start of Question]
|
21
|
+
{{ seed_question }}
|
22
|
+
[End of Question]
|
23
|
+
|
24
|
+
[Start of Response]
|
25
|
+
{{ seed_response }}
|
26
|
+
[End of Response]
|
27
|
+
{% endif %}
|
28
|
+
generation: |
|
29
|
+
Remember to follow the principles mentioned above and use the same format as the examples.
|
30
|
+
[Start of Question]
|
31
|
+
{{ question }}
|
32
|
+
[End of Question]
|
33
|
+
|
34
|
+
Generate the response to the question above and return it in between the [Start of Response] and [End of Response] tags.
|
35
|
+
start_tags: ["[Start of Response]"]
|
36
|
+
end_tags: ["[End of Response]"]
|
@@ -0,0 +1,28 @@
|
|
1
|
+
system: You are a highly capable AI Assistant that specializes in generating high-quality content tailored to specific tasks.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Your task is to write a rich, relevant, and well-structured **context** for the following task:
|
5
|
+
Task Description: {{task_description}}
|
6
|
+
|
7
|
+
principles: |
|
8
|
+
Please follow these guiding principles when generating the context:
|
9
|
+
* The context should be coherent, informative, and closely aligned with the task description.
|
10
|
+
* Do not include any greetings, explanations, or meta commentary.
|
11
|
+
* Maintain a natural, human-like tone suitable for the domain.
|
12
|
+
* Follow the formatting shown in the example exactly.
|
13
|
+
* Wrap the output between the tags: [Start of Context] and [End of Context].
|
14
|
+
|
15
|
+
examples: |
|
16
|
+
To guide you, here is an example of a well-structured context:
|
17
|
+
|
18
|
+
[Start of Context]
|
19
|
+
{{seed_context}}
|
20
|
+
[End of Context]
|
21
|
+
|
22
|
+
generation: |
|
23
|
+
Now generate a new context following the same structure and principles.
|
24
|
+
Begin your output with [Start of Context] and end with [End of Context].
|
25
|
+
Do not include any additional text outside these tags.
|
26
|
+
|
27
|
+
start_tags: ["[Start of Context]"]
|
28
|
+
end_tags: ["[End of Context]"]
|
@@ -0,0 +1,60 @@
|
|
1
|
+
system: |
|
2
|
+
You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
3
|
+
|
4
|
+
introduction: |
|
5
|
+
You will assume the role of a critic. You will be given an analysis of a query which includes a rubric, and a response to the query generated by an AI assistant. Your task is to evaluate the response based on the rubric provided.
|
6
|
+
|
7
|
+
principles: |
|
8
|
+
Use the following step-by-step process to evaluate the response:
|
9
|
+
* Using the domain as a part of the analysis, assume the role of an expert in that domain.
|
10
|
+
* Understand the task description provided in the analysis.
|
11
|
+
* Using the Rubric provided, evaluate the response generated by the AI assistant.
|
12
|
+
* For each item in the rubric, your evaluation should include how well the response meets the criteria and any feedback for improvement.
|
13
|
+
* Only evaluate the response based on the rubric provided, do not create your own criteria.
|
14
|
+
|
15
|
+
examples: |
|
16
|
+
To help you understand the task, here is an example:
|
17
|
+
|
18
|
+
[Start of Query]
|
19
|
+
{{ icl_query }}
|
20
|
+
[End of Query]
|
21
|
+
|
22
|
+
[Start of Response]
|
23
|
+
{{ icl_response }}
|
24
|
+
[End of Response]
|
25
|
+
|
26
|
+
[Start of Analysis]
|
27
|
+
{{ icl_analysis }}
|
28
|
+
[End of Analysis]
|
29
|
+
|
30
|
+
[Start of Rubric]
|
31
|
+
{{ icl_rubric }}
|
32
|
+
[End of Rubric]
|
33
|
+
|
34
|
+
[Start of Critique]
|
35
|
+
{{ icl_critique }}
|
36
|
+
[End of Critique]
|
37
|
+
|
38
|
+
generation: |
|
39
|
+
Now it's your turn to analyze the following query.
|
40
|
+
|
41
|
+
[Start of Query]
|
42
|
+
{{ question }}
|
43
|
+
[End of Query]
|
44
|
+
|
45
|
+
[Start of Response]
|
46
|
+
{{ response }}
|
47
|
+
[End of Response]
|
48
|
+
|
49
|
+
[Start of Analysis]
|
50
|
+
{{ analysis }}
|
51
|
+
[End of Analysis]
|
52
|
+
|
53
|
+
[Start of Rubric]
|
54
|
+
{{ rubric }}
|
55
|
+
[End of Rubric]
|
56
|
+
|
57
|
+
Remember to follow the paradigm and return the critique based on the rubric provided, between [Start of Critique] and [End of Critique] tags.
|
58
|
+
|
59
|
+
start_tags: ["[Start of Critique]"]
|
60
|
+
end_tags: ["[End of Critique]"]
|
@@ -0,0 +1,111 @@
|
|
1
|
+
system: You are a highly knowledgeable and impartial AI Assistant tasked with evaluating the quality of responses to user questions.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Please act as an objective evaluator and assess whether the AI Assistant's answer correctly follows formatting requirements and answers the user's question. Use the scoring rubric below and assign a score from 1 to 3.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
Use the following 3-point scale to score the answer:
|
8
|
+
|
9
|
+
**1 — Poor Quality**
|
10
|
+
- The output is incorrectly formatted, contains hallucinations, or ignores required tags/structure.
|
11
|
+
- The answer may be off-topic, incomplete, or inconsistent with the task.
|
12
|
+
- The output introduces unsafe or inappropriate content, or violates structural instructions.
|
13
|
+
|
14
|
+
**2 — Acceptable but Minimal**
|
15
|
+
- The answer is structurally valid and safe, but lacks polish, clarity, or minor formatting correctness.
|
16
|
+
- It meets the task minimally, but may show inconsistencies or lack proper use of spacing, tags, or conventions.
|
17
|
+
- It's not harmful, but also not ready for use without revision.
|
18
|
+
|
19
|
+
**3 — Excellent Answer**
|
20
|
+
- The answer is fully correct, clearly written, and **strictly adheres to formatting instructions**.
|
21
|
+
- It uses all required tags, markdown syntax, or structure accurately and consistently.
|
22
|
+
- It directly fulfills the task with precision, professionalism, and completeness.
|
23
|
+
|
24
|
+
examples: |
|
25
|
+
Example 1 — Score: 1 (Poor Formatting, Unsafe)
|
26
|
+
|
27
|
+
Task Description: Generate a markdown table of 3 planets with columns: Name, Gravity, and Moons.
|
28
|
+
|
29
|
+
[Start of Question]
|
30
|
+
Create a markdown table with 3 planets and their gravity/moons.
|
31
|
+
[End of Question]
|
32
|
+
|
33
|
+
[Start of Answer]
|
34
|
+
Mars - 3.7 - 2; Earth - 9.8 - 1; Jupiter - 24.8 - 79
|
35
|
+
[End of Answer]
|
36
|
+
|
37
|
+
[Start of Evaluation]
|
38
|
+
The answer is improperly formatted (not a markdown table) and unstructured. It violates task instructions both structurally and in tone.
|
39
|
+
[End of Evaluation]
|
40
|
+
|
41
|
+
[Start of Score]
|
42
|
+
1
|
43
|
+
[End of Score]
|
44
|
+
|
45
|
+
Example 2 — Score: 2 (Correct but Sloppy Formatting)
|
46
|
+
|
47
|
+
Task Description: Generate a markdown table of 3 planets with columns: Name, Gravity, and Moons.
|
48
|
+
|
49
|
+
[Start of Question]
|
50
|
+
Create a markdown table with 3 planets and their gravity/moons.
|
51
|
+
[End of Question]
|
52
|
+
|
53
|
+
[Start of Answer]
|
54
|
+
| Name | Gravity | Moons |
|
55
|
+
|-------|---------|-------|
|
56
|
+
| Mars | 3.7 | 2 |
|
57
|
+
| Earth| 9.8| 1 |
|
58
|
+
| Jupiter |24.8 |79|
|
59
|
+
[End of Answer]
|
60
|
+
|
61
|
+
[Start of Evaluation]
|
62
|
+
The table has correct content but inconsistent spacing and pipe alignment. It fulfills the task but lacks polish and readability. It's usable, but not clean or well-structured.
|
63
|
+
[End of Evaluation]
|
64
|
+
|
65
|
+
[Start of Score]
|
66
|
+
2
|
67
|
+
[End of Score]
|
68
|
+
|
69
|
+
Example 3 — Score: 3 (Flawless Markdown Table)
|
70
|
+
|
71
|
+
Task Description: Generate a markdown table of 3 planets with columns: Name, Gravity, and Moons.
|
72
|
+
|
73
|
+
[Start of Question]
|
74
|
+
Create a markdown table with 3 planets and their gravity/moons.
|
75
|
+
[End of Question]
|
76
|
+
|
77
|
+
[Start of Answer]
|
78
|
+
| Name | Gravity (m/s²) | Moons |
|
79
|
+
|----------|----------------|-------|
|
80
|
+
| Mars | 3.7 | 2 |
|
81
|
+
| Earth | 9.8 | 1 |
|
82
|
+
| Jupiter | 24.8 | 79 |
|
83
|
+
[End of Answer]
|
84
|
+
|
85
|
+
[Start of Evaluation]
|
86
|
+
The answer uses proper markdown syntax, alignment, and column headers. The formatting is clean, readable, and consistent with markdown table standards. It meets the task precisely.
|
87
|
+
[End of Evaluation]
|
88
|
+
|
89
|
+
[Start of Score]
|
90
|
+
3
|
91
|
+
[End of Score]
|
92
|
+
|
93
|
+
generation: |
|
94
|
+
Now begin your evaluation of the following QA pair. Use the rubric above and be objective and concise in your reasoning.
|
95
|
+
|
96
|
+
Task Description: {{task_description}}
|
97
|
+
|
98
|
+
[Start of Question]
|
99
|
+
{{ question }}
|
100
|
+
[End of Question]
|
101
|
+
|
102
|
+
[Start of Answer]
|
103
|
+
{{ response }}
|
104
|
+
[End of Answer]
|
105
|
+
|
106
|
+
* Provide your evaluation between [Start of Evaluation] and [End of Evaluation] tags.
|
107
|
+
* Provide the score between [Start of Score] and [End of Score] tags.
|
108
|
+
* Do not include any content outside these tags.
|
109
|
+
|
110
|
+
start_tags: ["[Start of Evaluation]", "[Start of Score]"]
|
111
|
+
end_tags: ["[End of Evaluation]", "[End of Score]"]
|
@@ -0,0 +1,78 @@
|
|
1
|
+
system: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Please act as an impartial and detail-oriented evaluator of synthetic questions. Your job is to assess whether the given question meets the defined quality and formatting standards for the task described. Assign a score using a strict binary 0/1 scale.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
A valid question must satisfy **all** of the following requirements:
|
8
|
+
* The question should be answerable via text (not require visual/audio input).
|
9
|
+
* It must be **clearly relevant to the task description** ({{task_description}}).
|
10
|
+
* It should **not contain placeholder text**, incomplete sentences, or formatting artifacts.
|
11
|
+
|
12
|
+
If the question satisfies **all** of the above, assign a score of `1`. Otherwise, assign `0`.
|
13
|
+
|
14
|
+
examples: |
|
15
|
+
Example 1 - Valid question:
|
16
|
+
|
17
|
+
Task Description: Extract the main idea of a paragraph.
|
18
|
+
|
19
|
+
[Start of Question]
|
20
|
+
What is the central message conveyed by the paragraph?
|
21
|
+
[End of Question]
|
22
|
+
|
23
|
+
[Start of Evaluation]
|
24
|
+
The question is clear, concise, grammatically correct, and directly related to the task. It follows formatting rules and is appropriate in tone.
|
25
|
+
[End of Evaluation]
|
26
|
+
|
27
|
+
[Start of Score]
|
28
|
+
1
|
29
|
+
[End of Score]
|
30
|
+
|
31
|
+
Example 2 - Invalid question (bad formatting):
|
32
|
+
|
33
|
+
Task Description: Extract the main idea of a paragraph.
|
34
|
+
|
35
|
+
[Start of Question]
|
36
|
+
main idea??
|
37
|
+
[End of Question]
|
38
|
+
|
39
|
+
[Start of Evaluation]
|
40
|
+
The question lacks proper capitalization, punctuation, and complete sentence structure. It does not meet the formatting standards.
|
41
|
+
[End of Evaluation]
|
42
|
+
|
43
|
+
[Start of Score]
|
44
|
+
0
|
45
|
+
[End of Score]
|
46
|
+
|
47
|
+
Example 3 - Invalid question (off-topic):
|
48
|
+
|
49
|
+
Task Description: Extract the main idea of a paragraph.
|
50
|
+
|
51
|
+
[Start of Question]
|
52
|
+
What's your favorite type of movie and why?
|
53
|
+
[End of Question]
|
54
|
+
|
55
|
+
[Start of Evaluation]
|
56
|
+
The question is unrelated to the given task description. It fails the relevance requirement.
|
57
|
+
[End of Evaluation]
|
58
|
+
|
59
|
+
[Start of Score]
|
60
|
+
0
|
61
|
+
[End of Score]
|
62
|
+
|
63
|
+
generation: |
|
64
|
+
Here's the question you need to evaluate:
|
65
|
+
|
66
|
+
Task Description: {{task_description}}
|
67
|
+
|
68
|
+
[Start of Question]
|
69
|
+
{{question}}
|
70
|
+
[End of Question]
|
71
|
+
|
72
|
+
Now begin your evaluation:
|
73
|
+
* First, provide a brief explanation between [Start of Evaluation] and [End of Evaluation] tags.
|
74
|
+
* Then return a binary score (0 or 1) between [Start of Score] and [End of Score] tags.
|
75
|
+
* Do not include any content outside these tags.
|
76
|
+
|
77
|
+
start_tags: ["[Start of Evaluation]", "[Start of Score]"]
|
78
|
+
end_tags: ["[End of Evaluation]", "[End of Score]"]
|
@@ -0,0 +1,119 @@
|
|
1
|
+
system: You are a highly knowledgeable and impartial AI Assistant tasked with evaluating the quality of responses to user questions.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Please act as an objective evaluator and assess the quality of the AI Assistant's answer to the user's question. Your job is to determine whether the answer is complete, correct, relevant, safe, and well-written according to the task. Use the scoring rubric below and assign a score from 1 to 3.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
Use the following 3-point scale to score the answer:
|
8
|
+
|
9
|
+
**1 — Poor Quality**
|
10
|
+
- The answer is factually incorrect, off-topic, incomplete, or nonsensical.
|
11
|
+
- It may contain hallucinations, hanging statements, or irrelevant content.
|
12
|
+
- It does not align with the task description or question intent.
|
13
|
+
|
14
|
+
**2 — Acceptable but Minimal**
|
15
|
+
- The answer is correct and safe, but short and lacks depth or clarity.
|
16
|
+
- It may be technically correct but overly terse or missing explanation.
|
17
|
+
- It satisfies the user's question at a surface level without context.
|
18
|
+
|
19
|
+
**3 — Excellent Answer**
|
20
|
+
- The answer is complete, correct, relevant, and insightful.
|
21
|
+
- It provides clear reasoning, well-structured writing, and deep understanding.
|
22
|
+
- It directly addresses the question with expert-level knowledge.
|
23
|
+
|
24
|
+
examples: |
|
25
|
+
Example 1 — Score: 1 (Poor Quality)
|
26
|
+
|
27
|
+
Task Description: Summarize the key point of a news paragraph in one sentence.
|
28
|
+
|
29
|
+
[Start of Context]
|
30
|
+
The mayor of Springfield unveiled a new plan to reduce urban pollution, which includes a ban on gas-powered scooters, expansion of bike lanes, and free public transit passes for residents.
|
31
|
+
[End of Context]
|
32
|
+
|
33
|
+
[Start of Question]
|
34
|
+
What is the main point of the paragraph?
|
35
|
+
[End of Question]
|
36
|
+
|
37
|
+
[Start of Answer]
|
38
|
+
Scooters are fun but kind of dangerous if you go too fast lol.
|
39
|
+
[End of Answer]
|
40
|
+
|
41
|
+
[Start of Evaluation]
|
42
|
+
The answer is irrelevant and nonsensical in context. It does not reflect the content of the paragraph and includes casual language and commentary that breaks task intent. It is an example of a hallucinated, off-topic response.
|
43
|
+
[End of Evaluation]
|
44
|
+
|
45
|
+
[Start of Score]
|
46
|
+
1
|
47
|
+
[End of Score]
|
48
|
+
|
49
|
+
Example 2 — Score: 2 (Acceptable but Minimal)
|
50
|
+
|
51
|
+
Task Description: Summarize the key point of a news paragraph in one sentence.
|
52
|
+
|
53
|
+
[Start of Context]
|
54
|
+
The mayor of Springfield unveiled a new plan to reduce urban pollution, which includes a ban on gas-powered scooters, expansion of bike lanes, and free public transit passes for residents.
|
55
|
+
[End of Context]
|
56
|
+
|
57
|
+
[Start of Question]
|
58
|
+
What is the main point of the paragraph?
|
59
|
+
[End of Question]
|
60
|
+
|
61
|
+
[Start of Answer]
|
62
|
+
The city is taking steps to reduce pollution.
|
63
|
+
[End of Answer]
|
64
|
+
|
65
|
+
[Start of Evaluation]
|
66
|
+
The answer is factually correct and safe, but vague. It lacks depth and omits the key details about *how* the city plans to reduce pollution. It does not demonstrate full understanding of the context or question.
|
67
|
+
[End of Evaluation]
|
68
|
+
|
69
|
+
[Start of Score]
|
70
|
+
2
|
71
|
+
[End of Score]
|
72
|
+
|
73
|
+
Example 3 — Score: 3 (Excellent Answer)
|
74
|
+
|
75
|
+
Task Description: Summarize the key point of a news paragraph in one sentence.
|
76
|
+
|
77
|
+
[Start of Context]
|
78
|
+
The mayor of Springfield unveiled a new plan to reduce urban pollution, which includes a ban on gas-powered scooters, expansion of bike lanes, and free public transit passes for residents.
|
79
|
+
[End of Context]
|
80
|
+
|
81
|
+
[Start of Question]
|
82
|
+
What is the main point of the paragraph?
|
83
|
+
[End of Question]
|
84
|
+
|
85
|
+
[Start of Answer]
|
86
|
+
The mayor of Springfield introduced a pollution-reduction plan that bans gas-powered scooters, expands bike lanes, and provides free transit passes to residents.
|
87
|
+
[End of Answer]
|
88
|
+
|
89
|
+
[Start of Evaluation]
|
90
|
+
The answer is complete, relevant, and clearly written. It captures the full intent and detail of the paragraph in a single well-structured sentence, demonstrating both factual understanding and clarity. It aligns perfectly with the task instruction.
|
91
|
+
[End of Evaluation]
|
92
|
+
|
93
|
+
[Start of Score]
|
94
|
+
3
|
95
|
+
[End of Score]
|
96
|
+
|
97
|
+
generation: |
|
98
|
+
Now begin your evaluation of the following QA pair. Use the rubric above and be objective and concise in your reasoning.
|
99
|
+
|
100
|
+
Task Description: {{task_description}}
|
101
|
+
|
102
|
+
[Start of Context]
|
103
|
+
{{ context }}
|
104
|
+
[End of Context]
|
105
|
+
|
106
|
+
[Start of Question]
|
107
|
+
{{ question }}
|
108
|
+
[End of Question]
|
109
|
+
|
110
|
+
[Start of Answer]
|
111
|
+
{{ response }}
|
112
|
+
[End of Answer]
|
113
|
+
|
114
|
+
* Provide your evaluation between [Start of Evaluation] and [End of Evaluation] tags.
|
115
|
+
* Provide the score between [Start of Score] and [End of Score] tags.
|
116
|
+
* Do not include any content outside these tags.
|
117
|
+
|
118
|
+
start_tags: ["[Start of Evaluation]", "[Start of Score]"]
|
119
|
+
end_tags: ["[End of Evaluation]", "[End of Score]"]
|