sdg-hub 0.1.0a2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/__init__.py +4 -0
- sdg_hub/_version.py +21 -0
- sdg_hub/blocks/__init__.py +6 -0
- sdg_hub/blocks/block.py +54 -0
- sdg_hub/blocks/filterblock.py +76 -0
- sdg_hub/blocks/iterblock.py +31 -0
- sdg_hub/blocks/llmblock.py +430 -0
- sdg_hub/blocks/rmblocks.py +194 -0
- sdg_hub/blocks/utilblocks.py +140 -0
- sdg_hub/configs/__init__.py +0 -0
- sdg_hub/configs/annotations/__init__.py +0 -0
- sdg_hub/configs/annotations/cot_reflection.yaml +34 -0
- sdg_hub/configs/annotations/detailed_description.yaml +10 -0
- sdg_hub/configs/annotations/detailed_description_icl.yaml +32 -0
- sdg_hub/configs/annotations/simple.yaml +10 -0
- sdg_hub/configs/knowledge/__init__.py +0 -0
- sdg_hub/configs/knowledge/atomic_facts.yaml +45 -0
- sdg_hub/configs/knowledge/auxilary_instructions.yaml +35 -0
- sdg_hub/configs/knowledge/data_recipe/__init__.py +0 -0
- sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml +3 -0
- sdg_hub/configs/knowledge/detailed_summary.yaml +17 -0
- sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +68 -0
- sdg_hub/configs/knowledge/evaluate_question.yaml +38 -0
- sdg_hub/configs/knowledge/evaluate_relevancy.yaml +85 -0
- sdg_hub/configs/knowledge/extractive_summary.yaml +17 -0
- sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +39 -0
- sdg_hub/configs/knowledge/generate_questions_responses.yaml +56 -0
- sdg_hub/configs/knowledge/mcq_generation.yaml +83 -0
- sdg_hub/configs/knowledge/router.yaml +12 -0
- sdg_hub/configs/knowledge/simple_generate_qa.yaml +34 -0
- sdg_hub/configs/reasoning/dynamic_cot.yaml +40 -0
- sdg_hub/configs/skills/_A_.yaml +97 -0
- sdg_hub/configs/skills/_B_.yaml +36 -0
- sdg_hub/configs/skills/_C_.yaml +71 -0
- sdg_hub/configs/skills/_D_.yaml +85 -0
- sdg_hub/configs/skills/_E_.yaml +30 -0
- sdg_hub/configs/skills/_F_.yaml +45 -0
- sdg_hub/configs/skills/_G_.yaml +56 -0
- sdg_hub/configs/skills/_H_.yaml +80 -0
- sdg_hub/configs/skills/__init__.py +0 -0
- sdg_hub/configs/skills/analyzer.yaml +48 -0
- sdg_hub/configs/skills/annotation.yaml +36 -0
- sdg_hub/configs/skills/contexts.yaml +21 -0
- sdg_hub/configs/skills/critic.yaml +60 -0
- sdg_hub/configs/skills/data_recipe/__init__.py +0 -0
- sdg_hub/configs/skills/data_recipe/default_recipe.yaml +6 -0
- sdg_hub/configs/skills/evaluate_freeform_pair.yaml +44 -0
- sdg_hub/configs/skills/evaluate_freeform_questions.yaml +46 -0
- sdg_hub/configs/skills/evaluate_grounded_pair.yaml +54 -0
- sdg_hub/configs/skills/evaluate_grounded_questions.yaml +51 -0
- sdg_hub/configs/skills/freeform_questions.yaml +29 -0
- sdg_hub/configs/skills/freeform_responses.yaml +45 -0
- sdg_hub/configs/skills/grounded_questions.yaml +38 -0
- sdg_hub/configs/skills/grounded_responses.yaml +59 -0
- sdg_hub/configs/skills/judge.yaml +53 -0
- sdg_hub/configs/skills/planner.yaml +67 -0
- sdg_hub/configs/skills/respond.yaml +8 -0
- sdg_hub/configs/skills/revised_responder.yaml +78 -0
- sdg_hub/configs/skills/router.yaml +12 -0
- sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +27 -0
- sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +31 -0
- sdg_hub/flow.py +127 -0
- sdg_hub/flows/annotation/emotion/detailed_description.yaml +19 -0
- sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml +19 -0
- sdg_hub/flows/annotation/emotion/simple.yaml +19 -0
- sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +13 -0
- sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +12 -0
- sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +89 -0
- sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +136 -0
- sdg_hub/flows/generation/skills/agentic_improve_skill.yaml +108 -0
- sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +12 -0
- sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +12 -0
- sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +80 -0
- sdg_hub/flows/generation/skills/synth_skills.yaml +59 -0
- sdg_hub/logger_config.py +20 -0
- sdg_hub/pipeline.py +66 -0
- sdg_hub/prompts.py +17 -0
- sdg_hub/py.typed +0 -0
- sdg_hub/registry.py +122 -0
- sdg_hub/sdg.py +164 -0
- sdg_hub/utils/__init__.py +5 -0
- sdg_hub/utils/chunking.py +73 -0
- sdg_hub/utils/datamixing.py +123 -0
- sdg_hub/utils/datautils.py +14 -0
- sdg_hub/utils/docprocessor.py +357 -0
- sdg_hub/utils/json.py +48 -0
- sdg_hub/utils/models.py +31 -0
- sdg_hub/utils/parse_and_convert.py +392 -0
- sdg_hub/utils/taxonomy.py +489 -0
- sdg_hub-0.1.0a2.dev0.dist-info/METADATA +154 -0
- sdg_hub-0.1.0a2.dev0.dist-info/RECORD +94 -0
- sdg_hub-0.1.0a2.dev0.dist-info/WHEEL +5 -0
- sdg_hub-0.1.0a2.dev0.dist-info/licenses/LICENSE +201 -0
- sdg_hub-0.1.0a2.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,194 @@
|
|
1
|
+
"""Module containing blocks for scoring responses using Reward Models."""
|
2
|
+
|
3
|
+
# Standard
|
4
|
+
from typing import Dict, List
|
5
|
+
import json
|
6
|
+
from urllib.parse import urljoin
|
7
|
+
|
8
|
+
# Third Party
|
9
|
+
from datasets import Dataset
|
10
|
+
import requests
|
11
|
+
|
12
|
+
# Local
|
13
|
+
from .block import Block
|
14
|
+
from ..logger_config import setup_logger
|
15
|
+
from ..registry import BlockRegistry
|
16
|
+
|
17
|
+
logger = setup_logger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
@BlockRegistry.register("PRMBlock")
|
21
|
+
class PRMBlock(Block):
|
22
|
+
"""A block for scoring responses using a ProcessReward Model (PRM) via HTTP API.
|
23
|
+
|
24
|
+
This block sends prompts and responses to a PRM endpoint and returns reward scores
|
25
|
+
for each step in the response.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
block_name: str,
|
31
|
+
host: str,
|
32
|
+
port: int,
|
33
|
+
model_name: str,
|
34
|
+
prompt_col: str,
|
35
|
+
response_col: str,
|
36
|
+
output_col: str = "step_rewards",
|
37
|
+
system_prompt: str = None,
|
38
|
+
endpoint: str = "pooling",
|
39
|
+
step_separator: str = "\n\n",
|
40
|
+
step_fill_token: str = "<extra_0>",
|
41
|
+
) -> None:
|
42
|
+
r"""Initialize the PRM (Process Reward Model) Block.
|
43
|
+
|
44
|
+
Parameters
|
45
|
+
----------
|
46
|
+
block_name : str
|
47
|
+
Name of the block
|
48
|
+
host : str
|
49
|
+
Hostname of the PRM service (e.g., "0.0.0.0" or "localhost")
|
50
|
+
port : int
|
51
|
+
Port number the service is running on
|
52
|
+
model_name : str
|
53
|
+
Name of the PRM model to use
|
54
|
+
prompt_col : str
|
55
|
+
Column name containing the prompt
|
56
|
+
response_col : str
|
57
|
+
Column name containing the response
|
58
|
+
output_col : str, optional
|
59
|
+
Column name to store the reward scores, by default "step_rewards"
|
60
|
+
system_prompt : str, optional
|
61
|
+
Optional system prompt to use for scoring, by default None
|
62
|
+
endpoint : str, optional
|
63
|
+
API endpoint name, by default "pooling"
|
64
|
+
step_separator : str, optional
|
65
|
+
Separator between steps in the response, by default "\n\n"
|
66
|
+
step_fill_token : str, optional
|
67
|
+
Model specific fill token for steps in the response, by default "<extra_0>" used by Qwen2.5-Math-PRM
|
68
|
+
"""
|
69
|
+
super().__init__(block_name)
|
70
|
+
# Construct base URL from host and port
|
71
|
+
self.base_url = f"http://{host.strip('/')}:{port}/"
|
72
|
+
self.endpoint = endpoint.strip("/")
|
73
|
+
|
74
|
+
# Construct the full API URL using urljoin
|
75
|
+
self.api_url = urljoin(self.base_url, self.endpoint)
|
76
|
+
logger.info(f"Initialized PRMBlock with API URL: {self.api_url}")
|
77
|
+
|
78
|
+
self.model_name = model_name
|
79
|
+
self.prompt_col = prompt_col
|
80
|
+
self.response_col = response_col
|
81
|
+
self.output_col = output_col
|
82
|
+
self.system_prompt = system_prompt
|
83
|
+
self.step_separator = step_separator
|
84
|
+
self.step_fill_token = step_fill_token
|
85
|
+
|
86
|
+
def _post_request(self, messages: List[Dict]) -> requests.Response:
|
87
|
+
"""Make POST request to PRM API endpoint.
|
88
|
+
|
89
|
+
Parameters
|
90
|
+
----------
|
91
|
+
messages : List[Dict]
|
92
|
+
List of message dictionaries to send to the API
|
93
|
+
|
94
|
+
Returns
|
95
|
+
-------
|
96
|
+
requests.Response
|
97
|
+
Response from the API
|
98
|
+
"""
|
99
|
+
headers = {"User-Agent": "PRMBlock Client"}
|
100
|
+
prompt = {"model": self.model_name, "messages": messages}
|
101
|
+
response = requests.post(self.api_url, headers=headers, json=prompt)
|
102
|
+
return response
|
103
|
+
|
104
|
+
def _format_messages(self, sample: Dict) -> List[Dict]:
|
105
|
+
"""Format input sample into messages for the PRM API.
|
106
|
+
|
107
|
+
Parameters
|
108
|
+
----------
|
109
|
+
sample : Dict
|
110
|
+
Input sample containing prompt and response
|
111
|
+
|
112
|
+
Returns
|
113
|
+
-------
|
114
|
+
List[Dict]
|
115
|
+
Formatted messages for the API
|
116
|
+
"""
|
117
|
+
messages = []
|
118
|
+
if self.system_prompt:
|
119
|
+
messages.append({"role": "system", "content": self.system_prompt})
|
120
|
+
|
121
|
+
messages.append({"role": "user", "content": sample[self.prompt_col]})
|
122
|
+
messages.append(
|
123
|
+
{
|
124
|
+
"role": "assistant",
|
125
|
+
"content": self.step_fill_token.join(sample[self.response_col].split(self.step_separator))
|
126
|
+
+ self.step_fill_token,
|
127
|
+
}
|
128
|
+
)
|
129
|
+
return messages
|
130
|
+
|
131
|
+
def _extract_rewards(self, response: requests.Response) -> List[float]:
|
132
|
+
"""Extract reward scores from API response.
|
133
|
+
|
134
|
+
Parameters
|
135
|
+
----------
|
136
|
+
response : requests.Response
|
137
|
+
Response from the API
|
138
|
+
|
139
|
+
Returns
|
140
|
+
-------
|
141
|
+
List[float]
|
142
|
+
List of reward scores
|
143
|
+
"""
|
144
|
+
try:
|
145
|
+
response_data = response.json()
|
146
|
+
rewards = [x[1] for x in response_data["data"][0]["data"]]
|
147
|
+
return rewards
|
148
|
+
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
149
|
+
logger.error(f"Error extracting rewards from response: {e}")
|
150
|
+
return []
|
151
|
+
|
152
|
+
def _generate(self, sample: dict) -> dict:
|
153
|
+
"""Generate reward scores for the input samples.
|
154
|
+
|
155
|
+
Parameters
|
156
|
+
----------
|
157
|
+
sample : dict
|
158
|
+
Input sample to score
|
159
|
+
|
160
|
+
Returns
|
161
|
+
-------
|
162
|
+
dict
|
163
|
+
Dictionary with added reward scores column
|
164
|
+
"""
|
165
|
+
messages = self._format_messages(sample)
|
166
|
+
rm_response = self._post_request(messages)
|
167
|
+
|
168
|
+
if rm_response.status_code != 200:
|
169
|
+
logger.error(f"API request failed with status {rm_response.status_code}")
|
170
|
+
rewards = [0.0] * len(
|
171
|
+
sample[self.response_col].split(self.step_separator)
|
172
|
+
) # Default to 0 scores on failure
|
173
|
+
else:
|
174
|
+
rewards = self._extract_rewards(rm_response)
|
175
|
+
|
176
|
+
sample[self.output_col] = rewards
|
177
|
+
return sample
|
178
|
+
|
179
|
+
def generate(self, samples: Dataset, batch_size: int = 4) -> Dataset:
|
180
|
+
"""Generate reward scores for the input samples.
|
181
|
+
|
182
|
+
Parameters
|
183
|
+
----------
|
184
|
+
samples : Dataset
|
185
|
+
Input dataset containing samples to score
|
186
|
+
batch_size : int, optional
|
187
|
+
Number of processes to use for parallel processing, by default 4
|
188
|
+
|
189
|
+
Returns
|
190
|
+
-------
|
191
|
+
Dataset
|
192
|
+
Dataset with added reward scores
|
193
|
+
"""
|
194
|
+
return samples.map(self._generate, num_proc=batch_size)
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
2
|
+
# Third Party
|
3
|
+
from datasets import Dataset
|
4
|
+
|
5
|
+
# Local
|
6
|
+
from .block import Block
|
7
|
+
from ..registry import BlockRegistry
|
8
|
+
from ..logger_config import setup_logger
|
9
|
+
|
10
|
+
logger = setup_logger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
@BlockRegistry.register("SamplePopulatorBlock")
|
14
|
+
class SamplePopulatorBlock(Block):
|
15
|
+
def __init__(self, config_paths, column_name, post_fix="", **batch_kwargs) -> None:
|
16
|
+
super().__init__(
|
17
|
+
block_name=self.__class__.__name__
|
18
|
+
) # Call the base class's __init__
|
19
|
+
self.configs = {}
|
20
|
+
for config in config_paths:
|
21
|
+
if post_fix:
|
22
|
+
config_name = config.replace(".yaml", f"_{post_fix}.yaml")
|
23
|
+
else:
|
24
|
+
config_name = config
|
25
|
+
config_key = config.split("/")[-1].split(".")[0]
|
26
|
+
self.configs[config_key] = self._load_config(config_name)
|
27
|
+
self.column_name = column_name
|
28
|
+
self.num_procs = batch_kwargs.get("num_procs", 8)
|
29
|
+
|
30
|
+
def _generate(self, sample) -> dict:
|
31
|
+
sample = {**sample, **self.configs[sample[self.column_name]]}
|
32
|
+
return sample
|
33
|
+
|
34
|
+
def generate(self, samples) -> Dataset:
|
35
|
+
samples = samples.map(self._generate, num_proc=self.num_procs)
|
36
|
+
return samples
|
37
|
+
|
38
|
+
|
39
|
+
@BlockRegistry.register("SelectorBlock")
|
40
|
+
class SelectorBlock(Block):
|
41
|
+
def __init__(self, choice_map, choice_col, output_col, **batch_kwargs) -> None:
|
42
|
+
super().__init__(block_name=self.__class__.__name__)
|
43
|
+
self.choice_map = choice_map
|
44
|
+
self.choice_col = choice_col
|
45
|
+
self.output_col = output_col
|
46
|
+
self.num_procs = batch_kwargs.get("num_procs", 8)
|
47
|
+
|
48
|
+
def _generate(self, sample) -> dict:
|
49
|
+
sample[self.output_col] = sample[self.choice_map[sample[self.choice_col]]]
|
50
|
+
return sample
|
51
|
+
|
52
|
+
def generate(self, samples: Dataset) -> Dataset:
|
53
|
+
samples = samples.map(self._generate, num_proc=self.num_procs)
|
54
|
+
return samples
|
55
|
+
|
56
|
+
|
57
|
+
@BlockRegistry.register("CombineColumnsBlock")
|
58
|
+
class CombineColumnsBlock(Block):
|
59
|
+
def __init__(self, columns, output_col, separator="\n\n", **batch_kwargs) -> None:
|
60
|
+
super().__init__(block_name=self.__class__.__name__)
|
61
|
+
self.columns = columns
|
62
|
+
self.output_col = output_col
|
63
|
+
self.separator = separator
|
64
|
+
self.num_procs = batch_kwargs.get("num_procs", 8)
|
65
|
+
|
66
|
+
def _generate(self, sample) -> dict:
|
67
|
+
sample[self.output_col] = self.separator.join(
|
68
|
+
[sample[col] for col in self.columns]
|
69
|
+
)
|
70
|
+
return sample
|
71
|
+
|
72
|
+
def generate(self, samples: Dataset) -> Dataset:
|
73
|
+
samples = samples.map(self._generate, num_proc=self.num_procs)
|
74
|
+
return samples
|
75
|
+
|
76
|
+
|
77
|
+
@BlockRegistry.register("FlattenColumnsBlock")
|
78
|
+
class FlattenColumnsBlock(Block):
|
79
|
+
def __init__(self, block_name: str, var_cols: list, value_name: str, var_name: str) -> None:
|
80
|
+
super().__init__(block_name=block_name)
|
81
|
+
self.var_cols = var_cols
|
82
|
+
self.value_name = value_name
|
83
|
+
self.var_name = var_name
|
84
|
+
|
85
|
+
def generate(self, samples: Dataset) -> Dataset:
|
86
|
+
df = samples.to_pandas()
|
87
|
+
id_cols = [col for col in samples.column_names if col not in self.var_cols]
|
88
|
+
flatten_df = df.melt(id_vars=id_cols,
|
89
|
+
value_vars=self.var_cols,
|
90
|
+
value_name=self.value_name,
|
91
|
+
var_name=self.var_name)
|
92
|
+
|
93
|
+
return Dataset.from_pandas(flatten_df)
|
94
|
+
|
95
|
+
|
96
|
+
@BlockRegistry.register("DuplicateColumns")
|
97
|
+
class DuplicateColumns(Block):
|
98
|
+
def __init__(self, block_name: str, columns_map: dict) -> None:
|
99
|
+
"""Create duplicate of columns specified in column map.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
columns_map (dict): mapping of existing column to new column names
|
103
|
+
"""
|
104
|
+
super().__init__(block_name=block_name)
|
105
|
+
self.columns_map = columns_map
|
106
|
+
|
107
|
+
|
108
|
+
def generate(self, samples: Dataset):
|
109
|
+
for col_to_dup in self.columns_map:
|
110
|
+
samples = samples.add_column(self.columns_map[col_to_dup], samples[col_to_dup])
|
111
|
+
return samples
|
112
|
+
|
113
|
+
|
114
|
+
@BlockRegistry.register("RenameColumns")
|
115
|
+
class RenameColumns(Block):
|
116
|
+
def __init__(self, block_name: str, columns_map: dict) -> None:
|
117
|
+
"""Rename dataset columns.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
columns_map (dict): mapping of existing column to new column names
|
121
|
+
"""
|
122
|
+
self.columns_map = columns_map
|
123
|
+
super().__init__(block_name=block_name)
|
124
|
+
|
125
|
+
|
126
|
+
def generate(self, samples: Dataset):
|
127
|
+
samples = samples.rename_columns(self.columns_map)
|
128
|
+
return samples
|
129
|
+
|
130
|
+
|
131
|
+
@BlockRegistry.register("SetToMajorityValue")
|
132
|
+
class SetToMajorityValue(Block):
|
133
|
+
def __init__(self, block_name: str, col_name) -> None:
|
134
|
+
self.col_name = col_name
|
135
|
+
super().__init__(block_name)
|
136
|
+
|
137
|
+
def generate(self, samples: Dataset):
|
138
|
+
samples = samples.to_pandas()
|
139
|
+
samples[self.col_name] = samples[self.col_name].mode()[0]
|
140
|
+
return Dataset.from_pandas(samples)
|
File without changes
|
File without changes
|
@@ -0,0 +1,34 @@
|
|
1
|
+
system: You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries.
|
2
|
+
introduction: |
|
3
|
+
Task Description: {{ task_description }}
|
4
|
+
principles: |
|
5
|
+
Follow these steps:
|
6
|
+
1. Think through the problem step by step within the [Start of Thinking] and [End of Thinking] tags.
|
7
|
+
2. Reflect on your thinking to check for any errors or improvements within the [Start of Reflection] and [End of Reflection] tags.
|
8
|
+
3. Make any necessary adjustments based on your reflection.
|
9
|
+
4. You can iterate on your thinking and reflection before providing the final answer.
|
10
|
+
5. Provide your final, concise answer within the [Start of Output] and [End of Output] tags.
|
11
|
+
examples: ~
|
12
|
+
generation: |
|
13
|
+
Important: The Thinking, and Reflection sections are for your internal reasoning process only.
|
14
|
+
Do not include any part of the final answer in these sections.
|
15
|
+
The actual response to the query must be entirely contained within the Output tags. Use the following format for your response:
|
16
|
+
|
17
|
+
[Start of Thinking]
|
18
|
+
[Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.]
|
19
|
+
[End of Thinking]
|
20
|
+
[Start of Reflection]
|
21
|
+
[Your reflection on your reasoning, checking for errors or improvements]
|
22
|
+
[End of Reflection]
|
23
|
+
[Iterate your thinking and reflection to make any necessary adjustments based on your reflection.]
|
24
|
+
|
25
|
+
[Start of Output]
|
26
|
+
[Your final, concise answer to the query. This is the only part that will be shown to the user.]
|
27
|
+
[End of Output]
|
28
|
+
[Stop generating any text after the End of Output tag.]
|
29
|
+
|
30
|
+
Here is the query for annotation:
|
31
|
+
{{ prompt }}
|
32
|
+
|
33
|
+
start_tags: ["[Start of Thinking]", "[Start of Output]"]
|
34
|
+
end_tags: ["[End of Reflection]", "[End of Output]"]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
system: ~
|
2
|
+
introduction: |
|
3
|
+
Task Description: {{ task_description }}
|
4
|
+
principles: ~
|
5
|
+
examples: |
|
6
|
+
To better assist you with this task, here are some examples:
|
7
|
+
{% if questions_and_answers is defined %}
|
8
|
+
{% for sample in questions_and_answers %}
|
9
|
+
[Start of Question]
|
10
|
+
{{ sample.question }}
|
11
|
+
[End of Question]
|
12
|
+
|
13
|
+
[Start of Output]
|
14
|
+
{{ sample.answer }}
|
15
|
+
[End of Output]
|
16
|
+
{% endfor %}
|
17
|
+
{% else %}
|
18
|
+
[Start of Question]
|
19
|
+
{{ seed_question }}
|
20
|
+
[End of Question]
|
21
|
+
|
22
|
+
[Start of Output]
|
23
|
+
{{ seed_response }}
|
24
|
+
[End of Output]
|
25
|
+
{% endif %}
|
26
|
+
generation: |
|
27
|
+
Here is the query for annotation:
|
28
|
+
[Start of Question]
|
29
|
+
{{ prompt }}
|
30
|
+
[End of Question]
|
31
|
+
start_tags: [""]
|
32
|
+
end_tags: [""]
|
File without changes
|
@@ -0,0 +1,45 @@
|
|
1
|
+
system: You are an AI assistant knowledgeable about {{domain}} domain. Be accurate but concise in response.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Please break down the following snippet from an article about {{domain}} into atomic facts.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
1. Makesure each fact is grounded in the given text.
|
8
|
+
2. Include any necessary information needed to explain the fact or concept
|
9
|
+
3. The atomic facts should be as simple as possible, if it’s compound sentence, break down one more time
|
10
|
+
4. For clarity, avoid using pronouns like ’it’, ’he’, ’she’, ’this’, ’that’ etc., and instead use the full names or titles.
|
11
|
+
5. Focus only on key concepts and facts. Skip any question or problems mentioned in the passage.
|
12
|
+
|
13
|
+
examples: |
|
14
|
+
To help you understand the task, here is an example:
|
15
|
+
[Passage]
|
16
|
+
The tournament was contested by ten national teams, maintaining the same format used in 2019. After six weeks of round-robin matches, India, South Africa, Australia, and New Zealand finished as the top four and qualified for the knockout stage. In the knockout stage, India and Australia beat New Zealand and South Africa, respectively, to advance to the final, played on 19 November at the Narendra Modi Stadium in Ahmedabad. Australia won the final by six wickets, winning their sixth Cricket World Cup title.
|
17
|
+
[Facts]
|
18
|
+
1. The tournament was contested by ten national teams.
|
19
|
+
2. The tournament maintained the same format used in 2019.
|
20
|
+
3. The round-robin matches lasted for six weeks.
|
21
|
+
4. India finished as one of the top four teams.
|
22
|
+
5. South Africa finished as one of the top four teams.
|
23
|
+
6. Australia finished as one of the top four teams.
|
24
|
+
7. New Zealand finished as one of the top four teams.
|
25
|
+
8. India, South Africa, Australia, and New Zealand qualified for the knockout stage.
|
26
|
+
9. In the knockout stage, India beat New Zealand.
|
27
|
+
10. In the knockout stage, Australia beat South Africa.
|
28
|
+
11. India advanced to the final.
|
29
|
+
12. Australia advanced to the final.
|
30
|
+
13. The final was played on 19 November.
|
31
|
+
14. The final was held at the Narendra Modi Stadium in Ahmedabad.
|
32
|
+
15. Australia won the final by six wickets.
|
33
|
+
16. Australia won their sixth Cricket World Cup title.
|
34
|
+
[End]
|
35
|
+
|
36
|
+
|
37
|
+
generation: |
|
38
|
+
Now it's your turn breakdown following snippet from article about {{domain}} into atomic facts following similar style as above examples
|
39
|
+
[Passage]
|
40
|
+
{{document}}
|
41
|
+
[Facts]
|
42
|
+
|
43
|
+
|
44
|
+
start_tags: [""]
|
45
|
+
end_tags: [""]
|
@@ -0,0 +1,35 @@
|
|
1
|
+
summary_detailed:
|
2
|
+
- Provide me with a comprehensive summary of the given document.
|
3
|
+
- Prepare a detailed breakdown of the contents of the document for me.
|
4
|
+
- Summarize the document thoroughly, covering all important points.
|
5
|
+
- Create a detailed executive summary of the provided document.
|
6
|
+
- Compose a comprehensive overview of the document's content.
|
7
|
+
- Deliver a detailed synopsis of the material presented in the document.
|
8
|
+
- Furnish me with a detailed analysis of the document's key points.
|
9
|
+
- Generate a thorough summary of the main ideas in the document.
|
10
|
+
- Offer a detailed digest of the information contained in the document.
|
11
|
+
- Supply me with a comprehensive rundown of the document's contents.
|
12
|
+
|
13
|
+
summary_extractive:
|
14
|
+
- Provide me with a summary of the document using extractive methods.
|
15
|
+
- Create an extractive summary for the given document.
|
16
|
+
- Generate an extractive summary from the document that was given to you.
|
17
|
+
- Summarize the document using extractive techniques.
|
18
|
+
- Create a summary of the provided document using extractive methods.
|
19
|
+
- Generate an extractive summary for the document provided.
|
20
|
+
- Using extractive techniques, summarize the given document.
|
21
|
+
- Create a summary of the document using extractive summarization.
|
22
|
+
- Generate an extractive summary of the document that was provided.
|
23
|
+
- Summarize the provided document using extractive summarization techniques.
|
24
|
+
|
25
|
+
summary_atomic_facts:
|
26
|
+
- Identify and list all atomic facts from the document.
|
27
|
+
- Extract all key facts from the given document.
|
28
|
+
- List all the important facts from the provided document.
|
29
|
+
- Highlight all the atomic facts present in the document.
|
30
|
+
- Identify and enumerate all key facts from the given text.
|
31
|
+
- List out all the critical information from the document.
|
32
|
+
- Highlight all the essential facts from the provided text.
|
33
|
+
- Identify and summarize all the important details from the document.
|
34
|
+
- Extract all the atomic facts from the given document.
|
35
|
+
- List all the key takeaways from the provided text.
|
File without changes
|
@@ -0,0 +1,17 @@
|
|
1
|
+
system: You are an AI assistant that is expert at summarizing text.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Give me detailed summary for below document, making sure all key points are covered.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
Do not add any new information.
|
8
|
+
Do not miss any key points from the provided document
|
9
|
+
|
10
|
+
examples: ""
|
11
|
+
|
12
|
+
generation: |
|
13
|
+
Document:
|
14
|
+
{{document}}
|
15
|
+
|
16
|
+
start_tags: [""]
|
17
|
+
end_tags: [""]
|
@@ -0,0 +1,68 @@
|
|
1
|
+
system: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Determine if the provided information is corroborated by the given context. Respond with YES if the context substantiates the information, even partially. Answer NO if the context does not support the information.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
Guidelines
|
8
|
+
- Answer YES when the context provides either direct or indirect evidence supporting the information. Indirect evidence may include contextual implications or inferred connections that reasonably support the information.
|
9
|
+
- Answer NO if the context lacks any supportive evidence, clearly contradicts the information, or if the support provided by the context is too vague or speculative to establish a solid connection to the information.
|
10
|
+
- Avoid using "partially" in your response. If the context provides any reasonable support (direct or indirect) for the information, consider it as a YES.
|
11
|
+
|
12
|
+
Strictly answer in this format
|
13
|
+
[Start of Context]
|
14
|
+
...
|
15
|
+
[End of Context]
|
16
|
+
[Start of Response]
|
17
|
+
...
|
18
|
+
[End of Response]
|
19
|
+
[Start of Explanation]
|
20
|
+
...
|
21
|
+
[End of Explanation]
|
22
|
+
[Start of Answer]
|
23
|
+
...
|
24
|
+
[End of Answer]
|
25
|
+
|
26
|
+
examples: |
|
27
|
+
Example 1:
|
28
|
+
[Start of Context]
|
29
|
+
An apple pie is a fruit pie with apples as the main filling. It's often served with whipped cream, ice cream, custard, or cheddar cheese. Typically, it has a double crust, with pastry above and below the filling. The upper crust can be solid or latticed.
|
30
|
+
[End of Context]
|
31
|
+
[Start of Response]
|
32
|
+
Apple pie is generally double-crusted.
|
33
|
+
[End of Response]
|
34
|
+
[Start of Explanation]
|
35
|
+
The context directly supports the information by stating that apple pie is "generally double-crusted," which matches the information provided.
|
36
|
+
[End of Explanation]
|
37
|
+
[Start of Answer]
|
38
|
+
YES
|
39
|
+
[End of Answer]
|
40
|
+
|
41
|
+
Example 2:
|
42
|
+
[Start of Context]
|
43
|
+
An apple pie is a fruit pie with apples as the main filling. It's often served with whipped cream, ice cream, custard, or cheddar cheese. Typically, it has a double crust, with pastry above and below the filling. The upper crust can be solid or latticed.
|
44
|
+
[End of Context]
|
45
|
+
[Start of Response]
|
46
|
+
Apple pies taste bad.
|
47
|
+
[End of Response]
|
48
|
+
[Start of Explanation]
|
49
|
+
The context does not provide any information about the taste of apple pies. The statement "Apple pies taste bad" is a subjective opinion and is not supported or mentioned in the given context.
|
50
|
+
[Start of Explanation]
|
51
|
+
[Start of Answer]
|
52
|
+
NO
|
53
|
+
[End of Answer]
|
54
|
+
|
55
|
+
generation: |
|
56
|
+
Now, based on the above examples and guidelines, determine if the following information is supported by the context provided. Answer YES or NO.
|
57
|
+
* Return the explanation within the [Start of Explanation] and [End of Explanation] tags.
|
58
|
+
* Return the answer between [Start of Answer] and [End of Answer] tags.
|
59
|
+
|
60
|
+
[Start of Context]
|
61
|
+
{{document}}
|
62
|
+
[End of Context]
|
63
|
+
[Start of Response]
|
64
|
+
{{response}}
|
65
|
+
[End of Response]
|
66
|
+
|
67
|
+
start_tags: ["[Start of Explanation]", "[Start of Answer]"]
|
68
|
+
end_tags: ["[End of Explanation]", "[End of Answer]"]
|
@@ -0,0 +1,38 @@
|
|
1
|
+
system: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
|
2
|
+
|
3
|
+
introduction: |
|
4
|
+
Given below question can you verify if it meets below requirements and based on them give a rating of 1 if it meets all of them or 0 otherwise.
|
5
|
+
|
6
|
+
principles: |
|
7
|
+
Here are the requirements:
|
8
|
+
|
9
|
+
Non-Referential Clarity and Contextual Independence: Ensure that the question is self-explanatory and does not rely on specific, unprovided external content, such as particular documents, specific tables, or detailed datasets. The question should be structured to be understandable and clear without requiring direct access to or knowledge of these specific external sources.
|
10
|
+
|
11
|
+
Subject-Aware Completeness: The question should be crafted to be answerable on its own, given a reasonable level of specialized knowledge in the relevant subject area. It is acceptable and encouraged for the question to require specialized understanding pertinent to the topic; however, it should not depend on unique, external information not provided in the question itself. This distinction allows for questions that necessitate a deep understanding of a subject while ensuring they are not tied to specific external content like a particular dataset or a line in a document.
|
12
|
+
|
13
|
+
Please give your answer as short explanation followed by rating of either 0 or 1 as below.
|
14
|
+
|
15
|
+
* Return a short explanation within the [Start of Explanation] and [End of Explanation] tags.
|
16
|
+
* Return the rating on a binary 0/1 scale between [Start of Rating] and [End of Rating] tags.
|
17
|
+
|
18
|
+
[Start of Question]
|
19
|
+
...
|
20
|
+
[End of Question]
|
21
|
+
|
22
|
+
[Start of Explanation]
|
23
|
+
...
|
24
|
+
[End of Explanation]
|
25
|
+
|
26
|
+
[Start of Rating]
|
27
|
+
...
|
28
|
+
[End of Rating]
|
29
|
+
|
30
|
+
examples: ""
|
31
|
+
|
32
|
+
generation: |
|
33
|
+
[Start of Question]
|
34
|
+
{{question}}
|
35
|
+
[End of Question]
|
36
|
+
|
37
|
+
start_tags: ["[Start of Explanation]", "[Start of Rating]"]
|
38
|
+
end_tags: ["[End of Explanation]", "[End of Rating]"]
|