sdg-hub 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdg_hub/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.6.1'
32
- __version_tuple__ = version_tuple = (0, 6, 1)
31
+ __version__ = version = '0.7.0'
32
+ __version_tuple__ = version_tuple = (0, 7, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
File without changes
@@ -0,0 +1,21 @@
1
+ - role: system
2
+ content: |
3
+ You are an extractive question-answering system. Your answers must be FULLY GROUNDED in the provided context.
4
+
5
+ Strict Rules:
6
+ 1. Use ONLY information explicitly stated in the context
7
+ 2. Do NOT make inferences, assumptions, or add general knowledge
8
+ 3. Do NOT elaborate beyond what the context says
9
+ 4. Quote or paraphrase the context directly
10
+ 5. If information is missing, acknowledge it
11
+
12
+ - role: user
13
+ content: |
14
+ Context:
15
+ {{context}}
16
+
17
+ Question:
18
+ {{question}}
19
+
20
+ Provide a direct, extractive answer using ONLY the information stated above.
21
+
@@ -0,0 +1,25 @@
1
+ - role: system
2
+ content: |
3
+ A "reasoning" question is a question with the following properties:
4
+ - It is a natural language question.
5
+ - It requires the reader to think critically and make an inference or draw a conclusion based on the information provided.
6
+
7
+ I will provide you with an abstract description of some content, the actual text content, and a specific topic to focus on.
8
+
9
+ Your Task:
10
+ 1. Focus on the following topic: {{topic}}
11
+ 2. Think of a "reasoning" question about this topic that can be answered using ONLY the provided text.
12
+
13
+ ## Abstract Description of Content
14
+ {{document_outline}}
15
+
16
+ ## Text Content
17
+ {{document}}
18
+
19
+ ## Topic to Focus On
20
+ {{topic}}
21
+
22
+ State the generated question. Do not say anything other than the question.
23
+ - role: user
24
+ content: |
25
+ Generate the question.
@@ -0,0 +1,23 @@
1
+ - role: system
2
+ content: |
3
+ You are an expert data annotator. Your task is to extract the EXACT sentences from the provided context that answer the question.
4
+
5
+ Rules:
6
+ 1. Extract ONLY sentences that contain information used to answer the question.
7
+ 2. Do NOT modify the sentences - copy them exactly as they appear.
8
+ 3. If multiple sentences are needed, output them one per line.
9
+ 4. If no sentence directly answers the question (based on the provided answer), output "No relevant sentences found."
10
+ 5. Output ONLY the sentences, no other text.
11
+
12
+ - role: user
13
+ content: |
14
+ Context:
15
+ {{context}}
16
+
17
+ Question:
18
+ {{question}}
19
+
20
+ Answer:
21
+ {{answer}}
22
+
23
+ Relevant Sentences:
@@ -0,0 +1,201 @@
1
+ metadata:
2
+ name: RAG Evaluation Dataset Flow
3
+ description: Generates Q&A pairs for RAG evaluation.
4
+ version: 1.0.0
5
+ author: "Red Hat AI RAG Contributors"
6
+ license: "Apache-2.0"
7
+ recommended_models:
8
+ default: "openai/gpt-oss-120b"
9
+ compatible:
10
+ - "meta-llama/Llama-3.3-70B-Instruct"
11
+ - "microsoft/phi-4"
12
+ tags:
13
+ - rag-evaluation
14
+ - qa-pairs
15
+ dataset_requirements:
16
+ required_columns:
17
+ - document
18
+ - document_outline
19
+ description: Input dataset should contain documents with text content and document outlines.
20
+ id: loud-dawn-245
21
+ blocks:
22
+ - block_type: DuplicateColumnsBlock
23
+ block_config:
24
+ block_name: duplicate_to_context
25
+ input_cols: {document: context}
26
+
27
+ - block_type: PromptBuilderBlock
28
+ block_config:
29
+ block_name: topic_prompt
30
+ input_cols: [document]
31
+ output_cols: topic_messages
32
+ prompt_config_path: topic_generation.yaml
33
+
34
+ - block_type: LLMChatBlock
35
+ block_config:
36
+ block_name: gen_topic
37
+ input_cols: topic_messages
38
+ output_cols: topic_response
39
+ async_mode: true
40
+ n: 1
41
+ max_tokens: 2048
42
+ temperature: 0.7
43
+
44
+ - block_type: LLMParserBlock
45
+ block_config:
46
+ block_name: parse_topic
47
+ input_cols: topic_response
48
+ field_prefix: topic_
49
+ extract_content: true
50
+
51
+ - block_type: RenameColumnsBlock
52
+ block_config:
53
+ block_name: rename_topic
54
+ input_cols: {topic_content: topic}
55
+
56
+ - block_type: PromptBuilderBlock
57
+ block_config:
58
+ block_name: conceptual_prompt
59
+ input_cols:
60
+ document: document
61
+ document_outline: document_outline
62
+ topic: topic
63
+ output_cols: conceptual_messages
64
+ prompt_config_path: conceptual_qa_generation.yaml
65
+
66
+ - block_type: LLMChatBlock
67
+ block_config:
68
+ block_name: gen_conceptual_question
69
+ input_cols: conceptual_messages
70
+ output_cols: question_response
71
+ async_mode: true
72
+ n: 1
73
+ max_tokens: 2048
74
+ temperature: 0.7
75
+
76
+ - block_type: LLMParserBlock
77
+ block_config:
78
+ block_name: parse_question
79
+ input_cols: question_response
80
+ field_prefix: question_
81
+ extract_content: true
82
+
83
+ - block_type: PromptBuilderBlock
84
+ block_config:
85
+ block_name: evolution_prompt
86
+ input_cols: {question_content: question}
87
+ output_cols: evolution_messages
88
+ prompt_config_path: question_evolution.yaml
89
+
90
+ - block_type: LLMChatBlock
91
+ block_config:
92
+ block_name: evolve_question
93
+ input_cols: evolution_messages
94
+ output_cols: evolution_response
95
+ async_mode: true
96
+ n: 1
97
+ max_tokens: 4096
98
+ temperature: 0.7
99
+
100
+ - block_type: LLMParserBlock
101
+ block_config:
102
+ block_name: parse_evolved_question
103
+ input_cols: evolution_response
104
+ field_prefix: "evolved_"
105
+ extract_content: true
106
+
107
+ - block_type: PromptBuilderBlock
108
+ block_config:
109
+ block_name: answer_prompt
110
+ input_cols:
111
+ context: context
112
+ evolved_content: question
113
+ output_cols: answer_messages
114
+ prompt_config_path: answer_generation.yaml
115
+
116
+ - block_type: LLMChatBlock
117
+ block_config:
118
+ block_name: gen_answer
119
+ input_cols: answer_messages
120
+ output_cols: answer_response
121
+ async_mode: true
122
+ n: 1
123
+ max_tokens: 4096
124
+ temperature: 0.2
125
+
126
+ - block_type: LLMParserBlock
127
+ block_config:
128
+ block_name: parse_answer
129
+ input_cols: answer_response
130
+ field_prefix: "answer_"
131
+ extract_content: true
132
+
133
+ - block_type: PromptBuilderBlock
134
+ block_config:
135
+ block_name: critic_prompt
136
+ input_cols:
137
+ context: context
138
+ evolved_content: question
139
+ answer_content: answer
140
+ output_cols: critic_messages
141
+ prompt_config_path: groundedness_critic.yaml
142
+
143
+ - block_type: LLMChatBlock
144
+ block_config:
145
+ block_name: gen_critic_score
146
+ input_cols: critic_messages
147
+ output_cols: critic_response
148
+ async_mode: true
149
+ n: 1
150
+ max_tokens: 512
151
+ temperature: 0.0
152
+
153
+ - block_type: LLMParserBlock
154
+ block_config:
155
+ block_name: parse_critic_score
156
+ input_cols: critic_response
157
+ field_prefix: "critic_"
158
+ extract_content: true
159
+
160
+ - block_type: ColumnValueFilterBlock
161
+ block_config:
162
+ block_name: filter_ungrounded
163
+ input_cols: critic_content
164
+ filter_value: [4, 5]
165
+ operation: "eq"
166
+ convert_dtype: "int"
167
+
168
+ - block_type: PromptBuilderBlock
169
+ block_config:
170
+ block_name: extraction_prompt
171
+ input_cols:
172
+ context: context
173
+ evolved_content: question
174
+ answer_content: answer
175
+ output_cols: extraction_messages
176
+ prompt_config_path: context_extraction.yaml
177
+
178
+ - block_type: LLMChatBlock
179
+ block_config:
180
+ block_name: extract_context
181
+ input_cols: extraction_messages
182
+ output_cols: extraction_response
183
+ async_mode: true
184
+ n: 1
185
+ max_tokens: 4096
186
+ temperature: 0.0
187
+
188
+ - block_type: LLMParserBlock
189
+ block_config:
190
+ block_name: parse_extracted_context
191
+ input_cols: extraction_response
192
+ field_prefix: "ground_truth_"
193
+ extract_content: true
194
+
195
+ - block_type: RenameColumnsBlock
196
+ block_config:
197
+ block_name: rename_final_columns
198
+ input_cols:
199
+ evolved_content: question
200
+ answer_content: response
201
+ ground_truth_content: ground_truth_context
@@ -0,0 +1,24 @@
1
+ - role: system
2
+ content: |
3
+ You are a strict evaluator for a RAG system. Your task is to rate how well the provided Answer is supported by the Context.
4
+
5
+ Score 1: The answer is completely unsupported or contradicts the context.
6
+ Score 2: The answer relies heavily on external knowledge or weak inferences.
7
+ Score 3: The answer is partially supported but includes some unsupported details.
8
+ Score 4: The answer is mostly supported, with only minor inferences.
9
+ Score 5: The answer is fully and explicitly supported by the context.
10
+
11
+ Output ONLY the integer score (1, 2, 3, 4, or 5). Do not output any other text.
12
+
13
+ - role: user
14
+ content: |
15
+ Context:
16
+ {{context}}
17
+
18
+ Question:
19
+ {{question}}
20
+
21
+ Answer:
22
+ {{answer}}
23
+
24
+ Score:
@@ -0,0 +1,18 @@
1
+ - role: system
2
+ content: |
3
+ You are an experienced linguistics expert for building testsets for large language model applications.
4
+
5
+ Your task is to rewrite the following question in a more indirect and compressed form, following these rules:
6
+ 1. Make the question more indirect
7
+ 2. Make the question shorter
8
+ 3. Use abbreviations if possible
9
+ 4. Keep the core meaning intact so it remains answerable
10
+
11
+ Output ONLY the rewritten question with a question mark "?" at the end. Do not provide any other explanation or text.
12
+
13
+ - role: user
14
+ content: |
15
+ Question to rewrite:
16
+ {{question}}
17
+
18
+ Rewritten Question:
@@ -0,0 +1,12 @@
1
+ - role: system
2
+ content: |
3
+ You are a helpful assistant that identifies a specific topic within a text.
4
+ Output only the topic. Do not include "The topic is" or any other text.
5
+ - role: user
6
+ content: |
7
+ Identify a specific topic in the following text.
8
+
9
+ Text:
10
+ {{document}}
11
+
12
+ Topic:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.6.1
3
+ Version: 0.7.0
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  sdg_hub/__init__.py,sha256=TlkZT40-70urdcWLqv3kupaJj8s-SVgd2QyvlSFwb4A,510
2
- sdg_hub/_version.py,sha256=7vNQiXfKffK0nbqts6Xy6-E1b1YOm4EGigvgaHr83o4,704
2
+ sdg_hub/_version.py,sha256=uLbRjFSUZAgfl7V7O8zKV5Db36k7tz87ZIVq3l2SWs0,704
3
3
  sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sdg_hub/core/__init__.py,sha256=e3BoejbqjYhasf9t__L4qE52lkD9EBjx4o--2kqKdro,460
5
5
  sdg_hub/core/blocks/__init__.py,sha256=8Rn1SglH8V3jGmTD_cG-h7qk9ktAab2eaBdyk7RN_hY,865
@@ -37,6 +37,14 @@ sdg_hub/core/utils/logger_config.py,sha256=6_cnsIHtSAdq1iTTZ7Q7nAJ1dmldlxSZ0AB49
37
37
  sdg_hub/core/utils/path_resolution.py,sha256=yWof4kGNpQ5dKcrVHg0h9KfOKLZ6ROjdfsLAZsQT5rM,2000
38
38
  sdg_hub/core/utils/time_estimator.py,sha256=rM3_R-Ka5DEtvOtlJoA_5pXSyQ6tT6t4h6qh3_5BCZo,12639
39
39
  sdg_hub/core/utils/yaml_utils.py,sha256=tShCd-FFkp0xlKnLe7dXsMOR4AvT9d2qRUmu4ZnPSEY,1458
40
+ sdg_hub/flows/evaluation/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ sdg_hub/flows/evaluation/rag/answer_generation.yaml,sha256=dxsHIPyEs14e9fH6JeEJgnrLIV-nLqXmnynj0XF_4os,624
42
+ sdg_hub/flows/evaluation/rag/conceptual_qa_generation.yaml,sha256=cvU8P3EUj9-Cr19Y3ASxkxEBh9ll_NYMC3s6-x1Monc,847
43
+ sdg_hub/flows/evaluation/rag/context_extraction.yaml,sha256=StAAU8yCTzaeGFKieJKFIDRfe21aqk7VIekMH1oEuxA,724
44
+ sdg_hub/flows/evaluation/rag/flow.yaml,sha256=ZDkCrQaN9WfvwWaMjgfA2qUrTVz7pCw-PiHzOyzXKio,5276
45
+ sdg_hub/flows/evaluation/rag/groundedness_critic.yaml,sha256=r5zqetGnNvg4UxCuENTzdWhCFbG6TnkY-seDMVRBBko,782
46
+ sdg_hub/flows/evaluation/rag/question_evolution.yaml,sha256=d3G11dQ3Wkgz0JBNyqTi-6QMGIdODOVcGNw1x9OnTEE,649
47
+ sdg_hub/flows/evaluation/rag/topic_generation.yaml,sha256=DhY_Wt7NzzjfirYlQQqABrXn73vMQj9W2XLZZEaofKc,303
40
48
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
49
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml,sha256=THRT3cY44KGI_69B2wqt2Q89EknnOSE7B4A_jdnxlIU,330
42
50
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml,sha256=Cs-yeiXs4yac3dZsurdXBZj-kkwWdK-xBywjvBlgtGI,669
@@ -76,8 +84,8 @@ sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_S
76
84
  sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
77
85
  sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=BBV18SdvuVTAESjwkJ7V1jbb-cSTBvNl3SCycd0oEQ4,4934
78
86
  sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
79
- sdg_hub-0.6.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
80
- sdg_hub-0.6.1.dist-info/METADATA,sha256=JQxLH1YwDrV5D1cAaaRziFFiF17buxN-fnyse5lQVV8,9584
81
- sdg_hub-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
- sdg_hub-0.6.1.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
83
- sdg_hub-0.6.1.dist-info/RECORD,,
87
+ sdg_hub-0.7.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
88
+ sdg_hub-0.7.0.dist-info/METADATA,sha256=ABg2y-NjvyUPbMdqyDgrzQhpxdnv4oCwuOarTT86ahI,9584
89
+ sdg_hub-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
90
+ sdg_hub-0.7.0.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
91
+ sdg_hub-0.7.0.dist-info/RECORD,,