sdg-hub 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. sdg_hub/__init__.py +28 -1
  2. sdg_hub/_version.py +2 -2
  3. sdg_hub/core/__init__.py +22 -0
  4. sdg_hub/core/blocks/__init__.py +58 -0
  5. sdg_hub/core/blocks/base.py +313 -0
  6. sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
  7. sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
  8. sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
  9. sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
  10. sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
  11. sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
  12. sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
  13. sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
  14. sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
  15. sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
  16. sdg_hub/core/blocks/evaluation/__init__.py +9 -0
  17. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
  18. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
  19. sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
  20. sdg_hub/core/blocks/filtering/__init__.py +12 -0
  21. sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
  22. sdg_hub/core/blocks/llm/__init__.py +25 -0
  23. sdg_hub/core/blocks/llm/client_manager.py +398 -0
  24. sdg_hub/core/blocks/llm/config.py +336 -0
  25. sdg_hub/core/blocks/llm/error_handler.py +368 -0
  26. sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
  27. sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
  28. sdg_hub/core/blocks/llm/text_parser_block.py +310 -0
  29. sdg_hub/core/blocks/registry.py +331 -0
  30. sdg_hub/core/blocks/transform/__init__.py +23 -0
  31. sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
  32. sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
  33. sdg_hub/core/blocks/transform/melt_columns.py +126 -0
  34. sdg_hub/core/blocks/transform/rename_columns.py +69 -0
  35. sdg_hub/core/blocks/transform/text_concat.py +102 -0
  36. sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
  37. sdg_hub/core/flow/__init__.py +20 -0
  38. sdg_hub/core/flow/base.py +980 -0
  39. sdg_hub/core/flow/metadata.py +344 -0
  40. sdg_hub/core/flow/migration.py +187 -0
  41. sdg_hub/core/flow/registry.py +330 -0
  42. sdg_hub/core/flow/validation.py +265 -0
  43. sdg_hub/{utils → core/utils}/__init__.py +6 -4
  44. sdg_hub/{utils → core/utils}/datautils.py +1 -3
  45. sdg_hub/core/utils/error_handling.py +208 -0
  46. sdg_hub/{utils → core/utils}/path_resolution.py +2 -2
  47. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
  48. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
  49. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
  50. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
  51. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
  52. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
  53. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +191 -0
  54. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
  55. sdg_hub-0.2.0.dist-info/METADATA +218 -0
  56. sdg_hub-0.2.0.dist-info/RECORD +63 -0
  57. sdg_hub/blocks/__init__.py +0 -42
  58. sdg_hub/blocks/block.py +0 -96
  59. sdg_hub/blocks/llmblock.py +0 -375
  60. sdg_hub/blocks/openaichatblock.py +0 -556
  61. sdg_hub/blocks/utilblocks.py +0 -597
  62. sdg_hub/checkpointer.py +0 -139
  63. sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
  64. sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
  65. sdg_hub/configs/annotations/detailed_description.yaml +0 -10
  66. sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
  67. sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
  68. sdg_hub/configs/knowledge/__init__.py +0 -0
  69. sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
  70. sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
  71. sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
  72. sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
  73. sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
  74. sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
  75. sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
  76. sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
  77. sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
  78. sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
  79. sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
  80. sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
  81. sdg_hub/configs/knowledge/router.yaml +0 -12
  82. sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
  83. sdg_hub/configs/reasoning/__init__.py +0 -0
  84. sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
  85. sdg_hub/configs/skills/__init__.py +0 -0
  86. sdg_hub/configs/skills/analyzer.yaml +0 -48
  87. sdg_hub/configs/skills/annotation.yaml +0 -36
  88. sdg_hub/configs/skills/contexts.yaml +0 -28
  89. sdg_hub/configs/skills/critic.yaml +0 -60
  90. sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
  91. sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
  92. sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
  93. sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
  94. sdg_hub/configs/skills/freeform_questions.yaml +0 -34
  95. sdg_hub/configs/skills/freeform_responses.yaml +0 -39
  96. sdg_hub/configs/skills/grounded_questions.yaml +0 -38
  97. sdg_hub/configs/skills/grounded_responses.yaml +0 -59
  98. sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
  99. sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
  100. sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
  101. sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
  102. sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
  103. sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
  104. sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
  105. sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
  106. sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
  107. sdg_hub/configs/skills/judge.yaml +0 -53
  108. sdg_hub/configs/skills/planner.yaml +0 -67
  109. sdg_hub/configs/skills/respond.yaml +0 -8
  110. sdg_hub/configs/skills/revised_responder.yaml +0 -78
  111. sdg_hub/configs/skills/router.yaml +0 -59
  112. sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
  113. sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
  114. sdg_hub/flow.py +0 -477
  115. sdg_hub/flow_runner.py +0 -450
  116. sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
  117. sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
  118. sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
  119. sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -136
  120. sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
  121. sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
  122. sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
  123. sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
  124. sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
  125. sdg_hub/pipeline.py +0 -121
  126. sdg_hub/prompts.py +0 -80
  127. sdg_hub/registry.py +0 -122
  128. sdg_hub/sdg.py +0 -206
  129. sdg_hub/utils/config_validation.py +0 -91
  130. sdg_hub/utils/error_handling.py +0 -94
  131. sdg_hub/utils/validation_result.py +0 -10
  132. sdg_hub-0.1.4.dist-info/METADATA +0 -190
  133. sdg_hub-0.1.4.dist-info/RECORD +0 -89
  134. sdg_hub/{logger_config.py → core/utils/logger_config.py} +1 -1
  135. /sdg_hub/{configs/__init__.py → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md} +0 -0
  136. /sdg_hub/{configs/annotations → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
  137. {sdg_hub-0.1.4.dist-info → sdg_hub-0.2.0.dist-info}/WHEEL +0 -0
  138. {sdg_hub-0.1.4.dist-info → sdg_hub-0.2.0.dist-info}/licenses/LICENSE +0 -0
  139. {sdg_hub-0.1.4.dist-info → sdg_hub-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,208 @@
1
+ """Custom exception classes for SDG Hub error handling."""
2
+
3
+ # Standard
4
+ from typing import Optional
5
+
6
+
7
+ class SDGHubError(Exception):
8
+ """Base exception class for all SDG Hub errors."""
9
+
10
+ def __init__(self, message: str, details: Optional[str] = None):
11
+ """Initialize SDGHubError.
12
+
13
+ Parameters
14
+ ----------
15
+ message : str
16
+ The main error message.
17
+ details : str, optional
18
+ Additional details about the error.
19
+ """
20
+ self.message = message
21
+ self.details = details
22
+ full_message = message
23
+ if details:
24
+ full_message = f"{message}\nDetails: {details}"
25
+ super().__init__(full_message)
26
+
27
+
28
+ class FlowRunnerError(SDGHubError):
29
+ """Base exception class for flow runner errors."""
30
+
31
+ pass
32
+
33
+
34
+ class DatasetLoadError(FlowRunnerError):
35
+ """Raised when dataset loading fails."""
36
+
37
+ pass
38
+
39
+
40
+ class FlowConfigurationError(FlowRunnerError):
41
+ """Raised when flow configuration is invalid."""
42
+
43
+ pass
44
+
45
+
46
+ class APIConnectionError(FlowRunnerError):
47
+ """Raised when API connection fails."""
48
+
49
+ pass
50
+
51
+
52
+ class DataGenerationError(FlowRunnerError):
53
+ """Raised when data generation fails."""
54
+
55
+ pass
56
+
57
+
58
+ class DataSaveError(FlowRunnerError):
59
+ """Raised when saving generated data fails."""
60
+
61
+ pass
62
+
63
+
64
+ class BlockError(SDGHubError):
65
+ """Base exception class for block-related errors."""
66
+
67
+ pass
68
+
69
+
70
+ class BlockConfigurationError(BlockError):
71
+ """Raised when block configuration is invalid."""
72
+
73
+ pass
74
+
75
+
76
+ class BlockExecutionError(BlockError):
77
+ """Raised when block execution fails."""
78
+
79
+ pass
80
+
81
+
82
+ class BlockValidationError(BlockError):
83
+ """Base exception class for block validation errors."""
84
+
85
+ pass
86
+
87
+
88
+ class MissingColumnError(BlockValidationError):
89
+ """Raised when required input columns are missing from dataset."""
90
+
91
+ def __init__(
92
+ self, block_name: str, missing_columns: list[str], available_columns: list[str]
93
+ ):
94
+ """Initialize MissingColumnError.
95
+
96
+ Parameters
97
+ ----------
98
+ block_name : str
99
+ Name of the block that failed validation.
100
+ missing_columns : List[str]
101
+ List of missing column names.
102
+ available_columns : List[str]
103
+ List of available column names in the dataset.
104
+ """
105
+ self.block_name = block_name
106
+ self.missing_columns = missing_columns
107
+ self.available_columns = available_columns
108
+
109
+ message = (
110
+ f"Block '{block_name}' missing required input columns: {missing_columns}"
111
+ )
112
+ details = f"Available columns: {available_columns}"
113
+
114
+ super().__init__(message, details)
115
+
116
+
117
+ class EmptyDatasetError(BlockValidationError):
118
+ """Raised when an empty dataset is provided to a block."""
119
+
120
+ def __init__(self, block_name: str):
121
+ """Initialize EmptyDatasetError.
122
+
123
+ Parameters
124
+ ----------
125
+ block_name : str
126
+ Name of the block that received the empty dataset.
127
+ """
128
+ self.block_name = block_name
129
+
130
+ message = f"Block '{block_name}' received an empty dataset"
131
+ details = "Dataset must contain at least one sample for processing"
132
+
133
+ super().__init__(message, details)
134
+
135
+
136
+ class OutputColumnCollisionError(BlockValidationError):
137
+ """Raised when output columns would overwrite existing dataset columns."""
138
+
139
+ def __init__(
140
+ self, block_name: str, collision_columns: list[str], existing_columns: list[str]
141
+ ):
142
+ """Initialize OutputColumnCollisionError.
143
+
144
+ Parameters
145
+ ----------
146
+ block_name : str
147
+ Name of the block that has column collisions.
148
+ collision_columns : List[str]
149
+ List of output columns that collide with existing columns.
150
+ existing_columns : List[str]
151
+ List of existing column names in the dataset.
152
+ """
153
+ self.block_name = block_name
154
+ self.collision_columns = collision_columns
155
+ self.existing_columns = existing_columns
156
+
157
+ message = f"Block '{block_name}' output columns would overwrite existing data: {collision_columns}"
158
+ details = f"Existing columns: {existing_columns}"
159
+
160
+ super().__init__(message, details)
161
+
162
+
163
+ class TemplateValidationError(BlockValidationError):
164
+ """Raised when template validation fails due to missing variables."""
165
+
166
+ def __init__(
167
+ self,
168
+ block_name: str,
169
+ missing_variables: list[str],
170
+ available_variables: list[str],
171
+ ):
172
+ """Initialize TemplateValidationError.
173
+
174
+ Parameters
175
+ ----------
176
+ block_name : str
177
+ Name of the block that failed template validation.
178
+ missing_variables : List[str]
179
+ List of missing template variable names.
180
+ available_variables : List[str]
181
+ List of available template variable names.
182
+ """
183
+ self.block_name = block_name
184
+ self.missing_variables = missing_variables
185
+ self.available_variables = available_variables
186
+
187
+ message = f"Block '{block_name}' template validation failed - missing required variables: {missing_variables}"
188
+ details = f"Available variables: {available_variables}"
189
+
190
+ super().__init__(message, details)
191
+
192
+
193
+ class FlowError(SDGHubError):
194
+ """Base exception class for flow-related errors."""
195
+
196
+ pass
197
+
198
+
199
+ class FlowValidationError(FlowError):
200
+ """Raised when flow validation fails."""
201
+
202
+ pass
203
+
204
+
205
+ class FlowExecutionError(FlowError):
206
+ """Raised when flow execution fails."""
207
+
208
+ pass
@@ -7,11 +7,11 @@ search paths.
7
7
  """
8
8
 
9
9
  # Standard
10
- from typing import List, Union
10
+ from typing import Union
11
11
  import os
12
12
 
13
13
 
14
- def resolve_path(filename: str, search_dirs: Union[str, List[str]]) -> str:
14
+ def resolve_path(filename: str, search_dirs: Union[str, list[str]]) -> str:
15
15
  """Resolve a file path relative to one or more search directories.
16
16
 
17
17
  Files are checked in the following order:
@@ -0,0 +1,40 @@
1
+ - role: system
2
+ content: You are an AI assistant knowledgeable about {{domain}} domain. Be accurate but concise in response.
3
+
4
+ - role: user
5
+ content: |
6
+ Please break down the following snippet from an article about {{domain}} into atomic facts.
7
+
8
+ 1. Makesure each fact is grounded in the given text.
9
+ 2. Include any necessary information needed to explain the fact or concept
10
+ 3. The atomic facts should be as simple as possible, if it's compound sentence, break down one more time
11
+ 4. For clarity, avoid using pronouns like 'it', 'he', 'she', 'this', 'that' etc., and instead use the full names or titles.
12
+ 5. Focus only on key concepts and facts. Skip any question or problems mentioned in the passage.
13
+
14
+ To help you understand the task, here is an example:
15
+ [Passage]
16
+ The tournament was contested by ten national teams, maintaining the same format used in 2019. After six weeks of round-robin matches, India, South Africa, Australia, and New Zealand finished as the top four and qualified for the knockout stage. In the knockout stage, India and Australia beat New Zealand and South Africa, respectively, to advance to the final, played on 19 November at the Narendra Modi Stadium in Ahmedabad. Australia won the final by six wickets, winning their sixth Cricket World Cup title.
17
+ [Facts]
18
+ 1. The tournament was contested by ten national teams.
19
+ 2. The tournament maintained the same format used in 2019.
20
+ 3. The round-robin matches lasted for six weeks.
21
+ 4. India finished as one of the top four teams.
22
+ 5. South Africa finished as one of the top four teams.
23
+ 6. Australia finished as one of the top four teams.
24
+ 7. New Zealand finished as one of the top four teams.
25
+ 8. India, South Africa, Australia, and New Zealand qualified for the knockout stage.
26
+ 9. In the knockout stage, India beat New Zealand.
27
+ 10. In the knockout stage, Australia beat South Africa.
28
+ 11. India advanced to the final.
29
+ 12. Australia advanced to the final.
30
+ 13. The final was played on 19 November.
31
+ 14. The final was held at the Narendra Modi Stadium in Ahmedabad.
32
+ 15. Australia won the final by six wickets.
33
+ 16. Australia won their sixth Cricket World Cup title.
34
+ [End]
35
+
36
+ Now it's your turn breakdown following snippet from article about {{domain}} into atomic facts following similar style as above examples
37
+ [Passage]
38
+ {{document_outline}}
39
+ {{document}}
40
+ [Facts]
@@ -0,0 +1,13 @@
1
+ - role: system
2
+ content: You are an AI assistant that is expert at summarizing text.
3
+
4
+ - role: user
5
+ content: |
6
+ Give me detailed summary for below document, making sure all key points are covered.
7
+
8
+ Do not add any new information.
9
+ Do not miss any key points from the provided document
10
+
11
+ Document:
12
+ {{document_outline}}
13
+ {{document}}
@@ -0,0 +1,64 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Determine if the provided information is corroborated by the given context. Respond with YES if the context substantiates the information, even partially. Answer NO if the context does not support the information.
7
+
8
+ Guidelines:
9
+ - Answer YES when the context provides either direct or indirect evidence supporting the information. Indirect evidence may include contextual implications or inferred connections that reasonably support the information.
10
+ - Answer NO if the context lacks any supportive evidence, clearly contradicts the information, or if the support provided by the context is too vague or speculative to establish a solid connection to the information.
11
+ - Avoid using "partially" in your response. If the context provides any reasonable support (direct or indirect) for the information, consider it as a YES.
12
+
13
+ Strictly answer in this format:
14
+ [Start of Context]
15
+ ...
16
+ [End of Context]
17
+ [Start of Response]
18
+ ...
19
+ [End of Response]
20
+ [Start of Explanation]
21
+ ...
22
+ [End of Explanation]
23
+ [Start of Answer]
24
+ ...
25
+ [End of Answer]
26
+
27
+ Example 1:
28
+ [Start of Context]
29
+ An apple pie is a fruit pie with apples as the main filling. It's often served with whipped cream, ice cream, custard, or cheddar cheese. Typically, it has a double crust, with pastry above and below the filling. The upper crust can be solid or latticed.
30
+ [End of Context]
31
+ [Start of Response]
32
+ Apple pie is generally double-crusted.
33
+ [End of Response]
34
+ [Start of Explanation]
35
+ The context directly supports the information by stating that apple pie is "generally double-crusted," which matches the information provided.
36
+ [End of Explanation]
37
+ [Start of Answer]
38
+ YES
39
+ [End of Answer]
40
+
41
+ Example 2:
42
+ [Start of Context]
43
+ An apple pie is a fruit pie with apples as the main filling. It's often served with whipped cream, ice cream, custard, or cheddar cheese. Typically, it has a double crust, with pastry above and below the filling. The upper crust can be solid or latticed.
44
+ [End of Context]
45
+ [Start of Response]
46
+ Apple pies taste bad.
47
+ [End of Response]
48
+ [Start of Explanation]
49
+ The context does not provide any information about the taste of apple pies. The statement "Apple pies taste bad" is a subjective opinion and is not supported or mentioned in the given context.
50
+ [End of Explanation]
51
+ [Start of Answer]
52
+ NO
53
+ [End of Answer]
54
+
55
+ Now, based on the above examples and guidelines, determine if the following information is supported by the context provided. Answer YES or NO.
56
+ * Return the explanation within the [Start of Explanation] and [End of Explanation] tags.
57
+ * Return the answer between [Start of Answer] and [End of Answer] tags.
58
+
59
+ [Start of Context]
60
+ {{document}}
61
+ [End of Context]
62
+ [Start of Response]
63
+ {{response}}
64
+ [End of Response]
@@ -0,0 +1,29 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Given below question can you verify if it meets below requirements and based on them give a rating of 1 if it meets all of them or 0 otherwise.
7
+
8
+ Here are the requirements:
9
+
10
+ Non-Referential Clarity and Contextual Independence: Ensure that the question is self-explanatory and does not rely on specific, unprovided external content, such as particular documents, specific tables, or detailed datasets. The question should be structured to be understandable and clear without requiring direct access to or knowledge of these specific external sources.
11
+
12
+ Subject-Aware Completeness: The question should be crafted to be answerable on its own, given a reasonable level of specialized knowledge in the relevant subject area. It is acceptable and encouraged for the question to require specialized understanding pertinent to the topic; however, it should not depend on unique, external information not provided in the question itself. This distinction allows for questions that necessitate a deep understanding of a subject while ensuring they are not tied to specific external content like a particular dataset or a line in a document.
13
+
14
+ Please give your answer as short explanation followed by rating of either 0 or 1 as below.
15
+
16
+ * Return a short explanation within the [Start of Explanation] and [End of Explanation] tags.
17
+ * Return the rating on a binary 0/1 scale between [Start of Rating] and [End of Rating] tags.
18
+
19
+ [Start of Question]
20
+ {{question}}
21
+ [End of Question]
22
+
23
+ [Start of Explanation]
24
+ ...
25
+ [End of Explanation]
26
+
27
+ [Start of Rating]
28
+ ...
29
+ [End of Rating]
@@ -0,0 +1,81 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Your task is to assess the relevance of a given response to a specific query. This evaluation should be conducted methodically by answering two key questions:
7
+
8
+ 1. Subject Matter Relevance: Does the provided response accurately match the subject matter of the user's query? This question aims to determine if the response is directly related to the main topic or issue presented in the query.
9
+ 2. Focus and Perspective Addressing: Does the provided response effectively address the focus or perspective on the subject matter as outlined in the user's query? This question seeks to evaluate whether the response not only matches the subject matter but also aligns with the specific angle or concern raised by the user.
10
+
11
+ For each question, assign a score of 1 point if the response meets the criteria, and 0 points if it does not. After evaluating each question, provide detailed feedback explaining your reasoning behind the scores awarded.
12
+
13
+ Conclude your evaluation with a total score as a final result. The total score should represent the sum of points assigned for each question, with a maximum possible score of 2 points.
14
+ Only evaluate the response based on the above criteria, do not create new questions.
15
+
16
+ Example 1:
17
+ [Start of Question]
18
+ What is the impact of global warming on polar bears?
19
+ [End of Question]
20
+
21
+ [Start of Response]
22
+ Global warming leads to melting ice caps, reducing the habitat of polar bears and negatively impacting their hunting grounds.
23
+ [End of Response]
24
+
25
+ [Start of Feedback]
26
+ - Subject Matter Relevance Score: 1 (The response is directly related to the impact of global warming on polar bears.)
27
+ - Alignment with Query's Focus Score: 1 (The response specifically addresses how global warming affects polar bears' habitat and hunting grounds.)
28
+ [End of Feedback]
29
+
30
+ [Start of Score]
31
+ 2
32
+ [End of Score]
33
+
34
+ Example 2:
35
+ [Start of Question]
36
+ How does photosynthesis work?
37
+ [End of Question]
38
+
39
+ [Start of Response]
40
+ Plants require sunlight and water to grow.
41
+ [End of Response]
42
+
43
+ [Start of Feedback]
44
+ - Subject Matter Relevance Score: 0 (The response is related to plant growth, but does not specifically address the process of photosynthesis.)
45
+ - Alignment with Query's Focus Score: 0 (The response fails to detail the photosynthesis process, missing the specific focus of the query.)
46
+ [End of Feedback]
47
+
48
+ [Start of Score]
49
+ 0
50
+ [End of Score]
51
+
52
+ Example 3:
53
+ [Start of Question]
54
+ What are the benefits of electric vehicles?
55
+ [End of Question]
56
+
57
+ [Start of Response]
58
+ Electric vehicles reduce dependency on fossil fuels and decrease greenhouse gas emissions.
59
+ [End of Response]
60
+
61
+ [Start of Feedback]
62
+ - Subject Matter Relevance Score: 1 (The response matches the query's subject on the benefits of electric vehicles.)
63
+ - Alignment with Query's Focus Score: 1 (The response effectively addresses the environmental benefits of electric vehicles, aligning with the query's focus.)
64
+ [End of Feedback]
65
+
66
+ [Start of Score]
67
+ 2
68
+ [End of Score]
69
+
70
+ Begin your response by providing the feedback followed by the score. Be as objective as possible.
71
+
72
+ [Start of Question]
73
+ {{question}}
74
+ [End of Question]
75
+
76
+ [Start of Response]
77
+ {{response}}
78
+ [End of Response]
79
+
80
+ * Return the feedback within the [Start of Feedback] and [End of Feedback] tags.
81
+ * Return the final score between [Start of Score] and [End of Score] tags.
@@ -0,0 +1,13 @@
1
+ - role: system
2
+ content: You are an AI assistant that is expert at summarizing text.
3
+
4
+ - role: user
5
+ content: |
6
+ Give me detailed extractive summary for below document, making sure all key points are covered.
7
+
8
+ Do not add any new information.
9
+ Do not miss any key points from the provided document
10
+
11
+ Document:
12
+ {{document_outline}}
13
+ {{document}}
@@ -0,0 +1,191 @@
1
+ metadata:
2
+ name: "Advanced Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
3
+ description: "A comprehensive flow that generates high-quality question-answer pairs from input documents using multiple LLM blocks for question generation, answer synthesis, and quality evaluation."
4
+ version: "1.0.0"
5
+ author: "SDG Hub Contributors"
6
+
7
+ recommended_models:
8
+ default: "meta-llama/Llama-3.3-70B-Instruct"
9
+ compatible: ["microsoft/phi-4", "mistralai/Mixtral-8x7B-Instruct-v0.1"]
10
+ experimental: []
11
+
12
+ tags:
13
+ - "question-generation"
14
+ - "knowledge-extraction"
15
+ - "qa-pairs"
16
+ - "document-processing"
17
+ - "educational"
18
+
19
+ license: "Apache-2.0"
20
+ min_sdg_hub_version: "0.2.0"
21
+
22
+ dataset_requirements:
23
+ required_columns:
24
+ - "document"
25
+ - "document_outline"
26
+ - "domain"
27
+ - "icl_document"
28
+ - "icl_query_1"
29
+ - "icl_response_1"
30
+ - "icl_query_2"
31
+ - "icl_response_2"
32
+ - "icl_query_3"
33
+ - "icl_response_3"
34
+ description: "Input dataset should contain documents with text content and domain classification. Each document should be substantial enough for meaningful question generation (minimum 100 words recommended)."
35
+
36
+ blocks:
37
+ - block_type: DuplicateColumnsBlock
38
+ block_config:
39
+ block_name: duplicate_document_col
40
+ input_cols: {document: base_document}
41
+
42
+ - block_type: PromptBuilderBlock
43
+ block_config:
44
+ block_name: detailed_summary_prompt
45
+ input_cols: [document, document_outline]
46
+ output_cols: summary_prompt
47
+ prompt_config_path: detailed_summary.yaml
48
+ format_as_messages: true
49
+
50
+ - block_type: LLMChatBlock
51
+ block_config:
52
+ block_name: gen_detailed_summary
53
+ input_cols: summary_prompt
54
+ output_cols: raw_summary_detailed
55
+ max_tokens: 2048
56
+ async_mode: true
57
+
58
+ - block_type: TextParserBlock
59
+ block_config:
60
+ block_name: parse_detailed_summary
61
+ input_cols: raw_summary_detailed
62
+ output_cols: summary_detailed
63
+ start_tags: [""]
64
+ end_tags: [""]
65
+
66
+ - block_type: PromptBuilderBlock
67
+ block_config:
68
+ block_name: atomic_facts_prompt
69
+ input_cols: [document, document_outline, domain]
70
+ output_cols: atomic_facts_prompt
71
+ prompt_config_path: atomic_facts.yaml
72
+ format_as_messages: true
73
+
74
+ - block_type: LLMChatBlock
75
+ block_config:
76
+ block_name: gen_atomic_facts
77
+ input_cols: atomic_facts_prompt
78
+ output_cols: raw_atomic_facts
79
+ max_tokens: 2048
80
+ async_mode: true
81
+
82
+ - block_type: TextParserBlock
83
+ block_config:
84
+ block_name: parse_atomic_facts
85
+ input_cols: raw_atomic_facts
86
+ output_cols: summary_atomic_facts
87
+ start_tags: [""]
88
+ end_tags: [""]
89
+
90
+ - block_type: PromptBuilderBlock
91
+ block_config:
92
+ block_name: extractive_summary_prompt
93
+ input_cols: [document, document_outline]
94
+ output_cols: extractive_summary_prompt
95
+ prompt_config_path: extractive_summary.yaml
96
+ format_as_messages: true
97
+
98
+ - block_type: LLMChatBlock
99
+ block_config:
100
+ block_name: gen_extractive_summary
101
+ input_cols: extractive_summary_prompt
102
+ output_cols: raw_summary_extractive
103
+ max_tokens: 2048
104
+ async_mode: true
105
+
106
+ - block_type: TextParserBlock
107
+ block_config:
108
+ block_name: parse_extractive_summary
109
+ input_cols: raw_summary_extractive
110
+ output_cols: summary_extractive
111
+ start_tags: [""]
112
+ end_tags: [""]
113
+
114
+ - block_type: MeltColumnsBlock
115
+ block_config:
116
+ block_name: melt_summary_columns
117
+ input_cols: [summary_detailed, summary_extractive, summary_atomic_facts, base_document]
118
+ output_cols: [summary, dataset_type]
119
+
120
+ - block_type: RenameColumnsBlock
121
+ block_config:
122
+ block_name: rename_to_document_column
123
+ input_cols: {document: raw_document, summary: document}
124
+
125
+ - block_type: PromptBuilderBlock
126
+ block_config:
127
+ block_name: knowledge_generation_prompt
128
+ input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
129
+ output_cols: knowledge_generation_prompt
130
+ prompt_config_path: generate_questions_responses.yaml
131
+ format_as_messages: true
132
+
133
+ - block_type: LLMChatBlock
134
+ block_config:
135
+ block_name: knowledge_generation
136
+ input_cols: knowledge_generation_prompt
137
+ output_cols: raw_knowledge_generation
138
+ temperature: 0.0
139
+ max_tokens: 2048
140
+ async_mode: true
141
+
142
+ - block_type: TextParserBlock
143
+ block_config:
144
+ block_name: parse_knowledge_generation
145
+ input_cols: raw_knowledge_generation
146
+ output_cols: [question, response]
147
+ parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
148
+ parser_cleanup_tags: ["[END]"]
149
+
150
+ - block_type: EvaluateFaithfulnessBlock
151
+ block_config:
152
+ block_name: eval_faithfulness
153
+ input_cols: [document, response]
154
+ output_cols: [faithfulness_explanation, faithfulness_judgment]
155
+ prompt_config_path: evaluate_faithfulness.yaml
156
+ filter_value: "YES"
157
+ operation: eq
158
+ async_mode: true
159
+ format_as_messages: true
160
+ start_tags: ["[Start of Explanation]", "[Start of Answer]"]
161
+ end_tags: ["[End of Explanation]", "[End of Answer]"]
162
+
163
+ - block_type: EvaluateRelevancyBlock
164
+ block_config:
165
+ block_name: eval_relevancy
166
+ input_cols: [question, response]
167
+ output_cols: [relevancy_explanation, relevancy_score]
168
+ prompt_config_path: evaluate_relevancy.yaml
169
+ filter_value: 2.0
170
+ operation: eq
171
+ convert_dtype: float
172
+ max_tokens: 2048
173
+ async_mode: true
174
+ format_as_messages: true
175
+ start_tags: ["[Start of Feedback]", "[Start of Score]"]
176
+ end_tags: ["[End of Feedback]", "[End of Score]"]
177
+
178
+ - block_type: VerifyQuestionBlock
179
+ block_config:
180
+ block_name: verify_question
181
+ input_cols: [question]
182
+ output_cols: [verification_explanation, verification_rating]
183
+ prompt_config_path: evaluate_question.yaml
184
+ filter_value: 1.0
185
+ operation: ge
186
+ convert_dtype: float
187
+ max_tokens: 2048
188
+ async_mode: true
189
+ format_as_messages: true
190
+ start_tags: ["[Start of Explanation]", "[Start of Rating]"]
191
+ end_tags: ["[End of Explanation]", "[End of Rating]"]