sdg-hub 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/__init__.py +2 -4
- sdg_hub/core/blocks/base.py +61 -6
- sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
- sdg_hub/core/blocks/llm/__init__.py +2 -4
- sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
- sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
- sdg_hub/core/flow/base.py +7 -4
- sdg_hub/core/utils/datautils.py +40 -22
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +30 -26
- sdg_hub/core/blocks/evaluation/__init__.py +0 -9
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
- sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
- sdg_hub/core/blocks/llm/client_manager.py +0 -447
- sdg_hub/core/blocks/llm/config.py +0 -337
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,323 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Thin wrapper for relevancy evaluation using 4 composed blocks.
|
3
|
-
|
4
|
-
This module provides a simple, lightweight wrapper that composes:
|
5
|
-
- PromptBuilderBlock: builds evaluation prompts
|
6
|
-
- LLMChatBlock: generates LLM responses
|
7
|
-
- TextParserBlock: parses structured output
|
8
|
-
- ColumnValueFilterBlock: filters based on score
|
9
|
-
|
10
|
-
The wrapper exposes minimal LLM interface for flow detection while
|
11
|
-
delegating all functionality to the internal blocks.
|
12
|
-
"""
|
13
|
-
|
14
|
-
# Standard
|
15
|
-
from typing import Any, Optional, Union
|
16
|
-
|
17
|
-
# Third Party
|
18
|
-
from datasets import Dataset
|
19
|
-
from pydantic import ConfigDict, Field, field_validator
|
20
|
-
|
21
|
-
# Local
|
22
|
-
from ...utils.error_handling import BlockValidationError
|
23
|
-
from ...utils.logger_config import setup_logger
|
24
|
-
from ..base import BaseBlock
|
25
|
-
from ..filtering.column_value_filter import ColumnValueFilterBlock
|
26
|
-
from ..llm.llm_chat_block import LLMChatBlock
|
27
|
-
from ..llm.prompt_builder_block import PromptBuilderBlock
|
28
|
-
from ..llm.text_parser_block import TextParserBlock
|
29
|
-
from ..registry import BlockRegistry
|
30
|
-
|
31
|
-
logger = setup_logger(__name__)
|
32
|
-
|
33
|
-
|
34
|
-
@BlockRegistry.register(
|
35
|
-
"EvaluateRelevancyBlock",
|
36
|
-
"evaluation",
|
37
|
-
"Thin wrapper composing 4 blocks for relevancy evaluation",
|
38
|
-
)
|
39
|
-
class EvaluateRelevancyBlock(BaseBlock):
|
40
|
-
"""Thin wrapper for relevancy evaluation using composed blocks.
|
41
|
-
|
42
|
-
Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
|
43
|
-
into a single evaluation pipeline with smart parameter routing.
|
44
|
-
|
45
|
-
Parameters
|
46
|
-
----------
|
47
|
-
block_name : str
|
48
|
-
Name of the block.
|
49
|
-
input_cols : List[str]
|
50
|
-
Input columns: ["question", "response"]
|
51
|
-
output_cols : List[str]
|
52
|
-
Output columns: ["relevancy_explanation", "relevancy_score"]
|
53
|
-
model : Optional[str]
|
54
|
-
LLM model identifier.
|
55
|
-
api_base : Optional[str]
|
56
|
-
API base URL.
|
57
|
-
api_key : Optional[str]
|
58
|
-
API key.
|
59
|
-
prompt_config_path : str
|
60
|
-
Path to YAML prompt template file (required).
|
61
|
-
**kwargs : Any
|
62
|
-
All other parameters are automatically routed to appropriate internal blocks
|
63
|
-
based on each block's accepted parameters. This includes all LLM parameters
|
64
|
-
(temperature, max_tokens, extra_body, extra_headers, etc.), text parser
|
65
|
-
parameters, and filter parameters.
|
66
|
-
"""
|
67
|
-
|
68
|
-
model_config = ConfigDict(
|
69
|
-
extra="allow"
|
70
|
-
) # Allow extra fields for dynamic forwarding
|
71
|
-
|
72
|
-
# --- Core configuration ---
|
73
|
-
prompt_config_path: str = Field(
|
74
|
-
...,
|
75
|
-
description="Path to YAML file containing the relevancy evaluation prompt template",
|
76
|
-
)
|
77
|
-
|
78
|
-
# --- LLM interface (for flow detection) ---
|
79
|
-
model: Optional[str] = Field(None, description="LLM model identifier")
|
80
|
-
api_base: Optional[str] = Field(None, description="API base URL")
|
81
|
-
api_key: Optional[str] = Field(None, description="API key")
|
82
|
-
|
83
|
-
# --- Filter configuration ---
|
84
|
-
filter_value: Union[str, int, float] = Field(
|
85
|
-
2.0, description="Value to filter on for relevancy score"
|
86
|
-
)
|
87
|
-
operation: str = Field("eq", description="Filter operation")
|
88
|
-
convert_dtype: Optional[str] = Field(
|
89
|
-
"float", description="Data type conversion for filter column"
|
90
|
-
)
|
91
|
-
|
92
|
-
# --- Parser configuration ---
|
93
|
-
start_tags: list[str] = Field(
|
94
|
-
["[Start of Feedback]", "[Start of Score]"],
|
95
|
-
description="Start tags for parsing feedback and score",
|
96
|
-
)
|
97
|
-
end_tags: list[str] = Field(
|
98
|
-
["[End of Feedback]", "[End of Score]"],
|
99
|
-
description="End tags for parsing feedback and score",
|
100
|
-
)
|
101
|
-
parsing_pattern: Optional[str] = Field(
|
102
|
-
None,
|
103
|
-
description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
|
104
|
-
)
|
105
|
-
|
106
|
-
# --- Internal blocks (composition) ---
|
107
|
-
prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
|
108
|
-
llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
|
109
|
-
text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
|
110
|
-
filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
|
111
|
-
|
112
|
-
@field_validator("input_cols")
|
113
|
-
@classmethod
|
114
|
-
def validate_input_cols(cls, v):
|
115
|
-
"""Validate input columns."""
|
116
|
-
if v != ["question", "response"]:
|
117
|
-
raise ValueError(
|
118
|
-
f"EvaluateRelevancyBlock expects input_cols ['question', 'response'], got {v}"
|
119
|
-
)
|
120
|
-
return v
|
121
|
-
|
122
|
-
@field_validator("output_cols")
|
123
|
-
@classmethod
|
124
|
-
def validate_output_cols(cls, v):
|
125
|
-
"""Validate output columns."""
|
126
|
-
expected = ["relevancy_explanation", "relevancy_score"]
|
127
|
-
if v != expected:
|
128
|
-
raise ValueError(
|
129
|
-
f"EvaluateRelevancyBlock expects output_cols {expected}, got {v}"
|
130
|
-
)
|
131
|
-
return v
|
132
|
-
|
133
|
-
def __init__(self, **kwargs):
|
134
|
-
"""Initialize with smart parameter routing."""
|
135
|
-
super().__init__(**kwargs)
|
136
|
-
self._create_internal_blocks(**kwargs)
|
137
|
-
|
138
|
-
# Log initialization if model is configured
|
139
|
-
if self.model:
|
140
|
-
logger.info(
|
141
|
-
f"Initialized EvaluateRelevancyBlock '{self.block_name}' with model '{self.model}'"
|
142
|
-
)
|
143
|
-
|
144
|
-
def _extract_params(self, kwargs: dict, block_class) -> dict:
|
145
|
-
"""Extract parameters for specific block class based on its model_fields."""
|
146
|
-
# Exclude parameters that are handled by this wrapper
|
147
|
-
wrapper_params = {
|
148
|
-
"block_name",
|
149
|
-
"input_cols",
|
150
|
-
"output_cols",
|
151
|
-
}
|
152
|
-
|
153
|
-
# Extract parameters that the target block accepts
|
154
|
-
params = {
|
155
|
-
k: v
|
156
|
-
for k, v in kwargs.items()
|
157
|
-
if k in block_class.model_fields and k not in wrapper_params
|
158
|
-
}
|
159
|
-
|
160
|
-
# Also include declared fields from this composite block that the target block accepts
|
161
|
-
for field_name in self.__class__.model_fields:
|
162
|
-
if (
|
163
|
-
field_name in block_class.model_fields
|
164
|
-
and field_name not in wrapper_params
|
165
|
-
):
|
166
|
-
field_value = getattr(self, field_name)
|
167
|
-
if field_value is not None: # Only forward non-None values
|
168
|
-
params[field_name] = field_value
|
169
|
-
|
170
|
-
return params
|
171
|
-
|
172
|
-
def _create_internal_blocks(self, **kwargs):
|
173
|
-
"""Create internal blocks with parameter routing."""
|
174
|
-
# Route parameters to appropriate blocks
|
175
|
-
prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
|
176
|
-
llm_params = self._extract_params(kwargs, LLMChatBlock)
|
177
|
-
parser_params = self._extract_params(kwargs, TextParserBlock)
|
178
|
-
filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
|
179
|
-
|
180
|
-
self.prompt_builder = PromptBuilderBlock(
|
181
|
-
block_name=f"{self.block_name}_prompt_builder",
|
182
|
-
input_cols=["question", "response"],
|
183
|
-
output_cols=["eval_relevancy_prompt"],
|
184
|
-
**prompt_params,
|
185
|
-
)
|
186
|
-
|
187
|
-
# Create LLM chat block with dynamic LLM parameter forwarding
|
188
|
-
llm_config = {
|
189
|
-
"block_name": f"{self.block_name}_llm_chat",
|
190
|
-
"input_cols": ["eval_relevancy_prompt"],
|
191
|
-
"output_cols": ["raw_eval_relevancy"],
|
192
|
-
**llm_params,
|
193
|
-
}
|
194
|
-
|
195
|
-
# Only add LLM parameters if they are provided
|
196
|
-
if self.model is not None:
|
197
|
-
llm_config["model"] = self.model
|
198
|
-
if self.api_base is not None:
|
199
|
-
llm_config["api_base"] = self.api_base
|
200
|
-
if self.api_key is not None:
|
201
|
-
llm_config["api_key"] = self.api_key
|
202
|
-
|
203
|
-
self.llm_chat = LLMChatBlock(**llm_config)
|
204
|
-
|
205
|
-
# Create text parser
|
206
|
-
self.text_parser = TextParserBlock(
|
207
|
-
block_name=f"{self.block_name}_text_parser",
|
208
|
-
input_cols=["raw_eval_relevancy"],
|
209
|
-
output_cols=["relevancy_explanation", "relevancy_score"],
|
210
|
-
**parser_params,
|
211
|
-
)
|
212
|
-
|
213
|
-
self.filter_block = ColumnValueFilterBlock(
|
214
|
-
block_name=f"{self.block_name}_filter",
|
215
|
-
input_cols=["relevancy_score"],
|
216
|
-
output_cols=[], # Filter doesn't create new columns
|
217
|
-
**filter_params,
|
218
|
-
)
|
219
|
-
|
220
|
-
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
221
|
-
"""Execute the 4-block relevancy evaluation pipeline.
|
222
|
-
|
223
|
-
Parameters
|
224
|
-
----------
|
225
|
-
samples : Dataset
|
226
|
-
Input dataset with 'question' and 'response' columns.
|
227
|
-
**kwargs : Any
|
228
|
-
Additional arguments passed to internal blocks.
|
229
|
-
|
230
|
-
Returns
|
231
|
-
-------
|
232
|
-
Dataset
|
233
|
-
Filtered dataset with relevancy evaluation results.
|
234
|
-
"""
|
235
|
-
# Validate model is configured
|
236
|
-
if not self.model:
|
237
|
-
raise BlockValidationError(
|
238
|
-
f"Model not configured for block '{self.block_name}'. "
|
239
|
-
f"Call flow.set_model_config() before generating."
|
240
|
-
)
|
241
|
-
|
242
|
-
logger.info(
|
243
|
-
f"Starting relevancy evaluation for {len(samples)} samples",
|
244
|
-
extra={"block_name": self.block_name, "model": self.model},
|
245
|
-
)
|
246
|
-
|
247
|
-
try:
|
248
|
-
# Execute 4-block pipeline with validation delegation
|
249
|
-
result = self.prompt_builder(samples, **kwargs)
|
250
|
-
result = self.llm_chat(result, **kwargs)
|
251
|
-
result = self.text_parser(result, **kwargs)
|
252
|
-
result = self.filter_block(result, **kwargs)
|
253
|
-
|
254
|
-
logger.info(
|
255
|
-
f"Relevancy evaluation completed: {len(samples)} → {len(result)} samples",
|
256
|
-
extra={"block_name": self.block_name},
|
257
|
-
)
|
258
|
-
|
259
|
-
return result
|
260
|
-
|
261
|
-
except Exception as e:
|
262
|
-
logger.error(
|
263
|
-
f"Error during relevancy evaluation: {e}",
|
264
|
-
extra={"block_name": self.block_name, "error": str(e)},
|
265
|
-
)
|
266
|
-
raise
|
267
|
-
|
268
|
-
def __getattr__(self, name: str) -> Any:
|
269
|
-
"""Forward attribute access to appropriate internal block."""
|
270
|
-
# Check each internal block to see which one has this parameter
|
271
|
-
for block_attr, block_class in [
|
272
|
-
("prompt_builder", PromptBuilderBlock),
|
273
|
-
("llm_chat", LLMChatBlock),
|
274
|
-
("text_parser", TextParserBlock),
|
275
|
-
("filter_block", ColumnValueFilterBlock),
|
276
|
-
]:
|
277
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
278
|
-
internal_block = getattr(self, block_attr)
|
279
|
-
if internal_block is not None:
|
280
|
-
return getattr(internal_block, name)
|
281
|
-
raise AttributeError(
|
282
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
283
|
-
)
|
284
|
-
|
285
|
-
def __setattr__(self, name: str, value: Any) -> None:
|
286
|
-
"""Handle dynamic parameter updates from flow.set_model_config()."""
|
287
|
-
super().__setattr__(name, value)
|
288
|
-
|
289
|
-
# Forward to appropriate internal blocks
|
290
|
-
for block_attr, block_class in [
|
291
|
-
("prompt_builder", PromptBuilderBlock),
|
292
|
-
("llm_chat", LLMChatBlock),
|
293
|
-
("text_parser", TextParserBlock),
|
294
|
-
("filter_block", ColumnValueFilterBlock),
|
295
|
-
]:
|
296
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
297
|
-
setattr(getattr(self, block_attr), name, value)
|
298
|
-
|
299
|
-
def _reinitialize_client_manager(self) -> None:
|
300
|
-
"""Reinitialize internal LLM block's client manager."""
|
301
|
-
if hasattr(self.llm_chat, "_reinitialize_client_manager"):
|
302
|
-
self.llm_chat._reinitialize_client_manager()
|
303
|
-
|
304
|
-
def get_internal_blocks_info(self) -> dict[str, Any]:
|
305
|
-
"""Get information about internal blocks."""
|
306
|
-
return {
|
307
|
-
"prompt_builder": self.prompt_builder.get_info(),
|
308
|
-
"llm_chat": self.llm_chat.get_info(),
|
309
|
-
"text_parser": self.text_parser.get_info(),
|
310
|
-
"filter": self.filter_block.get_info(),
|
311
|
-
}
|
312
|
-
|
313
|
-
def __repr__(self) -> str:
|
314
|
-
"""String representation of the block."""
|
315
|
-
filter_value = (
|
316
|
-
getattr(self.filter_block, "filter_value", 2.0)
|
317
|
-
if hasattr(self, "filter_block")
|
318
|
-
else 2.0
|
319
|
-
)
|
320
|
-
return (
|
321
|
-
f"EvaluateRelevancyBlock(name='{self.block_name}', "
|
322
|
-
f"model='{self.model}', filter_value='{filter_value}')"
|
323
|
-
)
|
@@ -1,329 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Thin wrapper for question verification using 4 composed blocks.
|
3
|
-
|
4
|
-
This module provides a simple, lightweight wrapper that composes:
|
5
|
-
- PromptBuilderBlock: builds verification prompts
|
6
|
-
- LLMChatBlock: generates LLM responses
|
7
|
-
- TextParserBlock: parses structured output
|
8
|
-
- ColumnValueFilterBlock: filters based on rating
|
9
|
-
|
10
|
-
The wrapper exposes minimal LLM interface for flow detection while
|
11
|
-
delegating all functionality to the internal blocks.
|
12
|
-
"""
|
13
|
-
|
14
|
-
# Standard
|
15
|
-
from typing import Any, Optional, Union
|
16
|
-
|
17
|
-
# Third Party
|
18
|
-
from datasets import Dataset
|
19
|
-
from pydantic import ConfigDict, Field, field_validator
|
20
|
-
|
21
|
-
# Local
|
22
|
-
from ...utils.error_handling import BlockValidationError
|
23
|
-
from ...utils.logger_config import setup_logger
|
24
|
-
from ..base import BaseBlock
|
25
|
-
from ..filtering.column_value_filter import ColumnValueFilterBlock
|
26
|
-
from ..llm.llm_chat_block import LLMChatBlock
|
27
|
-
from ..llm.prompt_builder_block import PromptBuilderBlock
|
28
|
-
from ..llm.text_parser_block import TextParserBlock
|
29
|
-
from ..registry import BlockRegistry
|
30
|
-
|
31
|
-
logger = setup_logger(__name__)
|
32
|
-
|
33
|
-
|
34
|
-
@BlockRegistry.register(
|
35
|
-
"VerifyQuestionBlock",
|
36
|
-
"evaluation",
|
37
|
-
"Thin wrapper composing 4 blocks for question verification",
|
38
|
-
)
|
39
|
-
class VerifyQuestionBlock(BaseBlock):
|
40
|
-
"""Thin wrapper for question verification using composed blocks.
|
41
|
-
|
42
|
-
Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
|
43
|
-
into a single verification pipeline with smart parameter routing.
|
44
|
-
|
45
|
-
Parameters
|
46
|
-
----------
|
47
|
-
block_name : str
|
48
|
-
Name of the block.
|
49
|
-
input_cols : List[str]
|
50
|
-
Input columns: ["question"]
|
51
|
-
output_cols : List[str]
|
52
|
-
Output columns: ["verification_explanation", "verification_rating"]
|
53
|
-
model : Optional[str]
|
54
|
-
LLM model identifier.
|
55
|
-
api_base : Optional[str]
|
56
|
-
API base URL.
|
57
|
-
api_key : Optional[str]
|
58
|
-
API key.
|
59
|
-
prompt_config_path : str
|
60
|
-
Path to YAML prompt template file (required).
|
61
|
-
**kwargs : Any
|
62
|
-
All other parameters are automatically routed to appropriate internal blocks
|
63
|
-
based on each block's accepted parameters. This includes all LLM parameters
|
64
|
-
(temperature, max_tokens, extra_body, extra_headers, etc.), text parser
|
65
|
-
parameters, and filter parameters.
|
66
|
-
"""
|
67
|
-
|
68
|
-
model_config = ConfigDict(
|
69
|
-
extra="allow"
|
70
|
-
) # Allow extra fields for dynamic forwarding
|
71
|
-
|
72
|
-
# --- Core configuration ---
|
73
|
-
prompt_config_path: str = Field(
|
74
|
-
...,
|
75
|
-
description="Path to YAML file containing the question verification prompt template",
|
76
|
-
)
|
77
|
-
|
78
|
-
# --- LLM interface (for flow detection) ---
|
79
|
-
model: Optional[str] = Field(None, description="LLM model identifier")
|
80
|
-
api_base: Optional[str] = Field(None, description="API base URL")
|
81
|
-
api_key: Optional[str] = Field(None, description="API key")
|
82
|
-
|
83
|
-
# --- Filter configuration ---
|
84
|
-
filter_value: Union[str, int, float] = Field(
|
85
|
-
1.0, description="Value to filter on for verification rating"
|
86
|
-
)
|
87
|
-
operation: str = Field("eq", description="Filter operation")
|
88
|
-
convert_dtype: Optional[str] = Field(
|
89
|
-
"float", description="Data type conversion for filter column"
|
90
|
-
)
|
91
|
-
|
92
|
-
# --- Parser configuration ---
|
93
|
-
start_tags: list[str] = Field(
|
94
|
-
["[Start of Explanation]", "[Start of Rating]"],
|
95
|
-
description="Start tags for parsing explanation and rating",
|
96
|
-
)
|
97
|
-
end_tags: list[str] = Field(
|
98
|
-
["[End of Explanation]", "[End of Rating]"],
|
99
|
-
description="End tags for parsing explanation and rating",
|
100
|
-
)
|
101
|
-
parsing_pattern: Optional[str] = Field(
|
102
|
-
None,
|
103
|
-
description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
|
104
|
-
)
|
105
|
-
|
106
|
-
# Store parameters for internal blocks
|
107
|
-
prompt_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
|
108
|
-
llm_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
|
109
|
-
parser_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
|
110
|
-
filter_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
|
111
|
-
|
112
|
-
# --- Internal blocks (composition) ---
|
113
|
-
prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
|
114
|
-
llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
|
115
|
-
text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
|
116
|
-
filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
|
117
|
-
|
118
|
-
@field_validator("input_cols")
|
119
|
-
@classmethod
|
120
|
-
def validate_input_cols(cls, v):
|
121
|
-
"""Validate input columns."""
|
122
|
-
if v != ["question"]:
|
123
|
-
raise ValueError(
|
124
|
-
f"VerifyQuestionBlock expects input_cols ['question'], got {v}"
|
125
|
-
)
|
126
|
-
return v
|
127
|
-
|
128
|
-
@field_validator("output_cols")
|
129
|
-
@classmethod
|
130
|
-
def validate_output_cols(cls, v):
|
131
|
-
"""Validate output columns."""
|
132
|
-
expected = ["verification_explanation", "verification_rating"]
|
133
|
-
if v != expected:
|
134
|
-
raise ValueError(
|
135
|
-
f"VerifyQuestionBlock expects output_cols {expected}, got {v}"
|
136
|
-
)
|
137
|
-
return v
|
138
|
-
|
139
|
-
def __init__(self, **kwargs):
|
140
|
-
"""Initialize with smart parameter routing."""
|
141
|
-
super().__init__(**kwargs)
|
142
|
-
self._create_internal_blocks(**kwargs)
|
143
|
-
|
144
|
-
# Log initialization if model is configured
|
145
|
-
if self.model:
|
146
|
-
logger.info(
|
147
|
-
f"Initialized VerifyQuestionBlock '{self.block_name}' with model '{self.model}'"
|
148
|
-
)
|
149
|
-
|
150
|
-
def _extract_params(self, kwargs: dict, block_class) -> dict:
|
151
|
-
"""Extract parameters for specific block class based on its model_fields."""
|
152
|
-
# Exclude parameters that are handled by this wrapper's structure
|
153
|
-
wrapper_params = {
|
154
|
-
"block_name",
|
155
|
-
"input_cols",
|
156
|
-
"output_cols",
|
157
|
-
}
|
158
|
-
|
159
|
-
# Extract parameters that the target block accepts
|
160
|
-
params = {
|
161
|
-
k: v
|
162
|
-
for k, v in kwargs.items()
|
163
|
-
if k in block_class.model_fields and k not in wrapper_params
|
164
|
-
}
|
165
|
-
|
166
|
-
# Also include declared fields from this composite block that the target block accepts
|
167
|
-
for field_name in self.__class__.model_fields:
|
168
|
-
if (
|
169
|
-
field_name in block_class.model_fields
|
170
|
-
and field_name not in wrapper_params
|
171
|
-
):
|
172
|
-
field_value = getattr(self, field_name)
|
173
|
-
if field_value is not None: # Only forward non-None values
|
174
|
-
params[field_name] = field_value
|
175
|
-
|
176
|
-
return params
|
177
|
-
|
178
|
-
def _create_internal_blocks(self, **kwargs):
|
179
|
-
"""Create internal blocks with parameter routing."""
|
180
|
-
# Route parameters to appropriate blocks
|
181
|
-
prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
|
182
|
-
llm_params = self._extract_params(kwargs, LLMChatBlock)
|
183
|
-
parser_params = self._extract_params(kwargs, TextParserBlock)
|
184
|
-
filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
|
185
|
-
|
186
|
-
self.prompt_builder = PromptBuilderBlock(
|
187
|
-
block_name=f"{self.block_name}_prompt_builder",
|
188
|
-
input_cols=["question"],
|
189
|
-
output_cols=["verify_question_prompt"],
|
190
|
-
**prompt_params,
|
191
|
-
)
|
192
|
-
|
193
|
-
# Create LLM chat block with dynamic LLM parameter forwarding
|
194
|
-
llm_config = {
|
195
|
-
"block_name": f"{self.block_name}_llm_chat",
|
196
|
-
"input_cols": ["verify_question_prompt"],
|
197
|
-
"output_cols": ["raw_verify_question"],
|
198
|
-
**llm_params,
|
199
|
-
}
|
200
|
-
|
201
|
-
# Only add LLM parameters if they are provided
|
202
|
-
if self.model is not None:
|
203
|
-
llm_config["model"] = self.model
|
204
|
-
if self.api_base is not None:
|
205
|
-
llm_config["api_base"] = self.api_base
|
206
|
-
if self.api_key is not None:
|
207
|
-
llm_config["api_key"] = self.api_key
|
208
|
-
|
209
|
-
self.llm_chat = LLMChatBlock(**llm_config)
|
210
|
-
|
211
|
-
# Create text parser
|
212
|
-
self.text_parser = TextParserBlock(
|
213
|
-
block_name=f"{self.block_name}_text_parser",
|
214
|
-
input_cols=["raw_verify_question"],
|
215
|
-
output_cols=["verification_explanation", "verification_rating"],
|
216
|
-
**parser_params,
|
217
|
-
)
|
218
|
-
|
219
|
-
self.filter_block = ColumnValueFilterBlock(
|
220
|
-
block_name=f"{self.block_name}_filter",
|
221
|
-
input_cols=["verification_rating"],
|
222
|
-
output_cols=[], # Filter doesn't create new columns
|
223
|
-
**filter_params,
|
224
|
-
)
|
225
|
-
|
226
|
-
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
227
|
-
"""Execute the 4-block question verification pipeline.
|
228
|
-
|
229
|
-
Parameters
|
230
|
-
----------
|
231
|
-
samples : Dataset
|
232
|
-
Input dataset with 'question' column.
|
233
|
-
**kwargs : Any
|
234
|
-
Additional arguments passed to internal blocks.
|
235
|
-
|
236
|
-
Returns
|
237
|
-
-------
|
238
|
-
Dataset
|
239
|
-
Filtered dataset with question verification results.
|
240
|
-
"""
|
241
|
-
# Validate model is configured
|
242
|
-
if not self.model:
|
243
|
-
raise BlockValidationError(
|
244
|
-
f"Model not configured for block '{self.block_name}'. "
|
245
|
-
f"Call flow.set_model_config() before generating."
|
246
|
-
)
|
247
|
-
|
248
|
-
logger.info(
|
249
|
-
f"Starting question verification for {len(samples)} samples",
|
250
|
-
extra={"block_name": self.block_name, "model": self.model},
|
251
|
-
)
|
252
|
-
|
253
|
-
try:
|
254
|
-
# Execute 4-block pipeline with validation delegation
|
255
|
-
result = self.prompt_builder(samples, **kwargs)
|
256
|
-
result = self.llm_chat(result, **kwargs)
|
257
|
-
result = self.text_parser(result, **kwargs)
|
258
|
-
result = self.filter_block(result, **kwargs)
|
259
|
-
|
260
|
-
logger.info(
|
261
|
-
f"Question verification completed: {len(samples)} → {len(result)} samples",
|
262
|
-
extra={"block_name": self.block_name},
|
263
|
-
)
|
264
|
-
|
265
|
-
return result
|
266
|
-
|
267
|
-
except Exception as e:
|
268
|
-
logger.error(
|
269
|
-
f"Error during question verification: {e}",
|
270
|
-
extra={"block_name": self.block_name, "error": str(e)},
|
271
|
-
)
|
272
|
-
raise
|
273
|
-
|
274
|
-
def __getattr__(self, name: str) -> Any:
|
275
|
-
"""Forward attribute access to appropriate internal block."""
|
276
|
-
# Check each internal block to see which one has this parameter
|
277
|
-
for block_attr, block_class in [
|
278
|
-
("prompt_builder", PromptBuilderBlock),
|
279
|
-
("llm_chat", LLMChatBlock),
|
280
|
-
("text_parser", TextParserBlock),
|
281
|
-
("filter_block", ColumnValueFilterBlock),
|
282
|
-
]:
|
283
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
284
|
-
internal_block = getattr(self, block_attr)
|
285
|
-
if internal_block is not None:
|
286
|
-
return getattr(internal_block, name)
|
287
|
-
raise AttributeError(
|
288
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
289
|
-
)
|
290
|
-
|
291
|
-
def __setattr__(self, name: str, value: Any) -> None:
|
292
|
-
"""Handle dynamic parameter updates from flow.set_model_config()."""
|
293
|
-
super().__setattr__(name, value)
|
294
|
-
|
295
|
-
# Forward to appropriate internal blocks
|
296
|
-
for block_attr, block_class in [
|
297
|
-
("prompt_builder", PromptBuilderBlock),
|
298
|
-
("llm_chat", LLMChatBlock),
|
299
|
-
("text_parser", TextParserBlock),
|
300
|
-
("filter_block", ColumnValueFilterBlock),
|
301
|
-
]:
|
302
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
303
|
-
setattr(getattr(self, block_attr), name, value)
|
304
|
-
|
305
|
-
def _reinitialize_client_manager(self) -> None:
|
306
|
-
"""Reinitialize internal LLM block's client manager."""
|
307
|
-
if hasattr(self.llm_chat, "_reinitialize_client_manager"):
|
308
|
-
self.llm_chat._reinitialize_client_manager()
|
309
|
-
|
310
|
-
def get_internal_blocks_info(self) -> dict[str, Any]:
|
311
|
-
"""Get information about internal blocks."""
|
312
|
-
return {
|
313
|
-
"prompt_builder": self.prompt_builder.get_info(),
|
314
|
-
"llm_chat": self.llm_chat.get_info(),
|
315
|
-
"text_parser": self.text_parser.get_info(),
|
316
|
-
"filter": self.filter_block.get_info(),
|
317
|
-
}
|
318
|
-
|
319
|
-
def __repr__(self) -> str:
|
320
|
-
"""String representation of the block."""
|
321
|
-
filter_value = (
|
322
|
-
getattr(self.filter_block, "filter_value", "1.0")
|
323
|
-
if hasattr(self, "filter_block")
|
324
|
-
else "1.0"
|
325
|
-
)
|
326
|
-
return (
|
327
|
-
f"VerifyQuestionBlock(name='{self.block_name}', "
|
328
|
-
f"model='{self.model}', filter_value='{filter_value}')"
|
329
|
-
)
|