sdg-hub 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +16 -3
- sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
- sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
- sdg_hub/core/blocks/llm/__init__.py +2 -0
- sdg_hub/core/blocks/llm/client_manager.py +61 -24
- sdg_hub/core/blocks/llm/config.py +1 -0
- sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
- sdg_hub/core/blocks/registry.py +49 -35
- sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
- sdg_hub/core/flow/base.py +370 -20
- sdg_hub/core/flow/checkpointer.py +333 -0
- sdg_hub/core/flow/metadata.py +45 -0
- sdg_hub/core/flow/migration.py +12 -1
- sdg_hub/core/flow/registry.py +121 -58
- sdg_hub/core/flow/validation.py +12 -0
- sdg_hub/core/utils/__init__.py +2 -1
- sdg_hub/core/utils/datautils.py +81 -1
- sdg_hub/core/utils/flow_id_words.yaml +231 -0
- sdg_hub/core/utils/flow_identifier.py +94 -0
- sdg_hub/core/utils/yaml_utils.py +59 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/METADATA +59 -31
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/RECORD +30 -25
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/WHEEL +0 -0
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,25 @@
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""
|
2
|
+
"""Thin wrapper for faithfulness evaluation using 4 composed blocks.
|
3
3
|
|
4
|
-
This module provides
|
5
|
-
|
6
|
-
|
4
|
+
This module provides a simple, lightweight wrapper that composes:
|
5
|
+
- PromptBuilderBlock: builds evaluation prompts
|
6
|
+
- LLMChatBlock: generates LLM responses
|
7
|
+
- TextParserBlock: parses structured output
|
8
|
+
- ColumnValueFilterBlock: filters based on judgment
|
9
|
+
|
10
|
+
The wrapper exposes minimal LLM interface for flow detection while
|
11
|
+
delegating all functionality to the internal blocks.
|
7
12
|
"""
|
8
13
|
|
9
14
|
# Standard
|
10
|
-
from typing import Any, Optional
|
15
|
+
from typing import Any, Optional
|
11
16
|
|
12
17
|
# Third Party
|
13
18
|
from datasets import Dataset
|
14
19
|
from pydantic import ConfigDict, Field, field_validator
|
15
20
|
|
16
21
|
# Local
|
22
|
+
from ...utils.error_handling import BlockValidationError
|
17
23
|
from ...utils.logger_config import setup_logger
|
18
24
|
from ..base import BaseBlock
|
19
25
|
from ..filtering.column_value_filter import ColumnValueFilterBlock
|
@@ -28,16 +34,13 @@ logger = setup_logger(__name__)
|
|
28
34
|
@BlockRegistry.register(
|
29
35
|
"EvaluateFaithfulnessBlock",
|
30
36
|
"evaluation",
|
31
|
-
"
|
37
|
+
"Thin wrapper composing 4 blocks for faithfulness evaluation",
|
32
38
|
)
|
33
39
|
class EvaluateFaithfulnessBlock(BaseBlock):
|
34
|
-
"""
|
40
|
+
"""Thin wrapper for faithfulness evaluation using composed blocks.
|
35
41
|
|
36
|
-
|
37
|
-
|
38
|
-
2. LLMChatBlock - generates faithfulness evaluation using LLM
|
39
|
-
3. TextParserBlock - parses explanation and judgment from raw output
|
40
|
-
4. ColumnValueFilterBlock - filters based on faithfulness judgment
|
42
|
+
Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
|
43
|
+
into a single evaluation pipeline with smart parameter routing.
|
41
44
|
|
42
45
|
Parameters
|
43
46
|
----------
|
@@ -47,88 +50,37 @@ class EvaluateFaithfulnessBlock(BaseBlock):
|
|
47
50
|
Input columns: ["document", "response"]
|
48
51
|
output_cols : List[str]
|
49
52
|
Output columns: ["faithfulness_explanation", "faithfulness_judgment"]
|
50
|
-
|
51
|
-
|
52
|
-
model : str
|
53
|
-
Model identifier in LiteLLM format (e.g., "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct")
|
53
|
+
model : Optional[str]
|
54
|
+
LLM model identifier.
|
54
55
|
api_base : Optional[str]
|
55
|
-
|
56
|
+
API base URL.
|
56
57
|
api_key : Optional[str]
|
57
|
-
API key
|
58
|
-
|
59
|
-
|
60
|
-
operation : str, optional
|
61
|
-
Filter operation (default: "eq")
|
62
|
-
convert_dtype : Optional[str], optional
|
63
|
-
Data type conversion for filter column (default: None)
|
64
|
-
async_mode : bool, optional
|
65
|
-
Whether to use async processing (default: True)
|
66
|
-
format_as_messages : bool, optional
|
67
|
-
Whether to format prompt as messages (default: True)
|
68
|
-
start_tags : List[str], optional
|
69
|
-
Start tags for parsing (default: ["[Start of Explanation]", "[Start of Answer]"])
|
70
|
-
end_tags : List[str], optional
|
71
|
-
End tags for parsing (default: ["[End of Explanation]", "[End of Answer]"])
|
72
|
-
parsing_pattern : Optional[str], optional
|
73
|
-
Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing.
|
74
|
-
parser_cleanup_tags : Optional[List[str]], optional
|
75
|
-
List of tags to clean from parsed output.
|
76
|
-
|
77
|
-
### LLM Generation Parameters ###
|
78
|
-
temperature : Optional[float], optional
|
79
|
-
Sampling temperature (0.0 to 2.0).
|
80
|
-
max_tokens : Optional[int], optional
|
81
|
-
Maximum tokens to generate.
|
82
|
-
top_p : Optional[float], optional
|
83
|
-
Nucleus sampling parameter (0.0 to 1.0).
|
84
|
-
frequency_penalty : Optional[float], optional
|
85
|
-
Frequency penalty (-2.0 to 2.0).
|
86
|
-
presence_penalty : Optional[float], optional
|
87
|
-
Presence penalty (-2.0 to 2.0).
|
88
|
-
stop : Optional[Union[str, List[str]]], optional
|
89
|
-
Stop sequences.
|
90
|
-
seed : Optional[int], optional
|
91
|
-
Random seed for reproducible outputs.
|
92
|
-
response_format : Optional[Dict[str, Any]], optional
|
93
|
-
Response format specification (e.g., JSON mode).
|
94
|
-
stream : Optional[bool], optional
|
95
|
-
Whether to stream responses.
|
96
|
-
n : Optional[int], optional
|
97
|
-
Number of completions to generate. When n > 1, the output column will contain
|
98
|
-
a list of responses for each input sample.
|
99
|
-
logprobs : Optional[bool], optional
|
100
|
-
Whether to return log probabilities.
|
101
|
-
top_logprobs : Optional[int], optional
|
102
|
-
Number of top log probabilities to return.
|
103
|
-
user : Optional[str], optional
|
104
|
-
End-user identifier.
|
105
|
-
extra_headers : Optional[Dict[str, str]], optional
|
106
|
-
Additional headers to send with requests.
|
107
|
-
extra_body : Optional[Dict[str, Any]], optional
|
108
|
-
Additional parameters for the request body.
|
109
|
-
timeout : float, optional
|
110
|
-
Request timeout in seconds (default: 120.0).
|
111
|
-
max_retries : int, optional
|
112
|
-
Maximum number of retry attempts (default: 6).
|
58
|
+
API key.
|
59
|
+
prompt_config_path : str
|
60
|
+
Path to YAML prompt template file (required).
|
113
61
|
**kwargs : Any
|
114
|
-
|
62
|
+
All other parameters are automatically routed to appropriate internal blocks
|
63
|
+
based on each block's accepted parameters. This includes all LLM parameters
|
64
|
+
(temperature, max_tokens, extra_body, extra_headers, etc.), text parser
|
65
|
+
parameters, and filter parameters.
|
115
66
|
"""
|
116
67
|
|
117
|
-
model_config = ConfigDict(
|
68
|
+
model_config = ConfigDict(
|
69
|
+
extra="allow"
|
70
|
+
) # Allow extra fields for dynamic forwarding
|
118
71
|
|
119
|
-
# Core configuration
|
72
|
+
# --- Core configuration ---
|
120
73
|
prompt_config_path: str = Field(
|
121
74
|
...,
|
122
75
|
description="Path to YAML file containing the faithfulness evaluation prompt template",
|
123
76
|
)
|
124
|
-
model: Optional[str] = Field(None, description="Model identifier in LiteLLM format")
|
125
|
-
api_base: Optional[str] = Field(None, description="Base URL for the API")
|
126
|
-
api_key: Optional[str] = Field(
|
127
|
-
None,
|
128
|
-
description="API key for the provider. Falls back to environment variables.",
|
129
|
-
)
|
130
77
|
|
131
|
-
#
|
78
|
+
# --- LLM interface (for flow detection) ---
|
79
|
+
model: Optional[str] = Field(None, description="LLM model identifier")
|
80
|
+
api_base: Optional[str] = Field(None, description="API base URL")
|
81
|
+
api_key: Optional[str] = Field(None, description="API key")
|
82
|
+
|
83
|
+
# --- Filter configuration ---
|
132
84
|
filter_value: str = Field(
|
133
85
|
"YES", description="Value to filter on for faithfulness judgment"
|
134
86
|
)
|
@@ -137,13 +89,7 @@ class EvaluateFaithfulnessBlock(BaseBlock):
|
|
137
89
|
None, description="Data type conversion for filter column"
|
138
90
|
)
|
139
91
|
|
140
|
-
#
|
141
|
-
async_mode: bool = Field(True, description="Whether to use async processing")
|
142
|
-
format_as_messages: bool = Field(
|
143
|
-
True, description="Whether to format prompt as messages"
|
144
|
-
)
|
145
|
-
|
146
|
-
# Parser configuration
|
92
|
+
# --- Parser configuration ---
|
147
93
|
start_tags: list[str] = Field(
|
148
94
|
["[Start of Explanation]", "[Start of Answer]"],
|
149
95
|
description="Start tags for parsing explanation and judgment",
|
@@ -156,409 +102,222 @@ class EvaluateFaithfulnessBlock(BaseBlock):
|
|
156
102
|
None,
|
157
103
|
description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
|
158
104
|
)
|
159
|
-
parser_cleanup_tags: Optional[list[str]] = Field(
|
160
|
-
None, description="List of tags to clean from parsed output"
|
161
|
-
)
|
162
|
-
|
163
|
-
# LLM generation parameters
|
164
|
-
temperature: Optional[float] = Field(
|
165
|
-
None, description="Sampling temperature (0.0 to 2.0)"
|
166
|
-
)
|
167
|
-
max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate")
|
168
|
-
top_p: Optional[float] = Field(
|
169
|
-
None, description="Nucleus sampling parameter (0.0 to 1.0)"
|
170
|
-
)
|
171
|
-
frequency_penalty: Optional[float] = Field(
|
172
|
-
None, description="Frequency penalty (-2.0 to 2.0)"
|
173
|
-
)
|
174
|
-
presence_penalty: Optional[float] = Field(
|
175
|
-
None, description="Presence penalty (-2.0 to 2.0)"
|
176
|
-
)
|
177
|
-
stop: Optional[Union[str, list[str]]] = Field(None, description="Stop sequences")
|
178
|
-
seed: Optional[int] = Field(
|
179
|
-
None, description="Random seed for reproducible outputs"
|
180
|
-
)
|
181
|
-
response_format: Optional[dict[str, Any]] = Field(
|
182
|
-
None, description="Response format specification (e.g., JSON mode)"
|
183
|
-
)
|
184
|
-
stream: Optional[bool] = Field(None, description="Whether to stream responses")
|
185
|
-
n: Optional[int] = Field(
|
186
|
-
None,
|
187
|
-
description="Number of completions to generate. When n > 1, the output column will contain a list of responses for each input sample",
|
188
|
-
)
|
189
|
-
logprobs: Optional[bool] = Field(
|
190
|
-
None, description="Whether to return log probabilities"
|
191
|
-
)
|
192
|
-
top_logprobs: Optional[int] = Field(
|
193
|
-
None, description="Number of top log probabilities to return"
|
194
|
-
)
|
195
|
-
user: Optional[str] = Field(None, description="End-user identifier")
|
196
|
-
extra_headers: Optional[dict[str, str]] = Field(
|
197
|
-
None, description="Additional headers to send with requests"
|
198
|
-
)
|
199
|
-
extra_body: Optional[dict[str, Any]] = Field(
|
200
|
-
None, description="Additional parameters for the request body"
|
201
|
-
)
|
202
|
-
timeout: float = Field(120.0, description="Request timeout in seconds")
|
203
|
-
max_retries: int = Field(6, description="Maximum number of retry attempts")
|
204
105
|
|
205
|
-
#
|
206
|
-
|
207
|
-
|
208
|
-
)
|
209
|
-
|
210
|
-
# Internal blocks - excluded from serialization
|
211
|
-
prompt_builder: Optional[PromptBuilderBlock] = Field(None, exclude=True)
|
212
|
-
llm_chat: Optional[LLMChatBlock] = Field(None, exclude=True)
|
213
|
-
text_parser: Optional[TextParserBlock] = Field(None, exclude=True)
|
214
|
-
filter_block: Optional[ColumnValueFilterBlock] = Field(None, exclude=True)
|
106
|
+
# --- Internal blocks (composition) ---
|
107
|
+
prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
|
108
|
+
llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
|
109
|
+
text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
|
110
|
+
filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
|
215
111
|
|
216
112
|
@field_validator("input_cols")
|
217
113
|
@classmethod
|
218
114
|
def validate_input_cols(cls, v):
|
219
|
-
"""Validate
|
220
|
-
|
221
|
-
if v != expected:
|
115
|
+
"""Validate input columns."""
|
116
|
+
if v != ["document", "response"]:
|
222
117
|
raise ValueError(
|
223
|
-
f"EvaluateFaithfulnessBlock expects input_cols
|
118
|
+
f"EvaluateFaithfulnessBlock expects input_cols ['document', 'response'], got {v}"
|
224
119
|
)
|
225
120
|
return v
|
226
121
|
|
227
122
|
@field_validator("output_cols")
|
228
123
|
@classmethod
|
229
124
|
def validate_output_cols(cls, v):
|
230
|
-
"""Validate
|
231
|
-
expected = [
|
232
|
-
"faithfulness_explanation",
|
233
|
-
"faithfulness_judgment",
|
234
|
-
]
|
125
|
+
"""Validate output columns."""
|
126
|
+
expected = ["faithfulness_explanation", "faithfulness_judgment"]
|
235
127
|
if v != expected:
|
236
128
|
raise ValueError(
|
237
|
-
f"EvaluateFaithfulnessBlock expects output_cols
|
129
|
+
f"EvaluateFaithfulnessBlock expects output_cols {expected}, got {v}"
|
238
130
|
)
|
239
131
|
return v
|
240
132
|
|
241
|
-
def
|
242
|
-
"""Initialize
|
243
|
-
super().
|
244
|
-
|
245
|
-
# Create internal blocks
|
246
|
-
self._create_internal_blocks()
|
133
|
+
def __init__(self, **kwargs):
|
134
|
+
"""Initialize with smart parameter routing."""
|
135
|
+
super().__init__(**kwargs)
|
136
|
+
self._create_internal_blocks(**kwargs)
|
247
137
|
|
248
|
-
# Log initialization
|
138
|
+
# Log initialization if model is configured
|
249
139
|
if self.model:
|
250
140
|
logger.info(
|
251
|
-
f"Initialized EvaluateFaithfulnessBlock '{self.block_name}' with model '{self.model}'"
|
252
|
-
extra={
|
253
|
-
"block_name": self.block_name,
|
254
|
-
"model": self.model,
|
255
|
-
"async_mode": self.async_mode,
|
256
|
-
"filter_value": self.filter_value,
|
257
|
-
},
|
141
|
+
f"Initialized EvaluateFaithfulnessBlock '{self.block_name}' with model '{self.model}'"
|
258
142
|
)
|
259
143
|
|
260
|
-
def
|
261
|
-
"""
|
262
|
-
#
|
144
|
+
def _extract_params(self, kwargs: dict, block_class) -> dict:
|
145
|
+
"""Extract parameters for specific block class based on its model_fields."""
|
146
|
+
# Exclude parameters that are handled by this wrapper's structure
|
147
|
+
wrapper_params = {
|
148
|
+
"block_name",
|
149
|
+
"input_cols",
|
150
|
+
"output_cols",
|
151
|
+
}
|
152
|
+
|
153
|
+
# Extract parameters that the target block accepts
|
154
|
+
params = {
|
155
|
+
k: v
|
156
|
+
for k, v in kwargs.items()
|
157
|
+
if k in block_class.model_fields and k not in wrapper_params
|
158
|
+
}
|
159
|
+
|
160
|
+
# Also include declared fields from this composite block that the target block accepts
|
161
|
+
for field_name in self.__class__.model_fields:
|
162
|
+
if (
|
163
|
+
field_name in block_class.model_fields
|
164
|
+
and field_name not in wrapper_params
|
165
|
+
):
|
166
|
+
field_value = getattr(self, field_name)
|
167
|
+
if field_value is not None: # Only forward non-None values
|
168
|
+
params[field_name] = field_value
|
169
|
+
|
170
|
+
return params
|
171
|
+
|
172
|
+
def _create_internal_blocks(self, **kwargs):
|
173
|
+
"""Create internal blocks with parameter routing."""
|
174
|
+
# Route parameters to appropriate blocks
|
175
|
+
prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
|
176
|
+
llm_params = self._extract_params(kwargs, LLMChatBlock)
|
177
|
+
parser_params = self._extract_params(kwargs, TextParserBlock)
|
178
|
+
filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
|
179
|
+
|
263
180
|
self.prompt_builder = PromptBuilderBlock(
|
264
181
|
block_name=f"{self.block_name}_prompt_builder",
|
265
182
|
input_cols=["document", "response"],
|
266
183
|
output_cols=["eval_faithfulness_prompt"],
|
267
|
-
|
268
|
-
format_as_messages=self.format_as_messages,
|
184
|
+
**prompt_params,
|
269
185
|
)
|
270
186
|
|
271
|
-
#
|
272
|
-
|
187
|
+
# Create LLM chat block with dynamic LLM parameter forwarding
|
188
|
+
llm_config = {
|
273
189
|
"block_name": f"{self.block_name}_llm_chat",
|
274
190
|
"input_cols": ["eval_faithfulness_prompt"],
|
275
191
|
"output_cols": ["raw_eval_faithfulness"],
|
276
|
-
|
277
|
-
"api_base": self.api_base,
|
278
|
-
"api_key": self.api_key,
|
279
|
-
"async_mode": self.async_mode,
|
280
|
-
"timeout": self.timeout,
|
281
|
-
"max_retries": self.max_retries,
|
282
|
-
}
|
283
|
-
|
284
|
-
# Add generation parameters if specified
|
285
|
-
if self.temperature is not None:
|
286
|
-
llm_kwargs["temperature"] = self.temperature
|
287
|
-
if self.max_tokens is not None:
|
288
|
-
llm_kwargs["max_tokens"] = self.max_tokens
|
289
|
-
if self.top_p is not None:
|
290
|
-
llm_kwargs["top_p"] = self.top_p
|
291
|
-
if self.frequency_penalty is not None:
|
292
|
-
llm_kwargs["frequency_penalty"] = self.frequency_penalty
|
293
|
-
if self.presence_penalty is not None:
|
294
|
-
llm_kwargs["presence_penalty"] = self.presence_penalty
|
295
|
-
if self.stop is not None:
|
296
|
-
llm_kwargs["stop"] = self.stop
|
297
|
-
if self.seed is not None:
|
298
|
-
llm_kwargs["seed"] = self.seed
|
299
|
-
if self.response_format is not None:
|
300
|
-
llm_kwargs["response_format"] = self.response_format
|
301
|
-
if self.stream is not None:
|
302
|
-
llm_kwargs["stream"] = self.stream
|
303
|
-
if self.n is not None:
|
304
|
-
llm_kwargs["n"] = self.n
|
305
|
-
if self.logprobs is not None:
|
306
|
-
llm_kwargs["logprobs"] = self.logprobs
|
307
|
-
if self.top_logprobs is not None:
|
308
|
-
llm_kwargs["top_logprobs"] = self.top_logprobs
|
309
|
-
if self.user is not None:
|
310
|
-
llm_kwargs["user"] = self.user
|
311
|
-
if self.extra_headers is not None:
|
312
|
-
llm_kwargs["extra_headers"] = self.extra_headers
|
313
|
-
if self.extra_body is not None:
|
314
|
-
llm_kwargs["extra_body"] = self.extra_body
|
315
|
-
|
316
|
-
# Add any additional kwargs
|
317
|
-
llm_kwargs.update(self.llm_kwargs)
|
318
|
-
|
319
|
-
self.llm_chat = LLMChatBlock(**llm_kwargs)
|
320
|
-
|
321
|
-
# 3. TextParserBlock
|
322
|
-
text_parser_kwargs = {
|
323
|
-
"block_name": f"{self.block_name}_text_parser",
|
324
|
-
"input_cols": ["raw_eval_faithfulness"],
|
325
|
-
"output_cols": ["faithfulness_explanation", "faithfulness_judgment"],
|
326
|
-
"start_tags": self.start_tags,
|
327
|
-
"end_tags": self.end_tags,
|
192
|
+
**llm_params,
|
328
193
|
}
|
329
194
|
|
330
|
-
#
|
331
|
-
if self.
|
332
|
-
|
333
|
-
if self.
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
"
|
343
|
-
"
|
344
|
-
"
|
345
|
-
|
346
|
-
|
347
|
-
if self.convert_dtype is not None:
|
348
|
-
filter_kwargs["convert_dtype"] = self.convert_dtype
|
349
|
-
|
350
|
-
self.filter_block = ColumnValueFilterBlock(**filter_kwargs)
|
351
|
-
|
352
|
-
def _reinitialize_client_manager(self) -> None:
|
353
|
-
"""Reinitialize the internal LLM chat block's client manager.
|
195
|
+
# Only add LLM parameters if they are provided
|
196
|
+
if self.model is not None:
|
197
|
+
llm_config["model"] = self.model
|
198
|
+
if self.api_base is not None:
|
199
|
+
llm_config["api_base"] = self.api_base
|
200
|
+
if self.api_key is not None:
|
201
|
+
llm_config["api_key"] = self.api_key
|
202
|
+
|
203
|
+
self.llm_chat = LLMChatBlock(**llm_config)
|
204
|
+
|
205
|
+
# Create text parser
|
206
|
+
self.text_parser = TextParserBlock(
|
207
|
+
block_name=f"{self.block_name}_text_parser",
|
208
|
+
input_cols=["raw_eval_faithfulness"],
|
209
|
+
output_cols=["faithfulness_explanation", "faithfulness_judgment"],
|
210
|
+
**parser_params,
|
211
|
+
)
|
354
212
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
self.llm_chat.api_base = self.api_base
|
362
|
-
self.llm_chat.api_key = self.api_key
|
363
|
-
# Reinitialize its client manager
|
364
|
-
self.llm_chat._reinitialize_client_manager()
|
213
|
+
self.filter_block = ColumnValueFilterBlock(
|
214
|
+
block_name=f"{self.block_name}_filter",
|
215
|
+
input_cols=["faithfulness_judgment"],
|
216
|
+
output_cols=[], # Filter doesn't create new columns
|
217
|
+
**filter_params,
|
218
|
+
)
|
365
219
|
|
366
220
|
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
367
|
-
"""
|
368
|
-
|
369
|
-
This method chains the four internal blocks in sequence:
|
370
|
-
1. Build faithfulness evaluation prompts
|
371
|
-
2. Generate LLM responses
|
372
|
-
3. Parse explanation and judgment
|
373
|
-
4. Filter based on judgment
|
221
|
+
"""Execute the 4-block faithfulness evaluation pipeline.
|
374
222
|
|
375
223
|
Parameters
|
376
224
|
----------
|
377
225
|
samples : Dataset
|
378
|
-
Input dataset
|
226
|
+
Input dataset with 'document' and 'response' columns.
|
379
227
|
**kwargs : Any
|
380
|
-
Additional
|
228
|
+
Additional arguments passed to internal blocks.
|
381
229
|
|
382
230
|
Returns
|
383
231
|
-------
|
384
232
|
Dataset
|
385
|
-
|
386
|
-
|
387
|
-
Raises
|
388
|
-
------
|
389
|
-
BlockValidationError
|
390
|
-
If model is not configured before calling generate().
|
233
|
+
Filtered dataset with faithfulness evaluation results.
|
391
234
|
"""
|
392
|
-
# Validate
|
235
|
+
# Validate model is configured
|
393
236
|
if not self.model:
|
394
|
-
# Local
|
395
|
-
from ...utils.error_handling import BlockValidationError
|
396
|
-
|
397
237
|
raise BlockValidationError(
|
398
238
|
f"Model not configured for block '{self.block_name}'. "
|
399
239
|
f"Call flow.set_model_config() before generating."
|
400
240
|
)
|
241
|
+
|
401
242
|
logger.info(
|
402
243
|
f"Starting faithfulness evaluation for {len(samples)} samples",
|
403
|
-
extra={
|
404
|
-
"block_name": self.block_name,
|
405
|
-
"model": self.model,
|
406
|
-
"batch_size": len(samples),
|
407
|
-
},
|
244
|
+
extra={"block_name": self.block_name, "model": self.model},
|
408
245
|
)
|
409
246
|
|
410
|
-
current_dataset = samples
|
411
|
-
|
412
247
|
try:
|
413
|
-
#
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
logger.debug("Step 2: Generating LLM responses")
|
419
|
-
current_dataset = self.llm_chat.generate(current_dataset, **kwargs)
|
420
|
-
|
421
|
-
# Step 3: Parse responses
|
422
|
-
logger.debug("Step 3: Parsing faithfulness evaluation responses")
|
423
|
-
current_dataset = self.text_parser.generate(current_dataset, **kwargs)
|
424
|
-
|
425
|
-
# Step 4: Filter based on judgment
|
426
|
-
logger.debug("Step 4: Filtering based on faithfulness judgment")
|
427
|
-
original_count = len(current_dataset)
|
428
|
-
current_dataset = self.filter_block.generate(current_dataset, **kwargs)
|
429
|
-
filtered_count = len(current_dataset)
|
248
|
+
# Execute 4-block pipeline with validation delegation
|
249
|
+
result = self.prompt_builder(samples, **kwargs)
|
250
|
+
result = self.llm_chat(result, **kwargs)
|
251
|
+
result = self.text_parser(result, **kwargs)
|
252
|
+
result = self.filter_block(result, **kwargs)
|
430
253
|
|
431
254
|
logger.info(
|
432
|
-
f"Faithfulness evaluation completed: {
|
433
|
-
|
434
|
-
extra={
|
435
|
-
"block_name": self.block_name,
|
436
|
-
"original_count": original_count,
|
437
|
-
"filtered_count": filtered_count,
|
438
|
-
"filter_rate": (original_count - filtered_count) / original_count
|
439
|
-
if original_count > 0
|
440
|
-
else 0,
|
441
|
-
},
|
255
|
+
f"Faithfulness evaluation completed: {len(samples)} → {len(result)} samples",
|
256
|
+
extra={"block_name": self.block_name},
|
442
257
|
)
|
443
258
|
|
444
|
-
return
|
259
|
+
return result
|
445
260
|
|
446
261
|
except Exception as e:
|
447
262
|
logger.error(
|
448
263
|
f"Error during faithfulness evaluation: {e}",
|
449
|
-
extra={
|
450
|
-
"block_name": self.block_name,
|
451
|
-
"model": self.model,
|
452
|
-
"error": str(e),
|
453
|
-
},
|
264
|
+
extra={"block_name": self.block_name, "error": str(e)},
|
454
265
|
)
|
455
266
|
raise
|
456
267
|
|
457
|
-
def
|
458
|
-
"""
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
# Validate the entire chain of internal blocks
|
475
|
-
if not all(
|
476
|
-
[self.prompt_builder, self.llm_chat, self.text_parser, self.filter_block]
|
477
|
-
):
|
478
|
-
raise ValueError(
|
479
|
-
"All internal blocks must be initialized before validation"
|
480
|
-
)
|
268
|
+
def __getattr__(self, name: str) -> Any:
|
269
|
+
"""Forward attribute access to appropriate internal block."""
|
270
|
+
# Check each internal block to see which one has this parameter
|
271
|
+
for block_attr, block_class in [
|
272
|
+
("prompt_builder", PromptBuilderBlock),
|
273
|
+
("llm_chat", LLMChatBlock),
|
274
|
+
("text_parser", TextParserBlock),
|
275
|
+
("filter_block", ColumnValueFilterBlock),
|
276
|
+
]:
|
277
|
+
if hasattr(self, block_attr) and name in block_class.model_fields:
|
278
|
+
internal_block = getattr(self, block_attr)
|
279
|
+
if internal_block is not None:
|
280
|
+
return getattr(internal_block, name)
|
281
|
+
raise AttributeError(
|
282
|
+
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
283
|
+
)
|
481
284
|
|
482
|
-
|
483
|
-
|
285
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
286
|
+
"""Handle dynamic parameter updates from flow.set_model_config()."""
|
287
|
+
super().__setattr__(name, value)
|
484
288
|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
if
|
493
|
-
|
494
|
-
temp_data = []
|
495
|
-
for sample in current_dataset:
|
496
|
-
temp_sample = dict(sample)
|
497
|
-
temp_sample["eval_faithfulness_prompt"] = [
|
498
|
-
{"role": "user", "content": "test"}
|
499
|
-
]
|
500
|
-
temp_data.append(temp_sample)
|
501
|
-
current_dataset = Dataset.from_list(temp_data)
|
502
|
-
|
503
|
-
# 2. Validate LLMChatBlock
|
504
|
-
logger.debug("Validating LLM chat block")
|
505
|
-
self.llm_chat._validate_custom(current_dataset)
|
506
|
-
|
507
|
-
# Simulate LLM chat output for next validation
|
508
|
-
if "raw_eval_faithfulness" not in current_dataset.column_names:
|
509
|
-
temp_data = []
|
510
|
-
for sample in current_dataset:
|
511
|
-
temp_sample = dict(sample)
|
512
|
-
temp_sample["raw_eval_faithfulness"] = (
|
513
|
-
"[Start of Explanation]Test explanation[End of Explanation]\n[Start of Answer]YES[End of Answer]"
|
514
|
-
)
|
515
|
-
temp_data.append(temp_sample)
|
516
|
-
current_dataset = Dataset.from_list(temp_data)
|
517
|
-
|
518
|
-
# 3. Validate TextParserBlock
|
519
|
-
logger.debug("Validating text parser block")
|
520
|
-
self.text_parser._validate_custom(current_dataset)
|
521
|
-
|
522
|
-
# Simulate text parser output for final validation
|
523
|
-
if "faithfulness_judgment" not in current_dataset.column_names:
|
524
|
-
temp_data = []
|
525
|
-
for sample in current_dataset:
|
526
|
-
temp_sample = dict(sample)
|
527
|
-
temp_sample["faithfulness_explanation"] = "Test explanation"
|
528
|
-
temp_sample["faithfulness_judgment"] = "YES"
|
529
|
-
temp_data.append(temp_sample)
|
530
|
-
current_dataset = Dataset.from_list(temp_data)
|
531
|
-
|
532
|
-
# 4. Validate ColumnValueFilterBlock
|
533
|
-
logger.debug("Validating filter block")
|
534
|
-
self.filter_block._validate_custom(current_dataset)
|
535
|
-
|
536
|
-
logger.debug("All internal blocks validated successfully")
|
289
|
+
# Forward to appropriate internal blocks
|
290
|
+
for block_attr, block_class in [
|
291
|
+
("prompt_builder", PromptBuilderBlock),
|
292
|
+
("llm_chat", LLMChatBlock),
|
293
|
+
("text_parser", TextParserBlock),
|
294
|
+
("filter_block", ColumnValueFilterBlock),
|
295
|
+
]:
|
296
|
+
if hasattr(self, block_attr) and name in block_class.model_fields:
|
297
|
+
setattr(getattr(self, block_attr), name, value)
|
537
298
|
|
538
|
-
|
539
|
-
|
540
|
-
|
299
|
+
def _reinitialize_client_manager(self) -> None:
|
300
|
+
"""Reinitialize internal LLM block's client manager."""
|
301
|
+
if hasattr(self.llm_chat, "_reinitialize_client_manager"):
|
302
|
+
self.llm_chat._reinitialize_client_manager()
|
541
303
|
|
542
304
|
def get_internal_blocks_info(self) -> dict[str, Any]:
|
543
|
-
"""Get information about
|
544
|
-
|
545
|
-
Returns
|
546
|
-
-------
|
547
|
-
Dict[str, Any]
|
548
|
-
Information about each internal block.
|
549
|
-
"""
|
305
|
+
"""Get information about internal blocks."""
|
550
306
|
return {
|
551
|
-
"prompt_builder": self.prompt_builder.get_info()
|
552
|
-
|
553
|
-
|
554
|
-
"
|
555
|
-
"text_parser": self.text_parser.get_info() if self.text_parser else None,
|
556
|
-
"filter": self.filter_block.get_info() if self.filter_block else None,
|
307
|
+
"prompt_builder": self.prompt_builder.get_info(),
|
308
|
+
"llm_chat": self.llm_chat.get_info(),
|
309
|
+
"text_parser": self.text_parser.get_info(),
|
310
|
+
"filter": self.filter_block.get_info(),
|
557
311
|
}
|
558
312
|
|
559
313
|
def __repr__(self) -> str:
|
560
314
|
"""String representation of the block."""
|
315
|
+
filter_value = (
|
316
|
+
getattr(self.filter_block, "filter_value", "YES")
|
317
|
+
if hasattr(self, "filter_block")
|
318
|
+
else "YES"
|
319
|
+
)
|
561
320
|
return (
|
562
321
|
f"EvaluateFaithfulnessBlock(name='{self.block_name}', "
|
563
|
-
f"model='{self.model}', filter_value='{
|
322
|
+
f"model='{self.model}', filter_value='{filter_value}')"
|
564
323
|
)
|