sdg-hub 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. sdg_hub/_version.py +16 -3
  2. sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
  3. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
  4. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
  5. sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
  6. sdg_hub/core/blocks/llm/client_manager.py +92 -43
  7. sdg_hub/core/blocks/llm/config.py +1 -0
  8. sdg_hub/core/blocks/llm/llm_chat_block.py +74 -16
  9. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +277 -115
  10. sdg_hub/core/blocks/llm/text_parser_block.py +88 -23
  11. sdg_hub/core/blocks/registry.py +48 -34
  12. sdg_hub/core/blocks/transform/__init__.py +2 -0
  13. sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
  14. sdg_hub/core/blocks/transform/json_structure_block.py +142 -0
  15. sdg_hub/core/flow/base.py +326 -62
  16. sdg_hub/core/utils/datautils.py +54 -0
  17. sdg_hub/core/utils/flow_metrics.py +261 -0
  18. sdg_hub/core/utils/logger_config.py +50 -9
  19. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
  20. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
  21. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +11 -0
  22. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +159 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +65 -0
  25. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +161 -0
  26. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +15 -0
  27. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +21 -0
  28. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +44 -0
  29. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
  30. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +104 -0
  31. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +61 -0
  32. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -7
  33. sdg_hub/flows/text_analysis/__init__.py +2 -0
  34. sdg_hub/flows/text_analysis/structured_insights/__init__.py +6 -0
  35. sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +27 -0
  36. sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +38 -0
  37. sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +21 -0
  38. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +153 -0
  39. sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +21 -0
  40. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/METADATA +42 -15
  41. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/RECORD +44 -22
  42. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/WHEEL +0 -0
  43. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/licenses/LICENSE +0 -0
  44. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,14 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
- """Composite block for relevancy evaluation of question-answer pairs.
2
+ """Thin wrapper for relevancy evaluation using 4 composed blocks.
3
3
 
4
- This module provides the EvaluateRelevancyBlock that encapsulates the complete
5
- relevancy evaluation workflow, combining prompt building, LLM chat, text parsing,
6
- and filtering into a single block for simplified configuration.
4
+ This module provides a simple, lightweight wrapper that composes:
5
+ - PromptBuilderBlock: builds evaluation prompts
6
+ - LLMChatBlock: generates LLM responses
7
+ - TextParserBlock: parses structured output
8
+ - ColumnValueFilterBlock: filters based on score
9
+
10
+ The wrapper exposes minimal LLM interface for flow detection while
11
+ delegating all functionality to the internal blocks.
7
12
  """
8
13
 
9
14
  # Standard
@@ -14,6 +19,7 @@ from datasets import Dataset
14
19
  from pydantic import ConfigDict, Field, field_validator
15
20
 
16
21
  # Local
22
+ from ...utils.error_handling import BlockValidationError
17
23
  from ...utils.logger_config import setup_logger
18
24
  from ..base import BaseBlock
19
25
  from ..filtering.column_value_filter import ColumnValueFilterBlock
@@ -28,16 +34,13 @@ logger = setup_logger(__name__)
28
34
  @BlockRegistry.register(
29
35
  "EvaluateRelevancyBlock",
30
36
  "evaluation",
31
- "Composite block for relevancy evaluation of question-answer pairs",
37
+ "Thin wrapper composing 4 blocks for relevancy evaluation",
32
38
  )
33
39
  class EvaluateRelevancyBlock(BaseBlock):
34
- """Composite block for relevancy evaluation workflow.
40
+ """Thin wrapper for relevancy evaluation using composed blocks.
35
41
 
36
- This block combines four separate blocks into a single cohesive evaluation block:
37
- 1. PromptBuilderBlock - builds evaluation prompt from question and response
38
- 2. LLMChatBlock - generates relevancy evaluation using LLM
39
- 3. TextParserBlock - parses feedback and score from raw output
40
- 4. ColumnValueFilterBlock - filters based on relevancy score
42
+ Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
43
+ into a single evaluation pipeline with smart parameter routing.
41
44
 
42
45
  Parameters
43
46
  ----------
@@ -47,88 +50,37 @@ class EvaluateRelevancyBlock(BaseBlock):
47
50
  Input columns: ["question", "response"]
48
51
  output_cols : List[str]
49
52
  Output columns: ["relevancy_explanation", "relevancy_score"]
50
- prompt_config_path : str
51
- Path to YAML file containing the relevancy evaluation prompt template.
52
- model : str
53
- Model identifier in LiteLLM format (e.g., "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct")
53
+ model : Optional[str]
54
+ LLM model identifier.
54
55
  api_base : Optional[str]
55
- Base URL for the API. Required for local models.
56
+ API base URL.
56
57
  api_key : Optional[str]
57
- API key for the provider. Falls back to environment variables.
58
- filter_value : Union[str, int, float], optional
59
- Value to filter on for relevancy score (default: 2.0)
60
- operation : str, optional
61
- Filter operation (default: "eq")
62
- convert_dtype : Optional[str], optional
63
- Data type conversion for filter column (default: "float")
64
- async_mode : bool, optional
65
- Whether to use async processing (default: True)
66
- format_as_messages : bool, optional
67
- Whether to format prompt as messages (default: True)
68
- start_tags : List[str], optional
69
- Start tags for parsing (default: ["[Start of Feedback]", "[Start of Score]"])
70
- end_tags : List[str], optional
71
- End tags for parsing (default: ["[End of Feedback]", "[End of Score]"])
72
- parsing_pattern : Optional[str], optional
73
- Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing.
74
- parser_cleanup_tags : Optional[List[str]], optional
75
- List of tags to clean from parsed output.
76
-
77
- ### LLM Generation Parameters ###
78
- temperature : Optional[float], optional
79
- Sampling temperature (0.0 to 2.0).
80
- max_tokens : Optional[int], optional
81
- Maximum tokens to generate.
82
- top_p : Optional[float], optional
83
- Nucleus sampling parameter (0.0 to 1.0).
84
- frequency_penalty : Optional[float], optional
85
- Frequency penalty (-2.0 to 2.0).
86
- presence_penalty : Optional[float], optional
87
- Presence penalty (-2.0 to 2.0).
88
- stop : Optional[Union[str, List[str]]], optional
89
- Stop sequences.
90
- seed : Optional[int], optional
91
- Random seed for reproducible outputs.
92
- response_format : Optional[Dict[str, Any]], optional
93
- Response format specification (e.g., JSON mode).
94
- stream : Optional[bool], optional
95
- Whether to stream responses.
96
- n : Optional[int], optional
97
- Number of completions to generate. When n > 1, the output column will contain
98
- a list of responses for each input sample.
99
- logprobs : Optional[bool], optional
100
- Whether to return log probabilities.
101
- top_logprobs : Optional[int], optional
102
- Number of top log probabilities to return.
103
- user : Optional[str], optional
104
- End-user identifier.
105
- extra_headers : Optional[Dict[str, str]], optional
106
- Additional headers to send with requests.
107
- extra_body : Optional[Dict[str, Any]], optional
108
- Additional parameters for the request body.
109
- timeout : float, optional
110
- Request timeout in seconds (default: 120.0).
111
- max_retries : int, optional
112
- Maximum number of retry attempts (default: 6).
58
+ API key.
59
+ prompt_config_path : str
60
+ Path to YAML prompt template file (required).
113
61
  **kwargs : Any
114
- Additional provider-specific parameters.
62
+ All other parameters are automatically routed to appropriate internal blocks
63
+ based on each block's accepted parameters. This includes all LLM parameters
64
+ (temperature, max_tokens, extra_body, extra_headers, etc.), text parser
65
+ parameters, and filter parameters.
115
66
  """
116
67
 
117
- model_config = ConfigDict(extra="forbid")
68
+ model_config = ConfigDict(
69
+ extra="allow"
70
+ ) # Allow extra fields for dynamic forwarding
118
71
 
119
- # Core configuration
72
+ # --- Core configuration ---
120
73
  prompt_config_path: str = Field(
121
74
  ...,
122
75
  description="Path to YAML file containing the relevancy evaluation prompt template",
123
76
  )
124
- model: Optional[str] = Field(None, description="Model identifier in LiteLLM format")
125
- api_base: Optional[str] = Field(None, description="Base URL for the API")
126
- api_key: Optional[str] = Field(
127
- None,
128
- description="API key for the provider. Falls back to environment variables.",
129
- )
130
77
 
131
- # Filter configuration
78
+ # --- LLM interface (for flow detection) ---
79
+ model: Optional[str] = Field(None, description="LLM model identifier")
80
+ api_base: Optional[str] = Field(None, description="API base URL")
81
+ api_key: Optional[str] = Field(None, description="API key")
82
+
83
+ # --- Filter configuration ---
132
84
  filter_value: Union[str, int, float] = Field(
133
85
  2.0, description="Value to filter on for relevancy score"
134
86
  )
@@ -137,13 +89,7 @@ class EvaluateRelevancyBlock(BaseBlock):
137
89
  "float", description="Data type conversion for filter column"
138
90
  )
139
91
 
140
- # Processing configuration
141
- async_mode: bool = Field(True, description="Whether to use async processing")
142
- format_as_messages: bool = Field(
143
- True, description="Whether to format prompt as messages"
144
- )
145
-
146
- # Parser configuration
92
+ # --- Parser configuration ---
147
93
  start_tags: list[str] = Field(
148
94
  ["[Start of Feedback]", "[Start of Score]"],
149
95
  description="Start tags for parsing feedback and score",
@@ -156,409 +102,222 @@ class EvaluateRelevancyBlock(BaseBlock):
156
102
  None,
157
103
  description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
158
104
  )
159
- parser_cleanup_tags: Optional[list[str]] = Field(
160
- None, description="List of tags to clean from parsed output"
161
- )
162
-
163
- # LLM generation parameters
164
- temperature: Optional[float] = Field(
165
- None, description="Sampling temperature (0.0 to 2.0)"
166
- )
167
- max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate")
168
- top_p: Optional[float] = Field(
169
- None, description="Nucleus sampling parameter (0.0 to 1.0)"
170
- )
171
- frequency_penalty: Optional[float] = Field(
172
- None, description="Frequency penalty (-2.0 to 2.0)"
173
- )
174
- presence_penalty: Optional[float] = Field(
175
- None, description="Presence penalty (-2.0 to 2.0)"
176
- )
177
- stop: Optional[Union[str, list[str]]] = Field(None, description="Stop sequences")
178
- seed: Optional[int] = Field(
179
- None, description="Random seed for reproducible outputs"
180
- )
181
- response_format: Optional[dict[str, Any]] = Field(
182
- None, description="Response format specification (e.g., JSON mode)"
183
- )
184
- stream: Optional[bool] = Field(None, description="Whether to stream responses")
185
- n: Optional[int] = Field(
186
- None,
187
- description="Number of completions to generate. When n > 1, the output column will contain a list of responses for each input sample",
188
- )
189
- logprobs: Optional[bool] = Field(
190
- None, description="Whether to return log probabilities"
191
- )
192
- top_logprobs: Optional[int] = Field(
193
- None, description="Number of top log probabilities to return"
194
- )
195
- user: Optional[str] = Field(None, description="End-user identifier")
196
- extra_headers: Optional[dict[str, str]] = Field(
197
- None, description="Additional headers to send with requests"
198
- )
199
- extra_body: Optional[dict[str, Any]] = Field(
200
- None, description="Additional parameters for the request body"
201
- )
202
- timeout: float = Field(120.0, description="Request timeout in seconds")
203
- max_retries: int = Field(6, description="Maximum number of retry attempts")
204
105
 
205
- # Additional provider-specific parameters
206
- llm_kwargs: dict[str, Any] = Field(
207
- default_factory=dict, description="Additional provider-specific parameters"
208
- )
209
-
210
- # Internal blocks - excluded from serialization
211
- prompt_builder: Optional[PromptBuilderBlock] = Field(None, exclude=True)
212
- llm_chat: Optional[LLMChatBlock] = Field(None, exclude=True)
213
- text_parser: Optional[TextParserBlock] = Field(None, exclude=True)
214
- filter_block: Optional[ColumnValueFilterBlock] = Field(None, exclude=True)
106
+ # --- Internal blocks (composition) ---
107
+ prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
108
+ llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
109
+ text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
110
+ filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
215
111
 
216
112
  @field_validator("input_cols")
217
113
  @classmethod
218
114
  def validate_input_cols(cls, v):
219
- """Validate that input columns are exactly ["question", "response"]."""
220
- expected = ["question", "response"]
221
- if v != expected:
115
+ """Validate input columns."""
116
+ if v != ["question", "response"]:
222
117
  raise ValueError(
223
- f"EvaluateRelevancyBlock expects input_cols={expected}, got {v}"
118
+ f"EvaluateRelevancyBlock expects input_cols ['question', 'response'], got {v}"
224
119
  )
225
120
  return v
226
121
 
227
122
  @field_validator("output_cols")
228
123
  @classmethod
229
124
  def validate_output_cols(cls, v):
230
- """Validate that output columns are exactly ["relevancy_explanation", "relevancy_score"]."""
231
- expected = [
232
- "relevancy_explanation",
233
- "relevancy_score",
234
- ]
125
+ """Validate output columns."""
126
+ expected = ["relevancy_explanation", "relevancy_score"]
235
127
  if v != expected:
236
128
  raise ValueError(
237
- f"EvaluateRelevancyBlock expects output_cols={expected}, got {v}"
129
+ f"EvaluateRelevancyBlock expects output_cols {expected}, got {v}"
238
130
  )
239
131
  return v
240
132
 
241
- def model_post_init(self, __context: Any) -> None:
242
- """Initialize the internal blocks after Pydantic validation."""
243
- super().model_post_init(__context)
244
-
245
- # Create internal blocks
246
- self._create_internal_blocks()
133
+ def __init__(self, **kwargs):
134
+ """Initialize with smart parameter routing."""
135
+ super().__init__(**kwargs)
136
+ self._create_internal_blocks(**kwargs)
247
137
 
248
- # Log initialization only when model is configured
138
+ # Log initialization if model is configured
249
139
  if self.model:
250
140
  logger.info(
251
- f"Initialized EvaluateRelevancyBlock '{self.block_name}' with model '{self.model}'",
252
- extra={
253
- "block_name": self.block_name,
254
- "model": self.model,
255
- "async_mode": self.async_mode,
256
- "filter_value": self.filter_value,
257
- },
141
+ f"Initialized EvaluateRelevancyBlock '{self.block_name}' with model '{self.model}'"
258
142
  )
259
143
 
260
- def _create_internal_blocks(self) -> None:
261
- """Create and configure the internal blocks."""
262
- # 1. PromptBuilderBlock
144
+ def _extract_params(self, kwargs: dict, block_class) -> dict:
145
+ """Extract parameters for specific block class based on its model_fields."""
146
+ # Exclude parameters that are handled by this wrapper
147
+ wrapper_params = {
148
+ "block_name",
149
+ "input_cols",
150
+ "output_cols",
151
+ }
152
+
153
+ # Extract parameters that the target block accepts
154
+ params = {
155
+ k: v
156
+ for k, v in kwargs.items()
157
+ if k in block_class.model_fields and k not in wrapper_params
158
+ }
159
+
160
+ # Also include declared fields from this composite block that the target block accepts
161
+ for field_name in self.__class__.model_fields:
162
+ if (
163
+ field_name in block_class.model_fields
164
+ and field_name not in wrapper_params
165
+ ):
166
+ field_value = getattr(self, field_name)
167
+ if field_value is not None: # Only forward non-None values
168
+ params[field_name] = field_value
169
+
170
+ return params
171
+
172
+ def _create_internal_blocks(self, **kwargs):
173
+ """Create internal blocks with parameter routing."""
174
+ # Route parameters to appropriate blocks
175
+ prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
176
+ llm_params = self._extract_params(kwargs, LLMChatBlock)
177
+ parser_params = self._extract_params(kwargs, TextParserBlock)
178
+ filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
179
+
263
180
  self.prompt_builder = PromptBuilderBlock(
264
181
  block_name=f"{self.block_name}_prompt_builder",
265
182
  input_cols=["question", "response"],
266
183
  output_cols=["eval_relevancy_prompt"],
267
- prompt_config_path=self.prompt_config_path,
268
- format_as_messages=self.format_as_messages,
184
+ **prompt_params,
269
185
  )
270
186
 
271
- # 2. LLMChatBlock
272
- llm_kwargs = {
187
+ # Create LLM chat block with dynamic LLM parameter forwarding
188
+ llm_config = {
273
189
  "block_name": f"{self.block_name}_llm_chat",
274
190
  "input_cols": ["eval_relevancy_prompt"],
275
191
  "output_cols": ["raw_eval_relevancy"],
276
- "model": self.model,
277
- "api_base": self.api_base,
278
- "api_key": self.api_key,
279
- "async_mode": self.async_mode,
280
- "timeout": self.timeout,
281
- "max_retries": self.max_retries,
282
- }
283
-
284
- # Add generation parameters if specified
285
- if self.temperature is not None:
286
- llm_kwargs["temperature"] = self.temperature
287
- if self.max_tokens is not None:
288
- llm_kwargs["max_tokens"] = self.max_tokens
289
- if self.top_p is not None:
290
- llm_kwargs["top_p"] = self.top_p
291
- if self.frequency_penalty is not None:
292
- llm_kwargs["frequency_penalty"] = self.frequency_penalty
293
- if self.presence_penalty is not None:
294
- llm_kwargs["presence_penalty"] = self.presence_penalty
295
- if self.stop is not None:
296
- llm_kwargs["stop"] = self.stop
297
- if self.seed is not None:
298
- llm_kwargs["seed"] = self.seed
299
- if self.response_format is not None:
300
- llm_kwargs["response_format"] = self.response_format
301
- if self.stream is not None:
302
- llm_kwargs["stream"] = self.stream
303
- if self.n is not None:
304
- llm_kwargs["n"] = self.n
305
- if self.logprobs is not None:
306
- llm_kwargs["logprobs"] = self.logprobs
307
- if self.top_logprobs is not None:
308
- llm_kwargs["top_logprobs"] = self.top_logprobs
309
- if self.user is not None:
310
- llm_kwargs["user"] = self.user
311
- if self.extra_headers is not None:
312
- llm_kwargs["extra_headers"] = self.extra_headers
313
- if self.extra_body is not None:
314
- llm_kwargs["extra_body"] = self.extra_body
315
-
316
- # Add any additional kwargs
317
- llm_kwargs.update(self.llm_kwargs)
318
-
319
- self.llm_chat = LLMChatBlock(**llm_kwargs)
320
-
321
- # 3. TextParserBlock
322
- text_parser_kwargs = {
323
- "block_name": f"{self.block_name}_text_parser",
324
- "input_cols": ["raw_eval_relevancy"],
325
- "output_cols": ["relevancy_explanation", "relevancy_score"],
326
- "start_tags": self.start_tags,
327
- "end_tags": self.end_tags,
192
+ **llm_params,
328
193
  }
329
194
 
330
- # Add optional TextParserBlock parameters if specified
331
- if self.parsing_pattern is not None:
332
- text_parser_kwargs["parsing_pattern"] = self.parsing_pattern
333
- if self.parser_cleanup_tags is not None:
334
- text_parser_kwargs["parser_cleanup_tags"] = self.parser_cleanup_tags
335
-
336
- self.text_parser = TextParserBlock(**text_parser_kwargs)
337
-
338
- # 4. ColumnValueFilterBlock
339
- filter_kwargs = {
340
- "block_name": f"{self.block_name}_filter",
341
- "input_cols": ["relevancy_score"],
342
- "output_cols": [], # Filter blocks don't create new columns
343
- "filter_value": self.filter_value,
344
- "operation": self.operation,
345
- }
346
-
347
- if self.convert_dtype is not None:
348
- filter_kwargs["convert_dtype"] = self.convert_dtype
349
-
350
- self.filter_block = ColumnValueFilterBlock(**filter_kwargs)
351
-
352
- def _reinitialize_client_manager(self) -> None:
353
- """Reinitialize the internal LLM chat block's client manager.
195
+ # Only add LLM parameters if they are provided
196
+ if self.model is not None:
197
+ llm_config["model"] = self.model
198
+ if self.api_base is not None:
199
+ llm_config["api_base"] = self.api_base
200
+ if self.api_key is not None:
201
+ llm_config["api_key"] = self.api_key
202
+
203
+ self.llm_chat = LLMChatBlock(**llm_config)
204
+
205
+ # Create text parser
206
+ self.text_parser = TextParserBlock(
207
+ block_name=f"{self.block_name}_text_parser",
208
+ input_cols=["raw_eval_relevancy"],
209
+ output_cols=["relevancy_explanation", "relevancy_score"],
210
+ **parser_params,
211
+ )
354
212
 
355
- This should be called after model configuration changes to ensure
356
- the internal LLM chat block uses the updated model configuration.
357
- """
358
- if self.llm_chat and hasattr(self.llm_chat, "_reinitialize_client_manager"):
359
- # Update the internal LLM chat block's model config
360
- self.llm_chat.model = self.model
361
- self.llm_chat.api_base = self.api_base
362
- self.llm_chat.api_key = self.api_key
363
- # Reinitialize its client manager
364
- self.llm_chat._reinitialize_client_manager()
213
+ self.filter_block = ColumnValueFilterBlock(
214
+ block_name=f"{self.block_name}_filter",
215
+ input_cols=["relevancy_score"],
216
+ output_cols=[], # Filter doesn't create new columns
217
+ **filter_params,
218
+ )
365
219
 
366
220
  def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
367
- """Generate relevancy evaluation for all samples.
368
-
369
- This method chains the four internal blocks in sequence:
370
- 1. Build relevancy evaluation prompts
371
- 2. Generate LLM responses
372
- 3. Parse explanation and score
373
- 4. Filter based on score
221
+ """Execute the 4-block relevancy evaluation pipeline.
374
222
 
375
223
  Parameters
376
224
  ----------
377
225
  samples : Dataset
378
- Input dataset containing 'question' and 'response' columns.
226
+ Input dataset with 'question' and 'response' columns.
379
227
  **kwargs : Any
380
- Additional keyword arguments passed to internal blocks.
228
+ Additional arguments passed to internal blocks.
381
229
 
382
230
  Returns
383
231
  -------
384
232
  Dataset
385
- Dataset with relevancy evaluation results and filtering applied.
386
-
387
- Raises
388
- ------
389
- BlockValidationError
390
- If model is not configured before calling generate().
233
+ Filtered dataset with relevancy evaluation results.
391
234
  """
392
- # Validate that model is configured
235
+ # Validate model is configured
393
236
  if not self.model:
394
- # Local
395
- from ...utils.error_handling import BlockValidationError
396
-
397
237
  raise BlockValidationError(
398
238
  f"Model not configured for block '{self.block_name}'. "
399
239
  f"Call flow.set_model_config() before generating."
400
240
  )
241
+
401
242
  logger.info(
402
243
  f"Starting relevancy evaluation for {len(samples)} samples",
403
- extra={
404
- "block_name": self.block_name,
405
- "model": self.model,
406
- "batch_size": len(samples),
407
- },
244
+ extra={"block_name": self.block_name, "model": self.model},
408
245
  )
409
246
 
410
- current_dataset = samples
411
-
412
247
  try:
413
- # Step 1: Build prompts
414
- logger.debug("Step 1: Building relevancy evaluation prompts")
415
- current_dataset = self.prompt_builder.generate(current_dataset, **kwargs)
416
-
417
- # Step 2: Generate LLM responses
418
- logger.debug("Step 2: Generating LLM responses")
419
- current_dataset = self.llm_chat.generate(current_dataset, **kwargs)
420
-
421
- # Step 3: Parse responses
422
- logger.debug("Step 3: Parsing relevancy evaluation responses")
423
- current_dataset = self.text_parser.generate(current_dataset, **kwargs)
424
-
425
- # Step 4: Filter based on score
426
- logger.debug("Step 4: Filtering based on relevancy score")
427
- original_count = len(current_dataset)
428
- current_dataset = self.filter_block.generate(current_dataset, **kwargs)
429
- filtered_count = len(current_dataset)
248
+ # Execute 4-block pipeline with validation delegation
249
+ result = self.prompt_builder(samples, **kwargs)
250
+ result = self.llm_chat(result, **kwargs)
251
+ result = self.text_parser(result, **kwargs)
252
+ result = self.filter_block(result, **kwargs)
430
253
 
431
254
  logger.info(
432
- f"Relevancy evaluation completed: {original_count} → {filtered_count} samples "
433
- f"(filtered {original_count - filtered_count} samples)",
434
- extra={
435
- "block_name": self.block_name,
436
- "original_count": original_count,
437
- "filtered_count": filtered_count,
438
- "filter_rate": (original_count - filtered_count) / original_count
439
- if original_count > 0
440
- else 0,
441
- },
255
+ f"Relevancy evaluation completed: {len(samples)} → {len(result)} samples",
256
+ extra={"block_name": self.block_name},
442
257
  )
443
258
 
444
- return current_dataset
259
+ return result
445
260
 
446
261
  except Exception as e:
447
262
  logger.error(
448
263
  f"Error during relevancy evaluation: {e}",
449
- extra={
450
- "block_name": self.block_name,
451
- "model": self.model,
452
- "error": str(e),
453
- },
264
+ extra={"block_name": self.block_name, "error": str(e)},
454
265
  )
455
266
  raise
456
267
 
457
- def _validate_custom(self, dataset: Dataset) -> None:
458
- """Custom validation for relevancy evaluation.
459
-
460
- This method validates the entire chain of internal blocks by simulating
461
- the data flow through each block to ensure they can all process the data correctly.
462
- """
463
- # Validate that required columns exist
464
- required_columns = ["question", "response"]
465
- missing_columns = [
466
- col for col in required_columns if col not in dataset.column_names
467
- ]
468
- if missing_columns:
469
- raise ValueError(
470
- f"EvaluateRelevancyBlock requires columns {required_columns}, "
471
- f"missing: {missing_columns}"
472
- )
473
-
474
- # Validate the entire chain of internal blocks
475
- if not all(
476
- [self.prompt_builder, self.llm_chat, self.text_parser, self.filter_block]
477
- ):
478
- raise ValueError(
479
- "All internal blocks must be initialized before validation"
480
- )
268
+ def __getattr__(self, name: str) -> Any:
269
+ """Forward attribute access to appropriate internal block."""
270
+ # Check each internal block to see which one has this parameter
271
+ for block_attr, block_class in [
272
+ ("prompt_builder", PromptBuilderBlock),
273
+ ("llm_chat", LLMChatBlock),
274
+ ("text_parser", TextParserBlock),
275
+ ("filter_block", ColumnValueFilterBlock),
276
+ ]:
277
+ if hasattr(self, block_attr) and name in block_class.model_fields:
278
+ internal_block = getattr(self, block_attr)
279
+ if internal_block is not None:
280
+ return getattr(internal_block, name)
281
+ raise AttributeError(
282
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
283
+ )
481
284
 
482
- # Simulate data flow through the chain to validate each block
483
- current_dataset = dataset
285
+ def __setattr__(self, name: str, value: Any) -> None:
286
+ """Handle dynamic parameter updates from flow.set_model_config()."""
287
+ super().__setattr__(name, value)
484
288
 
485
- try:
486
- # 1. Validate PromptBuilderBlock
487
- logger.debug("Validating prompt builder block")
488
- self.prompt_builder._validate_custom(current_dataset)
489
-
490
- # Simulate prompt builder output for next validation
491
- # Add the expected output column temporarily for validation
492
- if "eval_relevancy_prompt" not in current_dataset.column_names:
493
- # Create a temporary dataset with the expected column for validation
494
- temp_data = []
495
- for sample in current_dataset:
496
- temp_sample = dict(sample)
497
- temp_sample["eval_relevancy_prompt"] = [
498
- {"role": "user", "content": "test"}
499
- ]
500
- temp_data.append(temp_sample)
501
- current_dataset = Dataset.from_list(temp_data)
502
-
503
- # 2. Validate LLMChatBlock
504
- logger.debug("Validating LLM chat block")
505
- self.llm_chat._validate_custom(current_dataset)
506
-
507
- # Simulate LLM chat output for next validation
508
- if "raw_eval_relevancy" not in current_dataset.column_names:
509
- temp_data = []
510
- for sample in current_dataset:
511
- temp_sample = dict(sample)
512
- temp_sample["raw_eval_relevancy"] = (
513
- "[Start of Feedback]Test feedback[End of Feedback]\n[Start of Score]2.0[End of Score]"
514
- )
515
- temp_data.append(temp_sample)
516
- current_dataset = Dataset.from_list(temp_data)
517
-
518
- # 3. Validate TextParserBlock
519
- logger.debug("Validating text parser block")
520
- self.text_parser._validate_custom(current_dataset)
521
-
522
- # Simulate text parser output for final validation
523
- if "relevancy_score" not in current_dataset.column_names:
524
- temp_data = []
525
- for sample in current_dataset:
526
- temp_sample = dict(sample)
527
- temp_sample["relevancy_explanation"] = "Test feedback"
528
- temp_sample["relevancy_score"] = "2.0"
529
- temp_data.append(temp_sample)
530
- current_dataset = Dataset.from_list(temp_data)
531
-
532
- # 4. Validate ColumnValueFilterBlock
533
- logger.debug("Validating filter block")
534
- self.filter_block._validate_custom(current_dataset)
535
-
536
- logger.debug("All internal blocks validated successfully")
289
+ # Forward to appropriate internal blocks
290
+ for block_attr, block_class in [
291
+ ("prompt_builder", PromptBuilderBlock),
292
+ ("llm_chat", LLMChatBlock),
293
+ ("text_parser", TextParserBlock),
294
+ ("filter_block", ColumnValueFilterBlock),
295
+ ]:
296
+ if hasattr(self, block_attr) and name in block_class.model_fields:
297
+ setattr(getattr(self, block_attr), name, value)
537
298
 
538
- except Exception as e:
539
- logger.error(f"Validation failed in internal blocks: {e}")
540
- raise ValueError(f"Internal block validation failed: {e}") from e
299
+ def _reinitialize_client_manager(self) -> None:
300
+ """Reinitialize internal LLM block's client manager."""
301
+ if hasattr(self.llm_chat, "_reinitialize_client_manager"):
302
+ self.llm_chat._reinitialize_client_manager()
541
303
 
542
304
  def get_internal_blocks_info(self) -> dict[str, Any]:
543
- """Get information about the internal blocks.
544
-
545
- Returns
546
- -------
547
- Dict[str, Any]
548
- Information about each internal block.
549
- """
305
+ """Get information about internal blocks."""
550
306
  return {
551
- "prompt_builder": self.prompt_builder.get_info()
552
- if self.prompt_builder
553
- else None,
554
- "llm_chat": self.llm_chat.get_info() if self.llm_chat else None,
555
- "text_parser": self.text_parser.get_info() if self.text_parser else None,
556
- "filter": self.filter_block.get_info() if self.filter_block else None,
307
+ "prompt_builder": self.prompt_builder.get_info(),
308
+ "llm_chat": self.llm_chat.get_info(),
309
+ "text_parser": self.text_parser.get_info(),
310
+ "filter": self.filter_block.get_info(),
557
311
  }
558
312
 
559
313
  def __repr__(self) -> str:
560
314
  """String representation of the block."""
315
+ filter_value = (
316
+ getattr(self.filter_block, "filter_value", 2.0)
317
+ if hasattr(self, "filter_block")
318
+ else 2.0
319
+ )
561
320
  return (
562
321
  f"EvaluateRelevancyBlock(name='{self.block_name}', "
563
- f"model='{self.model}', filter_value='{self.filter_value}')"
322
+ f"model='{self.model}', filter_value='{filter_value}')"
564
323
  )