sdg-hub 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,14 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
- """Composite block for question verification and quality assessment.
2
+ """Thin wrapper for question verification using 4 composed blocks.
3
3
 
4
- This module provides the VerifyQuestionBlock that encapsulates the complete
5
- question verification workflow, combining prompt building, LLM chat, text parsing,
6
- and filtering into a single block for simplified configuration.
4
+ This module provides a simple, lightweight wrapper that composes:
5
+ - PromptBuilderBlock: builds verification prompts
6
+ - LLMChatBlock: generates LLM responses
7
+ - TextParserBlock: parses structured output
8
+ - ColumnValueFilterBlock: filters based on rating
9
+
10
+ The wrapper exposes minimal LLM interface for flow detection while
11
+ delegating all functionality to the internal blocks.
7
12
  """
8
13
 
9
14
  # Standard
@@ -14,6 +19,7 @@ from datasets import Dataset
14
19
  from pydantic import ConfigDict, Field, field_validator
15
20
 
16
21
  # Local
22
+ from ...utils.error_handling import BlockValidationError
17
23
  from ...utils.logger_config import setup_logger
18
24
  from ..base import BaseBlock
19
25
  from ..filtering.column_value_filter import ColumnValueFilterBlock
@@ -28,16 +34,13 @@ logger = setup_logger(__name__)
28
34
  @BlockRegistry.register(
29
35
  "VerifyQuestionBlock",
30
36
  "evaluation",
31
- "Composite block for question verification and quality assessment",
37
+ "Thin wrapper composing 4 blocks for question verification",
32
38
  )
33
39
  class VerifyQuestionBlock(BaseBlock):
34
- """Composite block for question verification workflow.
40
+ """Thin wrapper for question verification using composed blocks.
35
41
 
36
- This block combines four separate blocks into a single cohesive verification block:
37
- 1. PromptBuilderBlock - builds verification prompt from question
38
- 2. LLMChatBlock - generates question quality assessment using LLM
39
- 3. TextParserBlock - parses explanation and rating from raw output
40
- 4. ColumnValueFilterBlock - filters based on verification rating
42
+ Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
43
+ into a single verification pipeline with smart parameter routing.
41
44
 
42
45
  Parameters
43
46
  ----------
@@ -47,103 +50,46 @@ class VerifyQuestionBlock(BaseBlock):
47
50
  Input columns: ["question"]
48
51
  output_cols : List[str]
49
52
  Output columns: ["verification_explanation", "verification_rating"]
50
- prompt_config_path : str
51
- Path to YAML file containing the question verification prompt template.
52
- model : str
53
- Model identifier in LiteLLM format (e.g., "hosted_vllm/meta-llama/Llama-3.3-70B-Instruct")
53
+ model : Optional[str]
54
+ LLM model identifier.
54
55
  api_base : Optional[str]
55
- Base URL for the API. Required for local models.
56
+ API base URL.
56
57
  api_key : Optional[str]
57
- API key for the provider. Falls back to environment variables.
58
- filter_value : Union[str, int, float], optional
59
- Value to filter on for verification rating (default: 1.0)
60
- operation : str, optional
61
- Filter operation (default: "ge")
62
- convert_dtype : Optional[str], optional
63
- Data type conversion for filter column (default: "float")
64
- async_mode : bool, optional
65
- Whether to use async processing (default: True)
66
- format_as_messages : bool, optional
67
- Whether to format prompt as messages (default: True)
68
- start_tags : List[str], optional
69
- Start tags for parsing (default: ["[Start of Explanation]", "[Start of Rating]"])
70
- end_tags : List[str], optional
71
- End tags for parsing (default: ["[End of Explanation]", "[End of Rating]"])
72
- parsing_pattern : Optional[str], optional
73
- Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing.
74
- parser_cleanup_tags : Optional[List[str]], optional
75
- List of tags to clean from parsed output.
76
-
77
- ### LLM Generation Parameters ###
78
- temperature : Optional[float], optional
79
- Sampling temperature (0.0 to 2.0).
80
- max_tokens : Optional[int], optional
81
- Maximum tokens to generate.
82
- top_p : Optional[float], optional
83
- Nucleus sampling parameter (0.0 to 1.0).
84
- frequency_penalty : Optional[float], optional
85
- Frequency penalty (-2.0 to 2.0).
86
- presence_penalty : Optional[float], optional
87
- Presence penalty (-2.0 to 2.0).
88
- stop : Optional[Union[str, List[str]]], optional
89
- Stop sequences.
90
- seed : Optional[int], optional
91
- Random seed for reproducible outputs.
92
- response_format : Optional[Dict[str, Any]], optional
93
- Response format specification (e.g., JSON mode).
94
- stream : Optional[bool], optional
95
- Whether to stream responses.
96
- n : Optional[int], optional
97
- Number of completions to generate. When n > 1, the output column will contain
98
- a list of responses for each input sample.
99
- logprobs : Optional[bool], optional
100
- Whether to return log probabilities.
101
- top_logprobs : Optional[int], optional
102
- Number of top log probabilities to return.
103
- user : Optional[str], optional
104
- End-user identifier.
105
- extra_headers : Optional[Dict[str, str]], optional
106
- Additional headers to send with requests.
107
- extra_body : Optional[Dict[str, Any]], optional
108
- Additional parameters for the request body.
109
- timeout : float, optional
110
- Request timeout in seconds (default: 120.0).
111
- max_retries : int, optional
112
- Maximum number of retry attempts (default: 6).
58
+ API key.
59
+ prompt_config_path : str
60
+ Path to YAML prompt template file (required).
113
61
  **kwargs : Any
114
- Additional provider-specific parameters.
62
+ All other parameters are automatically routed to appropriate internal blocks
63
+ based on each block's accepted parameters. This includes all LLM parameters
64
+ (temperature, max_tokens, extra_body, extra_headers, etc.), text parser
65
+ parameters, and filter parameters.
115
66
  """
116
67
 
117
- model_config = ConfigDict(extra="forbid")
68
+ model_config = ConfigDict(
69
+ extra="allow"
70
+ ) # Allow extra fields for dynamic forwarding
118
71
 
119
- # Core configuration
72
+ # --- Core configuration ---
120
73
  prompt_config_path: str = Field(
121
74
  ...,
122
75
  description="Path to YAML file containing the question verification prompt template",
123
76
  )
124
- model: Optional[str] = Field(None, description="Model identifier in LiteLLM format")
125
- api_base: Optional[str] = Field(None, description="Base URL for the API")
126
- api_key: Optional[str] = Field(
127
- None,
128
- description="API key for the provider. Falls back to environment variables.",
129
- )
130
77
 
131
- # Filter configuration
78
+ # --- LLM interface (for flow detection) ---
79
+ model: Optional[str] = Field(None, description="LLM model identifier")
80
+ api_base: Optional[str] = Field(None, description="API base URL")
81
+ api_key: Optional[str] = Field(None, description="API key")
82
+
83
+ # --- Filter configuration ---
132
84
  filter_value: Union[str, int, float] = Field(
133
85
  1.0, description="Value to filter on for verification rating"
134
86
  )
135
- operation: str = Field("ge", description="Filter operation")
87
+ operation: str = Field("eq", description="Filter operation")
136
88
  convert_dtype: Optional[str] = Field(
137
89
  "float", description="Data type conversion for filter column"
138
90
  )
139
91
 
140
- # Processing configuration
141
- async_mode: bool = Field(True, description="Whether to use async processing")
142
- format_as_messages: bool = Field(
143
- True, description="Whether to format prompt as messages"
144
- )
145
-
146
- # Parser configuration
92
+ # --- Parser configuration ---
147
93
  start_tags: list[str] = Field(
148
94
  ["[Start of Explanation]", "[Start of Rating]"],
149
95
  description="Start tags for parsing explanation and rating",
@@ -156,409 +102,228 @@ class VerifyQuestionBlock(BaseBlock):
156
102
  None,
157
103
  description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
158
104
  )
159
- parser_cleanup_tags: Optional[list[str]] = Field(
160
- None, description="List of tags to clean from parsed output"
161
- )
162
105
 
163
- # LLM generation parameters
164
- temperature: Optional[float] = Field(
165
- None, description="Sampling temperature (0.0 to 2.0)"
166
- )
167
- max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate")
168
- top_p: Optional[float] = Field(
169
- None, description="Nucleus sampling parameter (0.0 to 1.0)"
170
- )
171
- frequency_penalty: Optional[float] = Field(
172
- None, description="Frequency penalty (-2.0 to 2.0)"
173
- )
174
- presence_penalty: Optional[float] = Field(
175
- None, description="Presence penalty (-2.0 to 2.0)"
176
- )
177
- stop: Optional[Union[str, list[str]]] = Field(None, description="Stop sequences")
178
- seed: Optional[int] = Field(
179
- None, description="Random seed for reproducible outputs"
180
- )
181
- response_format: Optional[dict[str, Any]] = Field(
182
- None, description="Response format specification (e.g., JSON mode)"
183
- )
184
- stream: Optional[bool] = Field(None, description="Whether to stream responses")
185
- n: Optional[int] = Field(
186
- None,
187
- description="Number of completions to generate. When n > 1, the output column will contain a list of responses for each input sample",
188
- )
189
- logprobs: Optional[bool] = Field(
190
- None, description="Whether to return log probabilities"
191
- )
192
- top_logprobs: Optional[int] = Field(
193
- None, description="Number of top log probabilities to return"
194
- )
195
- user: Optional[str] = Field(None, description="End-user identifier")
196
- extra_headers: Optional[dict[str, str]] = Field(
197
- None, description="Additional headers to send with requests"
198
- )
199
- extra_body: Optional[dict[str, Any]] = Field(
200
- None, description="Additional parameters for the request body"
201
- )
202
- timeout: float = Field(120.0, description="Request timeout in seconds")
203
- max_retries: int = Field(6, description="Maximum number of retry attempts")
106
+ # Store parameters for internal blocks
107
+ prompt_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
108
+ llm_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
109
+ parser_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
110
+ filter_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
204
111
 
205
- # Additional provider-specific parameters
206
- llm_kwargs: dict[str, Any] = Field(
207
- default_factory=dict, description="Additional provider-specific parameters"
208
- )
209
-
210
- # Internal blocks - excluded from serialization
211
- prompt_builder: Optional[PromptBuilderBlock] = Field(None, exclude=True)
212
- llm_chat: Optional[LLMChatBlock] = Field(None, exclude=True)
213
- text_parser: Optional[TextParserBlock] = Field(None, exclude=True)
214
- filter_block: Optional[ColumnValueFilterBlock] = Field(None, exclude=True)
112
+ # --- Internal blocks (composition) ---
113
+ prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
114
+ llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
115
+ text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
116
+ filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
215
117
 
216
118
  @field_validator("input_cols")
217
119
  @classmethod
218
120
  def validate_input_cols(cls, v):
219
- """Validate that input columns are exactly ["question"]."""
220
- expected = ["question"]
221
- if v != expected:
121
+ """Validate input columns."""
122
+ if v != ["question"]:
222
123
  raise ValueError(
223
- f"VerifyQuestionBlock expects input_cols={expected}, got {v}"
124
+ f"VerifyQuestionBlock expects input_cols ['question'], got {v}"
224
125
  )
225
126
  return v
226
127
 
227
128
  @field_validator("output_cols")
228
129
  @classmethod
229
130
  def validate_output_cols(cls, v):
230
- """Validate that output columns are exactly ["verification_explanation", "verification_rating"]."""
231
- expected = [
232
- "verification_explanation",
233
- "verification_rating",
234
- ]
131
+ """Validate output columns."""
132
+ expected = ["verification_explanation", "verification_rating"]
235
133
  if v != expected:
236
134
  raise ValueError(
237
- f"VerifyQuestionBlock expects output_cols={expected}, got {v}"
135
+ f"VerifyQuestionBlock expects output_cols {expected}, got {v}"
238
136
  )
239
137
  return v
240
138
 
241
- def model_post_init(self, __context: Any) -> None:
242
- """Initialize the internal blocks after Pydantic validation."""
243
- super().model_post_init(__context)
244
-
245
- # Create internal blocks
246
- self._create_internal_blocks()
139
+ def __init__(self, **kwargs):
140
+ """Initialize with smart parameter routing."""
141
+ super().__init__(**kwargs)
142
+ self._create_internal_blocks(**kwargs)
247
143
 
248
- # Log initialization only when model is configured
144
+ # Log initialization if model is configured
249
145
  if self.model:
250
146
  logger.info(
251
- f"Initialized VerifyQuestionBlock '{self.block_name}' with model '{self.model}'",
252
- extra={
253
- "block_name": self.block_name,
254
- "model": self.model,
255
- "async_mode": self.async_mode,
256
- "filter_value": self.filter_value,
257
- },
147
+ f"Initialized VerifyQuestionBlock '{self.block_name}' with model '{self.model}'"
258
148
  )
259
149
 
260
- def _create_internal_blocks(self) -> None:
261
- """Create and configure the internal blocks."""
262
- # 1. PromptBuilderBlock
150
+ def _extract_params(self, kwargs: dict, block_class) -> dict:
151
+ """Extract parameters for specific block class based on its model_fields."""
152
+ # Exclude parameters that are handled by this wrapper's structure
153
+ wrapper_params = {
154
+ "block_name",
155
+ "input_cols",
156
+ "output_cols",
157
+ }
158
+
159
+ # Extract parameters that the target block accepts
160
+ params = {
161
+ k: v
162
+ for k, v in kwargs.items()
163
+ if k in block_class.model_fields and k not in wrapper_params
164
+ }
165
+
166
+ # Also include declared fields from this composite block that the target block accepts
167
+ for field_name in self.__class__.model_fields:
168
+ if (
169
+ field_name in block_class.model_fields
170
+ and field_name not in wrapper_params
171
+ ):
172
+ field_value = getattr(self, field_name)
173
+ if field_value is not None: # Only forward non-None values
174
+ params[field_name] = field_value
175
+
176
+ return params
177
+
178
+ def _create_internal_blocks(self, **kwargs):
179
+ """Create internal blocks with parameter routing."""
180
+ # Route parameters to appropriate blocks
181
+ prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
182
+ llm_params = self._extract_params(kwargs, LLMChatBlock)
183
+ parser_params = self._extract_params(kwargs, TextParserBlock)
184
+ filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
185
+
263
186
  self.prompt_builder = PromptBuilderBlock(
264
187
  block_name=f"{self.block_name}_prompt_builder",
265
188
  input_cols=["question"],
266
189
  output_cols=["verify_question_prompt"],
267
- prompt_config_path=self.prompt_config_path,
268
- format_as_messages=self.format_as_messages,
190
+ **prompt_params,
269
191
  )
270
192
 
271
- # 2. LLMChatBlock
272
- llm_kwargs = {
193
+ # Create LLM chat block with dynamic LLM parameter forwarding
194
+ llm_config = {
273
195
  "block_name": f"{self.block_name}_llm_chat",
274
196
  "input_cols": ["verify_question_prompt"],
275
197
  "output_cols": ["raw_verify_question"],
276
- "model": self.model,
277
- "api_base": self.api_base,
278
- "api_key": self.api_key,
279
- "async_mode": self.async_mode,
280
- "timeout": self.timeout,
281
- "max_retries": self.max_retries,
282
- }
283
-
284
- # Add generation parameters if specified
285
- if self.temperature is not None:
286
- llm_kwargs["temperature"] = self.temperature
287
- if self.max_tokens is not None:
288
- llm_kwargs["max_tokens"] = self.max_tokens
289
- if self.top_p is not None:
290
- llm_kwargs["top_p"] = self.top_p
291
- if self.frequency_penalty is not None:
292
- llm_kwargs["frequency_penalty"] = self.frequency_penalty
293
- if self.presence_penalty is not None:
294
- llm_kwargs["presence_penalty"] = self.presence_penalty
295
- if self.stop is not None:
296
- llm_kwargs["stop"] = self.stop
297
- if self.seed is not None:
298
- llm_kwargs["seed"] = self.seed
299
- if self.response_format is not None:
300
- llm_kwargs["response_format"] = self.response_format
301
- if self.stream is not None:
302
- llm_kwargs["stream"] = self.stream
303
- if self.n is not None:
304
- llm_kwargs["n"] = self.n
305
- if self.logprobs is not None:
306
- llm_kwargs["logprobs"] = self.logprobs
307
- if self.top_logprobs is not None:
308
- llm_kwargs["top_logprobs"] = self.top_logprobs
309
- if self.user is not None:
310
- llm_kwargs["user"] = self.user
311
- if self.extra_headers is not None:
312
- llm_kwargs["extra_headers"] = self.extra_headers
313
- if self.extra_body is not None:
314
- llm_kwargs["extra_body"] = self.extra_body
315
-
316
- # Add any additional kwargs
317
- llm_kwargs.update(self.llm_kwargs)
318
-
319
- self.llm_chat = LLMChatBlock(**llm_kwargs)
320
-
321
- # 3. TextParserBlock
322
- text_parser_kwargs = {
323
- "block_name": f"{self.block_name}_text_parser",
324
- "input_cols": ["raw_verify_question"],
325
- "output_cols": ["verification_explanation", "verification_rating"],
326
- "start_tags": self.start_tags,
327
- "end_tags": self.end_tags,
328
- }
329
-
330
- # Add optional TextParserBlock parameters if specified
331
- if self.parsing_pattern is not None:
332
- text_parser_kwargs["parsing_pattern"] = self.parsing_pattern
333
- if self.parser_cleanup_tags is not None:
334
- text_parser_kwargs["parser_cleanup_tags"] = self.parser_cleanup_tags
335
-
336
- self.text_parser = TextParserBlock(**text_parser_kwargs)
337
-
338
- # 4. ColumnValueFilterBlock
339
- filter_kwargs = {
340
- "block_name": f"{self.block_name}_filter",
341
- "input_cols": ["verification_rating"],
342
- "output_cols": [], # Filter blocks don't create new columns
343
- "filter_value": self.filter_value,
344
- "operation": self.operation,
198
+ **llm_params,
345
199
  }
346
200
 
347
- if self.convert_dtype is not None:
348
- filter_kwargs["convert_dtype"] = self.convert_dtype
349
-
350
- self.filter_block = ColumnValueFilterBlock(**filter_kwargs)
351
-
352
- def _reinitialize_client_manager(self) -> None:
353
- """Reinitialize the internal LLM chat block's client manager.
201
+ # Only add LLM parameters if they are provided
202
+ if self.model is not None:
203
+ llm_config["model"] = self.model
204
+ if self.api_base is not None:
205
+ llm_config["api_base"] = self.api_base
206
+ if self.api_key is not None:
207
+ llm_config["api_key"] = self.api_key
208
+
209
+ self.llm_chat = LLMChatBlock(**llm_config)
210
+
211
+ # Create text parser
212
+ self.text_parser = TextParserBlock(
213
+ block_name=f"{self.block_name}_text_parser",
214
+ input_cols=["raw_verify_question"],
215
+ output_cols=["verification_explanation", "verification_rating"],
216
+ **parser_params,
217
+ )
354
218
 
355
- This should be called after model configuration changes to ensure
356
- the internal LLM chat block uses the updated model configuration.
357
- """
358
- if self.llm_chat and hasattr(self.llm_chat, "_reinitialize_client_manager"):
359
- # Update the internal LLM chat block's model config
360
- self.llm_chat.model = self.model
361
- self.llm_chat.api_base = self.api_base
362
- self.llm_chat.api_key = self.api_key
363
- # Reinitialize its client manager
364
- self.llm_chat._reinitialize_client_manager()
219
+ self.filter_block = ColumnValueFilterBlock(
220
+ block_name=f"{self.block_name}_filter",
221
+ input_cols=["verification_rating"],
222
+ output_cols=[], # Filter doesn't create new columns
223
+ **filter_params,
224
+ )
365
225
 
366
226
  def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
367
- """Generate question verification for all samples.
368
-
369
- This method chains the four internal blocks in sequence:
370
- 1. Build question verification prompts
371
- 2. Generate LLM responses
372
- 3. Parse explanation and rating
373
- 4. Filter based on rating
227
+ """Execute the 4-block question verification pipeline.
374
228
 
375
229
  Parameters
376
230
  ----------
377
231
  samples : Dataset
378
- Input dataset containing 'question' column.
232
+ Input dataset with 'question' column.
379
233
  **kwargs : Any
380
- Additional keyword arguments passed to internal blocks.
234
+ Additional arguments passed to internal blocks.
381
235
 
382
236
  Returns
383
237
  -------
384
238
  Dataset
385
- Dataset with question verification results and filtering applied.
386
-
387
- Raises
388
- ------
389
- BlockValidationError
390
- If model is not configured before calling generate().
239
+ Filtered dataset with question verification results.
391
240
  """
392
- # Validate that model is configured
241
+ # Validate model is configured
393
242
  if not self.model:
394
- # Local
395
- from ...utils.error_handling import BlockValidationError
396
-
397
243
  raise BlockValidationError(
398
244
  f"Model not configured for block '{self.block_name}'. "
399
245
  f"Call flow.set_model_config() before generating."
400
246
  )
247
+
401
248
  logger.info(
402
249
  f"Starting question verification for {len(samples)} samples",
403
- extra={
404
- "block_name": self.block_name,
405
- "model": self.model,
406
- "batch_size": len(samples),
407
- },
250
+ extra={"block_name": self.block_name, "model": self.model},
408
251
  )
409
252
 
410
- current_dataset = samples
411
-
412
253
  try:
413
- # Step 1: Build prompts
414
- logger.debug("Step 1: Building question verification prompts")
415
- current_dataset = self.prompt_builder.generate(current_dataset, **kwargs)
416
-
417
- # Step 2: Generate LLM responses
418
- logger.debug("Step 2: Generating LLM responses")
419
- current_dataset = self.llm_chat.generate(current_dataset, **kwargs)
420
-
421
- # Step 3: Parse responses
422
- logger.debug("Step 3: Parsing question verification responses")
423
- current_dataset = self.text_parser.generate(current_dataset, **kwargs)
424
-
425
- # Step 4: Filter based on rating
426
- logger.debug("Step 4: Filtering based on verification rating")
427
- original_count = len(current_dataset)
428
- current_dataset = self.filter_block.generate(current_dataset, **kwargs)
429
- filtered_count = len(current_dataset)
254
+ # Execute 4-block pipeline with validation delegation
255
+ result = self.prompt_builder(samples, **kwargs)
256
+ result = self.llm_chat(result, **kwargs)
257
+ result = self.text_parser(result, **kwargs)
258
+ result = self.filter_block(result, **kwargs)
430
259
 
431
260
  logger.info(
432
- f"Question verification completed: {original_count} → {filtered_count} samples "
433
- f"(filtered {original_count - filtered_count} samples)",
434
- extra={
435
- "block_name": self.block_name,
436
- "original_count": original_count,
437
- "filtered_count": filtered_count,
438
- "filter_rate": (original_count - filtered_count) / original_count
439
- if original_count > 0
440
- else 0,
441
- },
261
+ f"Question verification completed: {len(samples)} → {len(result)} samples",
262
+ extra={"block_name": self.block_name},
442
263
  )
443
264
 
444
- return current_dataset
265
+ return result
445
266
 
446
267
  except Exception as e:
447
268
  logger.error(
448
269
  f"Error during question verification: {e}",
449
- extra={
450
- "block_name": self.block_name,
451
- "model": self.model,
452
- "error": str(e),
453
- },
270
+ extra={"block_name": self.block_name, "error": str(e)},
454
271
  )
455
272
  raise
456
273
 
457
- def _validate_custom(self, dataset: Dataset) -> None:
458
- """Custom validation for question verification.
459
-
460
- This method validates the entire chain of internal blocks by simulating
461
- the data flow through each block to ensure they can all process the data correctly.
462
- """
463
- # Validate that required columns exist
464
- required_columns = ["question"]
465
- missing_columns = [
466
- col for col in required_columns if col not in dataset.column_names
467
- ]
468
- if missing_columns:
469
- raise ValueError(
470
- f"VerifyQuestionBlock requires columns {required_columns}, "
471
- f"missing: {missing_columns}"
472
- )
473
-
474
- # Validate the entire chain of internal blocks
475
- if not all(
476
- [self.prompt_builder, self.llm_chat, self.text_parser, self.filter_block]
477
- ):
478
- raise ValueError(
479
- "All internal blocks must be initialized before validation"
480
- )
274
+ def __getattr__(self, name: str) -> Any:
275
+ """Forward attribute access to appropriate internal block."""
276
+ # Check each internal block to see which one has this parameter
277
+ for block_attr, block_class in [
278
+ ("prompt_builder", PromptBuilderBlock),
279
+ ("llm_chat", LLMChatBlock),
280
+ ("text_parser", TextParserBlock),
281
+ ("filter_block", ColumnValueFilterBlock),
282
+ ]:
283
+ if hasattr(self, block_attr) and name in block_class.model_fields:
284
+ internal_block = getattr(self, block_attr)
285
+ if internal_block is not None:
286
+ return getattr(internal_block, name)
287
+ raise AttributeError(
288
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
289
+ )
481
290
 
482
- # Simulate data flow through the chain to validate each block
483
- current_dataset = dataset
291
+ def __setattr__(self, name: str, value: Any) -> None:
292
+ """Handle dynamic parameter updates from flow.set_model_config()."""
293
+ super().__setattr__(name, value)
484
294
 
485
- try:
486
- # 1. Validate PromptBuilderBlock
487
- logger.debug("Validating prompt builder block")
488
- self.prompt_builder._validate_custom(current_dataset)
489
-
490
- # Simulate prompt builder output for next validation
491
- # Add the expected output column temporarily for validation
492
- if "verify_question_prompt" not in current_dataset.column_names:
493
- # Create a temporary dataset with the expected column for validation
494
- temp_data = []
495
- for sample in current_dataset:
496
- temp_sample = dict(sample)
497
- temp_sample["verify_question_prompt"] = [
498
- {"role": "user", "content": "test"}
499
- ]
500
- temp_data.append(temp_sample)
501
- current_dataset = Dataset.from_list(temp_data)
502
-
503
- # 2. Validate LLMChatBlock
504
- logger.debug("Validating LLM chat block")
505
- self.llm_chat._validate_custom(current_dataset)
506
-
507
- # Simulate LLM chat output for next validation
508
- if "raw_verify_question" not in current_dataset.column_names:
509
- temp_data = []
510
- for sample in current_dataset:
511
- temp_sample = dict(sample)
512
- temp_sample["raw_verify_question"] = (
513
- "[Start of Explanation]Test explanation[End of Explanation]\n[Start of Rating]1.0[End of Rating]"
514
- )
515
- temp_data.append(temp_sample)
516
- current_dataset = Dataset.from_list(temp_data)
517
-
518
- # 3. Validate TextParserBlock
519
- logger.debug("Validating text parser block")
520
- self.text_parser._validate_custom(current_dataset)
521
-
522
- # Simulate text parser output for final validation
523
- if "verification_rating" not in current_dataset.column_names:
524
- temp_data = []
525
- for sample in current_dataset:
526
- temp_sample = dict(sample)
527
- temp_sample["verification_explanation"] = "Test explanation"
528
- temp_sample["verification_rating"] = "1.0"
529
- temp_data.append(temp_sample)
530
- current_dataset = Dataset.from_list(temp_data)
531
-
532
- # 4. Validate ColumnValueFilterBlock
533
- logger.debug("Validating filter block")
534
- self.filter_block._validate_custom(current_dataset)
535
-
536
- logger.debug("All internal blocks validated successfully")
295
+ # Forward to appropriate internal blocks
296
+ for block_attr, block_class in [
297
+ ("prompt_builder", PromptBuilderBlock),
298
+ ("llm_chat", LLMChatBlock),
299
+ ("text_parser", TextParserBlock),
300
+ ("filter_block", ColumnValueFilterBlock),
301
+ ]:
302
+ if hasattr(self, block_attr) and name in block_class.model_fields:
303
+ setattr(getattr(self, block_attr), name, value)
537
304
 
538
- except Exception as e:
539
- logger.error(f"Validation failed in internal blocks: {e}")
540
- raise ValueError(f"Internal block validation failed: {e}") from e
305
+ def _reinitialize_client_manager(self) -> None:
306
+ """Reinitialize internal LLM block's client manager."""
307
+ if hasattr(self.llm_chat, "_reinitialize_client_manager"):
308
+ self.llm_chat._reinitialize_client_manager()
541
309
 
542
310
  def get_internal_blocks_info(self) -> dict[str, Any]:
543
- """Get information about the internal blocks.
544
-
545
- Returns
546
- -------
547
- Dict[str, Any]
548
- Information about each internal block.
549
- """
311
+ """Get information about internal blocks."""
550
312
  return {
551
- "prompt_builder": self.prompt_builder.get_info()
552
- if self.prompt_builder
553
- else None,
554
- "llm_chat": self.llm_chat.get_info() if self.llm_chat else None,
555
- "text_parser": self.text_parser.get_info() if self.text_parser else None,
556
- "filter": self.filter_block.get_info() if self.filter_block else None,
313
+ "prompt_builder": self.prompt_builder.get_info(),
314
+ "llm_chat": self.llm_chat.get_info(),
315
+ "text_parser": self.text_parser.get_info(),
316
+ "filter": self.filter_block.get_info(),
557
317
  }
558
318
 
559
319
  def __repr__(self) -> str:
560
320
  """String representation of the block."""
321
+ filter_value = (
322
+ getattr(self.filter_block, "filter_value", "1.0")
323
+ if hasattr(self, "filter_block")
324
+ else "1.0"
325
+ )
561
326
  return (
562
327
  f"VerifyQuestionBlock(name='{self.block_name}', "
563
- f"model='{self.model}', filter_value='{self.filter_value}')"
328
+ f"model='{self.model}', filter_value='{filter_value}')"
564
329
  )