sdg-hub 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. sdg_hub/_version.py +2 -2
  2. sdg_hub/core/blocks/__init__.py +2 -4
  3. sdg_hub/core/blocks/base.py +61 -6
  4. sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  5. sdg_hub/core/blocks/llm/__init__.py +2 -4
  6. sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
  7. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  8. sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  9. sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  10. sdg_hub/core/flow/base.py +7 -4
  11. sdg_hub/core/utils/datautils.py +40 -22
  12. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
  13. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  14. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
  15. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
  16. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
  17. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
  18. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  19. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  20. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  21. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  22. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  25. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
  26. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
  27. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +30 -26
  28. sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  29. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  30. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  31. sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  32. sdg_hub/core/blocks/llm/client_manager.py +0 -447
  33. sdg_hub/core/blocks/llm/config.py +0 -337
  34. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
  35. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
  36. {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,323 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- """Thin wrapper for relevancy evaluation using 4 composed blocks.
3
-
4
- This module provides a simple, lightweight wrapper that composes:
5
- - PromptBuilderBlock: builds evaluation prompts
6
- - LLMChatBlock: generates LLM responses
7
- - TextParserBlock: parses structured output
8
- - ColumnValueFilterBlock: filters based on score
9
-
10
- The wrapper exposes minimal LLM interface for flow detection while
11
- delegating all functionality to the internal blocks.
12
- """
13
-
14
- # Standard
15
- from typing import Any, Optional, Union
16
-
17
- # Third Party
18
- from datasets import Dataset
19
- from pydantic import ConfigDict, Field, field_validator
20
-
21
- # Local
22
- from ...utils.error_handling import BlockValidationError
23
- from ...utils.logger_config import setup_logger
24
- from ..base import BaseBlock
25
- from ..filtering.column_value_filter import ColumnValueFilterBlock
26
- from ..llm.llm_chat_block import LLMChatBlock
27
- from ..llm.prompt_builder_block import PromptBuilderBlock
28
- from ..llm.text_parser_block import TextParserBlock
29
- from ..registry import BlockRegistry
30
-
31
- logger = setup_logger(__name__)
32
-
33
-
34
- @BlockRegistry.register(
35
- "EvaluateRelevancyBlock",
36
- "evaluation",
37
- "Thin wrapper composing 4 blocks for relevancy evaluation",
38
- )
39
- class EvaluateRelevancyBlock(BaseBlock):
40
- """Thin wrapper for relevancy evaluation using composed blocks.
41
-
42
- Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
43
- into a single evaluation pipeline with smart parameter routing.
44
-
45
- Parameters
46
- ----------
47
- block_name : str
48
- Name of the block.
49
- input_cols : List[str]
50
- Input columns: ["question", "response"]
51
- output_cols : List[str]
52
- Output columns: ["relevancy_explanation", "relevancy_score"]
53
- model : Optional[str]
54
- LLM model identifier.
55
- api_base : Optional[str]
56
- API base URL.
57
- api_key : Optional[str]
58
- API key.
59
- prompt_config_path : str
60
- Path to YAML prompt template file (required).
61
- **kwargs : Any
62
- All other parameters are automatically routed to appropriate internal blocks
63
- based on each block's accepted parameters. This includes all LLM parameters
64
- (temperature, max_tokens, extra_body, extra_headers, etc.), text parser
65
- parameters, and filter parameters.
66
- """
67
-
68
- model_config = ConfigDict(
69
- extra="allow"
70
- ) # Allow extra fields for dynamic forwarding
71
-
72
- # --- Core configuration ---
73
- prompt_config_path: str = Field(
74
- ...,
75
- description="Path to YAML file containing the relevancy evaluation prompt template",
76
- )
77
-
78
- # --- LLM interface (for flow detection) ---
79
- model: Optional[str] = Field(None, description="LLM model identifier")
80
- api_base: Optional[str] = Field(None, description="API base URL")
81
- api_key: Optional[str] = Field(None, description="API key")
82
-
83
- # --- Filter configuration ---
84
- filter_value: Union[str, int, float] = Field(
85
- 2.0, description="Value to filter on for relevancy score"
86
- )
87
- operation: str = Field("eq", description="Filter operation")
88
- convert_dtype: Optional[str] = Field(
89
- "float", description="Data type conversion for filter column"
90
- )
91
-
92
- # --- Parser configuration ---
93
- start_tags: list[str] = Field(
94
- ["[Start of Feedback]", "[Start of Score]"],
95
- description="Start tags for parsing feedback and score",
96
- )
97
- end_tags: list[str] = Field(
98
- ["[End of Feedback]", "[End of Score]"],
99
- description="End tags for parsing feedback and score",
100
- )
101
- parsing_pattern: Optional[str] = Field(
102
- None,
103
- description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
104
- )
105
-
106
- # --- Internal blocks (composition) ---
107
- prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
108
- llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
109
- text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
110
- filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
111
-
112
- @field_validator("input_cols")
113
- @classmethod
114
- def validate_input_cols(cls, v):
115
- """Validate input columns."""
116
- if v != ["question", "response"]:
117
- raise ValueError(
118
- f"EvaluateRelevancyBlock expects input_cols ['question', 'response'], got {v}"
119
- )
120
- return v
121
-
122
- @field_validator("output_cols")
123
- @classmethod
124
- def validate_output_cols(cls, v):
125
- """Validate output columns."""
126
- expected = ["relevancy_explanation", "relevancy_score"]
127
- if v != expected:
128
- raise ValueError(
129
- f"EvaluateRelevancyBlock expects output_cols {expected}, got {v}"
130
- )
131
- return v
132
-
133
- def __init__(self, **kwargs):
134
- """Initialize with smart parameter routing."""
135
- super().__init__(**kwargs)
136
- self._create_internal_blocks(**kwargs)
137
-
138
- # Log initialization if model is configured
139
- if self.model:
140
- logger.info(
141
- f"Initialized EvaluateRelevancyBlock '{self.block_name}' with model '{self.model}'"
142
- )
143
-
144
- def _extract_params(self, kwargs: dict, block_class) -> dict:
145
- """Extract parameters for specific block class based on its model_fields."""
146
- # Exclude parameters that are handled by this wrapper
147
- wrapper_params = {
148
- "block_name",
149
- "input_cols",
150
- "output_cols",
151
- }
152
-
153
- # Extract parameters that the target block accepts
154
- params = {
155
- k: v
156
- for k, v in kwargs.items()
157
- if k in block_class.model_fields and k not in wrapper_params
158
- }
159
-
160
- # Also include declared fields from this composite block that the target block accepts
161
- for field_name in self.__class__.model_fields:
162
- if (
163
- field_name in block_class.model_fields
164
- and field_name not in wrapper_params
165
- ):
166
- field_value = getattr(self, field_name)
167
- if field_value is not None: # Only forward non-None values
168
- params[field_name] = field_value
169
-
170
- return params
171
-
172
- def _create_internal_blocks(self, **kwargs):
173
- """Create internal blocks with parameter routing."""
174
- # Route parameters to appropriate blocks
175
- prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
176
- llm_params = self._extract_params(kwargs, LLMChatBlock)
177
- parser_params = self._extract_params(kwargs, TextParserBlock)
178
- filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
179
-
180
- self.prompt_builder = PromptBuilderBlock(
181
- block_name=f"{self.block_name}_prompt_builder",
182
- input_cols=["question", "response"],
183
- output_cols=["eval_relevancy_prompt"],
184
- **prompt_params,
185
- )
186
-
187
- # Create LLM chat block with dynamic LLM parameter forwarding
188
- llm_config = {
189
- "block_name": f"{self.block_name}_llm_chat",
190
- "input_cols": ["eval_relevancy_prompt"],
191
- "output_cols": ["raw_eval_relevancy"],
192
- **llm_params,
193
- }
194
-
195
- # Only add LLM parameters if they are provided
196
- if self.model is not None:
197
- llm_config["model"] = self.model
198
- if self.api_base is not None:
199
- llm_config["api_base"] = self.api_base
200
- if self.api_key is not None:
201
- llm_config["api_key"] = self.api_key
202
-
203
- self.llm_chat = LLMChatBlock(**llm_config)
204
-
205
- # Create text parser
206
- self.text_parser = TextParserBlock(
207
- block_name=f"{self.block_name}_text_parser",
208
- input_cols=["raw_eval_relevancy"],
209
- output_cols=["relevancy_explanation", "relevancy_score"],
210
- **parser_params,
211
- )
212
-
213
- self.filter_block = ColumnValueFilterBlock(
214
- block_name=f"{self.block_name}_filter",
215
- input_cols=["relevancy_score"],
216
- output_cols=[], # Filter doesn't create new columns
217
- **filter_params,
218
- )
219
-
220
- def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
221
- """Execute the 4-block relevancy evaluation pipeline.
222
-
223
- Parameters
224
- ----------
225
- samples : Dataset
226
- Input dataset with 'question' and 'response' columns.
227
- **kwargs : Any
228
- Additional arguments passed to internal blocks.
229
-
230
- Returns
231
- -------
232
- Dataset
233
- Filtered dataset with relevancy evaluation results.
234
- """
235
- # Validate model is configured
236
- if not self.model:
237
- raise BlockValidationError(
238
- f"Model not configured for block '{self.block_name}'. "
239
- f"Call flow.set_model_config() before generating."
240
- )
241
-
242
- logger.info(
243
- f"Starting relevancy evaluation for {len(samples)} samples",
244
- extra={"block_name": self.block_name, "model": self.model},
245
- )
246
-
247
- try:
248
- # Execute 4-block pipeline with validation delegation
249
- result = self.prompt_builder(samples, **kwargs)
250
- result = self.llm_chat(result, **kwargs)
251
- result = self.text_parser(result, **kwargs)
252
- result = self.filter_block(result, **kwargs)
253
-
254
- logger.info(
255
- f"Relevancy evaluation completed: {len(samples)} → {len(result)} samples",
256
- extra={"block_name": self.block_name},
257
- )
258
-
259
- return result
260
-
261
- except Exception as e:
262
- logger.error(
263
- f"Error during relevancy evaluation: {e}",
264
- extra={"block_name": self.block_name, "error": str(e)},
265
- )
266
- raise
267
-
268
- def __getattr__(self, name: str) -> Any:
269
- """Forward attribute access to appropriate internal block."""
270
- # Check each internal block to see which one has this parameter
271
- for block_attr, block_class in [
272
- ("prompt_builder", PromptBuilderBlock),
273
- ("llm_chat", LLMChatBlock),
274
- ("text_parser", TextParserBlock),
275
- ("filter_block", ColumnValueFilterBlock),
276
- ]:
277
- if hasattr(self, block_attr) and name in block_class.model_fields:
278
- internal_block = getattr(self, block_attr)
279
- if internal_block is not None:
280
- return getattr(internal_block, name)
281
- raise AttributeError(
282
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
283
- )
284
-
285
- def __setattr__(self, name: str, value: Any) -> None:
286
- """Handle dynamic parameter updates from flow.set_model_config()."""
287
- super().__setattr__(name, value)
288
-
289
- # Forward to appropriate internal blocks
290
- for block_attr, block_class in [
291
- ("prompt_builder", PromptBuilderBlock),
292
- ("llm_chat", LLMChatBlock),
293
- ("text_parser", TextParserBlock),
294
- ("filter_block", ColumnValueFilterBlock),
295
- ]:
296
- if hasattr(self, block_attr) and name in block_class.model_fields:
297
- setattr(getattr(self, block_attr), name, value)
298
-
299
- def _reinitialize_client_manager(self) -> None:
300
- """Reinitialize internal LLM block's client manager."""
301
- if hasattr(self.llm_chat, "_reinitialize_client_manager"):
302
- self.llm_chat._reinitialize_client_manager()
303
-
304
- def get_internal_blocks_info(self) -> dict[str, Any]:
305
- """Get information about internal blocks."""
306
- return {
307
- "prompt_builder": self.prompt_builder.get_info(),
308
- "llm_chat": self.llm_chat.get_info(),
309
- "text_parser": self.text_parser.get_info(),
310
- "filter": self.filter_block.get_info(),
311
- }
312
-
313
- def __repr__(self) -> str:
314
- """String representation of the block."""
315
- filter_value = (
316
- getattr(self.filter_block, "filter_value", 2.0)
317
- if hasattr(self, "filter_block")
318
- else 2.0
319
- )
320
- return (
321
- f"EvaluateRelevancyBlock(name='{self.block_name}', "
322
- f"model='{self.model}', filter_value='{filter_value}')"
323
- )
@@ -1,329 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- """Thin wrapper for question verification using 4 composed blocks.
3
-
4
- This module provides a simple, lightweight wrapper that composes:
5
- - PromptBuilderBlock: builds verification prompts
6
- - LLMChatBlock: generates LLM responses
7
- - TextParserBlock: parses structured output
8
- - ColumnValueFilterBlock: filters based on rating
9
-
10
- The wrapper exposes minimal LLM interface for flow detection while
11
- delegating all functionality to the internal blocks.
12
- """
13
-
14
- # Standard
15
- from typing import Any, Optional, Union
16
-
17
- # Third Party
18
- from datasets import Dataset
19
- from pydantic import ConfigDict, Field, field_validator
20
-
21
- # Local
22
- from ...utils.error_handling import BlockValidationError
23
- from ...utils.logger_config import setup_logger
24
- from ..base import BaseBlock
25
- from ..filtering.column_value_filter import ColumnValueFilterBlock
26
- from ..llm.llm_chat_block import LLMChatBlock
27
- from ..llm.prompt_builder_block import PromptBuilderBlock
28
- from ..llm.text_parser_block import TextParserBlock
29
- from ..registry import BlockRegistry
30
-
31
- logger = setup_logger(__name__)
32
-
33
-
34
- @BlockRegistry.register(
35
- "VerifyQuestionBlock",
36
- "evaluation",
37
- "Thin wrapper composing 4 blocks for question verification",
38
- )
39
- class VerifyQuestionBlock(BaseBlock):
40
- """Thin wrapper for question verification using composed blocks.
41
-
42
- Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
43
- into a single verification pipeline with smart parameter routing.
44
-
45
- Parameters
46
- ----------
47
- block_name : str
48
- Name of the block.
49
- input_cols : List[str]
50
- Input columns: ["question"]
51
- output_cols : List[str]
52
- Output columns: ["verification_explanation", "verification_rating"]
53
- model : Optional[str]
54
- LLM model identifier.
55
- api_base : Optional[str]
56
- API base URL.
57
- api_key : Optional[str]
58
- API key.
59
- prompt_config_path : str
60
- Path to YAML prompt template file (required).
61
- **kwargs : Any
62
- All other parameters are automatically routed to appropriate internal blocks
63
- based on each block's accepted parameters. This includes all LLM parameters
64
- (temperature, max_tokens, extra_body, extra_headers, etc.), text parser
65
- parameters, and filter parameters.
66
- """
67
-
68
- model_config = ConfigDict(
69
- extra="allow"
70
- ) # Allow extra fields for dynamic forwarding
71
-
72
- # --- Core configuration ---
73
- prompt_config_path: str = Field(
74
- ...,
75
- description="Path to YAML file containing the question verification prompt template",
76
- )
77
-
78
- # --- LLM interface (for flow detection) ---
79
- model: Optional[str] = Field(None, description="LLM model identifier")
80
- api_base: Optional[str] = Field(None, description="API base URL")
81
- api_key: Optional[str] = Field(None, description="API key")
82
-
83
- # --- Filter configuration ---
84
- filter_value: Union[str, int, float] = Field(
85
- 1.0, description="Value to filter on for verification rating"
86
- )
87
- operation: str = Field("eq", description="Filter operation")
88
- convert_dtype: Optional[str] = Field(
89
- "float", description="Data type conversion for filter column"
90
- )
91
-
92
- # --- Parser configuration ---
93
- start_tags: list[str] = Field(
94
- ["[Start of Explanation]", "[Start of Rating]"],
95
- description="Start tags for parsing explanation and rating",
96
- )
97
- end_tags: list[str] = Field(
98
- ["[End of Explanation]", "[End of Rating]"],
99
- description="End tags for parsing explanation and rating",
100
- )
101
- parsing_pattern: Optional[str] = Field(
102
- None,
103
- description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
104
- )
105
-
106
- # Store parameters for internal blocks
107
- prompt_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
108
- llm_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
109
- parser_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
110
- filter_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
111
-
112
- # --- Internal blocks (composition) ---
113
- prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
114
- llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
115
- text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
116
- filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
117
-
118
- @field_validator("input_cols")
119
- @classmethod
120
- def validate_input_cols(cls, v):
121
- """Validate input columns."""
122
- if v != ["question"]:
123
- raise ValueError(
124
- f"VerifyQuestionBlock expects input_cols ['question'], got {v}"
125
- )
126
- return v
127
-
128
- @field_validator("output_cols")
129
- @classmethod
130
- def validate_output_cols(cls, v):
131
- """Validate output columns."""
132
- expected = ["verification_explanation", "verification_rating"]
133
- if v != expected:
134
- raise ValueError(
135
- f"VerifyQuestionBlock expects output_cols {expected}, got {v}"
136
- )
137
- return v
138
-
139
- def __init__(self, **kwargs):
140
- """Initialize with smart parameter routing."""
141
- super().__init__(**kwargs)
142
- self._create_internal_blocks(**kwargs)
143
-
144
- # Log initialization if model is configured
145
- if self.model:
146
- logger.info(
147
- f"Initialized VerifyQuestionBlock '{self.block_name}' with model '{self.model}'"
148
- )
149
-
150
- def _extract_params(self, kwargs: dict, block_class) -> dict:
151
- """Extract parameters for specific block class based on its model_fields."""
152
- # Exclude parameters that are handled by this wrapper's structure
153
- wrapper_params = {
154
- "block_name",
155
- "input_cols",
156
- "output_cols",
157
- }
158
-
159
- # Extract parameters that the target block accepts
160
- params = {
161
- k: v
162
- for k, v in kwargs.items()
163
- if k in block_class.model_fields and k not in wrapper_params
164
- }
165
-
166
- # Also include declared fields from this composite block that the target block accepts
167
- for field_name in self.__class__.model_fields:
168
- if (
169
- field_name in block_class.model_fields
170
- and field_name not in wrapper_params
171
- ):
172
- field_value = getattr(self, field_name)
173
- if field_value is not None: # Only forward non-None values
174
- params[field_name] = field_value
175
-
176
- return params
177
-
178
- def _create_internal_blocks(self, **kwargs):
179
- """Create internal blocks with parameter routing."""
180
- # Route parameters to appropriate blocks
181
- prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
182
- llm_params = self._extract_params(kwargs, LLMChatBlock)
183
- parser_params = self._extract_params(kwargs, TextParserBlock)
184
- filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
185
-
186
- self.prompt_builder = PromptBuilderBlock(
187
- block_name=f"{self.block_name}_prompt_builder",
188
- input_cols=["question"],
189
- output_cols=["verify_question_prompt"],
190
- **prompt_params,
191
- )
192
-
193
- # Create LLM chat block with dynamic LLM parameter forwarding
194
- llm_config = {
195
- "block_name": f"{self.block_name}_llm_chat",
196
- "input_cols": ["verify_question_prompt"],
197
- "output_cols": ["raw_verify_question"],
198
- **llm_params,
199
- }
200
-
201
- # Only add LLM parameters if they are provided
202
- if self.model is not None:
203
- llm_config["model"] = self.model
204
- if self.api_base is not None:
205
- llm_config["api_base"] = self.api_base
206
- if self.api_key is not None:
207
- llm_config["api_key"] = self.api_key
208
-
209
- self.llm_chat = LLMChatBlock(**llm_config)
210
-
211
- # Create text parser
212
- self.text_parser = TextParserBlock(
213
- block_name=f"{self.block_name}_text_parser",
214
- input_cols=["raw_verify_question"],
215
- output_cols=["verification_explanation", "verification_rating"],
216
- **parser_params,
217
- )
218
-
219
- self.filter_block = ColumnValueFilterBlock(
220
- block_name=f"{self.block_name}_filter",
221
- input_cols=["verification_rating"],
222
- output_cols=[], # Filter doesn't create new columns
223
- **filter_params,
224
- )
225
-
226
- def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
227
- """Execute the 4-block question verification pipeline.
228
-
229
- Parameters
230
- ----------
231
- samples : Dataset
232
- Input dataset with 'question' column.
233
- **kwargs : Any
234
- Additional arguments passed to internal blocks.
235
-
236
- Returns
237
- -------
238
- Dataset
239
- Filtered dataset with question verification results.
240
- """
241
- # Validate model is configured
242
- if not self.model:
243
- raise BlockValidationError(
244
- f"Model not configured for block '{self.block_name}'. "
245
- f"Call flow.set_model_config() before generating."
246
- )
247
-
248
- logger.info(
249
- f"Starting question verification for {len(samples)} samples",
250
- extra={"block_name": self.block_name, "model": self.model},
251
- )
252
-
253
- try:
254
- # Execute 4-block pipeline with validation delegation
255
- result = self.prompt_builder(samples, **kwargs)
256
- result = self.llm_chat(result, **kwargs)
257
- result = self.text_parser(result, **kwargs)
258
- result = self.filter_block(result, **kwargs)
259
-
260
- logger.info(
261
- f"Question verification completed: {len(samples)} → {len(result)} samples",
262
- extra={"block_name": self.block_name},
263
- )
264
-
265
- return result
266
-
267
- except Exception as e:
268
- logger.error(
269
- f"Error during question verification: {e}",
270
- extra={"block_name": self.block_name, "error": str(e)},
271
- )
272
- raise
273
-
274
- def __getattr__(self, name: str) -> Any:
275
- """Forward attribute access to appropriate internal block."""
276
- # Check each internal block to see which one has this parameter
277
- for block_attr, block_class in [
278
- ("prompt_builder", PromptBuilderBlock),
279
- ("llm_chat", LLMChatBlock),
280
- ("text_parser", TextParserBlock),
281
- ("filter_block", ColumnValueFilterBlock),
282
- ]:
283
- if hasattr(self, block_attr) and name in block_class.model_fields:
284
- internal_block = getattr(self, block_attr)
285
- if internal_block is not None:
286
- return getattr(internal_block, name)
287
- raise AttributeError(
288
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
289
- )
290
-
291
- def __setattr__(self, name: str, value: Any) -> None:
292
- """Handle dynamic parameter updates from flow.set_model_config()."""
293
- super().__setattr__(name, value)
294
-
295
- # Forward to appropriate internal blocks
296
- for block_attr, block_class in [
297
- ("prompt_builder", PromptBuilderBlock),
298
- ("llm_chat", LLMChatBlock),
299
- ("text_parser", TextParserBlock),
300
- ("filter_block", ColumnValueFilterBlock),
301
- ]:
302
- if hasattr(self, block_attr) and name in block_class.model_fields:
303
- setattr(getattr(self, block_attr), name, value)
304
-
305
- def _reinitialize_client_manager(self) -> None:
306
- """Reinitialize internal LLM block's client manager."""
307
- if hasattr(self.llm_chat, "_reinitialize_client_manager"):
308
- self.llm_chat._reinitialize_client_manager()
309
-
310
- def get_internal_blocks_info(self) -> dict[str, Any]:
311
- """Get information about internal blocks."""
312
- return {
313
- "prompt_builder": self.prompt_builder.get_info(),
314
- "llm_chat": self.llm_chat.get_info(),
315
- "text_parser": self.text_parser.get_info(),
316
- "filter": self.filter_block.get_info(),
317
- }
318
-
319
- def __repr__(self) -> str:
320
- """String representation of the block."""
321
- filter_value = (
322
- getattr(self.filter_block, "filter_value", "1.0")
323
- if hasattr(self, "filter_block")
324
- else "1.0"
325
- )
326
- return (
327
- f"VerifyQuestionBlock(name='{self.block_name}', "
328
- f"model='{self.model}', filter_value='{filter_value}')"
329
- )