sdg-hub 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdg_hub/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.0'
21
- __version_tuple__ = version_tuple = (0, 2, 0)
20
+ __version__ = version = '0.2.1'
21
+ __version_tuple__ = version_tuple = (0, 2, 1)
@@ -11,6 +11,7 @@ from .client_manager import LLMClientManager
11
11
  from .config import LLMConfig
12
12
  from .error_handler import ErrorCategory, LLMErrorHandler
13
13
  from .llm_chat_block import LLMChatBlock
14
+ from .llm_chat_with_parsing_retry_block import LLMChatWithParsingRetryBlock
14
15
  from .prompt_builder_block import PromptBuilderBlock
15
16
  from .text_parser_block import TextParserBlock
16
17
 
@@ -20,6 +21,7 @@ __all__ = [
20
21
  "LLMErrorHandler",
21
22
  "ErrorCategory",
22
23
  "LLMChatBlock",
24
+ "LLMChatWithParsingRetryBlock",
23
25
  "PromptBuilderBlock",
24
26
  "TextParserBlock",
25
27
  ]
@@ -0,0 +1,491 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Composite block combining LLM chat and text parsing with retry logic.
3
+
4
+ This module provides the LLMChatWithParsingRetryBlock that encapsulates the complete
5
+ LLM generation and parsing workflow with automatic retry on parsing failures.
6
+ """
7
+
8
+ # Standard
9
+ from typing import Any, Optional
10
+
11
+ # Third Party
12
+ from datasets import Dataset
13
+ from pydantic import ConfigDict, Field, field_validator
14
+
15
+ from ...utils.error_handling import BlockValidationError
16
+
17
+ # Local
18
+ from ...utils.logger_config import setup_logger
19
+ from ..base import BaseBlock
20
+ from ..registry import BlockRegistry
21
+ from .llm_chat_block import LLMChatBlock
22
+ from .text_parser_block import TextParserBlock
23
+
24
+ logger = setup_logger(__name__)
25
+
26
+
27
+ class MaxRetriesExceededError(Exception):
28
+ """Raised when maximum retry attempts are exceeded without achieving target count."""
29
+
30
+ def __init__(self, target_count: int, actual_count: int, max_retries: int):
31
+ self.target_count = target_count
32
+ self.actual_count = actual_count
33
+ self.max_retries = max_retries
34
+ super().__init__(
35
+ f"Failed to achieve target count {target_count} after {max_retries} retries. "
36
+ f"Only got {actual_count} successful parses."
37
+ )
38
+
39
+
40
+ @BlockRegistry.register(
41
+ "LLMChatWithParsingRetryBlock",
42
+ "llm",
43
+ "Composite block combining LLM chat and text parsing with automatic retry on parsing failures",
44
+ )
45
+ class LLMChatWithParsingRetryBlock(BaseBlock):
46
+ """Composite block for LLM generation with parsing retry logic.
47
+
48
+ This block combines LLMChatBlock and TextParserBlock into a single cohesive block
49
+ that automatically retries LLM generation when parsing fails, accumulating successful
50
+ results until the target count is reached or max retries exceeded.
51
+
52
+ Parameters
53
+ ----------
54
+ block_name : str
55
+ Name of the block.
56
+ input_cols : Union[str, List[str]]
57
+ Input column name(s). Should contain the messages list.
58
+ output_cols : Union[str, List[str]]
59
+ Output column name(s) for parsed results.
60
+ model : str
61
+ Model identifier in LiteLLM format.
62
+ api_base : Optional[str]
63
+ Base URL for the API. Required for local models.
64
+ api_key : Optional[str]
65
+ API key for the provider. Falls back to environment variables.
66
+ parsing_max_retries : int, optional
67
+ Maximum number of retry attempts for parsing failures (default: 3).
68
+ This is different from max_retries, which handles LLM network/API failures.
69
+
70
+ ### LLM Generation Parameters ###
71
+ async_mode : bool, optional
72
+ Whether to use async processing (default: False).
73
+ timeout : float, optional
74
+ Request timeout in seconds (default: 120.0).
75
+ max_retries : int, optional
76
+ Maximum number of LLM retry attempts for network failures (default: 6).
77
+ temperature : Optional[float], optional
78
+ Sampling temperature (0.0 to 2.0).
79
+ max_tokens : Optional[int], optional
80
+ Maximum tokens to generate.
81
+ top_p : Optional[float], optional
82
+ Nucleus sampling parameter (0.0 to 1.0).
83
+ frequency_penalty : Optional[float], optional
84
+ Frequency penalty (-2.0 to 2.0).
85
+ presence_penalty : Optional[float], optional
86
+ Presence penalty (-2.0 to 2.0).
87
+ stop : Optional[Union[str, List[str]]], optional
88
+ Stop sequences.
89
+ seed : Optional[int], optional
90
+ Random seed for reproducible outputs.
91
+ response_format : Optional[Dict[str, Any]], optional
92
+ Response format specification (e.g., JSON mode).
93
+ stream : Optional[bool], optional
94
+ Whether to stream responses.
95
+ n : Optional[int], optional
96
+ Number of completions to generate per retry attempt.
97
+ logprobs : Optional[bool], optional
98
+ Whether to return log probabilities.
99
+ top_logprobs : Optional[int], optional
100
+ Number of top log probabilities to return.
101
+ user : Optional[str], optional
102
+ End-user identifier.
103
+ extra_headers : Optional[Dict[str, str]], optional
104
+ Additional headers to send with requests.
105
+ extra_body : Optional[Dict[str, Any]], optional
106
+ Additional parameters for the request body.
107
+ provider_specific : Optional[Dict[str, Any]], optional
108
+ Provider-specific parameters.
109
+
110
+ ### Text Parser Parameters ###
111
+ start_tags : List[str], optional
112
+ List of start tags for tag-based parsing.
113
+ end_tags : List[str], optional
114
+ List of end tags for tag-based parsing.
115
+ parsing_pattern : Optional[str], optional
116
+ Regex pattern for custom parsing.
117
+ parser_cleanup_tags : Optional[List[str]], optional
118
+ List of tags to clean from parsed output.
119
+
120
+ Examples
121
+ --------
122
+ >>> # Basic JSON parsing with retry
123
+ >>> block = LLMChatWithParsingRetryBlock(
124
+ ... block_name="json_retry_block",
125
+ ... input_cols="messages",
126
+ ... output_cols="parsed_json",
127
+ ... model="openai/gpt-4",
128
+ ... parsing_max_retries=3,
129
+ ... parsing_pattern=r'"result":\s*"([^"]*)"',
130
+ ... n=3
131
+ ... )
132
+
133
+ >>> # Tag-based parsing with retry
134
+ >>> block = LLMChatWithParsingRetryBlock(
135
+ ... block_name="tag_retry_block",
136
+ ... input_cols="messages",
137
+ ... output_cols=["explanation", "answer"],
138
+ ... model="anthropic/claude-3-sonnet-20240229",
139
+ ... parsing_max_retries=5,
140
+ ... start_tags=["<explanation>", "<answer>"],
141
+ ... end_tags=["</explanation>", "</answer>"],
142
+ ... n=2
143
+ ... )
144
+ """
145
+
146
+ model_config = ConfigDict(
147
+ extra="allow"
148
+ ) # Allow extra fields for dynamic forwarding
149
+
150
+ # Composite-specific parameters only
151
+ parsing_max_retries: int = Field(
152
+ 3, description="Maximum number of retry attempts for parsing failures"
153
+ )
154
+
155
+ # Store parameters for internal blocks
156
+ llm_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
157
+ parser_params: dict[str, Any] = Field(default_factory=dict, exclude=True)
158
+
159
+ # Internal blocks - excluded from serialization
160
+ llm_chat: Optional[LLMChatBlock] = Field(None, exclude=True)
161
+ text_parser: Optional[TextParserBlock] = Field(None, exclude=True)
162
+
163
+ @field_validator("input_cols")
164
+ @classmethod
165
+ def validate_single_input_col(cls, v):
166
+ """Ensure exactly one input column."""
167
+ if isinstance(v, str):
168
+ return [v]
169
+ if isinstance(v, list) and len(v) == 1:
170
+ return v
171
+ if isinstance(v, list) and len(v) != 1:
172
+ raise ValueError(
173
+ f"LLMChatWithParsingRetryBlock expects exactly one input column, got {len(v)}: {v}"
174
+ )
175
+ raise ValueError(f"Invalid input_cols format: {v}")
176
+
177
+ @field_validator("parsing_max_retries")
178
+ @classmethod
179
+ def validate_parsing_max_retries(cls, v):
180
+ """Ensure parsing_max_retries is positive."""
181
+ if v < 1:
182
+ raise ValueError("parsing_max_retries must be at least 1")
183
+ return v
184
+
185
+ def __init__(self, **kwargs):
186
+ """Initialize with dynamic parameter forwarding."""
187
+ # Extract and store composite-specific params before super().__init__
188
+ parsing_max_retries = kwargs.pop("parsing_max_retries", 3)
189
+
190
+ # Forward parameters to appropriate internal blocks
191
+ llm_params = {k: v for k, v in kwargs.items() if k in LLMChatBlock.model_fields}
192
+ parser_params = {
193
+ k: v for k, v in kwargs.items() if k in TextParserBlock.model_fields
194
+ }
195
+
196
+ # Keep only BaseBlock fields for super().__init__
197
+ base_params = {k: v for k, v in kwargs.items() if k in BaseBlock.model_fields}
198
+ base_params["parsing_max_retries"] = parsing_max_retries
199
+ base_params["llm_params"] = llm_params
200
+ base_params["parser_params"] = parser_params
201
+
202
+ # Initialize parent with all valid parameters
203
+ super().__init__(**base_params)
204
+
205
+ # Create internal blocks with forwarded parameters
206
+ self._create_internal_blocks()
207
+
208
+ # Log initialization only when model is configured
209
+ model = self.llm_params.get("model")
210
+ if model:
211
+ logger.info(
212
+ f"Initialized LLMChatWithParsingRetryBlock '{self.block_name}' with model '{model}'",
213
+ extra={
214
+ "block_name": self.block_name,
215
+ "model": model,
216
+ "async_mode": self.llm_params.get("async_mode", False),
217
+ "parsing_max_retries": self.parsing_max_retries,
218
+ },
219
+ )
220
+
221
+ def _create_internal_blocks(self) -> None:
222
+ """Create and configure the internal blocks using dynamic parameter forwarding."""
223
+ # 1. LLMChatBlock
224
+ llm_kwargs = {
225
+ **self.llm_params, # Forward all LLM parameters dynamically first
226
+ "block_name": f"{self.block_name}_llm_chat", # Override block_name
227
+ "input_cols": self.input_cols,
228
+ "output_cols": [f"{self.block_name}_raw_response"],
229
+ }
230
+ self.llm_chat = LLMChatBlock(**llm_kwargs)
231
+
232
+ # 2. TextParserBlock
233
+ text_parser_kwargs = {
234
+ **self.parser_params, # Forward all parser parameters dynamically first
235
+ "block_name": f"{self.block_name}_text_parser", # Override block_name
236
+ "input_cols": [f"{self.block_name}_raw_response"],
237
+ "output_cols": self.output_cols,
238
+ }
239
+ self.text_parser = TextParserBlock(**text_parser_kwargs)
240
+
241
+ def _reinitialize_client_manager(self) -> None:
242
+ """Reinitialize the internal LLM chat block's client manager.
243
+
244
+ This should be called after model configuration changes to ensure
245
+ the internal LLM chat block uses the updated model configuration.
246
+ """
247
+ if self.llm_chat and hasattr(self.llm_chat, "_reinitialize_client_manager"):
248
+ # Update the internal LLM chat block's model config from stored params
249
+ for key in ["model", "api_base", "api_key"]:
250
+ if key in self.llm_params:
251
+ setattr(self.llm_chat, key, self.llm_params[key])
252
+ # Reinitialize its client manager
253
+ self.llm_chat._reinitialize_client_manager()
254
+
255
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
256
+ """Generate responses with parsing retry logic.
257
+
258
+ For each input sample, this method:
259
+ 1. Generates LLM responses using the configured n parameter
260
+ 2. Attempts to parse the responses using TextParserBlock
261
+ 3. Counts successful parses and retries if below target
262
+ 4. Accumulates results across retry attempts
263
+ 5. Returns final dataset with all successful parses
264
+
265
+ Parameters
266
+ ----------
267
+ samples : Dataset
268
+ Input dataset containing the messages column.
269
+ **kwargs : Any
270
+ Additional keyword arguments passed to internal blocks.
271
+
272
+ Returns
273
+ -------
274
+ Dataset
275
+ Dataset with parsed results from successful generations.
276
+
277
+ Raises
278
+ ------
279
+ BlockValidationError
280
+ If model is not configured before calling generate().
281
+ MaxRetriesExceededError
282
+ If target count not reached after max retries for any sample.
283
+ """
284
+ # Validate that model is configured
285
+ model = self.llm_params.get("model")
286
+ if not model:
287
+ raise BlockValidationError(
288
+ f"Model not configured for block '{self.block_name}'. "
289
+ f"Call flow.set_model_config() before generating."
290
+ )
291
+
292
+ logger.info(
293
+ f"Starting LLM generation with parsing retry for {len(samples)} samples",
294
+ extra={
295
+ "block_name": self.block_name,
296
+ "model": model,
297
+ "batch_size": len(samples),
298
+ "parsing_max_retries": self.parsing_max_retries,
299
+ },
300
+ )
301
+
302
+ all_results = []
303
+
304
+ # Process each sample independently with retry logic
305
+ for sample_idx, sample in enumerate(samples):
306
+ sample_results = []
307
+ total_parsed_count = 0
308
+
309
+ # Determine target count for this sample (number of completions requested)
310
+ target = kwargs.get("n", self.llm_params.get("n")) or 1
311
+
312
+ logger.debug(
313
+ f"Processing sample {sample_idx} with target count {target}",
314
+ extra={
315
+ "block_name": self.block_name,
316
+ "sample_idx": sample_idx,
317
+ "target_count": target,
318
+ },
319
+ )
320
+
321
+ # Retry loop for this sample
322
+ for attempt in range(self.parsing_max_retries):
323
+ if total_parsed_count >= target:
324
+ break # Already reached target
325
+
326
+ try:
327
+ # Generate LLM responses for this sample
328
+ temp_dataset = Dataset.from_list([sample])
329
+ llm_result = self.llm_chat.generate(temp_dataset, **kwargs)
330
+
331
+ # Parse the responses
332
+ parsed_result = self.text_parser.generate(llm_result, **kwargs)
333
+
334
+ # Count successful parses and accumulate results
335
+ new_parsed_count = len(parsed_result)
336
+ total_parsed_count += new_parsed_count
337
+ sample_results.extend(parsed_result)
338
+
339
+ logger.debug(
340
+ f"Attempt {attempt + 1} for sample {sample_idx}: {new_parsed_count} successful parses "
341
+ f"(total: {total_parsed_count}/{target})",
342
+ extra={
343
+ "block_name": self.block_name,
344
+ "sample_idx": sample_idx,
345
+ "attempt": attempt + 1,
346
+ "new_parses": new_parsed_count,
347
+ "total_parses": total_parsed_count,
348
+ "target_count": target,
349
+ },
350
+ )
351
+
352
+ if total_parsed_count >= target:
353
+ logger.debug(
354
+ f"Target reached for sample {sample_idx} after {attempt + 1} attempts",
355
+ extra={
356
+ "block_name": self.block_name,
357
+ "sample_idx": sample_idx,
358
+ "attempts": attempt + 1,
359
+ "final_count": total_parsed_count,
360
+ },
361
+ )
362
+ break
363
+
364
+ except Exception as e:
365
+ logger.warning(
366
+ f"Error during attempt {attempt + 1} for sample {sample_idx}: {e}",
367
+ extra={
368
+ "block_name": self.block_name,
369
+ "sample_idx": sample_idx,
370
+ "attempt": attempt + 1,
371
+ "error": str(e),
372
+ },
373
+ )
374
+ # Continue to next attempt
375
+ continue
376
+
377
+ # Check if we reached the target count
378
+ if total_parsed_count < target:
379
+ raise MaxRetriesExceededError(
380
+ target_count=target,
381
+ actual_count=total_parsed_count,
382
+ max_retries=self.parsing_max_retries,
383
+ )
384
+
385
+ # Trim results to exact target count if we exceeded it
386
+ if total_parsed_count > target:
387
+ sample_results = sample_results[:target]
388
+ logger.debug(
389
+ f"Trimmed sample {sample_idx} results from {total_parsed_count} to {target}",
390
+ extra={
391
+ "block_name": self.block_name,
392
+ "sample_idx": sample_idx,
393
+ "trimmed_from": total_parsed_count,
394
+ "trimmed_to": target,
395
+ },
396
+ )
397
+
398
+ # Add this sample's results to final dataset
399
+ all_results.extend(sample_results)
400
+
401
+ logger.info(
402
+ f"LLM generation with parsing retry completed: {len(samples)} input samples → {len(all_results)} output rows",
403
+ extra={
404
+ "block_name": self.block_name,
405
+ "input_samples": len(samples),
406
+ "output_rows": len(all_results),
407
+ "model": model,
408
+ },
409
+ )
410
+
411
+ return Dataset.from_list(all_results)
412
+
413
+ def _validate_custom(self, dataset: Dataset) -> None:
414
+ """Custom validation for LLMChatWithParsingRetryBlock.
415
+
416
+ This method validates the entire chain of internal blocks by simulating
417
+ the data flow through each block to ensure they can all process the data correctly.
418
+ """
419
+ # Validate that required input column exists
420
+ if len(self.input_cols) != 1:
421
+ raise ValueError(
422
+ f"LLMChatWithParsingRetryBlock expects exactly one input column, got {len(self.input_cols)}"
423
+ )
424
+
425
+ input_col = self.input_cols[0]
426
+ if input_col not in dataset.column_names:
427
+ raise ValueError(
428
+ f"Required input column '{input_col}' not found in dataset. "
429
+ f"Available columns: {dataset.column_names}"
430
+ )
431
+
432
+ # Validate parsing configuration
433
+ has_regex = self.parser_params.get("parsing_pattern") is not None
434
+ has_tags = bool(self.parser_params.get("start_tags", [])) or bool(
435
+ self.parser_params.get("end_tags", [])
436
+ )
437
+
438
+ if not has_regex and not has_tags:
439
+ raise ValueError(
440
+ "LLMChatWithParsingRetryBlock requires at least one parsing method: "
441
+ "either 'parsing_pattern' (regex) or 'start_tags'/'end_tags' (tag-based parsing)"
442
+ )
443
+
444
+ # Validate that internal blocks are initialized
445
+ if not all([self.llm_chat, self.text_parser]):
446
+ raise ValueError(
447
+ "All internal blocks must be initialized before validation"
448
+ )
449
+
450
+ # Validate internal blocks
451
+ try:
452
+ logger.debug("Validating internal LLM chat block")
453
+ self.llm_chat._validate_custom(dataset)
454
+
455
+ # Create temporary dataset with expected LLM output for parser validation
456
+ temp_data = []
457
+ for sample in dataset:
458
+ temp_sample = dict(sample)
459
+ temp_sample[f"{self.block_name}_raw_response"] = "test output"
460
+ temp_data.append(temp_sample)
461
+ temp_dataset = Dataset.from_list(temp_data)
462
+
463
+ logger.debug("Validating internal text parser block")
464
+ self.text_parser._validate_custom(temp_dataset)
465
+
466
+ logger.debug("All internal blocks validated successfully")
467
+
468
+ except Exception as e:
469
+ logger.error(f"Validation failed in internal blocks: {e}")
470
+ raise ValueError(f"Internal block validation failed: {e}") from e
471
+
472
+ def get_internal_blocks_info(self) -> dict[str, Any]:
473
+ """Get information about the internal blocks.
474
+
475
+ Returns
476
+ -------
477
+ Dict[str, Any]
478
+ Information about each internal block.
479
+ """
480
+ return {
481
+ "llm_chat": self.llm_chat.get_info() if self.llm_chat else None,
482
+ "text_parser": self.text_parser.get_info() if self.text_parser else None,
483
+ }
484
+
485
+ def __repr__(self) -> str:
486
+ """String representation of the block."""
487
+ model = self.llm_params.get("model", "not_configured")
488
+ return (
489
+ f"LLMChatWithParsingRetryBlock(name='{self.block_name}', "
490
+ f"model='{model}', parsing_max_retries={self.parsing_max_retries})"
491
+ )
@@ -48,6 +48,9 @@ class TextParserBlock(BaseBlock):
48
48
  Regex pattern for custom parsing.
49
49
  parser_cleanup_tags : Optional[List[str]]
50
50
  List of tags to clean from parsed output.
51
+ expand_lists : bool
52
+ Whether to expand list inputs into individual rows (True) or preserve lists (False).
53
+ Default is True for backward compatibility.
51
54
  """
52
55
 
53
56
  start_tags: list[str] = Field(
@@ -62,6 +65,10 @@ class TextParserBlock(BaseBlock):
62
65
  parser_cleanup_tags: Optional[list[str]] = Field(
63
66
  default=None, description="List of tags to clean from parsed output"
64
67
  )
68
+ expand_lists: bool = Field(
69
+ default=True,
70
+ description="Whether to expand list inputs into individual rows (True) or preserve lists (False). ",
71
+ )
65
72
 
66
73
  @field_validator("start_tags", "end_tags", mode="before")
67
74
  @classmethod
@@ -237,36 +244,76 @@ class TextParserBlock(BaseBlock):
237
244
  logger.warning(f"Input column '{input_column}' contains empty list")
238
245
  return []
239
246
 
240
- all_results = []
241
- for i, response in enumerate(raw_output):
242
- if not response or not isinstance(response, str):
243
- logger.warning(
244
- f"List item {i} in column '{input_column}' contains invalid data "
245
- f"(empty or non-string): {type(response)}"
246
- )
247
- continue
248
-
249
- parsed_outputs = self._parse(response)
250
-
251
- if not parsed_outputs or not any(
252
- len(value) > 0 for value in parsed_outputs.values()
253
- ):
254
- logger.warning(
255
- f"Failed to parse content from list item {i}. Raw output length: {len(response)}, "
256
- f"parsing method: {'regex' if self.parsing_pattern else 'tags'}"
257
- )
258
- continue
259
-
260
- # Create output rows for this response
261
- max_length = max(len(value) for value in parsed_outputs.values())
262
- for values in zip(
263
- *(lst[:max_length] for lst in parsed_outputs.values())
264
- ):
265
- all_results.append(
266
- {**sample, **dict(zip(parsed_outputs.keys(), values))}
267
- )
268
-
269
- return all_results
247
+ if not self.expand_lists:
248
+ # When expand_lists=False, preserve the list structure
249
+ # Parse each response in the list and collect results as lists
250
+ all_parsed_outputs = {col: [] for col in self.output_cols}
251
+ valid_responses = 0
252
+
253
+ for i, response in enumerate(raw_output):
254
+ if not response or not isinstance(response, str):
255
+ logger.warning(
256
+ f"List item {i} in column '{input_column}' contains invalid data "
257
+ f"(empty or non-string): {type(response)}"
258
+ )
259
+ continue
260
+
261
+ parsed_outputs = self._parse(response)
262
+
263
+ if not parsed_outputs or not any(
264
+ len(value) > 0 for value in parsed_outputs.values()
265
+ ):
266
+ logger.warning(
267
+ f"Failed to parse content from list item {i}. Raw output length: {len(response)}, "
268
+ f"parsing method: {'regex' if self.parsing_pattern else 'tags'}"
269
+ )
270
+ continue
271
+
272
+ valid_responses += 1
273
+ # Collect all parsed values for each column as lists
274
+ for col in self.output_cols:
275
+ all_parsed_outputs[col].extend(parsed_outputs.get(col, []))
276
+
277
+ if valid_responses == 0:
278
+ return []
279
+
280
+ # Return single row with lists as values
281
+ # TODO: This breaks retry counting in LLMChatWithParsingRetryBlock until LLMChatWithParsingRetryBlock is re-based
282
+ # which expects one row per successful parse for counting
283
+ return [{**sample, **all_parsed_outputs}]
284
+
285
+ else:
286
+ # When expand_lists=True, use existing expanding behavior
287
+ all_results = []
288
+ for i, response in enumerate(raw_output):
289
+ if not response or not isinstance(response, str):
290
+ logger.warning(
291
+ f"List item {i} in column '{input_column}' contains invalid data "
292
+ f"(empty or non-string): {type(response)}"
293
+ )
294
+ continue
295
+
296
+ parsed_outputs = self._parse(response)
297
+
298
+ if not parsed_outputs or not any(
299
+ len(value) > 0 for value in parsed_outputs.values()
300
+ ):
301
+ logger.warning(
302
+ f"Failed to parse content from list item {i}. Raw output length: {len(response)}, "
303
+ f"parsing method: {'regex' if self.parsing_pattern else 'tags'}"
304
+ )
305
+ continue
306
+
307
+ # Create output rows for this response
308
+ max_length = max(len(value) for value in parsed_outputs.values())
309
+ for values in zip(
310
+ *(lst[:max_length] for lst in parsed_outputs.values())
311
+ ):
312
+ all_results.append(
313
+ {**sample, **dict(zip(parsed_outputs.keys(), values))}
314
+ )
315
+
316
+ return all_results
270
317
 
271
318
  # Handle string inputs (existing logic)
272
319
  elif isinstance(raw_output, str):
@@ -291,7 +291,7 @@ class BlockRegistry:
291
291
  }
292
292
 
293
293
  @classmethod
294
- def show(cls) -> None:
294
+ def discover_blocks(cls) -> None:
295
295
  """Print a Rich-formatted table of all available blocks."""
296
296
  if not cls._metadata:
297
297
  console.print("[yellow]No blocks registered yet.[/yellow]")