sdg-hub 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/__init__.py +2 -4
- sdg_hub/core/blocks/base.py +61 -6
- sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
- sdg_hub/core/blocks/llm/__init__.py +2 -4
- sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
- sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
- sdg_hub/core/flow/base.py +7 -4
- sdg_hub/core/utils/datautils.py +40 -22
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +30 -26
- sdg_hub/core/blocks/evaluation/__init__.py +0 -9
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
- sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
- sdg_hub/core/blocks/llm/client_manager.py +0 -447
- sdg_hub/core/blocks/llm/config.py +0 -337
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ from ...utils.logger_config import setup_logger
|
|
18
18
|
from ..base import BaseBlock
|
19
19
|
from ..registry import BlockRegistry
|
20
20
|
from .llm_chat_block import LLMChatBlock
|
21
|
+
from .llm_parser_block import LLMParserBlock
|
21
22
|
from .text_parser_block import TextParserBlock
|
22
23
|
|
23
24
|
logger = setup_logger(__name__)
|
@@ -66,45 +67,12 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
66
67
|
Maximum number of retry attempts for parsing failures (default: 3).
|
67
68
|
This is different from max_retries, which handles LLM network/API failures.
|
68
69
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
Maximum number of LLM retry attempts for network failures (default: 6).
|
76
|
-
temperature : Optional[float], optional
|
77
|
-
Sampling temperature (0.0 to 2.0).
|
78
|
-
max_tokens : Optional[int], optional
|
79
|
-
Maximum tokens to generate.
|
80
|
-
top_p : Optional[float], optional
|
81
|
-
Nucleus sampling parameter (0.0 to 1.0).
|
82
|
-
frequency_penalty : Optional[float], optional
|
83
|
-
Frequency penalty (-2.0 to 2.0).
|
84
|
-
presence_penalty : Optional[float], optional
|
85
|
-
Presence penalty (-2.0 to 2.0).
|
86
|
-
stop : Optional[Union[str, List[str]]], optional
|
87
|
-
Stop sequences.
|
88
|
-
seed : Optional[int], optional
|
89
|
-
Random seed for reproducible outputs.
|
90
|
-
response_format : Optional[Dict[str, Any]], optional
|
91
|
-
Response format specification (e.g., JSON mode).
|
92
|
-
stream : Optional[bool], optional
|
93
|
-
Whether to stream responses.
|
94
|
-
n : Optional[int], optional
|
95
|
-
Number of completions to generate per retry attempt.
|
96
|
-
logprobs : Optional[bool], optional
|
97
|
-
Whether to return log probabilities.
|
98
|
-
top_logprobs : Optional[int], optional
|
99
|
-
Number of top log probabilities to return.
|
100
|
-
user : Optional[str], optional
|
101
|
-
End-user identifier.
|
102
|
-
extra_headers : Optional[Dict[str, str]], optional
|
103
|
-
Additional headers to send with requests.
|
104
|
-
extra_body : Optional[Dict[str, Any]], optional
|
105
|
-
Additional parameters for the request body.
|
106
|
-
provider_specific : Optional[Dict[str, Any]], optional
|
107
|
-
Provider-specific parameters.
|
70
|
+
**llm_kwargs : Any
|
71
|
+
Any LiteLLM completion parameters (model, api_base, api_key, temperature,
|
72
|
+
max_tokens, top_p, frequency_penalty, presence_penalty, stop, seed,
|
73
|
+
response_format, stream, n, logprobs, top_logprobs, user, extra_headers,
|
74
|
+
extra_body, async_mode, timeout, num_retries, etc.).
|
75
|
+
See https://docs.litellm.ai/docs/completion/input for full list.
|
108
76
|
|
109
77
|
### Text Parser Parameters ###
|
110
78
|
start_tags : List[str], optional
|
@@ -116,6 +84,18 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
116
84
|
parser_cleanup_tags : Optional[List[str]], optional
|
117
85
|
List of tags to clean from parsed output.
|
118
86
|
|
87
|
+
### LLMParserBlock Parameters ###
|
88
|
+
extract_content : bool, optional
|
89
|
+
Whether to extract 'content' field from responses.
|
90
|
+
extract_reasoning_content : bool, optional
|
91
|
+
Whether to extract 'reasoning_content' field from responses.
|
92
|
+
extract_tool_calls : bool, optional
|
93
|
+
Whether to extract 'tool_calls' field from responses.
|
94
|
+
expand_lists : bool, optional
|
95
|
+
Whether to expand list inputs into individual rows (True) or preserve lists (False).
|
96
|
+
field_prefix : Optional[str], optional
|
97
|
+
Prefix for the field names in the parsed output.
|
98
|
+
|
119
99
|
Examples
|
120
100
|
--------
|
121
101
|
>>> # Basic JSON parsing with retry
|
@@ -165,9 +145,30 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
165
145
|
None, description="List of tags to clean from parsed output"
|
166
146
|
)
|
167
147
|
|
148
|
+
### LLMParserBlock Parameters ###
|
149
|
+
extract_content: bool = Field(
|
150
|
+
default=True, description="Whether to extract 'content' field from responses."
|
151
|
+
)
|
152
|
+
extract_reasoning_content: bool = Field(
|
153
|
+
default=False,
|
154
|
+
description="Whether to extract 'reasoning_content' field from responses.",
|
155
|
+
)
|
156
|
+
extract_tool_calls: bool = Field(
|
157
|
+
default=False,
|
158
|
+
description="Whether to extract 'tool_calls' field from responses.",
|
159
|
+
)
|
160
|
+
expand_lists: bool = Field(
|
161
|
+
default=True,
|
162
|
+
description="Whether to expand list inputs into individual rows (True) or preserve lists (False).",
|
163
|
+
)
|
164
|
+
field_prefix: Optional[str] = Field(
|
165
|
+
default="", description="Prefix for the field names in the parsed output."
|
166
|
+
)
|
167
|
+
|
168
168
|
# Internal blocks - excluded from serialization
|
169
169
|
llm_chat: Optional[LLMChatBlock] = Field(None, exclude=True)
|
170
170
|
text_parser: Optional[TextParserBlock] = Field(None, exclude=True)
|
171
|
+
llm_parser: Optional[LLMParserBlock] = Field(None, exclude=True)
|
171
172
|
|
172
173
|
@field_validator("input_cols")
|
173
174
|
@classmethod
|
@@ -197,19 +198,19 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
197
198
|
self._create_internal_blocks(**kwargs)
|
198
199
|
|
199
200
|
# Log initialization if model is configured
|
200
|
-
if
|
201
|
+
if self.llm_chat and self.llm_chat.model:
|
201
202
|
logger.info(
|
202
|
-
f"Initialized LLMChatWithParsingRetryBlock '{self.block_name}' with model '{self.model}'",
|
203
|
+
f"Initialized LLMChatWithParsingRetryBlock '{self.block_name}' with model '{self.llm_chat.model}'",
|
203
204
|
extra={
|
204
205
|
"block_name": self.block_name,
|
205
|
-
"model": self.model,
|
206
|
+
"model": self.llm_chat.model,
|
206
207
|
"parsing_max_retries": self.parsing_max_retries,
|
207
208
|
},
|
208
209
|
)
|
209
210
|
|
210
211
|
def _extract_params(self, kwargs: dict, block_class) -> dict:
|
211
|
-
"""Extract parameters for specific block class
|
212
|
-
#
|
212
|
+
"""Extract parameters for specific block class."""
|
213
|
+
# Parameters that belong to this wrapper and shouldn't be forwarded
|
213
214
|
wrapper_params = {
|
214
215
|
"block_name",
|
215
216
|
"input_cols",
|
@@ -217,23 +218,67 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
217
218
|
"parsing_max_retries",
|
218
219
|
}
|
219
220
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
221
|
+
if block_class == LLMChatBlock:
|
222
|
+
# LLMChatBlock accepts any parameters via extra="allow"
|
223
|
+
# Forward everything except wrapper-specific and parser-specific params
|
224
|
+
parser_specific_params = {
|
225
|
+
"start_tags",
|
226
|
+
"end_tags",
|
227
|
+
"parsing_pattern",
|
228
|
+
"parser_cleanup_tags",
|
229
|
+
}
|
230
|
+
llm_parser_specific_params = {
|
231
|
+
"extract_content",
|
232
|
+
"extract_reasoning_content",
|
233
|
+
"extract_tool_calls",
|
234
|
+
"expand_lists",
|
235
|
+
"field_prefix",
|
236
|
+
}
|
237
|
+
excluded_params = (
|
238
|
+
wrapper_params | parser_specific_params | llm_parser_specific_params
|
239
|
+
)
|
226
240
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
241
|
+
# Forward all other kwargs
|
242
|
+
params = {k: v for k, v in kwargs.items() if k not in excluded_params}
|
243
|
+
|
244
|
+
# Also forward instance attributes that aren't parser-specific
|
245
|
+
for field_name, field_value in self.__dict__.items():
|
246
|
+
if (
|
247
|
+
field_name not in excluded_params
|
248
|
+
and not field_name.startswith("_")
|
249
|
+
and field_name not in ["llm_chat", "text_parser", "llm_parser"]
|
250
|
+
and field_value is not None
|
251
|
+
):
|
235
252
|
params[field_name] = field_value
|
236
253
|
|
254
|
+
else:
|
255
|
+
# For TextParserBlock, only forward known fields and parser-specific params
|
256
|
+
non_llm_chat_params = {
|
257
|
+
"start_tags",
|
258
|
+
"end_tags",
|
259
|
+
"parsing_pattern",
|
260
|
+
"parser_cleanup_tags",
|
261
|
+
"expand_lists",
|
262
|
+
"field_prefix",
|
263
|
+
"extract_content",
|
264
|
+
"extract_reasoning_content",
|
265
|
+
"extract_tool_calls",
|
266
|
+
}
|
267
|
+
|
268
|
+
# Forward parser-specific parameters from kwargs
|
269
|
+
params = {
|
270
|
+
k: v
|
271
|
+
for k, v in kwargs.items()
|
272
|
+
if k in block_class.model_fields and k not in wrapper_params
|
273
|
+
}
|
274
|
+
|
275
|
+
# Forward parser-specific instance attributes
|
276
|
+
for field_name in non_llm_chat_params:
|
277
|
+
if hasattr(self, field_name):
|
278
|
+
field_value = getattr(self, field_name)
|
279
|
+
if field_value is not None:
|
280
|
+
params[field_name] = field_value
|
281
|
+
|
237
282
|
return params
|
238
283
|
|
239
284
|
def _create_internal_blocks(self, **kwargs):
|
@@ -241,6 +286,7 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
241
286
|
# Route parameters to appropriate blocks
|
242
287
|
llm_params = self._extract_params(kwargs, LLMChatBlock)
|
243
288
|
parser_params = self._extract_params(kwargs, TextParserBlock)
|
289
|
+
llm_parser_params = self._extract_params(kwargs, LLMParserBlock)
|
244
290
|
|
245
291
|
# 1. LLMChatBlock
|
246
292
|
self.llm_chat = LLMChatBlock(
|
@@ -250,25 +296,56 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
250
296
|
**llm_params,
|
251
297
|
)
|
252
298
|
|
299
|
+
# 2. LLMParserBlock
|
300
|
+
self.llm_parser = LLMParserBlock(
|
301
|
+
block_name=f"{self.block_name}_llm_parser",
|
302
|
+
input_cols=[f"{self.block_name}_raw_response"],
|
303
|
+
**llm_parser_params,
|
304
|
+
)
|
305
|
+
|
253
306
|
# 2. TextParserBlock
|
254
307
|
self.text_parser = TextParserBlock(
|
255
308
|
block_name=f"{self.block_name}_text_parser",
|
256
|
-
input_cols=[
|
309
|
+
input_cols=[
|
310
|
+
f"{self.llm_parser.field_prefix if self.llm_parser.field_prefix!='' else self.llm_parser.block_name}_content"
|
311
|
+
],
|
257
312
|
output_cols=self.output_cols,
|
258
313
|
**parser_params,
|
259
314
|
)
|
260
315
|
|
261
316
|
def __getattr__(self, name: str) -> Any:
|
262
317
|
"""Forward attribute access to appropriate internal block."""
|
263
|
-
#
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
318
|
+
# Parser-specific parameters go to text_parser
|
319
|
+
parser_params = {
|
320
|
+
"start_tags",
|
321
|
+
"end_tags",
|
322
|
+
"parsing_pattern",
|
323
|
+
"parser_cleanup_tags",
|
324
|
+
}
|
325
|
+
llm_parser_params = {
|
326
|
+
"extract_content",
|
327
|
+
"extract_reasoning_content",
|
328
|
+
"extract_tool_calls",
|
329
|
+
"expand_lists",
|
330
|
+
"field_prefix",
|
331
|
+
}
|
332
|
+
|
333
|
+
if name in parser_params and hasattr(self, "text_parser") and self.text_parser:
|
334
|
+
return getattr(self.text_parser, name)
|
335
|
+
|
336
|
+
if (
|
337
|
+
name in llm_parser_params
|
338
|
+
and hasattr(self, "llm_parser")
|
339
|
+
and self.llm_parser
|
340
|
+
):
|
341
|
+
return getattr(self.llm_parser, name)
|
342
|
+
|
343
|
+
# Everything else goes to llm_chat (which accepts any parameters via extra="allow")
|
344
|
+
if hasattr(self, "llm_chat") and self.llm_chat:
|
345
|
+
# Always try LLMChatBlock - it will return None for unset attributes
|
346
|
+
# due to extra="allow", which makes hasattr() work correctly
|
347
|
+
return getattr(self.llm_chat, name, None)
|
348
|
+
|
272
349
|
raise AttributeError(
|
273
350
|
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
274
351
|
)
|
@@ -277,24 +354,51 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
277
354
|
"""Handle dynamic parameter updates from flow.set_model_config()."""
|
278
355
|
super().__setattr__(name, value)
|
279
356
|
|
280
|
-
#
|
281
|
-
|
282
|
-
|
283
|
-
("text_parser", TextParserBlock),
|
284
|
-
]:
|
285
|
-
if hasattr(self, block_attr) and name in block_class.model_fields:
|
286
|
-
setattr(getattr(self, block_attr), name, value)
|
357
|
+
# Don't forward during initialization or for internal attributes
|
358
|
+
if not hasattr(self, "llm_chat") or name.startswith("_"):
|
359
|
+
return
|
287
360
|
|
288
|
-
|
289
|
-
|
361
|
+
# Parser-specific parameters go to text_parser
|
362
|
+
parser_params = {
|
363
|
+
"start_tags",
|
364
|
+
"end_tags",
|
365
|
+
"parsing_pattern",
|
366
|
+
"parser_cleanup_tags",
|
367
|
+
}
|
368
|
+
llm_parser_params = {
|
369
|
+
"extract_content",
|
370
|
+
"extract_reasoning_content",
|
371
|
+
"extract_tool_calls",
|
372
|
+
"expand_lists",
|
373
|
+
"field_prefix",
|
374
|
+
}
|
290
375
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
if
|
295
|
-
|
296
|
-
|
297
|
-
self.
|
376
|
+
if name in parser_params and hasattr(self, "text_parser") and self.text_parser:
|
377
|
+
setattr(self.text_parser, name, value)
|
378
|
+
|
379
|
+
if (
|
380
|
+
name in llm_parser_params
|
381
|
+
and hasattr(self, "llm_parser")
|
382
|
+
and self.llm_parser
|
383
|
+
):
|
384
|
+
setattr(self.llm_parser, name, value)
|
385
|
+
|
386
|
+
# LLM-related parameters go to llm_chat (which accepts any via extra="allow")
|
387
|
+
elif (
|
388
|
+
hasattr(self, "llm_chat")
|
389
|
+
and self.llm_chat
|
390
|
+
and name
|
391
|
+
not in {
|
392
|
+
"block_name",
|
393
|
+
"input_cols",
|
394
|
+
"output_cols",
|
395
|
+
"parsing_max_retries",
|
396
|
+
"llm_chat",
|
397
|
+
"llm_parser",
|
398
|
+
"text_parser",
|
399
|
+
}
|
400
|
+
):
|
401
|
+
setattr(self.llm_chat, name, value)
|
298
402
|
|
299
403
|
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
300
404
|
"""Generate responses with parsing retry logic.
|
@@ -325,8 +429,8 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
325
429
|
MaxRetriesExceededError
|
326
430
|
If target count not reached after max retries for any sample.
|
327
431
|
"""
|
328
|
-
# Validate that model is configured
|
329
|
-
if not
|
432
|
+
# Validate that model is configured (check internal LLM block)
|
433
|
+
if not self.llm_chat or not self.llm_chat.model:
|
330
434
|
raise BlockValidationError(
|
331
435
|
f"Model not configured for block '{self.block_name}'. "
|
332
436
|
f"Call flow.set_model_config() before generating."
|
@@ -336,7 +440,7 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
336
440
|
f"Starting LLM generation with parsing retry for {len(samples)} samples",
|
337
441
|
extra={
|
338
442
|
"block_name": self.block_name,
|
339
|
-
"model": self.model,
|
443
|
+
"model": self.llm_chat.model,
|
340
444
|
"batch_size": len(samples),
|
341
445
|
"parsing_max_retries": self.parsing_max_retries,
|
342
446
|
},
|
@@ -358,7 +462,7 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
358
462
|
},
|
359
463
|
)
|
360
464
|
|
361
|
-
if self.
|
465
|
+
if self.llm_parser.expand_lists:
|
362
466
|
# Current behavior for expand_lists=True: count rows directly
|
363
467
|
sample_results = []
|
364
468
|
total_parsed_count = 0
|
@@ -372,9 +476,14 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
372
476
|
# Generate LLM responses for this sample
|
373
477
|
temp_dataset = Dataset.from_list([sample])
|
374
478
|
llm_result = self.llm_chat.generate(temp_dataset, **kwargs)
|
479
|
+
llm_parser_result = self.llm_parser.generate(
|
480
|
+
llm_result, **kwargs
|
481
|
+
)
|
375
482
|
|
376
483
|
# Parse the responses
|
377
|
-
parsed_result = self.text_parser.generate(
|
484
|
+
parsed_result = self.text_parser.generate(
|
485
|
+
llm_parser_result, **kwargs
|
486
|
+
)
|
378
487
|
|
379
488
|
# Count successful parses and accumulate results
|
380
489
|
new_parsed_count = len(parsed_result)
|
@@ -433,10 +542,12 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
433
542
|
# Generate LLM responses for this sample
|
434
543
|
temp_dataset = Dataset.from_list([sample])
|
435
544
|
llm_result = self.llm_chat.generate(temp_dataset, **kwargs)
|
436
|
-
|
545
|
+
llm_parser_result = self.llm_parser.generate(
|
546
|
+
llm_result, **kwargs
|
547
|
+
)
|
437
548
|
# Get the raw responses (should be a list when n > 1)
|
438
|
-
raw_response_col = f"{self.block_name}
|
439
|
-
raw_responses =
|
549
|
+
raw_response_col = f"{self.llm_parser.field_prefix if self.llm_parser.field_prefix!='' else self.llm_parser.block_name}_content"
|
550
|
+
raw_responses = llm_parser_result[0][raw_response_col]
|
440
551
|
if not isinstance(raw_responses, list):
|
441
552
|
raw_responses = [raw_responses]
|
442
553
|
|
@@ -451,9 +562,11 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
451
562
|
temp_parse_dataset = Dataset.from_list(temp_parse_data)
|
452
563
|
|
453
564
|
# Force expand_lists=True temporarily to get individual parsed items
|
454
|
-
original_expand_lists = self.
|
565
|
+
original_expand_lists = self.llm_parser.expand_lists
|
455
566
|
try:
|
456
|
-
self.
|
567
|
+
self.llm_parser.expand_lists = (
|
568
|
+
self.llm_parser.expand_lists
|
569
|
+
)
|
457
570
|
parsed_result = self.text_parser.generate(
|
458
571
|
temp_parse_dataset, **kwargs
|
459
572
|
)
|
@@ -463,7 +576,7 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
463
576
|
)
|
464
577
|
continue
|
465
578
|
finally:
|
466
|
-
self.
|
579
|
+
self.llm_parser.expand_lists = original_expand_lists
|
467
580
|
|
468
581
|
# If parsing was successful, accumulate the results
|
469
582
|
if len(parsed_result) > 0:
|
@@ -545,7 +658,7 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
545
658
|
)
|
546
659
|
|
547
660
|
# For expand_lists=True, trim results to exact target count if we exceeded it
|
548
|
-
if self.
|
661
|
+
if self.llm_parser.expand_lists and total_parsed_count > target:
|
549
662
|
sample_results = sample_results[:target]
|
550
663
|
logger.debug(
|
551
664
|
f"Trimmed sample {sample_idx} results from {total_parsed_count} to {target}",
|
@@ -566,7 +679,7 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
566
679
|
"block_name": self.block_name,
|
567
680
|
"input_samples": len(samples),
|
568
681
|
"output_rows": len(all_results),
|
569
|
-
"model": self.model,
|
682
|
+
"model": self.llm_chat.model,
|
570
683
|
},
|
571
684
|
)
|
572
685
|
|
@@ -641,12 +754,17 @@ class LLMChatWithParsingRetryBlock(BaseBlock):
|
|
641
754
|
"""
|
642
755
|
return {
|
643
756
|
"llm_chat": self.llm_chat.get_info() if self.llm_chat else None,
|
757
|
+
"llm_parser": self.llm_parser.get_info() if self.llm_parser else None,
|
644
758
|
"text_parser": self.text_parser.get_info() if self.text_parser else None,
|
645
759
|
}
|
646
760
|
|
647
761
|
def __repr__(self) -> str:
|
648
762
|
"""String representation of the block."""
|
649
|
-
model =
|
763
|
+
model = (
|
764
|
+
self.llm_chat.model
|
765
|
+
if (self.llm_chat and self.llm_chat.model)
|
766
|
+
else "not_configured"
|
767
|
+
)
|
650
768
|
return (
|
651
769
|
f"LLMChatWithParsingRetryBlock(name='{self.block_name}', "
|
652
770
|
f"model='{model}', parsing_max_retries={self.parsing_max_retries})"
|