sdg-hub 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +16 -3
- sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
- sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
- sdg_hub/core/blocks/llm/client_manager.py +92 -43
- sdg_hub/core/blocks/llm/config.py +1 -0
- sdg_hub/core/blocks/llm/llm_chat_block.py +74 -16
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +277 -115
- sdg_hub/core/blocks/llm/text_parser_block.py +88 -23
- sdg_hub/core/blocks/registry.py +48 -34
- sdg_hub/core/blocks/transform/__init__.py +2 -0
- sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
- sdg_hub/core/blocks/transform/json_structure_block.py +142 -0
- sdg_hub/core/flow/base.py +326 -62
- sdg_hub/core/utils/datautils.py +54 -0
- sdg_hub/core/utils/flow_metrics.py +261 -0
- sdg_hub/core/utils/logger_config.py +50 -9
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +11 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +159 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +65 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +161 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +15 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +21 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +44 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +104 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +61 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -7
- sdg_hub/flows/text_analysis/__init__.py +2 -0
- sdg_hub/flows/text_analysis/structured_insights/__init__.py +6 -0
- sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +27 -0
- sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +38 -0
- sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +21 -0
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +153 -0
- sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +21 -0
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/METADATA +42 -15
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/RECORD +44 -22
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/top_level.txt +0 -0
@@ -107,9 +107,18 @@ class LLMClientManager:
|
|
107
107
|
f"Could not validate setup for model '{self.config.model}': {e}"
|
108
108
|
)
|
109
109
|
|
110
|
+
def _message_to_dict(self, message: Any) -> dict[str, Any]:
|
111
|
+
"""Convert a message to a dict."""
|
112
|
+
if hasattr(message, "to_dict"):
|
113
|
+
return message.to_dict()
|
114
|
+
elif hasattr(message, "__dict__"):
|
115
|
+
return message.__dict__
|
116
|
+
else:
|
117
|
+
return dict(message)
|
118
|
+
|
110
119
|
def create_completion(
|
111
120
|
self, messages: list[dict[str, Any]], **overrides: Any
|
112
|
-
) -> Union[
|
121
|
+
) -> Union[dict, list[dict]]:
|
113
122
|
"""Create a completion using LiteLLM.
|
114
123
|
|
115
124
|
Parameters
|
@@ -121,9 +130,9 @@ class LLMClientManager:
|
|
121
130
|
|
122
131
|
Returns
|
123
132
|
-------
|
124
|
-
Union[
|
125
|
-
The completion
|
126
|
-
returns a list of
|
133
|
+
Union[dict, List[dict]]
|
134
|
+
The completion response(s). Returns a single response when n=1 or n is None,
|
135
|
+
returns a list of responses when n>1. Response dicts contain 'content' and may contain 'reasoning_content'.
|
127
136
|
|
128
137
|
Raises
|
129
138
|
------
|
@@ -151,18 +160,80 @@ class LLMClientManager:
|
|
151
160
|
# Make the completion call
|
152
161
|
response = completion_func(kwargs)
|
153
162
|
|
154
|
-
# Extract
|
163
|
+
# Extract message objects from response
|
155
164
|
# Check if n > 1 to determine return type
|
156
165
|
n_value = final_config.n or 1
|
157
166
|
if n_value > 1:
|
158
|
-
return [
|
167
|
+
return [
|
168
|
+
self._message_to_dict(choice.message) for choice in response.choices
|
169
|
+
]
|
159
170
|
else:
|
160
|
-
return response.choices[0].message
|
171
|
+
return self._message_to_dict(response.choices[0].message)
|
161
172
|
|
162
173
|
async def acreate_completion(
|
174
|
+
self,
|
175
|
+
messages: Union[list[dict[str, Any]], list[list[dict[str, Any]]]],
|
176
|
+
max_concurrency: Optional[int] = None,
|
177
|
+
**overrides: Any,
|
178
|
+
) -> Union[dict, list[dict]] | list[Union[dict, list[dict]]]:
|
179
|
+
"""Create async completion(s) using LiteLLM with optional concurrency control.
|
180
|
+
|
181
|
+
Parameters
|
182
|
+
----------
|
183
|
+
messages : Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]
|
184
|
+
Single message list or list of message lists.
|
185
|
+
- For single: List[Dict[str, Any]] - returns Union[Any, List[Any]]
|
186
|
+
- For multiple: List[List[Dict[str, Any]]] - returns List[Union[Any, List[Any]]]
|
187
|
+
max_concurrency : Optional[int], optional
|
188
|
+
Maximum number of concurrent requests when processing multiple messages.
|
189
|
+
If None, all requests run concurrently.
|
190
|
+
**overrides : Any
|
191
|
+
Runtime parameter overrides.
|
192
|
+
|
193
|
+
Returns
|
194
|
+
-------
|
195
|
+
Union[dict, List[dict], List[Union[dict, List[dict]]]]
|
196
|
+
For single message: completion response (dict when n=1, List[dict] when n>1)
|
197
|
+
For multiple messages: list of completion responses (each element can be dict or List[dict])
|
198
|
+
|
199
|
+
Raises
|
200
|
+
------
|
201
|
+
Exception
|
202
|
+
If the completion fails after all retries.
|
203
|
+
"""
|
204
|
+
# Detect if we have single message or multiple messages
|
205
|
+
if not messages:
|
206
|
+
raise ValueError("messages cannot be empty")
|
207
|
+
|
208
|
+
# Check if first element is a dict (single message) or list (multiple messages)
|
209
|
+
if isinstance(messages[0], dict):
|
210
|
+
# Single message case
|
211
|
+
return await self._acreate_single(messages, **overrides)
|
212
|
+
else:
|
213
|
+
# Multiple messages case
|
214
|
+
messages_list = messages
|
215
|
+
|
216
|
+
if max_concurrency is not None:
|
217
|
+
# Use semaphore for concurrency control
|
218
|
+
semaphore = asyncio.Semaphore(max_concurrency)
|
219
|
+
|
220
|
+
async def _create_with_semaphore(msgs):
|
221
|
+
async with semaphore:
|
222
|
+
return await self._acreate_single(msgs, **overrides)
|
223
|
+
|
224
|
+
tasks = [_create_with_semaphore(msgs) for msgs in messages_list]
|
225
|
+
return await asyncio.gather(*tasks)
|
226
|
+
else:
|
227
|
+
# No concurrency limit - process all at once
|
228
|
+
tasks = [
|
229
|
+
self._acreate_single(msgs, **overrides) for msgs in messages_list
|
230
|
+
]
|
231
|
+
return await asyncio.gather(*tasks)
|
232
|
+
|
233
|
+
async def _acreate_single(
|
163
234
|
self, messages: list[dict[str, Any]], **overrides: Any
|
164
|
-
) -> Union[
|
165
|
-
"""Create
|
235
|
+
) -> Union[dict, list[dict]]:
|
236
|
+
"""Create a single async completion using LiteLLM.
|
166
237
|
|
167
238
|
Parameters
|
168
239
|
----------
|
@@ -173,10 +244,9 @@ class LLMClientManager:
|
|
173
244
|
|
174
245
|
Returns
|
175
246
|
-------
|
176
|
-
Union[
|
177
|
-
|
178
|
-
|
179
|
-
|
247
|
+
Union[dict, List[dict]]
|
248
|
+
List of completion message objects. Each element is a dict when n=1 or n is None,
|
249
|
+
or a list of dicts when n>1. Message dicts contain 'content' and may contain 'reasoning_content'.
|
180
250
|
Raises
|
181
251
|
------
|
182
252
|
Exception
|
@@ -203,17 +273,19 @@ class LLMClientManager:
|
|
203
273
|
# Make the async completion call
|
204
274
|
response = await completion_func(kwargs)
|
205
275
|
|
206
|
-
# Extract
|
276
|
+
# Extract message objects from response
|
207
277
|
# Check if n > 1 to determine return type
|
208
278
|
n_value = final_config.n or 1
|
209
279
|
if n_value > 1:
|
210
|
-
return [
|
280
|
+
return [
|
281
|
+
self._message_to_dict(choice.message) for choice in response.choices
|
282
|
+
]
|
211
283
|
else:
|
212
|
-
return response.choices[0].message
|
284
|
+
return self._message_to_dict(response.choices[0].message)
|
213
285
|
|
214
286
|
def create_completions_batch(
|
215
287
|
self, messages_list: list[list[dict[str, Any]]], **overrides: Any
|
216
|
-
) -> list[Union[
|
288
|
+
) -> list[Union[dict, list[dict]]]:
|
217
289
|
"""Create multiple completions in batch.
|
218
290
|
|
219
291
|
Parameters
|
@@ -225,9 +297,9 @@ class LLMClientManager:
|
|
225
297
|
|
226
298
|
Returns
|
227
299
|
-------
|
228
|
-
List[
|
229
|
-
List of completion
|
230
|
-
or a list of
|
300
|
+
List[dict] | List[List[dict]]
|
301
|
+
List of completion responses. Each element is a dict when n=1 or n is None,
|
302
|
+
or a list of dicts when n>1. Response dicts contain 'content' and may contain 'reasoning_content'.
|
231
303
|
"""
|
232
304
|
results = []
|
233
305
|
for messages in messages_list:
|
@@ -235,29 +307,6 @@ class LLMClientManager:
|
|
235
307
|
results.append(result)
|
236
308
|
return results
|
237
309
|
|
238
|
-
async def acreate_completions_batch(
|
239
|
-
self, messages_list: list[list[dict[str, Any]]], **overrides: Any
|
240
|
-
) -> list[Union[str, list[str]]]:
|
241
|
-
"""Create multiple completions in batch asynchronously.
|
242
|
-
|
243
|
-
Parameters
|
244
|
-
----------
|
245
|
-
messages_list : List[List[Dict[str, Any]]]
|
246
|
-
List of message lists to process.
|
247
|
-
**overrides : Any
|
248
|
-
Runtime parameter overrides.
|
249
|
-
|
250
|
-
Returns
|
251
|
-
-------
|
252
|
-
List[Union[str, List[str]]]
|
253
|
-
List of completion texts. Each element is a single string when n=1 or n is None,
|
254
|
-
or a list of strings when n>1.
|
255
|
-
"""
|
256
|
-
tasks = [
|
257
|
-
self.acreate_completion(messages, **overrides) for messages in messages_list
|
258
|
-
]
|
259
|
-
return await asyncio.gather(*tasks)
|
260
|
-
|
261
310
|
def _build_completion_kwargs(
|
262
311
|
self, messages: list[dict[str, Any]], config: LLMConfig
|
263
312
|
) -> dict[str, Any]:
|
@@ -42,9 +42,10 @@ class LLMChatBlock(BaseBlock):
|
|
42
42
|
Name of the block.
|
43
43
|
input_cols : Union[str, List[str]]
|
44
44
|
Input column name(s). Should contain the messages list.
|
45
|
-
output_cols : Union[
|
45
|
+
output_cols : Union[dict, List[dict]]
|
46
46
|
Output column name(s) for the response. When n > 1, the column will contain
|
47
|
-
a list of responses instead of a single
|
47
|
+
a list of responses instead of a single response. Responses contain 'content',
|
48
|
+
may contain 'reasoning_content' and other fields if any.
|
48
49
|
model : str
|
49
50
|
Model identifier in LiteLLM format. Examples:
|
50
51
|
- "openai/gpt-4"
|
@@ -131,7 +132,7 @@ class LLMChatBlock(BaseBlock):
|
|
131
132
|
>>> block = LLMChatBlock(
|
132
133
|
... block_name="gpt4_multiple",
|
133
134
|
... input_cols="messages",
|
134
|
-
... output_cols="responses", # Will contain lists of
|
135
|
+
... output_cols="responses", # Will contain lists of responses
|
135
136
|
... model="openai/gpt-4",
|
136
137
|
... n=3, # Generate 3 responses per input
|
137
138
|
... temperature=0.8
|
@@ -297,6 +298,10 @@ class LLMChatBlock(BaseBlock):
|
|
297
298
|
temperature, max_tokens, top_p, frequency_penalty, presence_penalty,
|
298
299
|
stop, seed, response_format, stream, n, and provider-specific params.
|
299
300
|
|
301
|
+
Special flow-level parameters:
|
302
|
+
_flow_max_concurrency : int, optional
|
303
|
+
Maximum concurrency for async requests (passed by Flow).
|
304
|
+
|
300
305
|
Returns
|
301
306
|
-------
|
302
307
|
Dataset
|
@@ -314,27 +319,73 @@ class LLMChatBlock(BaseBlock):
|
|
314
319
|
f"Call flow.set_model_config() before generating."
|
315
320
|
)
|
316
321
|
|
322
|
+
# Extract max_concurrency if provided by flow
|
323
|
+
flow_max_concurrency = override_kwargs.pop("_flow_max_concurrency", None)
|
324
|
+
|
317
325
|
# Extract messages
|
318
326
|
messages_list = samples[self.input_cols[0]]
|
319
327
|
|
320
328
|
# Log generation start
|
321
329
|
logger.info(
|
322
|
-
f"Starting {'async' if self.async_mode else 'sync'} generation for {len(messages_list)} samples"
|
330
|
+
f"Starting {'async' if self.async_mode else 'sync'} generation for {len(messages_list)} samples"
|
331
|
+
+ (
|
332
|
+
f" (max_concurrency={flow_max_concurrency})"
|
333
|
+
if flow_max_concurrency
|
334
|
+
else ""
|
335
|
+
),
|
323
336
|
extra={
|
324
337
|
"block_name": self.block_name,
|
325
338
|
"model": self.model,
|
326
339
|
"provider": self.client_manager.config.get_provider(),
|
327
340
|
"batch_size": len(messages_list),
|
328
341
|
"async_mode": self.async_mode,
|
329
|
-
"
|
342
|
+
"flow_max_concurrency": flow_max_concurrency,
|
343
|
+
"override_params": {
|
344
|
+
k: (
|
345
|
+
"***"
|
346
|
+
if any(
|
347
|
+
s in k.lower()
|
348
|
+
for s in ["key", "token", "secret", "authorization"]
|
349
|
+
)
|
350
|
+
else v
|
351
|
+
)
|
352
|
+
for k, v in override_kwargs.items()
|
353
|
+
},
|
330
354
|
},
|
331
355
|
)
|
332
356
|
|
333
357
|
# Generate responses
|
334
358
|
if self.async_mode:
|
335
|
-
|
336
|
-
|
337
|
-
|
359
|
+
try:
|
360
|
+
# Check if there's already a running event loop
|
361
|
+
loop = asyncio.get_running_loop()
|
362
|
+
# Check if nest_asyncio is applied (allows nested asyncio.run)
|
363
|
+
# Use multiple detection methods for robustness
|
364
|
+
nest_asyncio_applied = (
|
365
|
+
hasattr(loop, "_nest_patched")
|
366
|
+
or getattr(asyncio.run, "__module__", "") == "nest_asyncio"
|
367
|
+
)
|
368
|
+
|
369
|
+
if nest_asyncio_applied:
|
370
|
+
# nest_asyncio is applied, safe to use asyncio.run
|
371
|
+
responses = asyncio.run(
|
372
|
+
self._generate_async(
|
373
|
+
messages_list, flow_max_concurrency, **override_kwargs
|
374
|
+
)
|
375
|
+
)
|
376
|
+
else:
|
377
|
+
# Running inside an event loop without nest_asyncio
|
378
|
+
raise BlockValidationError(
|
379
|
+
f"async_mode=True cannot be used from within a running event loop for '{self.block_name}'. "
|
380
|
+
"Use an async entrypoint, set async_mode=False, or apply nest_asyncio.apply() in notebook environments."
|
381
|
+
)
|
382
|
+
except RuntimeError:
|
383
|
+
# No running loop; safe to create one
|
384
|
+
responses = asyncio.run(
|
385
|
+
self._generate_async(
|
386
|
+
messages_list, flow_max_concurrency, **override_kwargs
|
387
|
+
)
|
388
|
+
)
|
338
389
|
else:
|
339
390
|
responses = self._generate_sync(messages_list, **override_kwargs)
|
340
391
|
|
@@ -356,7 +407,7 @@ class LLMChatBlock(BaseBlock):
|
|
356
407
|
self,
|
357
408
|
messages_list: list[list[dict[str, Any]]],
|
358
409
|
**override_kwargs: dict[str, Any],
|
359
|
-
) -> list[Union[
|
410
|
+
) -> list[Union[dict, list[dict]]]:
|
360
411
|
"""Generate responses synchronously.
|
361
412
|
|
362
413
|
Parameters
|
@@ -368,8 +419,9 @@ class LLMChatBlock(BaseBlock):
|
|
368
419
|
|
369
420
|
Returns
|
370
421
|
-------
|
371
|
-
List[Union[
|
372
|
-
List of
|
422
|
+
List[Union[dict, List[dict]]]
|
423
|
+
List of responses. Each element is a dict when n=1 or n is None,
|
424
|
+
or a list of dicts when n>1. Response dicts contain 'content', may contain 'reasoning_content' and other fields if any.
|
373
425
|
"""
|
374
426
|
responses = []
|
375
427
|
|
@@ -409,26 +461,32 @@ class LLMChatBlock(BaseBlock):
|
|
409
461
|
async def _generate_async(
|
410
462
|
self,
|
411
463
|
messages_list: list[list[dict[str, Any]]],
|
464
|
+
flow_max_concurrency: Optional[int] = None,
|
412
465
|
**override_kwargs: dict[str, Any],
|
413
|
-
) -> list[Union[
|
466
|
+
) -> list[Union[dict, list[dict]]]:
|
414
467
|
"""Generate responses asynchronously.
|
415
468
|
|
416
469
|
Parameters
|
417
470
|
----------
|
418
471
|
messages_list : List[List[Dict[str, Any]]]
|
419
472
|
List of message lists to process.
|
473
|
+
flow_max_concurrency : Optional[int], optional
|
474
|
+
Maximum concurrency for async requests.
|
420
475
|
**override_kwargs : Dict[str, Any]
|
421
476
|
Runtime parameter overrides.
|
422
477
|
|
423
478
|
Returns
|
424
479
|
-------
|
425
|
-
List[Union[
|
426
|
-
List of
|
480
|
+
List[Union[dict, List[dict]]]
|
481
|
+
List of responses. Each element is a dict when n=1 or n is None,
|
482
|
+
or a list of dicts when n>1. Response dicts contain 'content', may contain 'reasoning_content' and other fields if any.
|
427
483
|
"""
|
428
484
|
try:
|
429
|
-
|
430
|
-
|
485
|
+
# Use unified client manager method with optional concurrency control
|
486
|
+
responses = await self.client_manager.acreate_completion(
|
487
|
+
messages_list, max_concurrency=flow_max_concurrency, **override_kwargs
|
431
488
|
)
|
489
|
+
|
432
490
|
return responses
|
433
491
|
|
434
492
|
except Exception as e:
|