sdg-hub 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -160,9 +160,69 @@ class LLMClientManager:
160
160
  return response.choices[0].message.content
161
161
 
162
162
  async def acreate_completion(
163
+ self,
164
+ messages: Union[list[dict[str, Any]], list[list[dict[str, Any]]]],
165
+ max_concurrency: Optional[int] = None,
166
+ **overrides: Any,
167
+ ) -> Union[str, list[str], list[Union[str, list[str]]]]:
168
+ """Create async completion(s) using LiteLLM with optional concurrency control.
169
+
170
+ Parameters
171
+ ----------
172
+ messages : Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]
173
+ Single message list or list of message lists.
174
+ - For single: List[Dict[str, Any]] - returns Union[str, List[str]]
175
+ - For multiple: List[List[Dict[str, Any]]] - returns List[Union[str, List[str]]]
176
+ max_concurrency : Optional[int], optional
177
+ Maximum number of concurrent requests when processing multiple messages.
178
+ If None, all requests run concurrently.
179
+ **overrides : Any
180
+ Runtime parameter overrides.
181
+
182
+ Returns
183
+ -------
184
+ Union[str, List[str], List[Union[str, List[str]]]]
185
+ For single message: completion text (string when n=1, list when n>1)
186
+ For multiple messages: list of completion texts (each element can be str or List[str])
187
+
188
+ Raises
189
+ ------
190
+ Exception
191
+ If the completion fails after all retries.
192
+ """
193
+ # Detect if we have single message or multiple messages
194
+ if not messages:
195
+ raise ValueError("messages cannot be empty")
196
+
197
+ # Check if first element is a dict (single message) or list (multiple messages)
198
+ if isinstance(messages[0], dict):
199
+ # Single message case
200
+ return await self._acreate_single(messages, **overrides)
201
+ else:
202
+ # Multiple messages case
203
+ messages_list = messages
204
+
205
+ if max_concurrency is not None:
206
+ # Use semaphore for concurrency control
207
+ semaphore = asyncio.Semaphore(max_concurrency)
208
+
209
+ async def _create_with_semaphore(msgs):
210
+ async with semaphore:
211
+ return await self._acreate_single(msgs, **overrides)
212
+
213
+ tasks = [_create_with_semaphore(msgs) for msgs in messages_list]
214
+ return await asyncio.gather(*tasks)
215
+ else:
216
+ # No concurrency limit - process all at once
217
+ tasks = [
218
+ self._acreate_single(msgs, **overrides) for msgs in messages_list
219
+ ]
220
+ return await asyncio.gather(*tasks)
221
+
222
+ async def _acreate_single(
163
223
  self, messages: list[dict[str, Any]], **overrides: Any
164
224
  ) -> Union[str, list[str]]:
165
- """Create an async completion using LiteLLM.
225
+ """Create a single async completion using LiteLLM.
166
226
 
167
227
  Parameters
168
228
  ----------
@@ -235,29 +295,6 @@ class LLMClientManager:
235
295
  results.append(result)
236
296
  return results
237
297
 
238
- async def acreate_completions_batch(
239
- self, messages_list: list[list[dict[str, Any]]], **overrides: Any
240
- ) -> list[Union[str, list[str]]]:
241
- """Create multiple completions in batch asynchronously.
242
-
243
- Parameters
244
- ----------
245
- messages_list : List[List[Dict[str, Any]]]
246
- List of message lists to process.
247
- **overrides : Any
248
- Runtime parameter overrides.
249
-
250
- Returns
251
- -------
252
- List[Union[str, List[str]]]
253
- List of completion texts. Each element is a single string when n=1 or n is None,
254
- or a list of strings when n>1.
255
- """
256
- tasks = [
257
- self.acreate_completion(messages, **overrides) for messages in messages_list
258
- ]
259
- return await asyncio.gather(*tasks)
260
-
261
298
  def _build_completion_kwargs(
262
299
  self, messages: list[dict[str, Any]], config: LLMConfig
263
300
  ) -> dict[str, Any]:
@@ -240,6 +240,7 @@ class LLMConfig:
240
240
  "logprobs",
241
241
  "top_logprobs",
242
242
  "user",
243
+ "timeout",
243
244
  ]:
244
245
  value = getattr(self, param)
245
246
  if value is not None:
@@ -297,6 +297,10 @@ class LLMChatBlock(BaseBlock):
297
297
  temperature, max_tokens, top_p, frequency_penalty, presence_penalty,
298
298
  stop, seed, response_format, stream, n, and provider-specific params.
299
299
 
300
+ Special flow-level parameters:
301
+ _flow_max_concurrency : int, optional
302
+ Maximum concurrency for async requests (passed by Flow).
303
+
300
304
  Returns
301
305
  -------
302
306
  Dataset
@@ -314,27 +318,73 @@ class LLMChatBlock(BaseBlock):
314
318
  f"Call flow.set_model_config() before generating."
315
319
  )
316
320
 
321
+ # Extract max_concurrency if provided by flow
322
+ flow_max_concurrency = override_kwargs.pop("_flow_max_concurrency", None)
323
+
317
324
  # Extract messages
318
325
  messages_list = samples[self.input_cols[0]]
319
326
 
320
327
  # Log generation start
321
328
  logger.info(
322
- f"Starting {'async' if self.async_mode else 'sync'} generation for {len(messages_list)} samples",
329
+ f"Starting {'async' if self.async_mode else 'sync'} generation for {len(messages_list)} samples"
330
+ + (
331
+ f" (max_concurrency={flow_max_concurrency})"
332
+ if flow_max_concurrency
333
+ else ""
334
+ ),
323
335
  extra={
324
336
  "block_name": self.block_name,
325
337
  "model": self.model,
326
338
  "provider": self.client_manager.config.get_provider(),
327
339
  "batch_size": len(messages_list),
328
340
  "async_mode": self.async_mode,
329
- "override_params": override_kwargs,
341
+ "flow_max_concurrency": flow_max_concurrency,
342
+ "override_params": {
343
+ k: (
344
+ "***"
345
+ if any(
346
+ s in k.lower()
347
+ for s in ["key", "token", "secret", "authorization"]
348
+ )
349
+ else v
350
+ )
351
+ for k, v in override_kwargs.items()
352
+ },
330
353
  },
331
354
  )
332
355
 
333
356
  # Generate responses
334
357
  if self.async_mode:
335
- responses = asyncio.run(
336
- self._generate_async(messages_list, **override_kwargs)
337
- )
358
+ try:
359
+ # Check if there's already a running event loop
360
+ loop = asyncio.get_running_loop()
361
+ # Check if nest_asyncio is applied (allows nested asyncio.run)
362
+ # Use multiple detection methods for robustness
363
+ nest_asyncio_applied = (
364
+ hasattr(loop, "_nest_patched")
365
+ or getattr(asyncio.run, "__module__", "") == "nest_asyncio"
366
+ )
367
+
368
+ if nest_asyncio_applied:
369
+ # nest_asyncio is applied, safe to use asyncio.run
370
+ responses = asyncio.run(
371
+ self._generate_async(
372
+ messages_list, flow_max_concurrency, **override_kwargs
373
+ )
374
+ )
375
+ else:
376
+ # Running inside an event loop without nest_asyncio
377
+ raise BlockValidationError(
378
+ f"async_mode=True cannot be used from within a running event loop for '{self.block_name}'. "
379
+ "Use an async entrypoint, set async_mode=False, or apply nest_asyncio.apply() in notebook environments."
380
+ )
381
+ except RuntimeError:
382
+ # No running loop; safe to create one
383
+ responses = asyncio.run(
384
+ self._generate_async(
385
+ messages_list, flow_max_concurrency, **override_kwargs
386
+ )
387
+ )
338
388
  else:
339
389
  responses = self._generate_sync(messages_list, **override_kwargs)
340
390
 
@@ -409,6 +459,7 @@ class LLMChatBlock(BaseBlock):
409
459
  async def _generate_async(
410
460
  self,
411
461
  messages_list: list[list[dict[str, Any]]],
462
+ flow_max_concurrency: Optional[int] = None,
412
463
  **override_kwargs: dict[str, Any],
413
464
  ) -> list[Union[str, list[str]]]:
414
465
  """Generate responses asynchronously.
@@ -417,6 +468,8 @@ class LLMChatBlock(BaseBlock):
417
468
  ----------
418
469
  messages_list : List[List[Dict[str, Any]]]
419
470
  List of message lists to process.
471
+ flow_max_concurrency : Optional[int], optional
472
+ Maximum concurrency for async requests.
420
473
  **override_kwargs : Dict[str, Any]
421
474
  Runtime parameter overrides.
422
475
 
@@ -426,9 +479,11 @@ class LLMChatBlock(BaseBlock):
426
479
  List of response strings or lists of response strings (when n > 1).
427
480
  """
428
481
  try:
429
- responses = await self.client_manager.acreate_completions_batch(
430
- messages_list, **override_kwargs
482
+ # Use unified client manager method with optional concurrency control
483
+ responses = await self.client_manager.acreate_completion(
484
+ messages_list, max_concurrency=flow_max_concurrency, **override_kwargs
431
485
  )
486
+
432
487
  return responses
433
488
 
434
489
  except Exception as e: