lm-deluge 0.0.52__py3-none-any.whl → 0.0.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/__init__.py CHANGED
@@ -1,10 +1,9 @@
1
- from .client import LLMClient, SamplingParams, APIResponse
1
+ from .client import APIResponse, LLMClient, SamplingParams
2
+ from .file import File
2
3
  from .prompt import Conversation, Message
3
4
  from .tool import Tool
4
- from .file import File
5
- import dotenv
6
5
 
7
- dotenv.load_dotenv()
6
+ # dotenv.load_dotenv() - don't do this, fucks with other packages
8
7
 
9
8
  __all__ = [
10
9
  "LLMClient",
@@ -60,7 +60,8 @@ def _build_anthropic_request(
60
60
  "type": "enabled",
61
61
  "budget_tokens": budget,
62
62
  }
63
- request_json.pop("top_p")
63
+ if "top_p" in request_json:
64
+ request_json["top_p"] = max(request_json["top_p"], 0.95)
64
65
  request_json["temperature"] = 1.0
65
66
  request_json["max_tokens"] += budget
66
67
  else:
@@ -70,6 +71,11 @@ def _build_anthropic_request(
70
71
  if system_message is not None:
71
72
  request_json["system"] = system_message
72
73
 
74
+ # handle temp + top_p for opus 4.1/sonnet 4.5
75
+ if model.name in ["claude-sonnet-4-5-20250929", "claude-opus-4-1-20250805"]:
76
+ if "temperature" in request_json and "top_p" in request_json:
77
+ request_json.pop("top_p")
78
+
73
79
  if tools:
74
80
  mcp_servers = []
75
81
  tool_definitions = []
@@ -89,6 +95,9 @@ def _build_anthropic_request(
89
95
  _add_beta(base_headers, "computer-use-2025-01-24")
90
96
  elif tool["type"] == "code_execution_20250522":
91
97
  _add_beta(base_headers, "code-execution-2025-05-22")
98
+ elif tool["type"] in ["memory_20250818", "clear_tool_uses_20250919"]:
99
+ _add_beta(base_headers, "context-management-2025-06-27")
100
+
92
101
  elif isinstance(tool, MCPServer):
93
102
  _add_beta(base_headers, "mcp-client-2025-04-04")
94
103
  mcp_servers.append(tool.for_anthropic())
lm_deluge/client.py CHANGED
@@ -80,6 +80,22 @@ class _LLMClient(BaseModel):
80
80
  self._tracker.log_final_status()
81
81
  self._tracker = None
82
82
 
83
+ def reset_tracker(self):
84
+ """Reset tracker by closing and reopening with fresh state.
85
+
86
+ Useful when reusing a client across multiple batches and you want
87
+ the progress bar to start from 0 instead of showing cumulative totals.
88
+ """
89
+ if self._tracker is None:
90
+ return
91
+
92
+ # Close existing tracker (including progress bar)
93
+ show_progress = self._tracker.use_progress_bar
94
+ self.close()
95
+
96
+ # Create fresh tracker
97
+ self.open(total=0, show_progress=show_progress)
98
+
83
99
  # NEW! Builder methods
84
100
  def with_model(self, model: str):
85
101
  self.model_names = [model]
@@ -353,146 +369,61 @@ class _LLMClient(BaseModel):
353
369
  cache: CachePattern | None = None,
354
370
  use_responses_api: bool = False,
355
371
  ) -> list[APIResponse | None] | list[str | None] | dict[str, int]:
356
- # Convert prompts to Conversations - no upfront cache checking for dynamic caching!
357
- prompts = prompts_to_conversations(prompts)
358
- ids = list(range(len(prompts)))
359
- results: list[APIResponse | None] = [None for _ in range(len(prompts))]
360
- contexts: list[RequestContext | None] = [None for _ in range(len(prompts))]
361
- inflight_tasks: set[asyncio.Task[None]] = set()
362
- # Use existing tracker if client has been opened; otherwise open/close automatically
363
- tracker: StatusTracker
364
- tracker_preopened = self._tracker is not None
365
- if tracker_preopened:
366
- tracker = self._tracker # type: ignore[assignment]
367
- tracker.add_to_total(len(prompts))
368
- else:
369
- self.open(total=len(prompts), show_progress=show_progress)
370
- tracker = self._tracker # type: ignore[assignment]
371
- assert tracker is not None
372
+ """Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
372
373
 
373
- # Create retry queue for failed requests
374
- retry_queue: asyncio.Queue[RequestContext] = asyncio.Queue()
375
-
376
- # Calculate sleep time for rate limiting (legacy; gating happens in _wait_for_capacity)
377
- seconds_to_sleep_each_loop = (60.0 * 0.9) / tracker.max_requests_per_minute
378
-
379
- # Main dispatch loop - using original pattern but with all prompts
380
- next_context = None # Persist across iterations like original
381
- next_is_retry = False # Track whether next_context is a retry
382
- prompts_not_finished = True
383
- prompts_iter = iter(zip(ids, prompts))
384
-
385
- while True:
386
- # Get next context (retry or new) - only if we don't already have one waiting
387
- if next_context is None:
388
- if not retry_queue.empty():
389
- next_context = retry_queue.get_nowait()
390
- next_is_retry = True
391
- print(f"Retrying request {next_context.task_id}.")
392
- elif prompts_not_finished:
393
- try:
394
- task_id, prompt = next(prompts_iter)
395
- model, sampling_params = self._select_model()
396
- assert isinstance(prompt, Conversation)
397
- next_context = RequestContext(
398
- task_id=task_id,
399
- model_name=model,
400
- prompt=prompt,
401
- sampling_params=sampling_params,
402
- attempts_left=self.max_attempts,
403
- request_timeout=self.request_timeout,
404
- status_tracker=tracker,
405
- tools=tools,
406
- cache=cache,
407
- use_responses_api=use_responses_api,
408
- extra_headers=self.extra_headers,
409
- force_local_mcp=self.force_local_mcp,
410
- )
411
-
412
- next_is_retry = False
413
- except StopIteration:
414
- prompts_not_finished = False
415
-
416
- # Dispatch using shared capacity gate (consistent with start_nowait)
417
- if next_context:
418
- # Wait here until we have capacity to launch this context
419
- await self._wait_for_capacity(
420
- next_context.num_tokens, tracker, retry=next_is_retry
421
- )
422
-
423
- # Launch simplified request processing
424
- contexts[next_context.task_id] = next_context
425
-
426
- async def process_and_store(ctx: RequestContext):
427
- try:
428
- response = await self.process_single_request(ctx, retry_queue)
429
- results[ctx.task_id] = response
430
- except Exception as e:
431
- # Create an error response for validation errors and other exceptions
432
- error_response = APIResponse(
433
- id=ctx.task_id,
434
- model_internal=ctx.model_name,
435
- prompt=ctx.prompt,
436
- sampling_params=ctx.sampling_params,
437
- status_code=None,
438
- is_error=True,
439
- error_message=str(e),
440
- )
441
- results[ctx.task_id] = error_response
442
- # Mark task as completed so the main loop can finish
443
- if ctx.status_tracker:
444
- ctx.status_tracker.task_failed(ctx.task_id)
445
-
446
- task = asyncio.create_task(process_and_store(next_context))
447
- inflight_tasks.add(task)
448
- task.add_done_callback(inflight_tasks.discard)
449
- next_context = None # Reset after successful dispatch
450
- next_is_retry = False
451
-
452
- # Update progress - original logic
453
- tracker.update_pbar()
454
-
455
- # Check completion: consider final outcomes, not in-progress count
456
- # This avoids rare hangs if in-progress is miscounted (e.g., double-increment).
457
- if (tracker.num_tasks_succeeded + tracker.num_tasks_failed) >= len(
458
- prompts
459
- ) and retry_queue.empty():
460
- break
374
+ This implementation creates all tasks upfront and waits for them to complete,
375
+ avoiding issues with tracker state accumulating across multiple calls.
376
+ """
377
+ # Convert prompts to Conversations
378
+ prompts = prompts_to_conversations(prompts)
461
379
 
462
- # Yield briefly to allow in-flight tasks to progress
463
- await asyncio.sleep(min(0.01, seconds_to_sleep_each_loop))
380
+ # Ensure tracker exists (start_nowait will call add_to_total for each task)
381
+ if self._tracker is None:
382
+ self.open(total=0, show_progress=show_progress)
383
+ tracker_preopened = False
384
+ else:
385
+ tracker_preopened = True
386
+
387
+ # Start all tasks using start_nowait - tasks will coordinate via shared capacity lock
388
+ task_ids = []
389
+ for prompt in prompts:
390
+ assert isinstance(prompt, Conversation)
391
+ task_id = self.start_nowait(
392
+ prompt,
393
+ tools=tools,
394
+ cache=cache,
395
+ use_responses_api=use_responses_api,
396
+ )
397
+ task_ids.append(task_id)
464
398
 
465
- if inflight_tasks:
466
- await asyncio.gather(*inflight_tasks, return_exceptions=True)
399
+ # Wait for all tasks to complete
400
+ results = await self.wait_for_all(task_ids)
467
401
 
402
+ # Close tracker if we opened it
468
403
  if not tracker_preopened:
469
404
  self.close()
470
405
 
406
+ # Defensive check: This should rarely happen, but provides a safety net
471
407
  for idx, response in enumerate(results):
472
408
  if response is None:
473
- ctx = contexts[idx]
474
- prompt = ctx.prompt if ctx else prompts[idx]
475
- sampling_params = (
476
- ctx.sampling_params
477
- if ctx
478
- else self.sampling_params[0]
479
- if self.sampling_params
480
- else SamplingParams()
409
+ # This should only happen if there's a bug in _run_context
410
+ print(
411
+ f"WARNING: result[{idx}] is None! Creating defensive error response. "
412
+ f"Please report this bug."
481
413
  )
482
- model_name = ctx.model_name if ctx else self.model_names[0]
483
- assert isinstance(
484
- prompt, Conversation
485
- ), "expected prompt to be a conversation"
486
414
  results[idx] = APIResponse(
487
415
  id=idx,
488
- model_internal=model_name,
489
- prompt=prompt,
490
- sampling_params=sampling_params,
416
+ model_internal=self.model_names[0],
417
+ prompt=prompts[idx], # type: ignore
418
+ sampling_params=self.sampling_params[0]
419
+ if self.sampling_params
420
+ else SamplingParams(),
491
421
  status_code=None,
492
422
  is_error=True,
493
423
  error_message="Internal error: no response produced.",
494
424
  )
495
425
 
426
+ # Handle return format
496
427
  if return_completions_only:
497
428
  return [r.completion if r is not None else None for r in results]
498
429
 
@@ -1,11 +1,12 @@
1
1
  import asyncio
2
2
  import io
3
3
  import json
4
+ import os
4
5
  from typing import Any
5
6
 
7
+ from lm_deluge.client import _LLMClient
6
8
  from lm_deluge.file import File
7
9
 
8
- from ..client import LLMClient
9
10
  from ..prompt import Conversation
10
11
  from ..util.json import load_json
11
12
 
@@ -18,7 +19,7 @@ except ImportError:
18
19
  async def extract_async(
19
20
  inputs: list[str | Any],
20
21
  schema: Any,
21
- client: LLMClient,
22
+ client: _LLMClient,
22
23
  document_name: str | None = None,
23
24
  object_name: str | None = None,
24
25
  show_progress: bool = True,
@@ -32,12 +33,13 @@ async def extract_async(
32
33
  raise ValueError("schema must be a pydantic model or a dict.")
33
34
 
34
35
  # warn if json_mode is not True
36
+ has_warned = os.environ.get("LM_DELUGE_WARN_JSON_MODE", False)
35
37
  for sp in client.sampling_params:
36
- if sp.json_mode is False:
38
+ if sp.json_mode is False and not has_warned:
37
39
  print(
38
40
  "Warning: json_mode is False for one or more sampling params. You may get invalid output."
39
41
  )
40
- break
42
+ os.environ["LM_DELUGE_WARN_JSON_MODE"] = "True"
41
43
  # check_schema(schema_dict) -- figure out later
42
44
  if document_name is None:
43
45
  document_name = "text"
@@ -111,7 +113,7 @@ async def extract_async(
111
113
  def extract(
112
114
  inputs: list[str | Any],
113
115
  schema: Any,
114
- client: LLMClient,
116
+ client: _LLMClient,
115
117
  document_name: str | None = None,
116
118
  object_name: str | None = None,
117
119
  show_progress: bool = True,
@@ -10,6 +10,18 @@ ANTHROPIC_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  #
13
+ "claude-4.5-sonnet": {
14
+ "id": "claude-4.5-sonnet",
15
+ "name": "claude-sonnet-4-5-20250929",
16
+ "api_base": "https://api.anthropic.com/v1",
17
+ "api_key_env_var": "ANTHROPIC_API_KEY",
18
+ "supports_json": False,
19
+ "api_spec": "anthropic",
20
+ "input_cost": 3.0,
21
+ "output_cost": 15.0,
22
+ "requests_per_minute": 4_000,
23
+ "tokens_per_minute": 400_000,
24
+ },
13
25
  "claude-4.1-opus": {
14
26
  "id": "claude-4.1-opus",
15
27
  "name": "claude-opus-4-1-20250805",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.52
3
+ Version: 0.0.54
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -1,9 +1,9 @@
1
- lm_deluge/__init__.py,sha256=mAztMuxINmh7dGbYnT8tsmw1eryQAvd0jpY8yHzd0EE,315
1
+ lm_deluge/__init__.py,sha256=D01sxqvAuW1QPYQfdSOlBFVhf7QUr78fGgCNPvKXbAc,346
2
2
  lm_deluge/agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  lm_deluge/batches.py,sha256=rQocJLyIs3Ko_nRdAE9jT__5cKWYxiIRAH_Lw3L0E1k,24653
4
4
  lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
5
5
  lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
6
- lm_deluge/client.py,sha256=WIz1M6PLZi08Y3SFhGS3Pxs1fP8P0nKSLMYzFUdNnOQ,35814
6
+ lm_deluge/client.py,sha256=iEyqSgYt_BZ-46nHbAAnqDFcTpZNDmrJV_oLUqixwNo,32018
7
7
  lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
8
8
  lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
9
9
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
@@ -17,7 +17,7 @@ lm_deluge/tool.py,sha256=3weKo09E_srEKwHlz2WMVhk2BuDr5pJpi1UP0-qlcmo,16210
17
17
  lm_deluge/tracker.py,sha256=EHFPsS94NmsON2u97rSE70q1t6pwCsixUmGV-kIphMs,11531
18
18
  lm_deluge/usage.py,sha256=VMEKghePFIID5JFBObqYxFpgYxnbYm_dnHy7V1-_T6M,4866
19
19
  lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
20
- lm_deluge/api_requests/anthropic.py,sha256=8MledxnN0S-H_fZRq8DGUokcjZPQ154mr8tPWAussJ8,7992
20
+ lm_deluge/api_requests/anthropic.py,sha256=Iy-AMo1o7xliwWhamFIUc601PZ0YWLhwAgszgfws42I,8467
21
21
  lm_deluge/api_requests/base.py,sha256=EVHNFtlttKbN7Tt1MnLaO-NjvKHPSV5CqlRv-OnpVAE,5593
22
22
  lm_deluge/api_requests/bedrock.py,sha256=GmVxXz3ERAeQ7e52Nlztt81O4H9eJOQeOnS6b65vjm4,15453
23
23
  lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
@@ -38,13 +38,13 @@ lm_deluge/built_in_tools/anthropic/computer_use.py,sha256=47DEQpj8HBSa-_TImW-5JC
38
38
  lm_deluge/built_in_tools/anthropic/editor.py,sha256=DyC_DrHVTm1khU9QDL39vBuhu4tO5mS5H7xMRIT0Ng4,23327
39
39
  lm_deluge/llm_tools/__init__.py,sha256=TbZTETq9i_9yYskFWQKOG4pGh5ZiyE_D-h3RArfhGp4,231
40
40
  lm_deluge/llm_tools/classify.py,sha256=OdMwV5u4XoPlVhjOHX0sng5KPBIKFJmQeOE2fmnPgLU,21
41
- lm_deluge/llm_tools/extract.py,sha256=C3drVAMaoFx5jNE38Xi5cXxrqboyoZ9cE7nX5ylWbXw,4482
41
+ lm_deluge/llm_tools/extract.py,sha256=p61JW8yv5gQxPp4P8Hkm90ERgfD_Ek5IABzjIIlX-M0,4631
42
42
  lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w,6271
43
43
  lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
44
44
  lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
45
45
  lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
46
46
  lm_deluge/models/__init__.py,sha256=Dh2CuTZeCAddIIXwWJXOjM10B0CpKqjTdMXWYuBP0s8,4289
47
- lm_deluge/models/anthropic.py,sha256=3pW7fyBY9Xh1m1RtfncU9amWTtKnjGZD0STjpu8iUSQ,5700
47
+ lm_deluge/models/anthropic.py,sha256=zTOjwNh00OYPZMFiaTniZzTtwL1Vmnlm750GhVaczqk,6112
48
48
  lm_deluge/models/bedrock.py,sha256=PIaXvho2agCm1hSSAEy8zHCITjApXT2eUOGDKW425tE,5424
49
49
  lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
50
50
  lm_deluge/models/cohere.py,sha256=M_7cVA9QD4qe1X4sZXCpKEkKrKz2jibaspiTnzsZ1GU,3998
@@ -66,8 +66,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
66
66
  lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
67
67
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
68
68
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
69
- lm_deluge-0.0.52.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
70
- lm_deluge-0.0.52.dist-info/METADATA,sha256=X1JJBjExVA0NNXSaoB2NkOpT9f660AFe9u58BmKdb2w,13443
71
- lm_deluge-0.0.52.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- lm_deluge-0.0.52.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
73
- lm_deluge-0.0.52.dist-info/RECORD,,
69
+ lm_deluge-0.0.54.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
70
+ lm_deluge-0.0.54.dist-info/METADATA,sha256=5s1jfrJkLOXWWU3P4S_9Eketc8o3lJv137x4ZnyV1CA,13443
71
+ lm_deluge-0.0.54.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ lm_deluge-0.0.54.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
73
+ lm_deluge-0.0.54.dist-info/RECORD,,