lm-deluge 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/prompt.py CHANGED
@@ -36,6 +36,9 @@ class Text:
36
36
  def gemini(self) -> dict:
37
37
  return {"text": self.text}
38
38
 
39
+ def mistral(self) -> dict:
40
+ return {"type": "text", "text": self.text}
41
+
39
42
 
40
43
  ###############################################################################
41
44
  # 2. One conversational turn (role + parts) #
@@ -162,6 +165,12 @@ class Message:
162
165
  role = "user" if self.role == "user" else "model"
163
166
  return {"role": role, "parts": parts}
164
167
 
168
+ def mistral(self) -> dict:
169
+ parts = [p.mistral() for p in self.parts]
170
+ # Shortcut: single text becomes a bare string
171
+ role = self.role
172
+ return {"role": role, "content": parts}
173
+
165
174
 
166
175
  ###############################################################################
167
176
  # 3. A whole conversation (ordered list of messages) #
@@ -233,15 +242,8 @@ class Conversation:
233
242
  other = [m.gemini() for m in self.messages if m.role != "system"]
234
243
  return system_msg, other
235
244
 
236
- def to_cohere(self) -> list[dict]:
237
- messages = []
238
- for m in self.messages:
239
- if len(m.parts) > 1:
240
- raise ValueError("Cohere does not support multi-part messages")
241
- if isinstance(m.parts[0], Image):
242
- raise ValueError("Cohere does not support images")
243
- messages.append({"role": m.role, "text": m.parts[0].text})
244
- return messages
245
+ def to_mistral(self) -> list[dict]:
246
+ return [m.mistral() for m in self.messages]
245
247
 
246
248
  # ── misc helpers ----------------------------------------------------------
247
249
  _tok = tiktoken.encoding_for_model("gpt-4")
lm_deluge/util/json.py CHANGED
@@ -31,8 +31,8 @@ def strip_json(json_string: str | None) -> str | None:
31
31
  if json_string is None:
32
32
  return None
33
33
  json_string = json_string.strip()
34
- if json_string.startswith("```json"):
35
- json_string = json_string.split("```json", 1)[1]
34
+ json_string = json_string.removeprefix("```json")
35
+ json_string = json_string.removesuffix("```")
36
36
  if "```json\n" in json_string:
37
37
  json_string = json_string.split("```json\n", 1)[1]
38
38
  json_string = json_string.strip("`").strip()
@@ -127,7 +127,8 @@ def load_json(
127
127
  if json_string is None:
128
128
  raise ValueError("Invalid (None) json_string")
129
129
  json_string = strip_json(json_string)
130
- raise ValueError("Invalid (empty) json_string")
130
+ if json_string is None or len(json_string) == 0:
131
+ raise ValueError("Invalid (empty) json_string")
131
132
 
132
133
  # Try standard JSON parsing
133
134
  try:
lm_deluge/util/xml.py CHANGED
@@ -66,26 +66,25 @@ def get_tags(html_string: str, tag: str, return_attributes: bool = False) -> lis
66
66
  return []
67
67
 
68
68
  try:
69
- # Regex pattern to match all instances of the tag and capture attributes and content
69
+ # 1. find the tag + inner HTML exactly as before
70
70
  pattern = re.compile(rf"<{tag}([^>]*)>(.*?)</{tag}>", re.DOTALL)
71
71
  matches = pattern.findall(html_string)
72
72
 
73
+ # 2. only parse attributes if the caller asked for them
73
74
  if not return_attributes:
74
- return [
75
- match[1] for match in matches
76
- ] # Return just the content inside the tags
75
+ return [m[1] for m in matches]
77
76
 
78
- # Parse attributes if return_attributes is True
79
- attributes_pattern = re.compile(r'(\w+)\s*=\s*"([^"]*)"') # Matches key="value"
77
+ # --- new bits ---------------------------------------------------------------
78
+ # key = (\w+)
79
+ # quote = (['"]) ← remembers whether it was ' or "
80
+ # value = (.*?)\2 ← capture up to the *same* quote (back-ref \2)
81
+ attributes_pattern = re.compile(r'(\w+)\s*=\s*([\'"])(.*?)\2')
80
82
 
81
83
  results = []
82
- for match in matches:
83
- tag_attributes = match[0] # The attributes portion of the tag
84
- tag_contents = match[1] # The content portion of the tag
84
+ for tag_attrs, tag_contents in matches:
85
+ attrs = {key: val for key, _, val in attributes_pattern.findall(tag_attrs)}
86
+ results.append({"content": tag_contents, "attributes": attrs})
85
87
 
86
- # Parse attributes into a dictionary
87
- attributes = dict(attributes_pattern.findall(tag_attributes))
88
- results.append({"content": tag_contents, "attributes": attributes})
89
88
  return results
90
89
  except re.error:
91
90
  print(f"Failed to compile regular expression for HTML tag '{tag}'")
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: lm_deluge
3
+ Version: 0.0.6
4
+ Summary: Python utility for using LLM API models.
5
+ Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: python-dotenv
9
+ Requires-Dist: json5
10
+ Requires-Dist: PyYAML
11
+ Requires-Dist: pandas
12
+ Requires-Dist: aiohttp
13
+ Requires-Dist: tiktoken
14
+ Requires-Dist: xxhash
15
+ Requires-Dist: tqdm
16
+ Requires-Dist: google-auth
17
+ Requires-Dist: requests-aws4auth
18
+ Requires-Dist: pydantic
19
+ Requires-Dist: bs4
20
+ Requires-Dist: lxml
21
+ Requires-Dist: pdf2image
22
+ Requires-Dist: pillow
23
+ Provides-Extra: full
24
+ Requires-Dist: pymupdf; extra == "full"
25
+ Requires-Dist: fasttext-wheel; extra == "full"
26
+ Requires-Dist: fasttext-langdetect; extra == "full"
27
+
28
+ # lm_deluge
29
+
30
+ `lm_deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
31
+
32
+ - **Unified client** – Send prompts to all relevant models with a single client.
33
+ - **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
34
+ - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
35
+ - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
36
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
37
+ - **Sync and async APIs** – Use the client from sync or async code.
38
+
39
+ **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install lm-deluge
45
+ ```
46
+
47
+ There are optional goodies. If you want support for PDFs and language-detection via FastText:
48
+ ```bash
49
+ pip install "lm-deluge[full]"
50
+ ```
51
+
52
+ The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables.
53
+
54
+ ## Quickstart
55
+
56
+ The easiest way to get started is with the `.basic` constructor. This uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
57
+
58
+ ```python
59
+ from lm_deluge import LLMClient
60
+
61
+ client = LLMClient.basic("gpt-4o-mini")
62
+ resps = client.process_prompts_sync(["Hello, world!"])
63
+ print(resp[0].completion)
64
+ ```
65
+
66
+ ## Spraying Across Models
67
+
68
+ To distribute your requests across models, just provide a list of more than one model to the constructor. The rate limits for the client apply to the client as a whole, not per-model, so you may want to increase them:
69
+
70
+ ```python
71
+ from lm_deluge import LLMClient
72
+
73
+ client = LLMClient.basic(
74
+ ["gpt-4o-mini", "claude-haiku-anthropic"],
75
+ max_requests_per_minute=10_000
76
+ )
77
+ resps = client.process_prompts_sync(
78
+ ["Hello, ChatGPT!", "Hello, Claude!"]
79
+ )
80
+ print(resp[0].completion)
81
+ ```
82
+
83
+ ## Configuration
84
+
85
+ API calls can be customized in a few ways.
86
+
87
+ 1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort.
88
+
89
+ You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models. You can also pass many of these arguments directly to `LLMClient.basic` so you don't have to construct an entire `SamplingParams` object.
90
+
91
+
92
+ 2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and caching.
93
+ 3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object).
94
+
95
+ Putting it all together:
96
+
97
+ ```python
98
+ from lm_deluge import LLMClient, SamplingParams
99
+
100
+ client = LLMClient(
101
+ "gpt-4",
102
+ max_requests_per_minute=100,
103
+ max_tokens_per_minute=100_000,
104
+ max_concurrent_requests=500,
105
+ sampling_params=SamplingParams(temperature=0.5, max_new_tokens=30)
106
+ )
107
+
108
+ await client.process_prompts_async(
109
+ ["What is the capital of Mars?"],
110
+ show_progress=False,
111
+ return_completions_only=True
112
+ )
113
+ ```
114
+
115
+ ## Multi-Turn Conversations
116
+
117
+ Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
118
+
119
+ ```python
120
+ from lm_deluge import Message, Conversation
121
+
122
+ prompt = Conversation.system("You are a helpful assistant.").add(
123
+ Message.user("What's in this image?").add_image("tests/image.jpg")
124
+ )
125
+
126
+ client = LLMClient.basic("gpt-4.1-mini")
127
+ resps = client.process_prompts_sync([prompt])
128
+ ```
129
+
130
+ This just works. Images can be local images on disk, URLs, bytes, base64 data URLs... go wild. You can use `Conversation.to_openai` or `Conversation.to_anthropic` to format your messages for the OpenAI or Anthropic clients directly.
131
+
132
+ ## Caching
133
+
134
+ `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent across different `process_prompts_[...]` calls.
135
+
136
+ **IMPORTANT:** Caching does not currently work for prompts in the SAME batch. That is, if you call `process_prompts_sync` with the same prompt 100 times, there will be 0 cache hits. If you call `process_prompts_sync` a *second* time with those same 100 prompts, all 100 will be cache hits. The cache is intended to be persistent and help you save costs across many invocations, but it can't help with a single batch-inference session (yet!).
137
+
138
+ ## Asynchronous Client
139
+ Use this in asynchronous code, or in a Jupyter notebook. If you try to use the sync client in a Jupyter notebook, you'll have to use `nest-asyncio`, because internally the sync client uses async code. Don't do it! Just use the async client!
140
+
141
+ ```python
142
+ import asyncio
143
+
144
+ async def main():
145
+ responses = await client.process_prompts_async(
146
+ ["an async call"],
147
+ return_completions_only=True,
148
+ )
149
+ print(responses[0])
150
+
151
+ asyncio.run(main())
152
+ ```
153
+
154
+ ## Available Models
155
+
156
+ We support all models in `src/lm_deluge/models.py`. An older version of this client supported Bedrock and Vertex. We plan to re-implement Bedrock support (our previous support was spotty and we need to figure out cross-region inference in order to support the newest Claude models). Vertex support is not currently planned, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
157
+
158
+ ## Feature Support
159
+
160
+ We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We don't support tool use yet, but support is planned (keep an eye out for a unified tool definition spec that works for all models!). We support logprobs for OpenAI models that return them via the `logprobs` argument to the `LLMClient`.
161
+
162
+ ## Built‑in tools
163
+
164
+ The `lm_deluge.llm_tools` package exposes a few helper functions:
165
+
166
+ - `extract` – structure text or images into a Pydantic model based on a schema.
167
+ - `translate` – translate a list of strings to English.
168
+ - `score_llm` – simple yes/no style scoring with optional log probability output.
169
+
170
+ Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.
@@ -1,37 +1,37 @@
1
- lm_deluge/__init__.py,sha256=r3Erra7aT5i2Mu5B9lcY8ll1T6q8_xF-xgnataKgKPg,153
1
+ lm_deluge/__init__.py,sha256=rndOr4Rcfnpttz-onWU3vVEm-MM0WDFgz6KexKPAx0k,222
2
2
  lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
3
- lm_deluge/client.py,sha256=xGeJxLsm1Y-szUAVjNUYbbF51qLzlKcKZGhzNxZ37HE,28489
3
+ lm_deluge/client.py,sha256=ERH0SkNvdM1zc8HYS5dxRGxVxUb4CXpUhveG3mz-w2I,28533
4
4
  lm_deluge/embed.py,sha256=m-X8UK4gV9KKD7Wv3yarAceMQaj7gR1JwzD_sB0MOQY,13183
5
5
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
6
6
  lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
7
- lm_deluge/image.py,sha256=8tIPMZ5-R_JUVHXH8SpPGrmxBMSiU1XU3oFv4AkVr1c,7028
8
- lm_deluge/models.py,sha256=zAm4mPOTI81cuP6BBGUo1n8yaUhVHYQp97Jq1zLt1aI,40333
9
- lm_deluge/prompt.py,sha256=AVTvm2n-NV9PqFWJaKGeGYQbXpzTcKQGicBEIW8jfxQ,12710
7
+ lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
8
+ lm_deluge/models.py,sha256=67bgQnDoJKIen3B_cmWGtkW8Pi6zUWsnQFfPWMWOErM,41939
9
+ lm_deluge/prompt.py,sha256=bhDAlfUQ_Fq6Wh-L9jOfoiMbDGyVKGkjGicnwKJWpcI,12680
10
10
  lm_deluge/rerank.py,sha256=tW1c3gQCAqaF8Ez-r-4qxYAcdKqxnLMxwHApKOUKwk4,11289
11
11
  lm_deluge/sampling_params.py,sha256=E2kewh1vz-1Qcy5xNBCzihfGgT_GcHYMfzaWb3FLiXs,739
12
12
  lm_deluge/tool.py,sha256=RVUW3E3FW11jCM-R7pIL1GpRs1YKCOjvTkL1D5xPetk,3196
13
13
  lm_deluge/tracker.py,sha256=Dk99scN_NeDEO0gkLO5efXiZq11Ga-k6cerUHWN7IWY,1292
14
14
  lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
15
15
  lm_deluge/api_requests/anthropic.py,sha256=URbiD-ANn_P3StFJVP2JoDWuoloZVsAUly8CGSyV2Kw,6618
16
- lm_deluge/api_requests/base.py,sha256=PGI6gMnT0ZDhlkk9YWBRTUiwqPLan-O7yYt0MKyFjy0,14753
17
- lm_deluge/api_requests/cohere.py,sha256=KyYpbOjJxaTdauQBG_D7WWPLjkqmmk9-YFFnxYyT0vU,4898
18
- lm_deluge/api_requests/common.py,sha256=ZtUirAlYNE4CJOQPbNip-mWKOMXCJi2_malwYsUcqsg,642
19
- lm_deluge/api_requests/google.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- lm_deluge/api_requests/openai.py,sha256=vjf2WFqQimroyGt_hX730lF8T83Bv29MzSyA8Ek0A9s,5663
21
- lm_deluge/api_requests/vertex.py,sha256=DxexjkkQOhrMgaDaVNEKo1E_0OhntUx4M_oDGfZUYgU,14609
16
+ lm_deluge/api_requests/base.py,sha256=Yt5Bxd5C5mZrbAMQYDghk0KRhUChSbTEsVI8DoThZBs,14805
17
+ lm_deluge/api_requests/common.py,sha256=EjwTnKrvgBx-HnRVt0kSJZ9RM7CM-QyhlIQkr1jxP-4,220
18
+ lm_deluge/api_requests/mistral.py,sha256=ThlV1jBfhpAwkaqPKhdUq-lIq2OienRbhEaSK4cctvI,5370
19
+ lm_deluge/api_requests/openai.py,sha256=YgJMUio23ks6VLv6BDBZAW6Bnfd2fpidSidaHXzyXFY,6135
22
20
  lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
21
+ lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
23
22
  lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
24
23
  lm_deluge/api_requests/deprecated/mistral.py,sha256=pOfOZUM4U35I3Plch84SnAFpDAzouHcSNNMtgxRvjy4,4709
24
+ lm_deluge/api_requests/deprecated/vertex.py,sha256=ygXz2RjdXErPCSBbiHLEWbf5_sSTIi31WoX0UaoYzRI,15275
25
25
  lm_deluge/llm_tools/__init__.py,sha256=TbZTETq9i_9yYskFWQKOG4pGh5ZiyE_D-h3RArfhGp4,231
26
26
  lm_deluge/llm_tools/extract.py,sha256=-GtyqJUxKvB567tk_NnCMklazz18xZBCPlAjYHTVUWg,3649
27
27
  lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
28
28
  lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
29
- lm_deluge/util/json.py,sha256=KuzyXvBCwoW2PLc6durR0aNA0MjlEcQprA1NTZmqh_g,5326
29
+ lm_deluge/util/json.py,sha256=dCeG9j1D17rXmQJbKJH79X0CGof4Wlqd55TDg4D6ky8,5388
30
30
  lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
31
31
  lm_deluge/util/pdf.py,sha256=zBcuh2IJxRfro6JPzQkAqdc6hUcrduFwb9aEoacNG9U,1590
32
32
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
33
- lm_deluge/util/xml.py,sha256=-yUKOZwsAY009w4ppQ8FI4hb5gDvLzMwcSgv88aEnaE,10578
34
- lm_deluge-0.0.5.dist-info/METADATA,sha256=MxI6OVGdYD6MsjeQPyoXDT8pneKB8FcZ652ZW0t_qyk,4957
35
- lm_deluge-0.0.5.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
36
- lm_deluge-0.0.5.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
37
- lm_deluge-0.0.5.dist-info/RECORD,,
33
+ lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
34
+ lm_deluge-0.0.6.dist-info/METADATA,sha256=c7Pof1ic06fW4ZiCZoq8N99Vx_-SIRVT_UeQ4-nWZQo,8267
35
+ lm_deluge-0.0.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
36
+ lm_deluge-0.0.6.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
37
+ lm_deluge-0.0.6.dist-info/RECORD,,
File without changes