langroid 0.51.1__py3-none-any.whl → 0.52.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
langroid/agent/base.py CHANGED
@@ -47,6 +47,7 @@ from langroid.language_models.base import (
47
47
  )
48
48
  from langroid.language_models.openai_gpt import OpenAIGPT, OpenAIGPTConfig
49
49
  from langroid.mytypes import Entity
50
+ from langroid.parsing.file_attachment import FileAttachment
50
51
  from langroid.parsing.parse_json import extract_top_level_json
51
52
  from langroid.parsing.parser import Parser, ParsingConfig
52
53
  from langroid.prompts.prompts_config import PromptsConfig
@@ -440,6 +441,7 @@ class Agent(ABC):
440
441
  def create_agent_response(
441
442
  self,
442
443
  content: str | None = None,
444
+ files: List[FileAttachment] = [],
443
445
  content_any: Any = None,
444
446
  tool_messages: List[ToolMessage] = [],
445
447
  oai_tool_calls: Optional[List[OpenAIToolCall]] = None,
@@ -452,6 +454,7 @@ class Agent(ABC):
452
454
  return self.response_template(
453
455
  Entity.AGENT,
454
456
  content=content,
457
+ files=files,
455
458
  content_any=content_any,
456
459
  tool_messages=tool_messages,
457
460
  oai_tool_calls=oai_tool_calls,
@@ -689,6 +692,7 @@ class Agent(ABC):
689
692
  self,
690
693
  e: Entity,
691
694
  content: str | None = None,
695
+ files: List[FileAttachment] = [],
692
696
  content_any: Any = None,
693
697
  tool_messages: List[ToolMessage] = [],
694
698
  oai_tool_calls: Optional[List[OpenAIToolCall]] = None,
@@ -700,6 +704,7 @@ class Agent(ABC):
700
704
  """Template for response from entity `e`."""
701
705
  return ChatDocument(
702
706
  content=content or "",
707
+ files=files,
703
708
  content_any=content_any,
704
709
  tool_messages=tool_messages,
705
710
  oai_tool_calls=oai_tool_calls,
@@ -714,6 +719,7 @@ class Agent(ABC):
714
719
  def create_user_response(
715
720
  self,
716
721
  content: str | None = None,
722
+ files: List[FileAttachment] = [],
717
723
  content_any: Any = None,
718
724
  tool_messages: List[ToolMessage] = [],
719
725
  oai_tool_calls: List[OpenAIToolCall] | None = None,
@@ -726,6 +732,7 @@ class Agent(ABC):
726
732
  return self.response_template(
727
733
  e=Entity.USER,
728
734
  content=content,
735
+ files=files,
729
736
  content_any=content_any,
730
737
  tool_messages=tool_messages,
731
738
  oai_tool_calls=oai_tool_calls,
@@ -1511,12 +1511,14 @@ class ChatAgent(Agent):
1511
1511
  output_len = self.config.llm.model_max_output_tokens
1512
1512
  if (
1513
1513
  truncate
1514
- and self.chat_num_tokens(hist)
1515
- > self.llm.chat_context_length() - self.config.llm.model_max_output_tokens
1514
+ and output_len > self.llm.chat_context_length() - self.chat_num_tokens(hist)
1516
1515
  ):
1517
1516
  # chat + output > max context length,
1518
- # so first try to shorten requested output len to fit.
1519
- output_len = self.llm.chat_context_length() - self.chat_num_tokens(hist)
1517
+ # so first try to shorten requested output len to fit;
1518
+ # use an extra margin of 300 tokens in case our calcs are off
1519
+ output_len = (
1520
+ self.llm.chat_context_length() - self.chat_num_tokens(hist) - 300
1521
+ )
1520
1522
  if output_len < self.config.llm.min_output_tokens:
1521
1523
  # unacceptably small output len, so drop early parts of conv history
1522
1524
  # if output_len is still too long, then drop early parts of conv history
@@ -1534,10 +1536,17 @@ class ChatAgent(Agent):
1534
1536
  # and last message (user msg).
1535
1537
  raise ValueError(
1536
1538
  """
1537
- The message history is longer than the max chat context
1538
- length allowed, and we have run out of messages to drop.
1539
- HINT: In your `OpenAIGPTConfig` object, try increasing
1540
- `chat_context_length` or decreasing `model_max_output_tokens`.
1539
+ The (message history + max_output_tokens) is longer than the
1540
+ max chat context length of this model, and we have tried
1541
+ reducing the requested max output tokens, as well as dropping
1542
+ early parts of the message history, to accommodate the model
1543
+ context length, but we have run out of msgs to drop.
1544
+
1545
+ HINT: In the `llm` field of your `ChatAgentConfig` object,
1546
+ which is of type `LLMConfig/OpenAIGPTConfig`, try
1547
+ - increasing `chat_context_length`
1548
+ (if accurate for the model), or
1549
+ - decreasing `max_output_tokens`
1541
1550
  """
1542
1551
  )
1543
1552
  # drop the second message, i.e. first msg after the sys msg
@@ -19,6 +19,7 @@ from langroid.language_models.base import (
19
19
  )
20
20
  from langroid.mytypes import DocMetaData, Document, Entity
21
21
  from langroid.parsing.agent_chats import parse_message
22
+ from langroid.parsing.file_attachment import FileAttachment
22
23
  from langroid.parsing.parse_json import extract_top_level_json, top_level_json_field
23
24
  from langroid.pydantic_v1 import BaseModel, Extra
24
25
  from langroid.utils.object_registry import ObjectRegistry
@@ -119,6 +120,7 @@ class ChatDocument(Document):
119
120
 
120
121
  reasoning: str = "" # reasoning produced by a reasoning LLM
121
122
  content_any: Any = None # to hold arbitrary data returned by responders
123
+ files: List[FileAttachment] = [] # list of file attachments
122
124
  oai_tool_calls: Optional[List[OpenAIToolCall]] = None
123
125
  oai_tool_id2result: Optional[OrderedDict[str, str]] = None
124
126
  oai_tool_choice: ToolChoiceTypes | Dict[str, Dict[str, str] | str] = "auto"
@@ -356,12 +358,8 @@ class ChatDocument(Document):
356
358
  Returns:
357
359
  List[LLMMessage]: list of LLMMessages corresponding to this ChatDocument.
358
360
  """
359
- sender_name = None
361
+
360
362
  sender_role = Role.USER
361
- fun_call = None
362
- oai_tool_calls = None
363
- tool_id = "" # for OpenAI Assistant
364
- chat_document_id: str = ""
365
363
  if isinstance(message, str):
366
364
  message = ChatDocument.from_str(message)
367
365
  content = message.content or to_string(message.content_any) or ""
@@ -381,6 +379,8 @@ class ChatDocument(Document):
381
379
  # same reasoning as for function-call above
382
380
  content += " " + "\n\n".join(str(tc) for tc in oai_tool_calls)
383
381
  oai_tool_calls = None
382
+ # some LLM APIs (e.g. gemini) don't like empty msg
383
+ content = content or " "
384
384
  sender_name = message.metadata.sender_name
385
385
  tool_ids = message.metadata.tool_ids
386
386
  tool_id = tool_ids[-1] if len(tool_ids) > 0 else ""
@@ -409,6 +409,7 @@ class ChatDocument(Document):
409
409
  role=Role.TOOL,
410
410
  tool_call_id=oai_tools[0].id,
411
411
  content=content,
412
+ files=message.files,
412
413
  chat_document_id=chat_document_id,
413
414
  )
414
415
  ]
@@ -424,6 +425,7 @@ class ChatDocument(Document):
424
425
  role=Role.TOOL,
425
426
  tool_call_id=message.metadata.oai_tool_id,
426
427
  content=content,
428
+ files=message.files,
427
429
  chat_document_id=chat_document_id,
428
430
  )
429
431
  ]
@@ -437,7 +439,8 @@ class ChatDocument(Document):
437
439
  LLMMessage(
438
440
  role=Role.TOOL,
439
441
  tool_call_id=tool_id,
440
- content=result,
442
+ content=result or " ",
443
+ files=message.files,
441
444
  chat_document_id=chat_document_id,
442
445
  )
443
446
  for tool_id, result in message.oai_tool_id2result.items()
@@ -450,6 +453,7 @@ class ChatDocument(Document):
450
453
  role=sender_role,
451
454
  tool_id=tool_id, # for OpenAI Assistant
452
455
  content=content,
456
+ files=message.files,
453
457
  function_call=fun_call,
454
458
  tool_calls=oai_tool_calls,
455
459
  name=sender_name,
@@ -204,8 +204,8 @@ class DocChatAgentConfig(ChatAgentConfig):
204
204
 
205
205
  llm: OpenAIGPTConfig = OpenAIGPTConfig(
206
206
  type="openai",
207
- chat_model=OpenAIChatModel.GPT4,
208
- completion_model=OpenAIChatModel.GPT4,
207
+ chat_model=OpenAIChatModel.GPT4o,
208
+ completion_model=OpenAIChatModel.GPT4o,
209
209
  timeout=40,
210
210
  )
211
211
  prompts: PromptsConfig = PromptsConfig(
@@ -118,8 +118,8 @@ class TableChatAgentConfig(ChatAgentConfig):
118
118
  vecdb: None | VectorStoreConfig = None
119
119
  llm: OpenAIGPTConfig = OpenAIGPTConfig(
120
120
  type="openai",
121
- chat_model=OpenAIChatModel.GPT4,
122
- completion_model=OpenAIChatModel.GPT4,
121
+ chat_model=OpenAIChatModel.GPT4o,
122
+ completion_model=OpenAIChatModel.GPT4o,
123
123
  )
124
124
  prompts: PromptsConfig = PromptsConfig(
125
125
  max_tokens=1000,
@@ -21,6 +21,7 @@ from langroid.cachedb.base import CacheDBConfig
21
21
  from langroid.cachedb.redis_cachedb import RedisCacheConfig
22
22
  from langroid.language_models.model_info import ModelInfo, get_model_info
23
23
  from langroid.parsing.agent_chats import parse_message
24
+ from langroid.parsing.file_attachment import FileAttachment
24
25
  from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
25
26
  from langroid.prompts.dialog import collate_chat_history
26
27
  from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
@@ -53,6 +54,13 @@ class StreamEventType(Enum):
53
54
  TOOL_ARGS = 5
54
55
 
55
56
 
57
+ class RetryParams(BaseSettings):
58
+ max_retries: int = 5
59
+ initial_delay: float = 1.0
60
+ exponential_base: float = 1.3
61
+ jitter: bool = True
62
+
63
+
56
64
  class LLMConfig(BaseSettings):
57
65
  """
58
66
  Common configuration for all language models.
@@ -63,7 +71,8 @@ class LLMConfig(BaseSettings):
63
71
  streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
64
72
  api_base: str | None = None
65
73
  formatter: None | str = None
66
- max_output_tokens: int | None = 8192 # specify None to use model_max_output_tokens
74
+ # specify None if you want to use the full max output tokens of the model
75
+ max_output_tokens: int | None = 8192
67
76
  timeout: int = 20 # timeout for API requests
68
77
  chat_model: str = ""
69
78
  completion_model: str = ""
@@ -86,11 +95,13 @@ class LLMConfig(BaseSettings):
86
95
  # Dict of model -> (input/prompt cost, output/completion cost)
87
96
  chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
88
97
  completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
98
+ retry_params: RetryParams = RetryParams()
89
99
 
90
100
  @property
91
101
  def model_max_output_tokens(self) -> int:
92
- return (
93
- self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
102
+ return min(
103
+ self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens,
104
+ get_model_info(self.chat_model).max_output_tokens,
94
105
  )
95
106
 
96
107
 
@@ -263,13 +274,14 @@ class LLMMessage(BaseModel):
263
274
  tool_call_id: Optional[str] = None # which OpenAI LLM tool this is a response to
264
275
  tool_id: str = "" # used by OpenAIAssistant
265
276
  content: str
277
+ files: List[FileAttachment] = []
266
278
  function_call: Optional[LLMFunctionCall] = None
267
279
  tool_calls: Optional[List[OpenAIToolCall]] = None
268
280
  timestamp: datetime = Field(default_factory=datetime.utcnow)
269
281
  # link to corresponding chat document, for provenance/rewind purposes
270
282
  chat_document_id: str = ""
271
283
 
272
- def api_dict(self, has_system_role: bool = True) -> Dict[str, Any]:
284
+ def api_dict(self, model: str, has_system_role: bool = True) -> Dict[str, Any]:
273
285
  """
274
286
  Convert to dictionary for API request, keeping ONLY
275
287
  the fields that are expected in an API call!
@@ -283,6 +295,17 @@ class LLMMessage(BaseModel):
283
295
  dict: dictionary representation of LLM message
284
296
  """
285
297
  d = self.dict()
298
+ files: List[FileAttachment] = d.pop("files")
299
+ if len(files) > 0 and self.role == Role.USER:
300
+ # In there are files, then content is an array of
301
+ # different content-parts
302
+ d["content"] = [
303
+ dict(
304
+ type="text",
305
+ text=self.content,
306
+ )
307
+ ] + [f.to_dict(model) for f in self.files]
308
+
286
309
  # if there is a key k = "role" with value "system", change to "user"
287
310
  # in case has_system_role is False
288
311
  if not has_system_role and "role" in d and d["role"] == "system":
@@ -24,13 +24,16 @@ class OpenAIChatModel(ModelName):
24
24
  """Enum for OpenAI Chat models"""
25
25
 
26
26
  GPT3_5_TURBO = "gpt-3.5-turbo-1106"
27
- GPT4 = "gpt-4"
27
+ GPT4 = "gpt-4o" # avoid deprecated gpt-4
28
28
  GPT4_TURBO = "gpt-4-turbo"
29
29
  GPT4o = "gpt-4o"
30
30
  GPT4o_MINI = "gpt-4o-mini"
31
31
  O1 = "o1"
32
32
  O1_MINI = "o1-mini"
33
33
  O3_MINI = "o3-mini"
34
+ GPT4_1 = "gpt-4.1"
35
+ GPT4_1_MINI = "gpt-4.1-mini"
36
+ GPT4_1_NANO = "gpt-4.1-nano"
34
37
 
35
38
 
36
39
  class OpenAICompletionModel(str, Enum):
@@ -44,6 +47,7 @@ class AnthropicModel(ModelName):
44
47
  """Enum for Anthropic models"""
45
48
 
46
49
  CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
50
+ CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
47
51
  CLAUDE_3_OPUS = "claude-3-opus-latest"
48
52
  CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
49
53
  CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
@@ -63,6 +67,7 @@ class GeminiModel(ModelName):
63
67
  GEMINI_1_5_FLASH = "gemini-1.5-flash"
64
68
  GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
65
69
  GEMINI_1_5_PRO = "gemini-1.5-pro"
70
+ GEMINI_2_5_PRO = "gemini-2.5-pro-exp-02-05"
66
71
  GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
67
72
  GEMINI_2_FLASH = "gemini-2.0-flash"
68
73
  GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
@@ -160,6 +165,33 @@ MODEL_INFO: Dict[str, ModelInfo] = {
160
165
  output_cost_per_million=30.0,
161
166
  description="GPT-4 Turbo",
162
167
  ),
168
+ OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
169
+ name=OpenAIChatModel.GPT4_1_NANO.value,
170
+ provider=ModelProvider.OPENAI,
171
+ context_length=1_047_576,
172
+ max_output_tokens=32_768,
173
+ input_cost_per_million=0.10,
174
+ output_cost_per_million=0.40,
175
+ description="GPT-4.1",
176
+ ),
177
+ OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
178
+ name=OpenAIChatModel.GPT4_1_MINI.value,
179
+ provider=ModelProvider.OPENAI,
180
+ context_length=1_047_576,
181
+ max_output_tokens=32_768,
182
+ input_cost_per_million=0.40,
183
+ output_cost_per_million=1.60,
184
+ description="GPT-4.1 Mini",
185
+ ),
186
+ OpenAIChatModel.GPT4_1.value: ModelInfo(
187
+ name=OpenAIChatModel.GPT4_1.value,
188
+ provider=ModelProvider.OPENAI,
189
+ context_length=1_047_576,
190
+ max_output_tokens=32_768,
191
+ input_cost_per_million=2.00,
192
+ output_cost_per_million=8.00,
193
+ description="GPT-4.1",
194
+ ),
163
195
  OpenAIChatModel.GPT4o.value: ModelInfo(
164
196
  name=OpenAIChatModel.GPT4o.value,
165
197
  provider=ModelProvider.OPENAI,
@@ -91,10 +91,13 @@ LLAMACPP_API_KEY = os.environ.get("LLAMA_API_KEY", DUMMY_API_KEY)
91
91
 
92
92
  openai_chat_model_pref_list = [
93
93
  OpenAIChatModel.GPT4o,
94
+ OpenAIChatModel.GPT4_1_NANO,
95
+ OpenAIChatModel.GPT4_1_MINI,
96
+ OpenAIChatModel.GPT4_1,
94
97
  OpenAIChatModel.GPT4o_MINI,
95
98
  OpenAIChatModel.O1_MINI,
99
+ OpenAIChatModel.O3_MINI,
96
100
  OpenAIChatModel.O1,
97
- OpenAIChatModel.GPT3_5_TURBO,
98
101
  ]
99
102
 
100
103
  openai_completion_model_pref_list = [
@@ -1731,8 +1734,7 @@ class OpenAIGPT(LanguageModel):
1731
1734
  logging.error(friendly_error(e, "Error in OpenAIGPT.achat: "))
1732
1735
  raise e
1733
1736
 
1734
- @retry_with_exponential_backoff
1735
- def _chat_completions_with_backoff(self, **kwargs): # type: ignore
1737
+ def _chat_completions_with_backoff_body(self, **kwargs): # type: ignore
1736
1738
  cached = False
1737
1739
  hashed_key, result = self._cache_lookup("Completion", **kwargs)
1738
1740
  if result is not None:
@@ -1781,8 +1783,17 @@ class OpenAIGPT(LanguageModel):
1781
1783
  self._cache_store(hashed_key, result.model_dump())
1782
1784
  return cached, hashed_key, result
1783
1785
 
1784
- @async_retry_with_exponential_backoff
1785
- async def _achat_completions_with_backoff(self, **kwargs): # type: ignore
1786
+ def _chat_completions_with_backoff(self, **kwargs): # type: ignore
1787
+ retry_func = retry_with_exponential_backoff(
1788
+ self._chat_completions_with_backoff_body,
1789
+ initial_delay=self.config.retry_params.initial_delay,
1790
+ max_retries=self.config.retry_params.max_retries,
1791
+ exponential_base=self.config.retry_params.exponential_base,
1792
+ jitter=self.config.retry_params.jitter,
1793
+ )
1794
+ return retry_func(**kwargs)
1795
+
1796
+ async def _achat_completions_with_backoff_body(self, **kwargs): # type: ignore
1786
1797
  cached = False
1787
1798
  hashed_key, result = self._cache_lookup("Completion", **kwargs)
1788
1799
  if result is not None:
@@ -1836,6 +1847,16 @@ class OpenAIGPT(LanguageModel):
1836
1847
  self._cache_store(hashed_key, result.model_dump())
1837
1848
  return cached, hashed_key, result
1838
1849
 
1850
+ async def _achat_completions_with_backoff(self, **kwargs): # type: ignore
1851
+ retry_func = async_retry_with_exponential_backoff(
1852
+ self._achat_completions_with_backoff_body,
1853
+ initial_delay=self.config.retry_params.initial_delay,
1854
+ max_retries=self.config.retry_params.max_retries,
1855
+ exponential_base=self.config.retry_params.exponential_base,
1856
+ jitter=self.config.retry_params.jitter,
1857
+ )
1858
+ return await retry_func(**kwargs)
1859
+
1839
1860
  def _prep_chat_completion(
1840
1861
  self,
1841
1862
  messages: Union[str, List[LLMMessage]],
@@ -1876,10 +1897,13 @@ class OpenAIGPT(LanguageModel):
1876
1897
  args: Dict[str, Any] = dict(
1877
1898
  model=chat_model,
1878
1899
  messages=[
1879
- m.api_dict(has_system_role=self.info().allows_system_message)
1900
+ m.api_dict(
1901
+ self.config.chat_model,
1902
+ has_system_role=self.info().allows_system_message,
1903
+ )
1880
1904
  for m in (llm_messages)
1881
1905
  ],
1882
- max_tokens=max_tokens,
1906
+ max_completion_tokens=max_tokens,
1883
1907
  stream=self.get_stream(),
1884
1908
  )
1885
1909
  if self.get_stream():
@@ -2073,7 +2097,7 @@ class OpenAIGPT(LanguageModel):
2073
2097
  function_call,
2074
2098
  response_format,
2075
2099
  )
2076
- cached, hashed_key, response = self._chat_completions_with_backoff(**args)
2100
+ cached, hashed_key, response = self._chat_completions_with_backoff(**args) # type: ignore
2077
2101
  if self.get_stream() and not cached:
2078
2102
  llm_response, openai_response = self._stream_response(response, chat=True)
2079
2103
  self._cache_store(hashed_key, openai_response)
@@ -2106,7 +2130,7 @@ class OpenAIGPT(LanguageModel):
2106
2130
  function_call,
2107
2131
  response_format,
2108
2132
  )
2109
- cached, hashed_key, response = await self._achat_completions_with_backoff(
2133
+ cached, hashed_key, response = await self._achat_completions_with_backoff( # type: ignore
2110
2134
  **args
2111
2135
  )
2112
2136
  if self.get_stream() and not cached:
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
31
31
  from PIL import Image
32
32
 
33
33
  from langroid.mytypes import DocMetaData, Document
34
- from langroid.parsing.parser import Parser, ParsingConfig
34
+ from langroid.parsing.parser import LLMPdfParserConfig, Parser, ParsingConfig
35
35
 
36
36
  logger = logging.getLogger(__name__)
37
37
 
@@ -1040,7 +1040,8 @@ class LLMPdfParser(DocumentParser):
1040
1040
  raise ValueError(
1041
1041
  "LLMPdfParser requires a llm-based config in pdf parsing config"
1042
1042
  )
1043
- self.model_name = config.pdf.llm_parser_config.model_name
1043
+ self.llm_parser_config: LLMPdfParserConfig = config.pdf.llm_parser_config
1044
+ self.model_name = self.llm_parser_config.model_name
1044
1045
 
1045
1046
  # Ensure output directory exists
1046
1047
  self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
@@ -1059,9 +1060,7 @@ class LLMPdfParser(DocumentParser):
1059
1060
  temp_file.close()
1060
1061
  self.output_filename = Path(temp_file.name)
1061
1062
 
1062
- self.max_tokens = (
1063
- config.pdf.llm_parser_config.max_tokens or self.DEFAULT_MAX_TOKENS
1064
- )
1063
+ self.max_tokens = self.llm_parser_config.max_tokens or self.DEFAULT_MAX_TOKENS
1065
1064
 
1066
1065
  """
1067
1066
  If True, each PDF page is processed as a separate chunk,
@@ -1069,12 +1068,12 @@ class LLMPdfParser(DocumentParser):
1069
1068
  grouped into chunks based on `max_token_limit` before being sent
1070
1069
  to the LLM.
1071
1070
  """
1072
- self.split_on_page = config.pdf.llm_parser_config.split_on_page or False
1071
+ self.split_on_page = self.llm_parser_config.split_on_page or False
1073
1072
 
1074
1073
  # Rate limiting parameters
1075
1074
  import asyncio
1076
1075
 
1077
- self.requests_per_minute = config.pdf.llm_parser_config.requests_per_minute or 5
1076
+ self.requests_per_minute = self.llm_parser_config.requests_per_minute or 5
1078
1077
 
1079
1078
  """
1080
1079
  A semaphore to control the number of concurrent requests to the LLM,
@@ -1231,6 +1230,7 @@ class LLMPdfParser(DocumentParser):
1231
1230
  llm_config = OpenAIGPTConfig(
1232
1231
  chat_model=self.model_name,
1233
1232
  max_output_tokens=self.max_tokens,
1233
+ timeout=self.llm_parser_config.timeout,
1234
1234
  )
1235
1235
  llm = OpenAIGPT(config=llm_config)
1236
1236
  page_nums = self._page_num_str(chunk.get("page_numbers", "?"))
@@ -1242,7 +1242,7 @@ class LLMPdfParser(DocumentParser):
1242
1242
  image_url=dict(url=data_uri),
1243
1243
  )
1244
1244
  elif "claude" in self.model_name.lower():
1245
- # optimistrally try this: some API proxies like litellm
1245
+ # optimistically try this: some API proxies like litellm
1246
1246
  # support this, and others may not.
1247
1247
  file_content = dict(
1248
1248
  type="file",
@@ -1259,27 +1259,32 @@ class LLMPdfParser(DocumentParser):
1259
1259
  file_data=data_uri,
1260
1260
  ),
1261
1261
  )
1262
+ prompt = (
1263
+ self.llm_parser_config.prompt
1264
+ or self.LLM_PDF_MD_SYSTEM_INSTRUCTION
1265
+ )
1266
+ system_prompt = (
1267
+ self.llm_parser_config.system_prompt
1268
+ or """
1269
+ You are an expert pdf -> markdown converter.
1270
+ Do NOT use any triple backquotes when you present the
1271
+ markdown content,like ```markdown etc.
1272
+ FAITHFULLY CONVERT THE PDF TO MARKDOWN,
1273
+ retaining ALL content as you find it.
1274
+ """
1275
+ )
1262
1276
 
1263
1277
  # Send the request with PDF content and system instructions
1264
1278
  response = await llm.async_client.chat.completions.create( # type: ignore
1265
1279
  model=self.model_name.split("/")[-1],
1266
1280
  messages=[
1267
- dict(
1268
- role="system",
1269
- content="""
1270
- You are an expert pdf -> markdown converter.
1271
- Do NOT use any triple backquotes when you present the
1272
- markdown content,like ```markdown etc.
1273
- FAITHFULLY CONVERT THE PDF TO MARKDOWN,
1274
- retaining ALL content as you find it.
1275
- """,
1276
- ),
1281
+ dict(role="system", content=system_prompt),
1277
1282
  dict( # type: ignore
1278
1283
  role="user",
1279
1284
  content=[
1280
1285
  dict(
1281
1286
  type="text",
1282
- text=self.LLM_PDF_MD_SYSTEM_INSTRUCTION,
1287
+ text=prompt,
1283
1288
  ),
1284
1289
  file_content,
1285
1290
  ],
@@ -0,0 +1,157 @@
1
+ import base64
2
+ import mimetypes
3
+ import uuid
4
+ from pathlib import Path
5
+ from typing import Any, BinaryIO, Dict, Optional, Union
6
+
7
+ from langroid.pydantic_v1 import BaseModel
8
+
9
+
10
+ class FileAttachment(BaseModel):
11
+ """Represents a file attachment to be sent to an LLM API."""
12
+
13
+ content: bytes
14
+ filename: Optional[str] = None
15
+ mime_type: str = "application/octet-stream"
16
+
17
+ def __init__(self, **data: Any) -> None:
18
+ """Initialize with sensible defaults for filename if not provided."""
19
+ if "filename" not in data or data["filename"] is None:
20
+ # Generate a more readable unique filename
21
+ unique_id = str(uuid.uuid4())[:8]
22
+ data["filename"] = f"attachment_{unique_id}.bin"
23
+ super().__init__(**data)
24
+
25
+ @classmethod
26
+ def from_path(cls, file_path: Union[str, Path]) -> "FileAttachment":
27
+ """Create a FileAttachment from a file path.
28
+
29
+ Args:
30
+ file_path: Path to the file to attach
31
+
32
+ Returns:
33
+ FileAttachment instance
34
+ """
35
+ path = Path(file_path)
36
+ with open(path, "rb") as f:
37
+ content = f.read()
38
+
39
+ mime_type, _ = mimetypes.guess_type(path)
40
+ if mime_type is None:
41
+ mime_type = "application/octet-stream"
42
+
43
+ return cls(content=content, filename=path.name, mime_type=mime_type)
44
+
45
+ @classmethod
46
+ def from_bytes(
47
+ cls,
48
+ content: bytes,
49
+ filename: Optional[str] = None,
50
+ mime_type: Optional[str] = None,
51
+ ) -> "FileAttachment":
52
+ """Create a FileAttachment from bytes content.
53
+
54
+ Args:
55
+ content: Raw bytes content
56
+ filename: Optional name to use for the file
57
+ mime_type: MIME type of the content, guessed from filename if provided
58
+
59
+ Returns:
60
+ FileAttachment instance
61
+ """
62
+ if mime_type is None and filename is not None:
63
+ mime_type, _ = mimetypes.guess_type(filename)
64
+
65
+ return cls(
66
+ content=content,
67
+ filename=filename,
68
+ mime_type=mime_type or "application/octet-stream",
69
+ )
70
+
71
+ @classmethod
72
+ def from_io(
73
+ cls,
74
+ file_obj: BinaryIO,
75
+ filename: Optional[str] = None,
76
+ mime_type: Optional[str] = None,
77
+ ) -> "FileAttachment":
78
+ """Create a FileAttachment from a file-like object.
79
+
80
+ Args:
81
+ file_obj: File-like object with binary content
82
+ filename: Optional name to use for the file
83
+ mime_type: MIME type of the content, guessed from filename if provided
84
+
85
+ Returns:
86
+ FileAttachment instance
87
+ """
88
+ content = file_obj.read()
89
+ return cls.from_bytes(content, filename, mime_type)
90
+
91
+ @classmethod
92
+ def from_text(
93
+ cls,
94
+ text: str,
95
+ filename: Optional[str] = None,
96
+ mime_type: str = "text/plain",
97
+ encoding: str = "utf-8",
98
+ ) -> "FileAttachment":
99
+ """Create a FileAttachment from text content.
100
+
101
+ Args:
102
+ text: Text content to include
103
+ filename: Optional name to use for the file
104
+ mime_type: MIME type of the content
105
+ encoding: Text encoding to use
106
+
107
+ Returns:
108
+ FileAttachment instance
109
+ """
110
+ content = text.encode(encoding)
111
+ return cls(content=content, filename=filename, mime_type=mime_type)
112
+
113
+ def to_base64(self) -> str:
114
+ """Convert content to base64 encoding.
115
+
116
+ Returns:
117
+ Base64 encoded string
118
+ """
119
+ return base64.b64encode(self.content).decode("utf-8")
120
+
121
+ def to_data_uri(self) -> str:
122
+ """Convert content to a data URI.
123
+
124
+ Returns:
125
+ A data URI string containing the base64-encoded content with MIME type
126
+ """
127
+ base64_content = self.to_base64()
128
+ return f"data:{self.mime_type};base64,{base64_content}"
129
+
130
+ def to_dict(self, model: str) -> Dict[str, Any]:
131
+ """
132
+ Convert to a dictionary suitable for API requests.
133
+ Tested only for PDF files.
134
+
135
+ Returns:
136
+ Dictionary with file data
137
+ """
138
+ if "gemini" in model.lower():
139
+ return dict(type="image_url", image_url=dict(url=self.to_data_uri()))
140
+ elif "claude" in model.lower():
141
+ # optimistically try this: some API proxies like litellm
142
+ # support this, and others may not.
143
+ return dict(
144
+ type="file",
145
+ file=dict(
146
+ file_data=self.to_data_uri(),
147
+ ),
148
+ )
149
+ else:
150
+ # fallback: assume file upload is similar to OpenAI API
151
+ return dict(
152
+ type="file",
153
+ file=dict(
154
+ filename=self.filename,
155
+ file_data=self.to_data_uri(),
156
+ ),
157
+ )
@@ -43,6 +43,9 @@ class LLMPdfParserConfig(BaseSettings):
43
43
  max_tokens: Optional[int] = None
44
44
  split_on_page: Optional[bool] = True
45
45
  requests_per_minute: Optional[int] = 5
46
+ timeout: int = 60
47
+ prompt: str = "" # override with a domain-specific prompt
48
+ system_prompt: str = "" # override with a domain-specific system prompt
46
49
 
47
50
 
48
51
  class MarkerConfig(BaseSettings):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.51.1
3
+ Version: 0.52.0
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -846,7 +846,7 @@ import langroid.language_models as lm
846
846
 
847
847
  mdl = lm.OpenAIGPT(
848
848
  lm.OpenAIGPTConfig(
849
- chat_model=lm.OpenAIChatModel.GPT4, # or, e.g. "ollama/qwen2.5"
849
+ chat_model=lm.OpenAIChatModel.GPT4o, # or, e.g. "ollama/qwen2.5"
850
850
  ),
851
851
  )
852
852
 
@@ -3,10 +3,10 @@ langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
3
3
  langroid/mytypes.py,sha256=HIcYAqGeA9OK0Hlscym2FI5Oax9QFljDZoVgRlomhRk,4014
4
4
  langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
6
- langroid/agent/base.py,sha256=bs5OLCf534mhsdR7Rgf27GqVNuSV2bOVbD46Y86mGFA,79829
6
+ langroid/agent/base.py,sha256=lWR4ivX_elTFejpknLhkO-DlAGT3aG6ojQAVkzDOqMc,80090
7
7
  langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
8
- langroid/agent/chat_agent.py,sha256=BBXGx4nYUG4xgeRGoT4HmVJ9lVi6E3kPt0YYBW52e3E,84557
9
- langroid/agent/chat_document.py,sha256=gWceR8mcggyGbJePJQgVvqzVivYlfPlFp8pUZ7yUZvg,17821
8
+ langroid/agent/chat_agent.py,sha256=mIkf3kq5m1RPXeBb6U52pXB9itum0ChcpXABC_g-Xfs,85082
9
+ langroid/agent/chat_document.py,sha256=6O20Fp4QrquykaF2jFtwNHkvcoDte1LLwVZNk9mVH9c,18057
10
10
  langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
11
11
  langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
12
12
  langroid/agent/tool_message.py,sha256=BhjP-_TfQ2tgxuY4Yo_JHLOwwt0mJ4BwjPnREvEY4vk,14744
@@ -14,13 +14,13 @@ langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a
14
14
  langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  langroid/agent/callbacks/chainlit.py,sha256=UHB6P_J40vsVnssosqkpkOVWRf9NK4TOY0_G2g_Arsg,20900
16
16
  langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
17
- langroid/agent/special/doc_chat_agent.py,sha256=dOL9Y0xAslkwepCdKU8Dc1m5Vk8qgk-gLbU4JzsmTII,65234
17
+ langroid/agent/special/doc_chat_agent.py,sha256=ALp2rv12J-ChRCxVtflDwz6n0qIbUAymldIy8qpsvrg,65236
18
18
  langroid/agent/special/doc_chat_task.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
20
20
  langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
21
21
  langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
22
22
  langroid/agent/special/retriever_agent.py,sha256=o2UfqiCGME0t85SZ6qjK041_WZYqXSuV1SeH_3KtVuc,1931
23
- langroid/agent/special/table_chat_agent.py,sha256=d9v2wsblaRx7oMnKhLV7uO_ujvk9gh59pSGvBXyeyNc,9659
23
+ langroid/agent/special/table_chat_agent.py,sha256=ii-xd7pRLLfRhamFZ04zpSkRO4xPn6Rm5qmA4z4N0HA,9661
24
24
  langroid/agent/special/arangodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  langroid/agent/special/arangodb/arangodb_agent.py,sha256=12Y54c84c9qXV-YXRBcI5HaqyiY75JR4TmqlURYKJAM,25851
26
26
  langroid/agent/special/arangodb/system_messages.py,sha256=udwfLleTdyz_DuxHuoiv2wHEZoAPBPbwdF_ivjIfP5c,6867
@@ -69,11 +69,11 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
69
69
  langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
70
70
  langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
71
71
  langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
72
- langroid/language_models/base.py,sha256=sCDC02hqIgjY73KnCvc-YGxZJm_LAs4Z1VVQpIFWLyQ,27754
72
+ langroid/language_models/base.py,sha256=pfN3t-BktKmN_4K8pwmpjC9OdcHxsytM5s5TmsJ-nPg,28560
73
73
  langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
74
74
  langroid/language_models/mock_lm.py,sha256=5BgHKDVRWFbUwDT_PFgTZXz9-k8wJSA2e3PZmyDgQ1k,4022
75
- langroid/language_models/model_info.py,sha256=tfBBxL0iUf2mVN6CjcvqflzFUVg2oZqOJZexZ8jHTYA,12216
76
- langroid/language_models/openai_gpt.py,sha256=FG3eMWedko0kN-n-SkSbwnrm5hSxoW2wmJSBAvOAOYU,84731
75
+ langroid/language_models/model_info.py,sha256=0NE1zWNUHJwcM5jhwNxUqGjbpek-Nq7ljGdWpM8R3RQ,13380
76
+ langroid/language_models/openai_gpt.py,sha256=KFbG6q143CYt6SP8rU1UEFX2mffn8yXfVOdEyuA5IaY,85854
77
77
  langroid/language_models/utils.py,sha256=hC5p61P_Qlrowkm5wMap1A1b5ZUCwK_XhPIzAQk1T1s,5483
78
78
  langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
79
79
  langroid/language_models/prompt_formatter/base.py,sha256=eDS1sgRNZVnoajwV_ZIha6cba5Dt8xjgzdRbPITwx3Q,1221
@@ -82,11 +82,12 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
82
82
  langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
83
83
  langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
84
84
  langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
85
- langroid/parsing/document_parser.py,sha256=7_pHu-_yQOETtDATv5VRdVSvac9kJRuZiwQ6EbJqJ_o,57403
85
+ langroid/parsing/document_parser.py,sha256=cUcp4JKS_LpsjX7OqnGBhHorDHx7FG5pvKGjRBkQoMw,57685
86
+ langroid/parsing/file_attachment.py,sha256=iIMTmAkfu-TgV9CKnDEB-BiDDN6WOepH51sIz6-PnFw,4826
86
87
  langroid/parsing/md_parser.py,sha256=JUgsUpCaeAuBndmtDaJR9HMZaje1gmtXtaLXJHst3i8,21340
87
88
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
88
89
  langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
89
- langroid/parsing/parser.py,sha256=Tbe1mQ7wp6GVx2xMWv1raIkpepTN0qNrqOxakWY6Zkc,15437
90
+ langroid/parsing/parser.py,sha256=uaAITarcGI2504zcP_dLhp3LjNdh9A6R_yS-o_VcaH8,15599
90
91
  langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
91
92
  langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
92
93
  langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
@@ -129,7 +130,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
129
130
  langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
130
131
  langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
131
132
  langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
132
- langroid-0.51.1.dist-info/METADATA,sha256=9E0M5JzLk_fuMOLH918i7fIBwWKMm1O6J3VY8DoG3NM,63641
133
- langroid-0.51.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
134
- langroid-0.51.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
135
- langroid-0.51.1.dist-info/RECORD,,
133
+ langroid-0.52.0.dist-info/METADATA,sha256=xiHZhRlCgRm6s8wRIZ300xBO5gUW1j2wqWZG9rj1npc,63642
134
+ langroid-0.52.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
135
+ langroid-0.52.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
136
+ langroid-0.52.0.dist-info/RECORD,,