langroid 0.51.2__tar.gz → 0.52.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.51.2 → langroid-0.52.0}/PKG-INFO +1 -1
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/base.py +7 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/chat_document.py +6 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/base.py +22 -1
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/openai_gpt.py +28 -7
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/document_parser.py +17 -12
- langroid-0.52.0/langroid/parsing/file_attachment.py +157 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/parser.py +2 -0
- {langroid-0.51.2 → langroid-0.52.0}/pyproject.toml +1 -1
- {langroid-0.51.2 → langroid-0.52.0}/.gitignore +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/LICENSE +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/README.md +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/batch.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/callbacks/chainlit.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/chat_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/openai_assistant.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/arangodb/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/arangodb/system_messages.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/arangodb/tools.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/arangodb/utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/doc_chat_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/doc_chat_task.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/lance_tools.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/neo4j/system_messages.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/neo4j/tools.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/relevance_extractor_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/retriever_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/table_chat_agent.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/task.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tool_message.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/exa_search_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/file_tools.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/google_search_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/metaphor_search_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/orchestration.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/recipient_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/retrieval_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/rewind_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/tools/tavily_search_tool.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/agent/xml_tool_message.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/cachedb/base.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/base.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/models.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/protoc/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/protoc/embeddings.proto +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/embedding_models/remote_embeds.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/exceptions.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/config.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/mock_lm.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/model_info.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/mytypes.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/md_parser.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/para_sentence_split.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/parse_json.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/pdf_utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/repo_loader.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/routing.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/search.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/spider.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/url_loader.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/urls.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/parsing/web_search.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/prompts/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/prompts/dialog.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/prompts/templates.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/py.typed +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/pydantic_v1/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/pydantic_v1/main.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/configuration.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/constants.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/git_utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/globals.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/logging.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/object_registry.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/output/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/output/citations.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/output/printing.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/output/status.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/pandas_utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/pydantic_utils.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/system.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/utils/types.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/base.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/chromadb.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/lancedb.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/pineconedb.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/postgres.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/qdrantdb.py +0 -0
- {langroid-0.51.2 → langroid-0.52.0}/langroid/vector_store/weaviatedb.py +0 -0
@@ -47,6 +47,7 @@ from langroid.language_models.base import (
|
|
47
47
|
)
|
48
48
|
from langroid.language_models.openai_gpt import OpenAIGPT, OpenAIGPTConfig
|
49
49
|
from langroid.mytypes import Entity
|
50
|
+
from langroid.parsing.file_attachment import FileAttachment
|
50
51
|
from langroid.parsing.parse_json import extract_top_level_json
|
51
52
|
from langroid.parsing.parser import Parser, ParsingConfig
|
52
53
|
from langroid.prompts.prompts_config import PromptsConfig
|
@@ -440,6 +441,7 @@ class Agent(ABC):
|
|
440
441
|
def create_agent_response(
|
441
442
|
self,
|
442
443
|
content: str | None = None,
|
444
|
+
files: List[FileAttachment] = [],
|
443
445
|
content_any: Any = None,
|
444
446
|
tool_messages: List[ToolMessage] = [],
|
445
447
|
oai_tool_calls: Optional[List[OpenAIToolCall]] = None,
|
@@ -452,6 +454,7 @@ class Agent(ABC):
|
|
452
454
|
return self.response_template(
|
453
455
|
Entity.AGENT,
|
454
456
|
content=content,
|
457
|
+
files=files,
|
455
458
|
content_any=content_any,
|
456
459
|
tool_messages=tool_messages,
|
457
460
|
oai_tool_calls=oai_tool_calls,
|
@@ -689,6 +692,7 @@ class Agent(ABC):
|
|
689
692
|
self,
|
690
693
|
e: Entity,
|
691
694
|
content: str | None = None,
|
695
|
+
files: List[FileAttachment] = [],
|
692
696
|
content_any: Any = None,
|
693
697
|
tool_messages: List[ToolMessage] = [],
|
694
698
|
oai_tool_calls: Optional[List[OpenAIToolCall]] = None,
|
@@ -700,6 +704,7 @@ class Agent(ABC):
|
|
700
704
|
"""Template for response from entity `e`."""
|
701
705
|
return ChatDocument(
|
702
706
|
content=content or "",
|
707
|
+
files=files,
|
703
708
|
content_any=content_any,
|
704
709
|
tool_messages=tool_messages,
|
705
710
|
oai_tool_calls=oai_tool_calls,
|
@@ -714,6 +719,7 @@ class Agent(ABC):
|
|
714
719
|
def create_user_response(
|
715
720
|
self,
|
716
721
|
content: str | None = None,
|
722
|
+
files: List[FileAttachment] = [],
|
717
723
|
content_any: Any = None,
|
718
724
|
tool_messages: List[ToolMessage] = [],
|
719
725
|
oai_tool_calls: List[OpenAIToolCall] | None = None,
|
@@ -726,6 +732,7 @@ class Agent(ABC):
|
|
726
732
|
return self.response_template(
|
727
733
|
e=Entity.USER,
|
728
734
|
content=content,
|
735
|
+
files=files,
|
729
736
|
content_any=content_any,
|
730
737
|
tool_messages=tool_messages,
|
731
738
|
oai_tool_calls=oai_tool_calls,
|
@@ -19,6 +19,7 @@ from langroid.language_models.base import (
|
|
19
19
|
)
|
20
20
|
from langroid.mytypes import DocMetaData, Document, Entity
|
21
21
|
from langroid.parsing.agent_chats import parse_message
|
22
|
+
from langroid.parsing.file_attachment import FileAttachment
|
22
23
|
from langroid.parsing.parse_json import extract_top_level_json, top_level_json_field
|
23
24
|
from langroid.pydantic_v1 import BaseModel, Extra
|
24
25
|
from langroid.utils.object_registry import ObjectRegistry
|
@@ -119,6 +120,7 @@ class ChatDocument(Document):
|
|
119
120
|
|
120
121
|
reasoning: str = "" # reasoning produced by a reasoning LLM
|
121
122
|
content_any: Any = None # to hold arbitrary data returned by responders
|
123
|
+
files: List[FileAttachment] = [] # list of file attachments
|
122
124
|
oai_tool_calls: Optional[List[OpenAIToolCall]] = None
|
123
125
|
oai_tool_id2result: Optional[OrderedDict[str, str]] = None
|
124
126
|
oai_tool_choice: ToolChoiceTypes | Dict[str, Dict[str, str] | str] = "auto"
|
@@ -407,6 +409,7 @@ class ChatDocument(Document):
|
|
407
409
|
role=Role.TOOL,
|
408
410
|
tool_call_id=oai_tools[0].id,
|
409
411
|
content=content,
|
412
|
+
files=message.files,
|
410
413
|
chat_document_id=chat_document_id,
|
411
414
|
)
|
412
415
|
]
|
@@ -422,6 +425,7 @@ class ChatDocument(Document):
|
|
422
425
|
role=Role.TOOL,
|
423
426
|
tool_call_id=message.metadata.oai_tool_id,
|
424
427
|
content=content,
|
428
|
+
files=message.files,
|
425
429
|
chat_document_id=chat_document_id,
|
426
430
|
)
|
427
431
|
]
|
@@ -436,6 +440,7 @@ class ChatDocument(Document):
|
|
436
440
|
role=Role.TOOL,
|
437
441
|
tool_call_id=tool_id,
|
438
442
|
content=result or " ",
|
443
|
+
files=message.files,
|
439
444
|
chat_document_id=chat_document_id,
|
440
445
|
)
|
441
446
|
for tool_id, result in message.oai_tool_id2result.items()
|
@@ -448,6 +453,7 @@ class ChatDocument(Document):
|
|
448
453
|
role=sender_role,
|
449
454
|
tool_id=tool_id, # for OpenAI Assistant
|
450
455
|
content=content,
|
456
|
+
files=message.files,
|
451
457
|
function_call=fun_call,
|
452
458
|
tool_calls=oai_tool_calls,
|
453
459
|
name=sender_name,
|
@@ -21,6 +21,7 @@ from langroid.cachedb.base import CacheDBConfig
|
|
21
21
|
from langroid.cachedb.redis_cachedb import RedisCacheConfig
|
22
22
|
from langroid.language_models.model_info import ModelInfo, get_model_info
|
23
23
|
from langroid.parsing.agent_chats import parse_message
|
24
|
+
from langroid.parsing.file_attachment import FileAttachment
|
24
25
|
from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
|
25
26
|
from langroid.prompts.dialog import collate_chat_history
|
26
27
|
from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
|
@@ -53,6 +54,13 @@ class StreamEventType(Enum):
|
|
53
54
|
TOOL_ARGS = 5
|
54
55
|
|
55
56
|
|
57
|
+
class RetryParams(BaseSettings):
|
58
|
+
max_retries: int = 5
|
59
|
+
initial_delay: float = 1.0
|
60
|
+
exponential_base: float = 1.3
|
61
|
+
jitter: bool = True
|
62
|
+
|
63
|
+
|
56
64
|
class LLMConfig(BaseSettings):
|
57
65
|
"""
|
58
66
|
Common configuration for all language models.
|
@@ -87,6 +95,7 @@ class LLMConfig(BaseSettings):
|
|
87
95
|
# Dict of model -> (input/prompt cost, output/completion cost)
|
88
96
|
chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
|
89
97
|
completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
|
98
|
+
retry_params: RetryParams = RetryParams()
|
90
99
|
|
91
100
|
@property
|
92
101
|
def model_max_output_tokens(self) -> int:
|
@@ -265,13 +274,14 @@ class LLMMessage(BaseModel):
|
|
265
274
|
tool_call_id: Optional[str] = None # which OpenAI LLM tool this is a response to
|
266
275
|
tool_id: str = "" # used by OpenAIAssistant
|
267
276
|
content: str
|
277
|
+
files: List[FileAttachment] = []
|
268
278
|
function_call: Optional[LLMFunctionCall] = None
|
269
279
|
tool_calls: Optional[List[OpenAIToolCall]] = None
|
270
280
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
271
281
|
# link to corresponding chat document, for provenance/rewind purposes
|
272
282
|
chat_document_id: str = ""
|
273
283
|
|
274
|
-
def api_dict(self, has_system_role: bool = True) -> Dict[str, Any]:
|
284
|
+
def api_dict(self, model: str, has_system_role: bool = True) -> Dict[str, Any]:
|
275
285
|
"""
|
276
286
|
Convert to dictionary for API request, keeping ONLY
|
277
287
|
the fields that are expected in an API call!
|
@@ -285,6 +295,17 @@ class LLMMessage(BaseModel):
|
|
285
295
|
dict: dictionary representation of LLM message
|
286
296
|
"""
|
287
297
|
d = self.dict()
|
298
|
+
files: List[FileAttachment] = d.pop("files")
|
299
|
+
if len(files) > 0 and self.role == Role.USER:
|
300
|
+
# In there are files, then content is an array of
|
301
|
+
# different content-parts
|
302
|
+
d["content"] = [
|
303
|
+
dict(
|
304
|
+
type="text",
|
305
|
+
text=self.content,
|
306
|
+
)
|
307
|
+
] + [f.to_dict(model) for f in self.files]
|
308
|
+
|
288
309
|
# if there is a key k = "role" with value "system", change to "user"
|
289
310
|
# in case has_system_role is False
|
290
311
|
if not has_system_role and "role" in d and d["role"] == "system":
|
@@ -1734,8 +1734,7 @@ class OpenAIGPT(LanguageModel):
|
|
1734
1734
|
logging.error(friendly_error(e, "Error in OpenAIGPT.achat: "))
|
1735
1735
|
raise e
|
1736
1736
|
|
1737
|
-
|
1738
|
-
def _chat_completions_with_backoff(self, **kwargs): # type: ignore
|
1737
|
+
def _chat_completions_with_backoff_body(self, **kwargs): # type: ignore
|
1739
1738
|
cached = False
|
1740
1739
|
hashed_key, result = self._cache_lookup("Completion", **kwargs)
|
1741
1740
|
if result is not None:
|
@@ -1784,8 +1783,17 @@ class OpenAIGPT(LanguageModel):
|
|
1784
1783
|
self._cache_store(hashed_key, result.model_dump())
|
1785
1784
|
return cached, hashed_key, result
|
1786
1785
|
|
1787
|
-
|
1788
|
-
|
1786
|
+
def _chat_completions_with_backoff(self, **kwargs): # type: ignore
|
1787
|
+
retry_func = retry_with_exponential_backoff(
|
1788
|
+
self._chat_completions_with_backoff_body,
|
1789
|
+
initial_delay=self.config.retry_params.initial_delay,
|
1790
|
+
max_retries=self.config.retry_params.max_retries,
|
1791
|
+
exponential_base=self.config.retry_params.exponential_base,
|
1792
|
+
jitter=self.config.retry_params.jitter,
|
1793
|
+
)
|
1794
|
+
return retry_func(**kwargs)
|
1795
|
+
|
1796
|
+
async def _achat_completions_with_backoff_body(self, **kwargs): # type: ignore
|
1789
1797
|
cached = False
|
1790
1798
|
hashed_key, result = self._cache_lookup("Completion", **kwargs)
|
1791
1799
|
if result is not None:
|
@@ -1839,6 +1847,16 @@ class OpenAIGPT(LanguageModel):
|
|
1839
1847
|
self._cache_store(hashed_key, result.model_dump())
|
1840
1848
|
return cached, hashed_key, result
|
1841
1849
|
|
1850
|
+
async def _achat_completions_with_backoff(self, **kwargs): # type: ignore
|
1851
|
+
retry_func = async_retry_with_exponential_backoff(
|
1852
|
+
self._achat_completions_with_backoff_body,
|
1853
|
+
initial_delay=self.config.retry_params.initial_delay,
|
1854
|
+
max_retries=self.config.retry_params.max_retries,
|
1855
|
+
exponential_base=self.config.retry_params.exponential_base,
|
1856
|
+
jitter=self.config.retry_params.jitter,
|
1857
|
+
)
|
1858
|
+
return await retry_func(**kwargs)
|
1859
|
+
|
1842
1860
|
def _prep_chat_completion(
|
1843
1861
|
self,
|
1844
1862
|
messages: Union[str, List[LLMMessage]],
|
@@ -1879,7 +1897,10 @@ class OpenAIGPT(LanguageModel):
|
|
1879
1897
|
args: Dict[str, Any] = dict(
|
1880
1898
|
model=chat_model,
|
1881
1899
|
messages=[
|
1882
|
-
m.api_dict(
|
1900
|
+
m.api_dict(
|
1901
|
+
self.config.chat_model,
|
1902
|
+
has_system_role=self.info().allows_system_message,
|
1903
|
+
)
|
1883
1904
|
for m in (llm_messages)
|
1884
1905
|
],
|
1885
1906
|
max_completion_tokens=max_tokens,
|
@@ -2076,7 +2097,7 @@ class OpenAIGPT(LanguageModel):
|
|
2076
2097
|
function_call,
|
2077
2098
|
response_format,
|
2078
2099
|
)
|
2079
|
-
cached, hashed_key, response = self._chat_completions_with_backoff(**args)
|
2100
|
+
cached, hashed_key, response = self._chat_completions_with_backoff(**args) # type: ignore
|
2080
2101
|
if self.get_stream() and not cached:
|
2081
2102
|
llm_response, openai_response = self._stream_response(response, chat=True)
|
2082
2103
|
self._cache_store(hashed_key, openai_response)
|
@@ -2109,7 +2130,7 @@ class OpenAIGPT(LanguageModel):
|
|
2109
2130
|
function_call,
|
2110
2131
|
response_format,
|
2111
2132
|
)
|
2112
|
-
cached, hashed_key, response = await self._achat_completions_with_backoff(
|
2133
|
+
cached, hashed_key, response = await self._achat_completions_with_backoff( # type: ignore
|
2113
2134
|
**args
|
2114
2135
|
)
|
2115
2136
|
if self.get_stream() and not cached:
|
@@ -1242,7 +1242,7 @@ class LLMPdfParser(DocumentParser):
|
|
1242
1242
|
image_url=dict(url=data_uri),
|
1243
1243
|
)
|
1244
1244
|
elif "claude" in self.model_name.lower():
|
1245
|
-
#
|
1245
|
+
# optimistically try this: some API proxies like litellm
|
1246
1246
|
# support this, and others may not.
|
1247
1247
|
file_content = dict(
|
1248
1248
|
type="file",
|
@@ -1259,27 +1259,32 @@ class LLMPdfParser(DocumentParser):
|
|
1259
1259
|
file_data=data_uri,
|
1260
1260
|
),
|
1261
1261
|
)
|
1262
|
+
prompt = (
|
1263
|
+
self.llm_parser_config.prompt
|
1264
|
+
or self.LLM_PDF_MD_SYSTEM_INSTRUCTION
|
1265
|
+
)
|
1266
|
+
system_prompt = (
|
1267
|
+
self.llm_parser_config.system_prompt
|
1268
|
+
or """
|
1269
|
+
You are an expert pdf -> markdown converter.
|
1270
|
+
Do NOT use any triple backquotes when you present the
|
1271
|
+
markdown content,like ```markdown etc.
|
1272
|
+
FAITHFULLY CONVERT THE PDF TO MARKDOWN,
|
1273
|
+
retaining ALL content as you find it.
|
1274
|
+
"""
|
1275
|
+
)
|
1262
1276
|
|
1263
1277
|
# Send the request with PDF content and system instructions
|
1264
1278
|
response = await llm.async_client.chat.completions.create( # type: ignore
|
1265
1279
|
model=self.model_name.split("/")[-1],
|
1266
1280
|
messages=[
|
1267
|
-
dict(
|
1268
|
-
role="system",
|
1269
|
-
content="""
|
1270
|
-
You are an expert pdf -> markdown converter.
|
1271
|
-
Do NOT use any triple backquotes when you present the
|
1272
|
-
markdown content,like ```markdown etc.
|
1273
|
-
FAITHFULLY CONVERT THE PDF TO MARKDOWN,
|
1274
|
-
retaining ALL content as you find it.
|
1275
|
-
""",
|
1276
|
-
),
|
1281
|
+
dict(role="system", content=system_prompt),
|
1277
1282
|
dict( # type: ignore
|
1278
1283
|
role="user",
|
1279
1284
|
content=[
|
1280
1285
|
dict(
|
1281
1286
|
type="text",
|
1282
|
-
text=
|
1287
|
+
text=prompt,
|
1283
1288
|
),
|
1284
1289
|
file_content,
|
1285
1290
|
],
|
@@ -0,0 +1,157 @@
|
|
1
|
+
import base64
|
2
|
+
import mimetypes
|
3
|
+
import uuid
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any, BinaryIO, Dict, Optional, Union
|
6
|
+
|
7
|
+
from langroid.pydantic_v1 import BaseModel
|
8
|
+
|
9
|
+
|
10
|
+
class FileAttachment(BaseModel):
|
11
|
+
"""Represents a file attachment to be sent to an LLM API."""
|
12
|
+
|
13
|
+
content: bytes
|
14
|
+
filename: Optional[str] = None
|
15
|
+
mime_type: str = "application/octet-stream"
|
16
|
+
|
17
|
+
def __init__(self, **data: Any) -> None:
|
18
|
+
"""Initialize with sensible defaults for filename if not provided."""
|
19
|
+
if "filename" not in data or data["filename"] is None:
|
20
|
+
# Generate a more readable unique filename
|
21
|
+
unique_id = str(uuid.uuid4())[:8]
|
22
|
+
data["filename"] = f"attachment_{unique_id}.bin"
|
23
|
+
super().__init__(**data)
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
def from_path(cls, file_path: Union[str, Path]) -> "FileAttachment":
|
27
|
+
"""Create a FileAttachment from a file path.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
file_path: Path to the file to attach
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
FileAttachment instance
|
34
|
+
"""
|
35
|
+
path = Path(file_path)
|
36
|
+
with open(path, "rb") as f:
|
37
|
+
content = f.read()
|
38
|
+
|
39
|
+
mime_type, _ = mimetypes.guess_type(path)
|
40
|
+
if mime_type is None:
|
41
|
+
mime_type = "application/octet-stream"
|
42
|
+
|
43
|
+
return cls(content=content, filename=path.name, mime_type=mime_type)
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
def from_bytes(
|
47
|
+
cls,
|
48
|
+
content: bytes,
|
49
|
+
filename: Optional[str] = None,
|
50
|
+
mime_type: Optional[str] = None,
|
51
|
+
) -> "FileAttachment":
|
52
|
+
"""Create a FileAttachment from bytes content.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
content: Raw bytes content
|
56
|
+
filename: Optional name to use for the file
|
57
|
+
mime_type: MIME type of the content, guessed from filename if provided
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
FileAttachment instance
|
61
|
+
"""
|
62
|
+
if mime_type is None and filename is not None:
|
63
|
+
mime_type, _ = mimetypes.guess_type(filename)
|
64
|
+
|
65
|
+
return cls(
|
66
|
+
content=content,
|
67
|
+
filename=filename,
|
68
|
+
mime_type=mime_type or "application/octet-stream",
|
69
|
+
)
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def from_io(
|
73
|
+
cls,
|
74
|
+
file_obj: BinaryIO,
|
75
|
+
filename: Optional[str] = None,
|
76
|
+
mime_type: Optional[str] = None,
|
77
|
+
) -> "FileAttachment":
|
78
|
+
"""Create a FileAttachment from a file-like object.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
file_obj: File-like object with binary content
|
82
|
+
filename: Optional name to use for the file
|
83
|
+
mime_type: MIME type of the content, guessed from filename if provided
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
FileAttachment instance
|
87
|
+
"""
|
88
|
+
content = file_obj.read()
|
89
|
+
return cls.from_bytes(content, filename, mime_type)
|
90
|
+
|
91
|
+
@classmethod
|
92
|
+
def from_text(
|
93
|
+
cls,
|
94
|
+
text: str,
|
95
|
+
filename: Optional[str] = None,
|
96
|
+
mime_type: str = "text/plain",
|
97
|
+
encoding: str = "utf-8",
|
98
|
+
) -> "FileAttachment":
|
99
|
+
"""Create a FileAttachment from text content.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
text: Text content to include
|
103
|
+
filename: Optional name to use for the file
|
104
|
+
mime_type: MIME type of the content
|
105
|
+
encoding: Text encoding to use
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
FileAttachment instance
|
109
|
+
"""
|
110
|
+
content = text.encode(encoding)
|
111
|
+
return cls(content=content, filename=filename, mime_type=mime_type)
|
112
|
+
|
113
|
+
def to_base64(self) -> str:
|
114
|
+
"""Convert content to base64 encoding.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Base64 encoded string
|
118
|
+
"""
|
119
|
+
return base64.b64encode(self.content).decode("utf-8")
|
120
|
+
|
121
|
+
def to_data_uri(self) -> str:
|
122
|
+
"""Convert content to a data URI.
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
A data URI string containing the base64-encoded content with MIME type
|
126
|
+
"""
|
127
|
+
base64_content = self.to_base64()
|
128
|
+
return f"data:{self.mime_type};base64,{base64_content}"
|
129
|
+
|
130
|
+
def to_dict(self, model: str) -> Dict[str, Any]:
|
131
|
+
"""
|
132
|
+
Convert to a dictionary suitable for API requests.
|
133
|
+
Tested only for PDF files.
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
Dictionary with file data
|
137
|
+
"""
|
138
|
+
if "gemini" in model.lower():
|
139
|
+
return dict(type="image_url", image_url=dict(url=self.to_data_uri()))
|
140
|
+
elif "claude" in model.lower():
|
141
|
+
# optimistically try this: some API proxies like litellm
|
142
|
+
# support this, and others may not.
|
143
|
+
return dict(
|
144
|
+
type="file",
|
145
|
+
file=dict(
|
146
|
+
file_data=self.to_data_uri(),
|
147
|
+
),
|
148
|
+
)
|
149
|
+
else:
|
150
|
+
# fallback: assume file upload is similar to OpenAI API
|
151
|
+
return dict(
|
152
|
+
type="file",
|
153
|
+
file=dict(
|
154
|
+
filename=self.filename,
|
155
|
+
file_data=self.to_data_uri(),
|
156
|
+
),
|
157
|
+
)
|
@@ -44,6 +44,8 @@ class LLMPdfParserConfig(BaseSettings):
|
|
44
44
|
split_on_page: Optional[bool] = True
|
45
45
|
requests_per_minute: Optional[int] = 5
|
46
46
|
timeout: int = 60
|
47
|
+
prompt: str = "" # override with a domain-specific prompt
|
48
|
+
system_prompt: str = "" # override with a domain-specific system prompt
|
47
49
|
|
48
50
|
|
49
51
|
class MarkerConfig(BaseSettings):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.51.2 → langroid-0.52.0}/langroid/agent/special/sql/utils/description_extractors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/prompt_formatter/hf_formatter.py
RENAMED
File without changes
|
{langroid-0.51.2 → langroid-0.52.0}/langroid/language_models/prompt_formatter/llama2_formatter.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|