langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. langroid/agent/base.py +39 -17
  2. langroid/agent/base.py-e +2216 -0
  3. langroid/agent/callbacks/chainlit.py +2 -1
  4. langroid/agent/chat_agent.py +73 -55
  5. langroid/agent/chat_agent.py-e +2086 -0
  6. langroid/agent/chat_document.py +7 -7
  7. langroid/agent/chat_document.py-e +513 -0
  8. langroid/agent/openai_assistant.py +9 -9
  9. langroid/agent/openai_assistant.py-e +882 -0
  10. langroid/agent/special/arangodb/arangodb_agent.py +10 -18
  11. langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
  12. langroid/agent/special/arangodb/tools.py +3 -3
  13. langroid/agent/special/doc_chat_agent.py +16 -14
  14. langroid/agent/special/lance_rag/critic_agent.py +2 -2
  15. langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
  16. langroid/agent/special/lance_tools.py +6 -5
  17. langroid/agent/special/lance_tools.py-e +61 -0
  18. langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
  20. langroid/agent/special/relevance_extractor_agent.py +1 -1
  21. langroid/agent/special/sql/sql_chat_agent.py +11 -3
  22. langroid/agent/task.py +9 -87
  23. langroid/agent/task.py-e +2418 -0
  24. langroid/agent/tool_message.py +33 -17
  25. langroid/agent/tool_message.py-e +400 -0
  26. langroid/agent/tools/file_tools.py +4 -2
  27. langroid/agent/tools/file_tools.py-e +234 -0
  28. langroid/agent/tools/mcp/fastmcp_client.py +19 -6
  29. langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
  30. langroid/agent/tools/orchestration.py +22 -17
  31. langroid/agent/tools/orchestration.py-e +301 -0
  32. langroid/agent/tools/recipient_tool.py +3 -3
  33. langroid/agent/tools/task_tool.py +22 -16
  34. langroid/agent/tools/task_tool.py-e +249 -0
  35. langroid/agent/xml_tool_message.py +90 -35
  36. langroid/agent/xml_tool_message.py-e +392 -0
  37. langroid/cachedb/base.py +1 -1
  38. langroid/embedding_models/base.py +2 -2
  39. langroid/embedding_models/models.py +3 -7
  40. langroid/embedding_models/models.py-e +563 -0
  41. langroid/exceptions.py +4 -1
  42. langroid/language_models/azure_openai.py +2 -2
  43. langroid/language_models/azure_openai.py-e +134 -0
  44. langroid/language_models/base.py +6 -4
  45. langroid/language_models/base.py-e +812 -0
  46. langroid/language_models/client_cache.py +64 -0
  47. langroid/language_models/config.py +2 -4
  48. langroid/language_models/config.py-e +18 -0
  49. langroid/language_models/model_info.py +9 -1
  50. langroid/language_models/model_info.py-e +483 -0
  51. langroid/language_models/openai_gpt.py +119 -20
  52. langroid/language_models/openai_gpt.py-e +2280 -0
  53. langroid/language_models/provider_params.py +3 -22
  54. langroid/language_models/provider_params.py-e +153 -0
  55. langroid/mytypes.py +11 -4
  56. langroid/mytypes.py-e +132 -0
  57. langroid/parsing/code_parser.py +1 -1
  58. langroid/parsing/file_attachment.py +1 -1
  59. langroid/parsing/file_attachment.py-e +246 -0
  60. langroid/parsing/md_parser.py +14 -4
  61. langroid/parsing/md_parser.py-e +574 -0
  62. langroid/parsing/parser.py +22 -7
  63. langroid/parsing/parser.py-e +410 -0
  64. langroid/parsing/repo_loader.py +3 -1
  65. langroid/parsing/repo_loader.py-e +812 -0
  66. langroid/parsing/search.py +1 -1
  67. langroid/parsing/url_loader.py +17 -51
  68. langroid/parsing/url_loader.py-e +683 -0
  69. langroid/parsing/urls.py +5 -4
  70. langroid/parsing/urls.py-e +279 -0
  71. langroid/prompts/prompts_config.py +1 -1
  72. langroid/pydantic_v1/__init__.py +45 -6
  73. langroid/pydantic_v1/__init__.py-e +36 -0
  74. langroid/pydantic_v1/main.py +11 -4
  75. langroid/pydantic_v1/main.py-e +11 -0
  76. langroid/utils/configuration.py +13 -11
  77. langroid/utils/configuration.py-e +141 -0
  78. langroid/utils/constants.py +1 -1
  79. langroid/utils/constants.py-e +32 -0
  80. langroid/utils/globals.py +21 -5
  81. langroid/utils/globals.py-e +49 -0
  82. langroid/utils/html_logger.py +2 -1
  83. langroid/utils/html_logger.py-e +825 -0
  84. langroid/utils/object_registry.py +1 -1
  85. langroid/utils/object_registry.py-e +66 -0
  86. langroid/utils/pydantic_utils.py +55 -28
  87. langroid/utils/pydantic_utils.py-e +602 -0
  88. langroid/utils/types.py +2 -2
  89. langroid/utils/types.py-e +113 -0
  90. langroid/vector_store/base.py +3 -3
  91. langroid/vector_store/lancedb.py +5 -5
  92. langroid/vector_store/lancedb.py-e +404 -0
  93. langroid/vector_store/meilisearch.py +2 -2
  94. langroid/vector_store/pineconedb.py +4 -4
  95. langroid/vector_store/pineconedb.py-e +427 -0
  96. langroid/vector_store/postgres.py +1 -1
  97. langroid/vector_store/qdrantdb.py +3 -3
  98. langroid/vector_store/weaviatedb.py +1 -1
  99. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
  100. langroid-0.59.0b1.dist-info/RECORD +181 -0
  101. langroid/agent/special/doc_chat_task.py +0 -0
  102. langroid/mcp/__init__.py +0 -1
  103. langroid/mcp/server/__init__.py +0 -1
  104. langroid-0.58.2.dist-info/RECORD +0 -145
  105. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
  106. {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ Provider-specific parameter configurations for various LLM providers.
4
4
 
5
5
  from typing import Any, Dict, Optional
6
6
 
7
- from langroid.pydantic_v1 import BaseSettings
7
+ from pydantic_settings import BaseSettings, SettingsConfigDict
8
8
 
9
9
  # Constants
10
10
  LANGDB_BASE_URL = "https://api.us-east-1.langdb.ai"
@@ -24,10 +24,7 @@ class LangDBParams(BaseSettings):
24
24
  thread_id: Optional[str] = None
25
25
  base_url: str = LANGDB_BASE_URL
26
26
 
27
- class Config:
28
- # allow setting of fields via env vars,
29
- # e.g. LANGDB_PROJECT_ID=1234
30
- env_prefix = "LANGDB_"
27
+ model_config = SettingsConfigDict(env_prefix="LANGDB_")
31
28
 
32
29
 
33
30
  class PortkeyParams(BaseSettings):
@@ -61,10 +58,7 @@ class PortkeyParams(BaseSettings):
61
58
  custom_headers: Optional[Dict[str, str]] = None # Optional: additional headers
62
59
  base_url: str = PORTKEY_BASE_URL
63
60
 
64
- class Config:
65
- # allow setting of fields via env vars,
66
- # e.g. PORTKEY_API_KEY=xxx, PORTKEY_PROVIDER=anthropic
67
- env_prefix = "PORTKEY_"
61
+ model_config = SettingsConfigDict(env_prefix="PORTKEY_")
68
62
 
69
63
  def get_headers(self) -> Dict[str, str]:
70
64
  """Generate Portkey-specific headers from parameters."""
@@ -73,7 +67,6 @@ class PortkeyParams(BaseSettings):
73
67
 
74
68
  headers = {}
75
69
 
76
- # API key - from params or environment
77
70
  if self.api_key and self.api_key != DUMMY_API_KEY:
78
71
  headers["x-portkey-api-key"] = self.api_key
79
72
  else:
@@ -81,45 +74,35 @@ class PortkeyParams(BaseSettings):
81
74
  if portkey_key:
82
75
  headers["x-portkey-api-key"] = portkey_key
83
76
 
84
- # Provider
85
77
  if self.provider:
86
78
  headers["x-portkey-provider"] = self.provider
87
79
 
88
- # Virtual key
89
80
  if self.virtual_key:
90
81
  headers["x-portkey-virtual-key"] = self.virtual_key
91
82
 
92
- # Trace ID
93
83
  if self.trace_id:
94
84
  headers["x-portkey-trace-id"] = self.trace_id
95
85
 
96
- # Metadata
97
86
  if self.metadata:
98
87
  headers["x-portkey-metadata"] = json.dumps(self.metadata)
99
88
 
100
- # Retry configuration
101
89
  if self.retry:
102
90
  headers["x-portkey-retry"] = json.dumps(self.retry)
103
91
 
104
- # Cache configuration
105
92
  if self.cache:
106
93
  headers["x-portkey-cache"] = json.dumps(self.cache)
107
94
 
108
- # Cache force refresh
109
95
  if self.cache_force_refresh is not None:
110
96
  headers["x-portkey-cache-force-refresh"] = str(
111
97
  self.cache_force_refresh
112
98
  ).lower()
113
99
 
114
- # User identifier
115
100
  if self.user:
116
101
  headers["x-portkey-user"] = self.user
117
102
 
118
- # Organization identifier
119
103
  if self.organization:
120
104
  headers["x-portkey-organization"] = self.organization
121
105
 
122
- # Add any custom headers
123
106
  if self.custom_headers:
124
107
  headers.update(self.custom_headers)
125
108
 
@@ -138,7 +121,6 @@ class PortkeyParams(BaseSettings):
138
121
  _, provider, model = parts
139
122
  return provider, model
140
123
  else:
141
- # Fallback: just remove "portkey/" prefix and return empty provider
142
124
  model = model_string.replace("portkey/", "")
143
125
  return "", model
144
126
 
@@ -157,7 +139,6 @@ class PortkeyParams(BaseSettings):
157
139
  """
158
140
  import os
159
141
 
160
- # Common environment variable patterns for different providers
161
142
  env_patterns = [
162
143
  f"{provider.upper()}_API_KEY",
163
144
  f"{provider.upper()}_KEY",
@@ -0,0 +1,153 @@
1
+ """
2
+ Provider-specific parameter configurations for various LLM providers.
3
+ """
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from pydantic import ConfigDict
8
+ from pydantic_settings import BaseSettings
9
+
10
+ # Constants
11
+ LANGDB_BASE_URL = "https://api.us-east-1.langdb.ai"
12
+ PORTKEY_BASE_URL = "https://api.portkey.ai"
13
+ DUMMY_API_KEY = "xxx"
14
+
15
+
16
+ class LangDBParams(BaseSettings):
17
+ """
18
+ Parameters specific to LangDB integration.
19
+ """
20
+
21
+ api_key: str = DUMMY_API_KEY
22
+ project_id: str = ""
23
+ label: Optional[str] = None
24
+ run_id: Optional[str] = None
25
+ thread_id: Optional[str] = None
26
+ base_url: str = LANGDB_BASE_URL
27
+
28
+ model_config = ConfigDict(env_prefix="LANGDB_")
29
+
30
+
31
+ class PortkeyParams(BaseSettings):
32
+ """
33
+ Parameters specific to Portkey integration.
34
+
35
+ Portkey is an AI gateway that provides a unified API for multiple LLM providers,
36
+ with features like automatic retries, fallbacks, load balancing, and observability.
37
+
38
+ Example usage:
39
+ # Use Portkey with Anthropic
40
+ config = OpenAIGPTConfig(
41
+ chat_model="portkey/anthropic/claude-3-sonnet-20240229",
42
+ portkey_params=PortkeyParams(
43
+ api_key="your-portkey-api-key",
44
+ provider="anthropic"
45
+ )
46
+ )
47
+ """
48
+
49
+ api_key: str = DUMMY_API_KEY # Portkey API key
50
+ provider: str = "" # Required: e.g., "openai", "anthropic", "cohere", etc.
51
+ virtual_key: Optional[str] = None # Optional: virtual key for the provider
52
+ trace_id: Optional[str] = None # Optional: trace ID for request tracking
53
+ metadata: Optional[Dict[str, Any]] = None # Optional: metadata for logging
54
+ retry: Optional[Dict[str, Any]] = None # Optional: retry configuration
55
+ cache: Optional[Dict[str, Any]] = None # Optional: cache configuration
56
+ cache_force_refresh: Optional[bool] = None # Optional: force cache refresh
57
+ user: Optional[str] = None # Optional: user identifier
58
+ organization: Optional[str] = None # Optional: organization identifier
59
+ custom_headers: Optional[Dict[str, str]] = None # Optional: additional headers
60
+ base_url: str = PORTKEY_BASE_URL
61
+
62
+ model_config = ConfigDict(env_prefix="PORTKEY_")
63
+
64
+ def get_headers(self) -> Dict[str, str]:
65
+ """Generate Portkey-specific headers from parameters."""
66
+ import json
67
+ import os
68
+
69
+ headers = {}
70
+
71
+ if self.api_key and self.api_key != DUMMY_API_KEY:
72
+ headers["x-portkey-api-key"] = self.api_key
73
+ else:
74
+ portkey_key = os.getenv("PORTKEY_API_KEY", "")
75
+ if portkey_key:
76
+ headers["x-portkey-api-key"] = portkey_key
77
+
78
+ if self.provider:
79
+ headers["x-portkey-provider"] = self.provider
80
+
81
+ if self.virtual_key:
82
+ headers["x-portkey-virtual-key"] = self.virtual_key
83
+
84
+ if self.trace_id:
85
+ headers["x-portkey-trace-id"] = self.trace_id
86
+
87
+ if self.metadata:
88
+ headers["x-portkey-metadata"] = json.dumps(self.metadata)
89
+
90
+ if self.retry:
91
+ headers["x-portkey-retry"] = json.dumps(self.retry)
92
+
93
+ if self.cache:
94
+ headers["x-portkey-cache"] = json.dumps(self.cache)
95
+
96
+ if self.cache_force_refresh is not None:
97
+ headers["x-portkey-cache-force-refresh"] = str(
98
+ self.cache_force_refresh
99
+ ).lower()
100
+
101
+ if self.user:
102
+ headers["x-portkey-user"] = self.user
103
+
104
+ if self.organization:
105
+ headers["x-portkey-organization"] = self.organization
106
+
107
+ if self.custom_headers:
108
+ headers.update(self.custom_headers)
109
+
110
+ return headers
111
+
112
+ def parse_model_string(self, model_string: str) -> tuple[str, str]:
113
+ """
114
+ Parse a model string like "portkey/anthropic/claude-3-sonnet"
115
+ and extract provider and model name.
116
+
117
+ Returns:
118
+ tuple: (provider, model_name)
119
+ """
120
+ parts = model_string.split("/", 2)
121
+ if len(parts) >= 3 and parts[0] == "portkey":
122
+ _, provider, model = parts
123
+ return provider, model
124
+ else:
125
+ model = model_string.replace("portkey/", "")
126
+ return "", model
127
+
128
+ def get_provider_api_key(
129
+ self, provider: str, default_key: str = DUMMY_API_KEY
130
+ ) -> str:
131
+ """
132
+ Get the API key for the provider from environment variables.
133
+
134
+ Args:
135
+ provider: The provider name (e.g., "anthropic", "openai")
136
+ default_key: Default key to return if not found
137
+
138
+ Returns:
139
+ The API key for the provider
140
+ """
141
+ import os
142
+
143
+ env_patterns = [
144
+ f"{provider.upper()}_API_KEY",
145
+ f"{provider.upper()}_KEY",
146
+ ]
147
+
148
+ for pattern in env_patterns:
149
+ key = os.getenv(pattern, "")
150
+ if key:
151
+ return key
152
+
153
+ return default_key
langroid/mytypes.py CHANGED
@@ -3,7 +3,7 @@ from textwrap import dedent
3
3
  from typing import Any, Callable, Dict, List, Union
4
4
  from uuid import uuid4
5
5
 
6
- from langroid.pydantic_v1 import BaseModel, Extra, Field
6
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
7
7
 
8
8
  Number = Union[int, float]
9
9
  Embedding = List[Number]
@@ -51,13 +51,21 @@ class DocMetaData(BaseModel):
51
51
  id: str = Field(default_factory=lambda: str(uuid4()))
52
52
  window_ids: List[str] = [] # for RAG: ids of chunks around this one
53
53
 
54
+ @field_validator("id", mode="before")
55
+ @classmethod
56
+ def convert_id_to_string(cls, v: Any) -> str:
57
+ """Convert id to string if it's not already."""
58
+ if v is None:
59
+ return str(uuid4())
60
+ return str(v)
61
+
54
62
  def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
55
63
  """
56
64
  Special dict method to convert bool fields to int, to appease some
57
65
  downstream libraries, e.g. Chroma which complains about bool fields in
58
66
  metadata.
59
67
  """
60
- original_dict = super().dict(*args, **kwargs)
68
+ original_dict = super().model_dump(*args, **kwargs)
61
69
 
62
70
  for key, value in original_dict.items():
63
71
  if isinstance(value, bool):
@@ -92,8 +100,7 @@ class DocMetaData(BaseModel):
92
100
  )
93
101
  return ", ".join(components)
94
102
 
95
- class Config:
96
- extra = Extra.allow
103
+ model_config = ConfigDict(extra="allow")
97
104
 
98
105
 
99
106
  class Document(BaseModel):
langroid/mytypes.py-e ADDED
@@ -0,0 +1,132 @@
1
+ from enum import Enum
2
+ from textwrap import dedent
3
+ from typing import Any, Callable, Dict, List, Union
4
+ from uuid import uuid4
5
+
6
+ from pydantic import BaseModel, Extra, Field
7
+
8
+ Number = Union[int, float]
9
+ Embedding = List[Number]
10
+ Embeddings = List[Embedding]
11
+ EmbeddingFunction = Callable[[List[str]], Embeddings]
12
+
13
+
14
+ class Entity(str, Enum):
15
+ """
16
+ Enum for the different types of entities that can respond to the current message.
17
+ """
18
+
19
+ AGENT = "Agent"
20
+ LLM = "LLM"
21
+ USER = "User"
22
+ SYSTEM = "System"
23
+
24
+ def __eq__(self, other: object) -> bool:
25
+ """Allow case-insensitive equality (==) comparison with strings."""
26
+ if other is None:
27
+ return False
28
+ if isinstance(other, str):
29
+ return self.value.lower() == other.lower()
30
+ return super().__eq__(other)
31
+
32
+ def __ne__(self, other: object) -> bool:
33
+ """Allow case-insensitive non-equality (!=) comparison with strings."""
34
+ return not self.__eq__(other)
35
+
36
+ def __hash__(self) -> int:
37
+ """Override this to ensure hashability of the enum,
38
+ so it can be used sets and dictionary keys.
39
+ """
40
+ return hash(self.value.lower())
41
+
42
+
43
+ class DocMetaData(BaseModel):
44
+ """Metadata for a document."""
45
+
46
+ source: str = "context" # just reference
47
+ source_content: str = "context" # reference and content
48
+ title: str = "Unknown Title"
49
+ published_date: str = "Unknown Date"
50
+ is_chunk: bool = False # if it is a chunk, don't split
51
+ id: str = Field(default_factory=lambda: str(uuid4()))
52
+ window_ids: List[str] = [] # for RAG: ids of chunks around this one
53
+
54
+ def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
55
+ """
56
+ Special dict method to convert bool fields to int, to appease some
57
+ downstream libraries, e.g. Chroma which complains about bool fields in
58
+ metadata.
59
+ """
60
+ original_dict = super().model_dump(*args, **kwargs)
61
+
62
+ for key, value in original_dict.items():
63
+ if isinstance(value, bool):
64
+ original_dict[key] = 1 * value
65
+
66
+ return original_dict
67
+
68
+ def __str__(self) -> str:
69
+ title_str = (
70
+ ""
71
+ if "unknown" in self.title.lower() or self.title.strip() == ""
72
+ else f"Title: {self.title}"
73
+ )
74
+ date_str = ""
75
+ if (
76
+ "unknown" not in self.published_date.lower()
77
+ and self.published_date.strip() != ""
78
+ ):
79
+ try:
80
+ from dateutil import parser
81
+
82
+ # Try to parse the date string
83
+ date_obj = parser.parse(self.published_date)
84
+ # Format to include only the date part (year-month-day)
85
+ date_only = date_obj.strftime("%Y-%m-%d")
86
+ date_str = f"Date: {date_only}"
87
+ except (ValueError, ImportError, TypeError):
88
+ # If parsing fails, just use the original date
89
+ date_str = f"Date: {self.published_date}"
90
+ components = [self.source] + (
91
+ [] if title_str + date_str == "" else [title_str, date_str]
92
+ )
93
+ return ", ".join(components)
94
+
95
+ model_config = ConfigDict(extra="allow")
96
+
97
+ class Document(BaseModel):
98
+ """Interface for interacting with a document."""
99
+
100
+ content: str
101
+ metadata: DocMetaData
102
+
103
+ def id(self) -> str:
104
+ return self.metadata.id
105
+
106
+ @staticmethod
107
+ def from_string(
108
+ content: str,
109
+ source: str = "context",
110
+ is_chunk: bool = True,
111
+ ) -> "Document":
112
+ return Document(
113
+ content=content,
114
+ metadata=DocMetaData(source=source, is_chunk=is_chunk),
115
+ )
116
+
117
+ def __str__(self) -> str:
118
+ return dedent(
119
+ f"""
120
+ CONTENT: {self.content}
121
+ SOURCE:{str(self.metadata)}
122
+ """
123
+ )
124
+
125
+
126
+ class NonToolAction(str, Enum):
127
+ """
128
+ Possible options to handle non-tool msgs from LLM.
129
+ """
130
+
131
+ FORWARD_USER = "user" # forward msg to user
132
+ DONE = "done" # task done
@@ -2,12 +2,12 @@ from functools import reduce
2
2
  from typing import Callable, List
3
3
 
4
4
  import tiktoken
5
+ from pydantic_settings import BaseSettings
5
6
  from pygments import lex
6
7
  from pygments.lexers import get_lexer_by_name
7
8
  from pygments.token import Token
8
9
 
9
10
  from langroid.mytypes import Document
10
- from langroid.pydantic_v1 import BaseSettings
11
11
 
12
12
 
13
13
  def chunk_code(
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import Any, BinaryIO, Dict, Optional, Union
6
6
  from urllib.parse import urlparse
7
7
 
8
- from langroid.pydantic_v1 import BaseModel
8
+ from pydantic import BaseModel
9
9
 
10
10
 
11
11
  class FileAttachment(BaseModel):
@@ -0,0 +1,246 @@
1
+ import base64
2
+ import mimetypes
3
+ import uuid
4
+ from pathlib import Path
5
+ from typing import Any, BinaryIO, Dict, Optional, Union
6
+ from urllib.parse import urlparse
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class FileAttachment(BaseModel):
12
+ """Represents a file attachment to be sent to an LLM API."""
13
+
14
+ content: bytes
15
+ filename: Optional[str] = None
16
+ mime_type: str = "application/octet-stream"
17
+ url: str | None = None
18
+ detail: str | None = None
19
+
20
+ def __init__(self, **data: Any) -> None:
21
+ """Initialize with sensible defaults for filename if not provided."""
22
+ if "filename" not in data or data["filename"] is None:
23
+ # Generate a more readable unique filename
24
+ unique_id = str(uuid.uuid4())[:8]
25
+ data["filename"] = f"attachment_{unique_id}.bin"
26
+ super().__init__(**data)
27
+
28
+ @classmethod
29
+ def _from_path(
30
+ cls,
31
+ file_path: Union[str, Path],
32
+ detail: Optional[str] = None,
33
+ ) -> "FileAttachment":
34
+ """Create a FileAttachment from a file path.
35
+
36
+ Args:
37
+ file_path: Path to the file to attach
38
+
39
+ Returns:
40
+ FileAttachment instance
41
+ """
42
+ path = Path(file_path)
43
+ with open(path, "rb") as f:
44
+ content = f.read()
45
+
46
+ mime_type, _ = mimetypes.guess_type(path)
47
+ if mime_type is None:
48
+ mime_type = "application/octet-stream"
49
+
50
+ return cls(
51
+ content=content,
52
+ filename=path.name,
53
+ mime_type=mime_type,
54
+ detail=detail,
55
+ )
56
+
57
+ @classmethod
58
+ def _from_url(
59
+ cls,
60
+ url: str,
61
+ content: Optional[bytes] = None,
62
+ filename: Optional[str] = None,
63
+ mime_type: Optional[str] = None,
64
+ detail: Optional[str] = None,
65
+ ) -> "FileAttachment":
66
+ """Create a FileAttachment from a URL.
67
+
68
+ Args:
69
+ url: URL to the file
70
+ content: Optional raw bytes content (if already fetched)
71
+ filename: Optional name to use for the file
72
+ mime_type: MIME type of the content, guessed from filename or url
73
+
74
+ Returns:
75
+ FileAttachment instance
76
+ """
77
+ if filename is None and url:
78
+ # Extract filename from URL if possible
79
+
80
+ parsed_url = urlparse(url)
81
+ path = parsed_url.path
82
+ filename = path.split("/")[-1] if path else None
83
+
84
+ if mime_type is None and filename:
85
+ mime_type, _ = mimetypes.guess_type(filename)
86
+
87
+ return cls(
88
+ content=content or b"", # Empty bytes if no content provided
89
+ filename=filename,
90
+ mime_type=mime_type or "application/octet-stream",
91
+ url=url,
92
+ detail=detail,
93
+ )
94
+
95
+ @classmethod
96
+ def from_path(
97
+ cls,
98
+ path: Union[str, Path],
99
+ detail: str | None = None,
100
+ ) -> "FileAttachment":
101
+ """Create a FileAttachment from either a local file path or a URL.
102
+
103
+ Args:
104
+ path_or_url: Path to the file or URL to fetch
105
+
106
+ Returns:
107
+ FileAttachment instance
108
+ """
109
+ # Convert to string if Path object
110
+ path_str = str(path)
111
+
112
+ # Check if it's a URL
113
+ if path_str.startswith(("http://", "https://", "ftp://")):
114
+ return cls._from_url(url=path_str, detail=detail)
115
+ else:
116
+ # Assume it's a local file path
117
+ return cls._from_path(path_str, detail=detail)
118
+
119
+ @classmethod
120
+ def from_bytes(
121
+ cls,
122
+ content: bytes,
123
+ filename: Optional[str] = None,
124
+ mime_type: Optional[str] = None,
125
+ ) -> "FileAttachment":
126
+ """Create a FileAttachment from bytes content.
127
+
128
+ Args:
129
+ content: Raw bytes content
130
+ filename: Optional name to use for the file
131
+ mime_type: MIME type of the content, guessed from filename if provided
132
+
133
+ Returns:
134
+ FileAttachment instance
135
+ """
136
+ if mime_type is None and filename is not None:
137
+ mime_type, _ = mimetypes.guess_type(filename)
138
+
139
+ return cls(
140
+ content=content,
141
+ filename=filename,
142
+ mime_type=mime_type or "application/octet-stream",
143
+ )
144
+
145
+ @classmethod
146
+ def from_io(
147
+ cls,
148
+ file_obj: BinaryIO,
149
+ filename: Optional[str] = None,
150
+ mime_type: Optional[str] = None,
151
+ ) -> "FileAttachment":
152
+ """Create a FileAttachment from a file-like object.
153
+
154
+ Args:
155
+ file_obj: File-like object with binary content
156
+ filename: Optional name to use for the file
157
+ mime_type: MIME type of the content, guessed from filename if provided
158
+
159
+ Returns:
160
+ FileAttachment instance
161
+ """
162
+ content = file_obj.read()
163
+ return cls.from_bytes(content, filename, mime_type)
164
+
165
+ @classmethod
166
+ def from_text(
167
+ cls,
168
+ text: str,
169
+ filename: Optional[str] = None,
170
+ mime_type: str = "text/plain",
171
+ encoding: str = "utf-8",
172
+ ) -> "FileAttachment":
173
+ """Create a FileAttachment from text content.
174
+
175
+ Args:
176
+ text: Text content to include
177
+ filename: Optional name to use for the file
178
+ mime_type: MIME type of the content
179
+ encoding: Text encoding to use
180
+
181
+ Returns:
182
+ FileAttachment instance
183
+ """
184
+ content = text.encode(encoding)
185
+ return cls(content=content, filename=filename, mime_type=mime_type)
186
+
187
+ def to_base64(self) -> str:
188
+ """Convert content to base64 encoding.
189
+
190
+ Returns:
191
+ Base64 encoded string
192
+ """
193
+ return base64.b64encode(self.content).decode("utf-8")
194
+
195
+ def to_data_uri(self) -> str:
196
+ """Convert content to a data URI.
197
+
198
+ Returns:
199
+ A data URI string containing the base64-encoded content with MIME type
200
+ """
201
+ base64_content = self.to_base64()
202
+ return f"data:{self.mime_type};base64,{base64_content}"
203
+
204
+ def to_dict(self, model: str) -> Dict[str, Any]:
205
+ """
206
+ Convert to a dictionary suitable for API requests.
207
+ Tested only for PDF files.
208
+
209
+ Returns:
210
+ Dictionary with file data
211
+ """
212
+ if (
213
+ self.mime_type
214
+ and self.mime_type.startswith("image/")
215
+ or "gemini" in model.lower()
216
+ ):
217
+ # for gemini models, we use `image_url` for both pdf-files and images
218
+
219
+ image_url_dict = {}
220
+
221
+ # If we have a URL and it's a full http/https URL, use it directly
222
+ if self.url and (
223
+ self.url.startswith("http://") or self.url.startswith("https://")
224
+ ):
225
+ image_url_dict["url"] = self.url
226
+ # Otherwise use base64 data URI
227
+ else:
228
+ image_url_dict["url"] = self.to_data_uri()
229
+
230
+ # Add detail parameter if specified
231
+ if self.detail:
232
+ image_url_dict["detail"] = self.detail
233
+
234
+ return dict(
235
+ type="image_url",
236
+ image_url=image_url_dict,
237
+ )
238
+ else:
239
+ # For non-image files
240
+ return dict(
241
+ type="file",
242
+ file=dict(
243
+ filename=self.filename,
244
+ file_data=self.to_data_uri(),
245
+ ),
246
+ )