langroid 0.56.11__py3-none-any.whl → 0.56.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +9 -3
- langroid/agent/task.py +27 -11
- langroid/agent/tools/task_tool.py +71 -13
- langroid/language_models/base.py +25 -19
- langroid/language_models/model_info.py +57 -3
- langroid/language_models/openai_gpt.py +45 -14
- {langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/METADATA +1 -1
- {langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/RECORD +10 -10
- {langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/WHEEL +0 -0
- {langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/licenses/LICENSE +0 -0
langroid/agent/base.py
CHANGED
@@ -137,6 +137,7 @@ class Agent(ABC):
|
|
137
137
|
|
138
138
|
def __init__(self, config: AgentConfig = AgentConfig()):
|
139
139
|
self.config = config
|
140
|
+
self.id = ObjectRegistry.new_id() # Initialize agent ID
|
140
141
|
self.lock = asyncio.Lock() # for async access to update self.llm.usage_cost
|
141
142
|
self.dialog: List[Tuple[str, str]] = [] # seq of LLM (prompt, response) tuples
|
142
143
|
self.llm_tools_map: Dict[str, Type[ToolMessage]] = {}
|
@@ -685,6 +686,7 @@ class Agent(ABC):
|
|
685
686
|
results.metadata.tool_ids = (
|
686
687
|
[] if msg is None or isinstance(msg, str) else msg.metadata.tool_ids
|
687
688
|
)
|
689
|
+
results.metadata.agent_id = self.id
|
688
690
|
return results
|
689
691
|
sender_name = self.config.name
|
690
692
|
if isinstance(msg, ChatDocument) and msg.function_call is not None:
|
@@ -703,6 +705,7 @@ class Agent(ABC):
|
|
703
705
|
metadata=ChatDocMetaData(
|
704
706
|
source=Entity.AGENT,
|
705
707
|
sender=Entity.AGENT,
|
708
|
+
agent_id=self.id,
|
706
709
|
sender_name=sender_name,
|
707
710
|
oai_tool_id=oai_tool_id,
|
708
711
|
# preserve trail of tool_ids for OpenAI Assistant fn-calls
|
@@ -967,6 +970,7 @@ class Agent(ABC):
|
|
967
970
|
return ChatDocument(
|
968
971
|
content=user_msg,
|
969
972
|
metadata=ChatDocMetaData(
|
973
|
+
agent_id=self.id,
|
970
974
|
source=source,
|
971
975
|
sender=sender,
|
972
976
|
# preserve trail of tool_ids for OpenAI Assistant fn-calls
|
@@ -2142,7 +2146,7 @@ class Agent(ABC):
|
|
2142
2146
|
completion_tokens = self.num_tokens(response.message)
|
2143
2147
|
if response.function_call is not None:
|
2144
2148
|
completion_tokens += self.num_tokens(str(response.function_call))
|
2145
|
-
cost = self.compute_token_cost(prompt_tokens, completion_tokens)
|
2149
|
+
cost = self.compute_token_cost(prompt_tokens, 0, completion_tokens)
|
2146
2150
|
response.usage = LLMTokenUsage(
|
2147
2151
|
prompt_tokens=prompt_tokens,
|
2148
2152
|
completion_tokens=completion_tokens,
|
@@ -2166,9 +2170,11 @@ class Agent(ABC):
|
|
2166
2170
|
if print_response_stats:
|
2167
2171
|
print(self.indent + self.token_stats_str)
|
2168
2172
|
|
2169
|
-
def compute_token_cost(self, prompt: int, completion: int) -> float:
|
2173
|
+
def compute_token_cost(self, prompt: int, cached: int, completion: int) -> float:
|
2170
2174
|
price = cast(LanguageModel, self.llm).chat_cost()
|
2171
|
-
return (
|
2175
|
+
return (
|
2176
|
+
price[0] * (prompt - cached) + price[1] * cached + price[2] * completion
|
2177
|
+
) / 1000
|
2172
2178
|
|
2173
2179
|
def ask_agent(
|
2174
2180
|
self,
|
langroid/agent/task.py
CHANGED
@@ -615,7 +615,10 @@ class Task:
|
|
615
615
|
if isinstance(msg, ChatDocument):
|
616
616
|
# carefully deep-copy: fresh metadata.id, register
|
617
617
|
# as new obj in registry
|
618
|
+
original_parent_id = msg.metadata.parent_id
|
618
619
|
self.pending_message = ChatDocument.deepcopy(msg)
|
620
|
+
# Preserve the parent pointer from the original message
|
621
|
+
self.pending_message.metadata.parent_id = original_parent_id
|
619
622
|
if self.pending_message is not None and self.caller is not None:
|
620
623
|
# msg may have come from `caller`, so we pretend this is from
|
621
624
|
# the CURRENT task's USER entity
|
@@ -623,7 +626,11 @@ class Task:
|
|
623
626
|
# update parent, child, agent pointers
|
624
627
|
if msg is not None:
|
625
628
|
msg.metadata.child_id = self.pending_message.metadata.id
|
626
|
-
|
629
|
+
# Only override parent_id if it wasn't already set in the
|
630
|
+
# original message. This preserves parent chains from TaskTool
|
631
|
+
if not msg.metadata.parent_id:
|
632
|
+
self.pending_message.metadata.parent_id = msg.metadata.id
|
633
|
+
if self.pending_message is not None:
|
627
634
|
self.pending_message.metadata.agent_id = self.agent.id
|
628
635
|
|
629
636
|
self._show_pending_message_if_debug()
|
@@ -2250,24 +2257,33 @@ class Task:
|
|
2250
2257
|
def _get_message_chain(
|
2251
2258
|
self, msg: ChatDocument | None, max_depth: Optional[int] = None
|
2252
2259
|
) -> List[ChatDocument]:
|
2253
|
-
"""Get the chain of messages
|
2260
|
+
"""Get the chain of messages using agent's message history."""
|
2254
2261
|
if max_depth is None:
|
2255
2262
|
# Get max depth needed from all sequences
|
2256
2263
|
max_depth = 50 # default fallback
|
2257
2264
|
if self._parsed_done_sequences:
|
2258
2265
|
max_depth = max(len(seq.events) for seq in self._parsed_done_sequences)
|
2259
2266
|
|
2260
|
-
|
2261
|
-
|
2262
|
-
|
2267
|
+
# Get chat document IDs from message history
|
2268
|
+
doc_ids = [
|
2269
|
+
m.chat_document_id for m in self.agent.message_history if m.chat_document_id
|
2270
|
+
]
|
2271
|
+
|
2272
|
+
# Add current message ID if it exists and is not already the last one
|
2273
|
+
if msg:
|
2274
|
+
msg_id = msg.id()
|
2275
|
+
if not doc_ids or doc_ids[-1] != msg_id:
|
2276
|
+
doc_ids.append(msg_id)
|
2263
2277
|
|
2264
|
-
|
2265
|
-
|
2266
|
-
current = current.parent
|
2267
|
-
depth += 1
|
2278
|
+
# Take only the last max_depth elements
|
2279
|
+
relevant_ids = doc_ids[-max_depth:]
|
2268
2280
|
|
2269
|
-
#
|
2270
|
-
return
|
2281
|
+
# Convert IDs to ChatDocuments and filter out None values
|
2282
|
+
return [
|
2283
|
+
doc
|
2284
|
+
for doc_id in relevant_ids
|
2285
|
+
if (doc := ChatDocument.from_id(doc_id)) is not None
|
2286
|
+
]
|
2271
2287
|
|
2272
2288
|
def _matches_event(self, actual: AgentEvent, expected: AgentEvent) -> bool:
|
2273
2289
|
"""Check if an actual event matches an expected event pattern."""
|
@@ -38,10 +38,13 @@ class TaskTool(ToolMessage):
|
|
38
38
|
system_message: Optional[str] = Field(
|
39
39
|
...,
|
40
40
|
description="""
|
41
|
-
Optional system message to configure the sub-agent's general behavior
|
41
|
+
Optional system message to configure the sub-agent's general behavior and
|
42
|
+
to specify the task and its context.
|
42
43
|
A good system message will have these components:
|
43
44
|
- Inform the sub-agent of its role, e.g. "You are a financial analyst."
|
44
|
-
- Clear spec of the task
|
45
|
+
- Clear spec of the task, with sufficient context for the sub-agent to
|
46
|
+
understand what it needs to do, since the sub-agent does
|
47
|
+
NOT have access to your conversation history!
|
45
48
|
- Any additional general context needed for the task, such as a
|
46
49
|
(part of a) document, or data items, etc.
|
47
50
|
- Specify when to use certain tools, e.g.
|
@@ -73,9 +76,10 @@ class TaskTool(ToolMessage):
|
|
73
76
|
A list of tool names to enable for the sub-agent.
|
74
77
|
This must be a list of strings referring to the names of tools
|
75
78
|
that are known to you.
|
76
|
-
If you want to enable all tools, you
|
77
|
-
|
78
|
-
|
79
|
+
If you want to enable all tools, or you do not have any preference
|
80
|
+
on what tools are enabled for the sub-agent, you can set
|
81
|
+
this field to a singleton list ['ALL']
|
82
|
+
To disable all tools, set it to a singleton list ['NONE']
|
79
83
|
""",
|
80
84
|
)
|
81
85
|
# TODO: ensure valid model name
|
@@ -113,11 +117,20 @@ class TaskTool(ToolMessage):
|
|
113
117
|
# TODO: Maybe we just copy the parent agent's config and override chat_model?
|
114
118
|
# -- but what if parent agent has a MockLMConfig?
|
115
119
|
llm_config = lm.OpenAIGPTConfig(
|
116
|
-
chat_model=self.model or
|
120
|
+
chat_model=self.model or lm.OpenAIChatModel.GPT4_1_MINI,
|
117
121
|
)
|
118
122
|
config = ChatAgentConfig(
|
119
123
|
name=agent_name,
|
120
124
|
llm=llm_config,
|
125
|
+
handle_llm_no_tool=f"""
|
126
|
+
You forgot to use one of your TOOLs! Remember that you must either:
|
127
|
+
- use a tool, or a sequence of tools, to complete your task, OR
|
128
|
+
- if you are done with your task, use the `{DoneTool.name()}` tool
|
129
|
+
to return the result.
|
130
|
+
|
131
|
+
As a reminder, this was your task:
|
132
|
+
{self.prompt}
|
133
|
+
""",
|
121
134
|
system_message=f"""
|
122
135
|
{self.system_message}
|
123
136
|
|
@@ -138,7 +151,9 @@ class TaskTool(ToolMessage):
|
|
138
151
|
tool_classes = [
|
139
152
|
agent.llm_tools_map[t]
|
140
153
|
for t in agent.llm_tools_known
|
141
|
-
if t in agent.llm_tools_map
|
154
|
+
if t in agent.llm_tools_map
|
155
|
+
and t != self.request
|
156
|
+
and agent.llm_tools_map[t]._allow_llm_use
|
142
157
|
# Exclude the TaskTool itself!
|
143
158
|
]
|
144
159
|
elif self.tools == ["NONE"]:
|
@@ -150,6 +165,7 @@ class TaskTool(ToolMessage):
|
|
150
165
|
agent.llm_tools_map[tool_name]
|
151
166
|
for tool_name in self.tools
|
152
167
|
if tool_name in agent.llm_tools_map
|
168
|
+
and agent.llm_tools_map[tool_name]._allow_llm_use
|
153
169
|
]
|
154
170
|
|
155
171
|
# always enable the DoneTool to signal task completion
|
@@ -160,7 +176,9 @@ class TaskTool(ToolMessage):
|
|
160
176
|
|
161
177
|
return task
|
162
178
|
|
163
|
-
def handle(
|
179
|
+
def handle(
|
180
|
+
self, agent: ChatAgent, chat_doc: Optional[ChatDocument] = None
|
181
|
+
) -> Optional[ChatDocument]:
|
164
182
|
"""
|
165
183
|
|
166
184
|
Handle the TaskTool by creating a sub-agent with specified tools
|
@@ -168,24 +186,64 @@ class TaskTool(ToolMessage):
|
|
168
186
|
|
169
187
|
Args:
|
170
188
|
agent: The parent ChatAgent that is handling this tool
|
189
|
+
chat_doc: The ChatDocument containing this tool message
|
171
190
|
"""
|
172
191
|
|
173
192
|
task = self._set_up_task(agent)
|
174
|
-
|
175
|
-
|
193
|
+
|
194
|
+
# Create a ChatDocument for the prompt with parent pointer
|
195
|
+
prompt_doc = None
|
196
|
+
if chat_doc is not None:
|
197
|
+
from langroid.agent.chat_document import ChatDocMetaData
|
198
|
+
|
199
|
+
prompt_doc = ChatDocument(
|
200
|
+
content=self.prompt,
|
201
|
+
metadata=ChatDocMetaData(
|
202
|
+
parent_id=chat_doc.id(),
|
203
|
+
agent_id=agent.id,
|
204
|
+
sender=chat_doc.metadata.sender,
|
205
|
+
),
|
206
|
+
)
|
207
|
+
# Set bidirectional parent-child relationship
|
208
|
+
chat_doc.metadata.child_id = prompt_doc.id()
|
209
|
+
|
210
|
+
# Run the task with the ChatDocument or string prompt
|
211
|
+
result = task.run(prompt_doc or self.prompt, turns=self.max_iterations or 10)
|
176
212
|
return result
|
177
213
|
|
178
|
-
async def handle_async(
|
214
|
+
async def handle_async(
|
215
|
+
self, agent: ChatAgent, chat_doc: Optional[ChatDocument] = None
|
216
|
+
) -> Optional[ChatDocument]:
|
179
217
|
"""
|
180
218
|
Async method to handle the TaskTool by creating a sub-agent with specified tools
|
181
219
|
and running the task non-interactively.
|
182
220
|
|
183
221
|
Args:
|
184
222
|
agent: The parent ChatAgent that is handling this tool
|
223
|
+
chat_doc: The ChatDocument containing this tool message
|
185
224
|
"""
|
186
225
|
task = self._set_up_task(agent)
|
187
|
-
|
226
|
+
|
227
|
+
# Create a ChatDocument for the prompt with parent pointer
|
228
|
+
prompt_doc = None
|
229
|
+
if chat_doc is not None:
|
230
|
+
from langroid.agent.chat_document import ChatDocMetaData
|
231
|
+
|
232
|
+
prompt_doc = ChatDocument(
|
233
|
+
content=self.prompt,
|
234
|
+
metadata=ChatDocMetaData(
|
235
|
+
parent_id=chat_doc.id(),
|
236
|
+
agent_id=agent.id,
|
237
|
+
sender=chat_doc.metadata.sender,
|
238
|
+
),
|
239
|
+
)
|
240
|
+
# Set bidirectional parent-child relationship
|
241
|
+
chat_doc.metadata.child_id = prompt_doc.id()
|
242
|
+
|
243
|
+
# Run the task with the ChatDocument or string prompt
|
188
244
|
# TODO eventually allow the various task setup configs,
|
189
245
|
# including termination conditions
|
190
|
-
result = await task.run_async(
|
246
|
+
result = await task.run_async(
|
247
|
+
prompt_doc or self.prompt, turns=self.max_iterations or 10
|
248
|
+
)
|
191
249
|
return result
|
langroid/language_models/base.py
CHANGED
@@ -91,10 +91,6 @@ class LLMConfig(BaseSettings):
|
|
91
91
|
# reasoning output from reasoning models
|
92
92
|
cache_config: None | CacheDBConfig = RedisCacheConfig()
|
93
93
|
thought_delimiters: Tuple[str, str] = ("<think>", "</think>")
|
94
|
-
|
95
|
-
# Dict of model -> (input/prompt cost, output/completion cost)
|
96
|
-
chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
|
97
|
-
completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
|
98
94
|
retry_params: RetryParams = RetryParams()
|
99
95
|
|
100
96
|
@property
|
@@ -131,7 +127,7 @@ class LLMFunctionCall(BaseModel):
|
|
131
127
|
if not isinstance(dict_or_list, dict):
|
132
128
|
raise ValueError(
|
133
129
|
f"""
|
134
|
-
Invalid function args: {fun_args_str}
|
130
|
+
Invalid function args: {fun_args_str}
|
135
131
|
parsed as {dict_or_list},
|
136
132
|
which is not a valid dict.
|
137
133
|
"""
|
@@ -224,12 +220,14 @@ class LLMTokenUsage(BaseModel):
|
|
224
220
|
"""
|
225
221
|
|
226
222
|
prompt_tokens: int = 0
|
223
|
+
cached_tokens: int = 0
|
227
224
|
completion_tokens: int = 0
|
228
225
|
cost: float = 0.0
|
229
226
|
calls: int = 0 # how many API calls - not used as of 2025-04-04
|
230
227
|
|
231
228
|
def reset(self) -> None:
|
232
229
|
self.prompt_tokens = 0
|
230
|
+
self.cached_tokens = 0
|
233
231
|
self.completion_tokens = 0
|
234
232
|
self.cost = 0.0
|
235
233
|
self.calls = 0
|
@@ -237,7 +235,8 @@ class LLMTokenUsage(BaseModel):
|
|
237
235
|
def __str__(self) -> str:
|
238
236
|
return (
|
239
237
|
f"Tokens = "
|
240
|
-
f"(prompt {self.prompt_tokens},
|
238
|
+
f"(prompt {self.prompt_tokens}, cached {self.cached_tokens}, "
|
239
|
+
f"completion {self.completion_tokens}), "
|
241
240
|
f"Cost={self.cost}, Calls={self.calls}"
|
242
241
|
)
|
243
242
|
|
@@ -462,9 +461,9 @@ class LanguageModel(ABC):
|
|
462
461
|
if type(config) is LLMConfig:
|
463
462
|
raise ValueError(
|
464
463
|
"""
|
465
|
-
Cannot create a Language Model object from LLMConfig.
|
466
|
-
Please specify a specific subclass of LLMConfig e.g.,
|
467
|
-
OpenAIGPTConfig. If you are creating a ChatAgent from
|
464
|
+
Cannot create a Language Model object from LLMConfig.
|
465
|
+
Please specify a specific subclass of LLMConfig e.g.,
|
466
|
+
OpenAIGPTConfig. If you are creating a ChatAgent from
|
468
467
|
a ChatAgentConfig, please specify the `llm` field of this config
|
469
468
|
as a specific subclass of LLMConfig, e.g., OpenAIGPTConfig.
|
470
469
|
"""
|
@@ -666,8 +665,15 @@ class LanguageModel(ABC):
|
|
666
665
|
def completion_context_length(self) -> int:
|
667
666
|
return self.config.completion_context_length or DEFAULT_CONTEXT_LENGTH
|
668
667
|
|
669
|
-
def chat_cost(self) -> Tuple[float, float]:
|
670
|
-
|
668
|
+
def chat_cost(self) -> Tuple[float, float, float]:
|
669
|
+
"""
|
670
|
+
Return the cost per 1000 tokens for chat completions.
|
671
|
+
|
672
|
+
Returns:
|
673
|
+
Tuple[float, float, float]: (input_cost, cached_cost, output_cost)
|
674
|
+
per 1000 tokens
|
675
|
+
"""
|
676
|
+
return (0.0, 0.0, 0.0)
|
671
677
|
|
672
678
|
def reset_usage_cost(self) -> None:
|
673
679
|
for mdl in [self.config.chat_model, self.config.completion_model]:
|
@@ -754,18 +760,18 @@ class LanguageModel(ABC):
|
|
754
760
|
|
755
761
|
prompt = f"""
|
756
762
|
You are an expert at understanding a CHAT HISTORY between an AI Assistant
|
757
|
-
and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
|
758
|
-
QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
|
763
|
+
and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
|
764
|
+
QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
|
759
765
|
WITHOUT the context of the chat history.
|
760
|
-
|
761
|
-
Below is the CHAT HISTORY. When the User asks you to rephrase a
|
762
|
-
FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
|
763
|
-
question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
|
766
|
+
|
767
|
+
Below is the CHAT HISTORY. When the User asks you to rephrase a
|
768
|
+
FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
|
769
|
+
question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
|
764
770
|
text or context.
|
765
|
-
|
771
|
+
|
766
772
|
<CHAT_HISTORY>
|
767
773
|
{history}
|
768
|
-
</CHAT_HISTORY>
|
774
|
+
</CHAT_HISTORY>
|
769
775
|
""".strip()
|
770
776
|
|
771
777
|
follow_up_question = f"""
|
@@ -69,7 +69,9 @@ class GeminiModel(ModelName):
|
|
69
69
|
GEMINI_1_5_FLASH = "gemini-1.5-flash"
|
70
70
|
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
|
71
71
|
GEMINI_1_5_PRO = "gemini-1.5-pro"
|
72
|
-
GEMINI_2_5_PRO = "gemini-2.5-pro
|
72
|
+
GEMINI_2_5_PRO = "gemini-2.5-pro"
|
73
|
+
GEMINI_2_5_FLASH = "gemini-2.5-flash"
|
74
|
+
GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
|
73
75
|
GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
|
74
76
|
GEMINI_2_FLASH = "gemini-2.0-flash"
|
75
77
|
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
|
@@ -108,6 +110,7 @@ class ModelInfo(BaseModel):
|
|
108
110
|
max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
|
109
111
|
max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
|
110
112
|
input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
|
113
|
+
cached_cost_per_million: float = 0.0 # Cost in USD per million cached tokens
|
111
114
|
output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
|
112
115
|
allows_streaming: bool = True # Whether model supports streaming output
|
113
116
|
allows_system_message: bool = True # Whether model supports system messages
|
@@ -173,6 +176,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
173
176
|
context_length=1_047_576,
|
174
177
|
max_output_tokens=32_768,
|
175
178
|
input_cost_per_million=0.10,
|
179
|
+
cached_cost_per_million=0.025,
|
176
180
|
output_cost_per_million=0.40,
|
177
181
|
description="GPT-4.1",
|
178
182
|
),
|
@@ -182,6 +186,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
182
186
|
context_length=1_047_576,
|
183
187
|
max_output_tokens=32_768,
|
184
188
|
input_cost_per_million=0.40,
|
189
|
+
cached_cost_per_million=0.10,
|
185
190
|
output_cost_per_million=1.60,
|
186
191
|
description="GPT-4.1 Mini",
|
187
192
|
),
|
@@ -191,6 +196,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
191
196
|
context_length=1_047_576,
|
192
197
|
max_output_tokens=32_768,
|
193
198
|
input_cost_per_million=2.00,
|
199
|
+
cached_cost_per_million=0.50,
|
194
200
|
output_cost_per_million=8.00,
|
195
201
|
description="GPT-4.1",
|
196
202
|
),
|
@@ -200,6 +206,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
200
206
|
context_length=128_000,
|
201
207
|
max_output_tokens=16_384,
|
202
208
|
input_cost_per_million=2.5,
|
209
|
+
cached_cost_per_million=1.25,
|
203
210
|
output_cost_per_million=10.0,
|
204
211
|
has_structured_output=True,
|
205
212
|
description="GPT-4o (128K context)",
|
@@ -210,6 +217,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
210
217
|
context_length=128_000,
|
211
218
|
max_output_tokens=16_384,
|
212
219
|
input_cost_per_million=0.15,
|
220
|
+
cached_cost_per_million=0.075,
|
213
221
|
output_cost_per_million=0.60,
|
214
222
|
has_structured_output=True,
|
215
223
|
description="GPT-4o Mini",
|
@@ -220,6 +228,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
220
228
|
context_length=200_000,
|
221
229
|
max_output_tokens=100_000,
|
222
230
|
input_cost_per_million=15.0,
|
231
|
+
cached_cost_per_million=7.50,
|
223
232
|
output_cost_per_million=60.0,
|
224
233
|
allows_streaming=True,
|
225
234
|
allows_system_message=False,
|
@@ -233,8 +242,9 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
233
242
|
provider=ModelProvider.OPENAI,
|
234
243
|
context_length=200_000,
|
235
244
|
max_output_tokens=100_000,
|
236
|
-
input_cost_per_million=
|
237
|
-
|
245
|
+
input_cost_per_million=2.0,
|
246
|
+
cached_cost_per_million=0.50,
|
247
|
+
output_cost_per_million=8.0,
|
238
248
|
allows_streaming=True,
|
239
249
|
allows_system_message=False,
|
240
250
|
unsupported_params=["temperature"],
|
@@ -248,6 +258,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
248
258
|
context_length=128_000,
|
249
259
|
max_output_tokens=65_536,
|
250
260
|
input_cost_per_million=1.1,
|
261
|
+
cached_cost_per_million=0.55,
|
251
262
|
output_cost_per_million=4.4,
|
252
263
|
allows_streaming=False,
|
253
264
|
allows_system_message=False,
|
@@ -262,6 +273,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
262
273
|
context_length=200_000,
|
263
274
|
max_output_tokens=100_000,
|
264
275
|
input_cost_per_million=1.1,
|
276
|
+
cached_cost_per_million=0.55,
|
265
277
|
output_cost_per_million=4.4,
|
266
278
|
allows_streaming=False,
|
267
279
|
allows_system_message=False,
|
@@ -276,6 +288,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
276
288
|
context_length=200_000,
|
277
289
|
max_output_tokens=100_000,
|
278
290
|
input_cost_per_million=1.10,
|
291
|
+
cached_cost_per_million=0.275,
|
279
292
|
output_cost_per_million=4.40,
|
280
293
|
allows_streaming=False,
|
281
294
|
allows_system_message=False,
|
@@ -291,6 +304,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
291
304
|
context_length=200_000,
|
292
305
|
max_output_tokens=8192,
|
293
306
|
input_cost_per_million=3.0,
|
307
|
+
cached_cost_per_million=0.30,
|
294
308
|
output_cost_per_million=15.0,
|
295
309
|
description="Claude 3.5 Sonnet",
|
296
310
|
),
|
@@ -300,6 +314,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
300
314
|
context_length=200_000,
|
301
315
|
max_output_tokens=4096,
|
302
316
|
input_cost_per_million=15.0,
|
317
|
+
cached_cost_per_million=1.50,
|
303
318
|
output_cost_per_million=75.0,
|
304
319
|
description="Claude 3 Opus",
|
305
320
|
),
|
@@ -309,6 +324,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
309
324
|
context_length=200_000,
|
310
325
|
max_output_tokens=4096,
|
311
326
|
input_cost_per_million=3.0,
|
327
|
+
cached_cost_per_million=0.30,
|
312
328
|
output_cost_per_million=15.0,
|
313
329
|
description="Claude 3 Sonnet",
|
314
330
|
),
|
@@ -318,6 +334,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
318
334
|
context_length=200_000,
|
319
335
|
max_output_tokens=4096,
|
320
336
|
input_cost_per_million=0.25,
|
337
|
+
cached_cost_per_million=0.03,
|
321
338
|
output_cost_per_million=1.25,
|
322
339
|
description="Claude 3 Haiku",
|
323
340
|
),
|
@@ -328,6 +345,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
328
345
|
context_length=64_000,
|
329
346
|
max_output_tokens=8_000,
|
330
347
|
input_cost_per_million=0.27,
|
348
|
+
cached_cost_per_million=0.07,
|
331
349
|
output_cost_per_million=1.10,
|
332
350
|
description="DeepSeek Chat",
|
333
351
|
),
|
@@ -337,6 +355,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
337
355
|
context_length=64_000,
|
338
356
|
max_output_tokens=8_000,
|
339
357
|
input_cost_per_million=0.55,
|
358
|
+
cached_cost_per_million=0.14,
|
340
359
|
output_cost_per_million=2.19,
|
341
360
|
description="DeepSeek-R1 Reasoning LM",
|
342
361
|
),
|
@@ -347,6 +366,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
347
366
|
context_length=1_056_768,
|
348
367
|
max_output_tokens=8192,
|
349
368
|
input_cost_per_million=0.10,
|
369
|
+
cached_cost_per_million=0.025,
|
350
370
|
output_cost_per_million=0.40,
|
351
371
|
rename_params={"max_tokens": "max_completion_tokens"},
|
352
372
|
description="Gemini 2.0 Flash",
|
@@ -401,6 +421,40 @@ MODEL_INFO: Dict[str, ModelInfo] = {
|
|
401
421
|
rename_params={"max_tokens": "max_completion_tokens"},
|
402
422
|
description="Gemini 2.0 Flash Thinking",
|
403
423
|
),
|
424
|
+
# Gemini 2.5 Models
|
425
|
+
GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
|
426
|
+
name=GeminiModel.GEMINI_2_5_PRO.value,
|
427
|
+
provider=ModelProvider.GOOGLE,
|
428
|
+
context_length=1_048_576,
|
429
|
+
max_output_tokens=65_536,
|
430
|
+
input_cost_per_million=1.25,
|
431
|
+
cached_cost_per_million=0.31,
|
432
|
+
output_cost_per_million=10.0,
|
433
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
434
|
+
description="Gemini 2.5 Pro",
|
435
|
+
),
|
436
|
+
GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
|
437
|
+
name=GeminiModel.GEMINI_2_5_FLASH.value,
|
438
|
+
provider=ModelProvider.GOOGLE,
|
439
|
+
context_length=1_048_576,
|
440
|
+
max_output_tokens=65_536,
|
441
|
+
input_cost_per_million=0.30,
|
442
|
+
cached_cost_per_million=0.075,
|
443
|
+
output_cost_per_million=2.50,
|
444
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
445
|
+
description="Gemini 2.5 Flash",
|
446
|
+
),
|
447
|
+
GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
|
448
|
+
name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
|
449
|
+
provider=ModelProvider.GOOGLE,
|
450
|
+
context_length=65_536,
|
451
|
+
max_output_tokens=65_536,
|
452
|
+
input_cost_per_million=0.10,
|
453
|
+
cached_cost_per_million=0.025,
|
454
|
+
output_cost_per_million=0.40,
|
455
|
+
rename_params={"max_tokens": "max_completion_tokens"},
|
456
|
+
description="Gemini 2.5 Flash Lite Preview",
|
457
|
+
),
|
404
458
|
}
|
405
459
|
|
406
460
|
|
@@ -766,14 +766,21 @@ class OpenAIGPT(LanguageModel):
|
|
766
766
|
or self.completion_info().context_length
|
767
767
|
)
|
768
768
|
|
769
|
-
def chat_cost(self) -> Tuple[float, float]:
|
769
|
+
def chat_cost(self) -> Tuple[float, float, float]:
|
770
770
|
"""
|
771
|
-
(Prompt, Generation) cost per 1000 tokens, for chat-completion
|
771
|
+
(Prompt, Cached, Generation) cost per 1000 tokens, for chat-completion
|
772
772
|
models/endpoints.
|
773
773
|
Get it from the dict, otherwise fail-over to general method
|
774
774
|
"""
|
775
775
|
info = self.info()
|
776
|
-
|
776
|
+
cached_cost_per_million = info.cached_cost_per_million
|
777
|
+
if not cached_cost_per_million:
|
778
|
+
cached_cost_per_million = info.input_cost_per_million
|
779
|
+
return (
|
780
|
+
info.input_cost_per_million / 1000,
|
781
|
+
cached_cost_per_million / 1000,
|
782
|
+
info.output_cost_per_million / 1000,
|
783
|
+
)
|
777
784
|
|
778
785
|
def set_stream(self, stream: bool) -> bool:
|
779
786
|
"""Enable or disable streaming output from API.
|
@@ -1429,6 +1436,16 @@ class OpenAIGPT(LanguageModel):
|
|
1429
1436
|
# and the reasoning may be included in the message content
|
1430
1437
|
# within delimiters like <think> ... </think>
|
1431
1438
|
reasoning, completion = self.get_reasoning_final(completion)
|
1439
|
+
|
1440
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
1441
|
+
prompt_tokens_details: Any = usage.get("prompt_tokens_details", {})
|
1442
|
+
cached_tokens = (
|
1443
|
+
prompt_tokens_details.get("cached_tokens", 0)
|
1444
|
+
if isinstance(prompt_tokens_details, dict)
|
1445
|
+
else 0
|
1446
|
+
)
|
1447
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
1448
|
+
|
1432
1449
|
return (
|
1433
1450
|
LLMResponse(
|
1434
1451
|
message=completion,
|
@@ -1438,11 +1455,13 @@ class OpenAIGPT(LanguageModel):
|
|
1438
1455
|
oai_tool_calls=tool_calls or None if len(tool_deltas) > 0 else None,
|
1439
1456
|
function_call=function_call if has_function else None,
|
1440
1457
|
usage=LLMTokenUsage(
|
1441
|
-
prompt_tokens=
|
1442
|
-
|
1458
|
+
prompt_tokens=prompt_tokens,
|
1459
|
+
cached_tokens=cached_tokens,
|
1460
|
+
completion_tokens=completion_tokens,
|
1443
1461
|
cost=self._cost_chat_model(
|
1444
|
-
|
1445
|
-
|
1462
|
+
prompt_tokens,
|
1463
|
+
cached_tokens,
|
1464
|
+
completion_tokens,
|
1446
1465
|
),
|
1447
1466
|
),
|
1448
1467
|
),
|
@@ -1479,9 +1498,11 @@ class OpenAIGPT(LanguageModel):
|
|
1479
1498
|
return hashed_key, None
|
1480
1499
|
return hashed_key, cached_val
|
1481
1500
|
|
1482
|
-
def _cost_chat_model(self, prompt: int, completion: int) -> float:
|
1501
|
+
def _cost_chat_model(self, prompt: int, cached: int, completion: int) -> float:
|
1483
1502
|
price = self.chat_cost()
|
1484
|
-
return (
|
1503
|
+
return (
|
1504
|
+
price[0] * (prompt - cached) + price[1] * cached + price[2] * completion
|
1505
|
+
) / 1000
|
1485
1506
|
|
1486
1507
|
def _get_non_stream_token_usage(
|
1487
1508
|
self, cached: bool, response: Dict[str, Any]
|
@@ -1499,14 +1520,24 @@ class OpenAIGPT(LanguageModel):
|
|
1499
1520
|
"""
|
1500
1521
|
cost = 0.0
|
1501
1522
|
prompt_tokens = 0
|
1523
|
+
cached_tokens = 0
|
1502
1524
|
completion_tokens = 0
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1506
|
-
|
1525
|
+
|
1526
|
+
usage = response.get("usage")
|
1527
|
+
if not cached and not self.get_stream() and usage is not None:
|
1528
|
+
prompt_tokens = usage.get("prompt_tokens") or 0
|
1529
|
+
prompt_tokens_details = usage.get("prompt_tokens_details", {})
|
1530
|
+
cached_tokens = prompt_tokens_details.get("cached_tokens") or 0
|
1531
|
+
completion_tokens = usage.get("completion_tokens") or 0
|
1532
|
+
cost = self._cost_chat_model(
|
1533
|
+
prompt_tokens, cached_tokens, completion_tokens
|
1534
|
+
)
|
1507
1535
|
|
1508
1536
|
return LLMTokenUsage(
|
1509
|
-
prompt_tokens=prompt_tokens,
|
1537
|
+
prompt_tokens=prompt_tokens,
|
1538
|
+
cached_tokens=cached_tokens,
|
1539
|
+
completion_tokens=completion_tokens,
|
1540
|
+
cost=cost,
|
1510
1541
|
)
|
1511
1542
|
|
1512
1543
|
def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
|
@@ -3,13 +3,13 @@ langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
|
|
3
3
|
langroid/mytypes.py,sha256=HIcYAqGeA9OK0Hlscym2FI5Oax9QFljDZoVgRlomhRk,4014
|
4
4
|
langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
|
6
|
-
langroid/agent/base.py,sha256=
|
6
|
+
langroid/agent/base.py,sha256=exiOhO0L1StZ8ziPBnunHYiKFvEnRyaLnMpBrc8tyMw,86263
|
7
7
|
langroid/agent/batch.py,sha256=wpE9RqCNDVDhAXkCB7wEqfCIEAi6qKcrhaZ-Zr9T4C0,21375
|
8
8
|
langroid/agent/chat_agent.py,sha256=pBnLGlAA6d2MK_1qa4GyhFZHnDf_RrUDli7__PKRRz4,88956
|
9
9
|
langroid/agent/chat_document.py,sha256=0e6zYkqIorMIVbCsxOul9ziwAPPOWDsBsRV9E8ux-WI,18055
|
10
10
|
langroid/agent/done_sequence_parser.py,sha256=oUPzQCkkAo-5qos3ndSV47Lre7O_LoGWwTybjE9sCwc,4381
|
11
11
|
langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
|
12
|
-
langroid/agent/task.py,sha256=
|
12
|
+
langroid/agent/task.py,sha256=Fzqu4TbKKIO8CZr9eyppYjhRomMVkppb81ig98U4iHs,102170
|
13
13
|
langroid/agent/tool_message.py,sha256=BhjP-_TfQ2tgxuY4Yo_JHLOwwt0mJ4BwjPnREvEY4vk,14744
|
14
14
|
langroid/agent/xml_tool_message.py,sha256=oeBKnJNoGaKdtz39XoWGMTNlVyXew2MWH5lgtYeh8wQ,15496
|
15
15
|
langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -54,7 +54,7 @@ langroid/agent/tools/recipient_tool.py,sha256=dr0yTxgNEIoxUYxH6TtaExC4G_8WdJ0xGo
|
|
54
54
|
langroid/agent/tools/retrieval_tool.py,sha256=zcAV20PP_6VzSd-UE-IJcabaBseFL_QNz59Bnig8-lE,946
|
55
55
|
langroid/agent/tools/rewind_tool.py,sha256=XAXL3BpNhCmBGYq_qi_sZfHJuIw7NY2jp4wnojJ7WRs,5606
|
56
56
|
langroid/agent/tools/segment_extract_tool.py,sha256=__srZ_VGYLVOdPrITUM8S0HpmX4q7r5FHWMDdHdEv8w,1440
|
57
|
-
langroid/agent/tools/task_tool.py,sha256=
|
57
|
+
langroid/agent/tools/task_tool.py,sha256=Z56QzELMNhU5TWGzI4MgxyYnw__6e75ZhCzBJ0lveqA,9686
|
58
58
|
langroid/agent/tools/tavily_search_tool.py,sha256=soI-j0HdgVQLf09wRQScaEK4b5RpAX9C4cwOivRFWWI,1903
|
59
59
|
langroid/agent/tools/mcp/__init__.py,sha256=DJNM0VeFnFS3pJKCyFGggT8JVjVu0rBzrGzasT1HaSM,387
|
60
60
|
langroid/agent/tools/mcp/decorators.py,sha256=h7dterhsmvWJ8q4mp_OopmuG2DF71ty8cZwOyzdDZuk,1127
|
@@ -73,12 +73,12 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
|
|
73
73
|
langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
|
74
74
|
langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
|
75
75
|
langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
|
76
|
-
langroid/language_models/base.py,sha256=
|
76
|
+
langroid/language_models/base.py,sha256=r0MckcZGmuv_opKR2xvjzOz94mmWCzn9LJKgqyBjJ7c,28559
|
77
77
|
langroid/language_models/client_cache.py,sha256=YtGcpalYkS_ckMU12J7VmUOGmVv1wzuLUBxgIagcpmA,6896
|
78
78
|
langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
|
79
79
|
langroid/language_models/mock_lm.py,sha256=tA9JpURznsMZ59iRhFYMmaYQzAc0D0BT-PiJIV58sAk,4079
|
80
|
-
langroid/language_models/model_info.py,sha256=
|
81
|
-
langroid/language_models/openai_gpt.py,sha256=
|
80
|
+
langroid/language_models/model_info.py,sha256=LzRfZsWmOm7WF6KGJfcN0aVdRqk0URNuDGMMz6cFt50,17121
|
81
|
+
langroid/language_models/openai_gpt.py,sha256=FFiJa9_j_bTiA8SzBv7xssuc7LGxT_TI7Pcg8XLJnzE,89230
|
82
82
|
langroid/language_models/provider_params.py,sha256=fX25NAmYUIc1-nliMKpmTGZO6D6RpyTXtSDdZCZdb5w,5464
|
83
83
|
langroid/language_models/utils.py,sha256=n55Oe2_V_4VNGhytvPWLYC-0tFS07RTjN83KWl-p_MI,6032
|
84
84
|
langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
|
@@ -138,7 +138,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
138
138
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
139
139
|
langroid/vector_store/qdrantdb.py,sha256=ZYrT9mxoUCx_67Qzb5xnkWuFG12rfe30yAg4NgG2ueA,19168
|
140
140
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
141
|
-
langroid-0.56.
|
142
|
-
langroid-0.56.
|
143
|
-
langroid-0.56.
|
144
|
-
langroid-0.56.
|
141
|
+
langroid-0.56.13.dist-info/METADATA,sha256=W_sS_4htB4FnkLzrVk0KYhaRSpkK-rUEnFY1f-t0bAo,65745
|
142
|
+
langroid-0.56.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
143
|
+
langroid-0.56.13.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
144
|
+
langroid-0.56.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|