litecoo 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
litecoo/__init__.py ADDED
File without changes
litecoo/agent_loop.py ADDED
@@ -0,0 +1,107 @@
1
+ """
2
+ Server-side agentic loop for litecoo.
3
+ When X-Agent-Loop: server header is set on /v1/messages requests,
4
+ litecoo executes tool calls internally instead of returning them to the client.
5
+ The loop continues until stop_reason=end_turn or max_turns is reached.
6
+ """
7
+
8
+ import logging
9
+ from typing import Any, Optional
10
+
11
+ logger = logging.getLogger("litecoo.agent_loop")
12
+
13
+
14
+ async def run_agent_loop(
15
+ initial_messages: list[dict],
16
+ system: str,
17
+ tools: list[dict],
18
+ kwargs: dict,
19
+ alias: str,
20
+ router: Any,
21
+ session_id: str = None,
22
+ max_turns: int = 10,
23
+ execute_tool_fn: Optional[Any] = None,
24
+ ) -> dict:
25
+ """
26
+ Execute the agentic tool loop server-side.
27
+ Returns a final Anthropic-format response dict.
28
+ """
29
+ from .main import openai_response_to_anthropic # late import to avoid circular
30
+
31
+ messages = list(initial_messages)
32
+
33
+ # Ensure system message is prepended if not already present
34
+ if system and (not messages or messages[0].get("role") != "system"):
35
+ messages = [{"role": "system", "content": system}] + messages
36
+
37
+ last_anthropic_resp: dict = {}
38
+
39
+ for turn in range(max_turns):
40
+ call_kwargs = dict(kwargs)
41
+ call_kwargs["messages"] = messages
42
+ call_kwargs["stream"] = False
43
+ if tools:
44
+ call_kwargs["tools"] = tools
45
+
46
+ oai_resp = await router.acompletion(model=alias, **call_kwargs)
47
+ raw = oai_resp.model_dump() if hasattr(oai_resp, "model_dump") else dict(oai_resp)
48
+
49
+ # Determine request_model from alias for response
50
+ anthropic_resp = openai_response_to_anthropic(raw, alias)
51
+ last_anthropic_resp = anthropic_resp
52
+ stop_reason = anthropic_resp.get("stop_reason", "end_turn")
53
+
54
+ if stop_reason == "end_turn":
55
+ logger.info(f"[agent_loop] done after {turn + 1} turn(s)")
56
+ return anthropic_resp
57
+
58
+ if stop_reason == "max_tokens":
59
+ logger.info(f"[agent_loop] max_tokens hit at turn {turn + 1}")
60
+ return anthropic_resp
61
+
62
+ if stop_reason == "tool_use":
63
+ # Build the assistant message from the OAI response
64
+ choice = (raw.get("choices") or [{}])[0]
65
+ asst_msg = choice.get("message", {})
66
+ if hasattr(asst_msg, "model_dump"):
67
+ asst_msg = asst_msg.model_dump()
68
+ messages.append({"role": "assistant", **asst_msg})
69
+
70
+ # Execute each tool and append results to messages in OpenAI format
71
+ for block in anthropic_resp.get("content", []):
72
+ if block.get("type") != "tool_use":
73
+ continue
74
+ tool_name = block.get("name", "")
75
+ tool_input = block.get("input", {})
76
+ tool_id = block.get("id", "")
77
+
78
+ logger.info(f"[agent_loop] executing tool '{tool_name}' (turn {turn + 1})")
79
+
80
+ if "_error_invalid_json" in tool_input:
81
+ error_msg = tool_input["_error_invalid_json"]
82
+ raw_args = tool_input.get("raw_arguments", "")
83
+ result = f"Error: Invalid JSON arguments for tool '{tool_name}'. {error_msg}. Raw input was: {raw_args}. Please correct the JSON syntax and try again."
84
+ else:
85
+ fn = execute_tool_fn if execute_tool_fn else (lambda n, i, sid: __import__("tools").execute_tool(n, i, sid))
86
+ import inspect
87
+ sig = inspect.signature(fn)
88
+ if "session_id" in sig.parameters:
89
+ result = await fn(tool_name, tool_input, session_id=session_id)
90
+ else:
91
+ result = await fn(tool_name, tool_input)
92
+
93
+ messages.append({
94
+ "role": "tool",
95
+ "tool_call_id": tool_id,
96
+ "content": str(result),
97
+ })
98
+ continue
99
+
100
+ # Unknown stop reason — return what we have
101
+ break
102
+
103
+ # max_turns exceeded — annotate and return
104
+ logger.warning(f"[agent_loop] max_turns ({max_turns}) exceeded")
105
+ if last_anthropic_resp:
106
+ last_anthropic_resp["stop_reason"] = "end_turn"
107
+ return last_anthropic_resp
litecoo/cli.py ADDED
@@ -0,0 +1,18 @@
1
+ import argparse
2
+ import sys
3
+ import uvicorn
4
+
5
+ def main():
6
+ parser = argparse.ArgumentParser(description="litecoo - Free LLM proxy for Cowork")
7
+ parser.add_argument("--host", default="0.0.0.0", help="Host to bind (default: 0.0.0.0)")
8
+ parser.add_argument("--port", type=int, default=8000, help="Port to bind (default: 8000)")
9
+ parser.add_argument("--env-file", default=".env", help="Environment file to load")
10
+
11
+ args = parser.parse_args()
12
+
13
+ # We pass the import string instead of the object to enable hot-reloading if needed,
14
+ # and to ensure uvicorn runs correctly across platforms.
15
+ uvicorn.run("litecoo.main:app", host=args.host, port=args.port, env_file=args.env_file)
16
+
17
+ if __name__ == "__main__":
18
+ main()
litecoo/context.py ADDED
@@ -0,0 +1,57 @@
1
+ """
2
+ Sliding window context management for litecoo.
3
+ Prevents ContextWindowExceededError by trimming old messages when needed.
4
+ """
5
+
6
+ import logging
7
+ import os
8
+ from typing import Any
9
+
10
+ logger = logging.getLogger("litecoo.context")
11
+
12
+ MAX_TOKENS_ESTIMATE: int = int(os.getenv("litecoo_MAX_CONTEXT_TOKENS", "50000"))
13
+ SUMMARY_KEEP_MESSAGES: int = 6
14
+
15
+
16
+ async def fit_to_context(messages: list[dict], alias: str, router: Any) -> list[dict]:
17
+ """
18
+ Trim messages to stay within the context window by summarising the middle section.
19
+ Returns the original list unchanged on any error (never crash the request).
20
+ """
21
+ try:
22
+ estimate = sum(len(str(m.get("content", ""))) for m in messages) // 4
23
+ if estimate < MAX_TOKENS_ESTIMATE:
24
+ return messages
25
+
26
+ # Separate system messages (always keep)
27
+ system_msgs = [m for m in messages if m.get("role") == "system"]
28
+ non_system = [m for m in messages if m.get("role") != "system"]
29
+
30
+ if len(non_system) <= SUMMARY_KEEP_MESSAGES:
31
+ return messages # Nothing to trim
32
+
33
+ keep_tail = non_system[-SUMMARY_KEEP_MESSAGES:]
34
+ middle = non_system[:-SUMMARY_KEEP_MESSAGES]
35
+
36
+ if not middle:
37
+ return messages
38
+
39
+ # Drop middle messages entirely to save context, but leave a note
40
+ # that the agent can use a memory search tool if needed.
41
+ # (Assuming you implement a search_memory tool)
42
+
43
+ summary_msg: dict = {
44
+ "role": "user",
45
+ "content": f"[System Note: {len(middle)} messages have been removed from the middle of this conversation to save context window. If you need to recall past events, rely on your tools or ask the user.]",
46
+ }
47
+
48
+ compacted = system_msgs + [summary_msg] + list(keep_tail)
49
+ logger.info(
50
+ f"[context] compacted {len(messages)} → {len(compacted)} messages "
51
+ f"(estimate was {estimate} tokens)"
52
+ )
53
+ return compacted
54
+
55
+ except Exception as exc:
56
+ logger.warning(f"[context] fit_to_context error (returning original): {exc}")
57
+ return messages
@@ -0,0 +1,260 @@
1
+ import os
2
+ import json
3
+ import logging
4
+ import time
5
+ import httpx
6
+ from typing import Any, AsyncGenerator, Dict, List, Optional
7
+
8
+ logger = logging.getLogger("litecoo.router")
9
+
10
+ class RouterException(Exception):
11
+ def __init__(self, message: str, status_code: int = 500, response: Optional[Any] = None):
12
+ self.message = message
13
+ self.status_code = status_code
14
+ self.response = response
15
+ super().__init__(message)
16
+
17
+ # Aliases kept for compatibility inside main.py
18
+ LiteLLMException = RouterException
19
+
20
+ class RateLimitError(RouterException):
21
+ pass
22
+
23
+ class AuthenticationError(RouterException):
24
+ pass
25
+
26
+ class ContextWindowExceededError(RouterException):
27
+ pass
28
+
29
+ # Mock response classes
30
+ class ChoiceMessage:
31
+ def __init__(self, role: str, content: Optional[str], tool_calls: Optional[list] = None):
32
+ self.role = role
33
+ self.content = content
34
+ self.tool_calls = tool_calls or []
35
+ def model_dump(self) -> dict:
36
+ res = {"role": self.role, "content": self.content}
37
+ if self.tool_calls:
38
+ res["tool_calls"] = self.tool_calls
39
+ return res
40
+
41
+ class Choice:
42
+ def __init__(self, message: ChoiceMessage, finish_reason: str = "stop"):
43
+ self.message = message
44
+ self.finish_reason = finish_reason
45
+
46
+ class Usage:
47
+ def __init__(self, prompt_tokens: int, completion_tokens: int, total_tokens: int):
48
+ self.prompt_tokens = prompt_tokens
49
+ self.completion_tokens = completion_tokens
50
+ self.total_tokens = total_tokens
51
+
52
+ class ChatCompletionResponse:
53
+ def __init__(self, raw_dict: dict):
54
+ self._dict = raw_dict
55
+ self.choices = []
56
+ for choice_data in raw_dict.get("choices", []):
57
+ msg_data = choice_data.get("message", {})
58
+ msg = ChoiceMessage(
59
+ role=msg_data.get("role", "assistant"),
60
+ content=msg_data.get("content"),
61
+ tool_calls=msg_data.get("tool_calls")
62
+ )
63
+ self.choices.append(Choice(message=msg, finish_reason=choice_data.get("finish_reason", "stop")))
64
+
65
+ usage_data = raw_dict.get("usage", {})
66
+ self.usage = Usage(
67
+ prompt_tokens=usage_data.get("prompt_tokens", 0),
68
+ completion_tokens=usage_data.get("completion_tokens", 0),
69
+ total_tokens=usage_data.get("total_tokens", 0)
70
+ )
71
+ self.id = raw_dict.get("id", "")
72
+ self.model = raw_dict.get("model", "")
73
+
74
+ def model_dump(self) -> dict:
75
+ return self._dict
76
+
77
+ # Streaming support classes
78
+ class ToolCallFunction:
79
+ def __init__(self, name: Optional[str], arguments: Optional[str]):
80
+ self.name = name
81
+ self.arguments = arguments
82
+
83
+ class ToolCallDelta:
84
+ def __init__(self, index: int, id: Optional[str], function: Optional[ToolCallFunction]):
85
+ self.index = index
86
+ self.id = id
87
+ self.function = function
88
+
89
+ class Delta:
90
+ def __init__(self, content: Optional[str] = None, tool_calls: Optional[list] = None):
91
+ self.content = content
92
+ self.tool_calls = tool_calls
93
+
94
+ class StreamingChoice:
95
+ def __init__(self, delta: Delta):
96
+ self.delta = delta
97
+
98
+ class StreamingChunk:
99
+ def __init__(self, raw_dict: dict):
100
+ self._dict = raw_dict
101
+ self.choices = []
102
+ for choice_data in raw_dict.get("choices", []):
103
+ delta_data = choice_data.get("delta", {})
104
+ tc_list = []
105
+ if "tool_calls" in delta_data:
106
+ for tc in delta_data.get("tool_calls", []):
107
+ fn_data = tc.get("function", {})
108
+ fn = ToolCallFunction(
109
+ name=fn_data.get("name"),
110
+ arguments=fn_data.get("arguments")
111
+ )
112
+ tc_list.append(ToolCallDelta(
113
+ index=tc.get("index", 0),
114
+ id=tc.get("id"),
115
+ function=fn
116
+ ))
117
+ delta = Delta(
118
+ content=delta_data.get("content"),
119
+ tool_calls=tc_list if "tool_calls" in delta_data else None
120
+ )
121
+ self.choices.append(StreamingChoice(delta=delta))
122
+
123
+ def json(self) -> str:
124
+ return json.dumps(self._dict)
125
+
126
+ def model_dump_json(self, **kwargs) -> str:
127
+ return json.dumps(self._dict)
128
+
129
+ def _resolve_env_vars(config_val: Any) -> Any:
130
+ """Recursively resolve values starting with 'os.environ/' to actual env variables."""
131
+ if isinstance(config_val, str):
132
+ if config_val.startswith("os.environ/"):
133
+ env_var = config_val.removeprefix("os.environ/")
134
+ return os.getenv(env_var, "")
135
+ return config_val
136
+ elif isinstance(config_val, dict):
137
+ return {k: _resolve_env_vars(v) for k, v in config_val.items()}
138
+ elif isinstance(config_val, list):
139
+ return [_resolve_env_vars(i) for i in config_val]
140
+ return config_val
141
+
142
+ class Router:
143
+ def __init__(self, model_list: List[Dict], **kwargs):
144
+ self.model_list = model_list
145
+ self.client = httpx.AsyncClient(timeout=60.0)
146
+
147
+ async def acompletion(self, model: str, **kwargs) -> Any:
148
+ # 1. Find the target model configuration in model_list
149
+ config_item = None
150
+ for item in self.model_list:
151
+ if item.get("model_name") == model:
152
+ config_item = item
153
+ break
154
+
155
+ if not config_item:
156
+ logger.warning(f"Model {model} not found in model_list configuration. Using as-is.")
157
+ config_item = {"litellm_params": {"model": model}}
158
+
159
+ params = config_item.get("provider_params") or config_item.get("litellm_params", {})
160
+ # Resolve any env variables (e.g. API keys) in parameters
161
+ resolved_params = _resolve_env_vars(params)
162
+
163
+ target_model = resolved_params.get("model", model)
164
+ api_base = resolved_params.get("api_base")
165
+ api_key = resolved_params.get("api_key")
166
+ extra_headers = resolved_params.get("extra_headers") or {}
167
+
168
+ # 2. Determine target API Base URL
169
+ if target_model.startswith("openrouter/"):
170
+ target_model = target_model.removeprefix("openrouter/")
171
+ if not api_base:
172
+ api_base = "https://openrouter.ai/api/v1"
173
+ elif target_model.startswith("openai/"):
174
+ target_model = target_model.removeprefix("openai/")
175
+
176
+ if not api_base:
177
+ api_base = "https://api.openai.com/v1"
178
+
179
+ # 3. Construct headers
180
+ headers = {
181
+ "Content-Type": "application/json",
182
+ **extra_headers
183
+ }
184
+ if api_key:
185
+ headers["Authorization"] = f"Bearer {api_key}"
186
+
187
+ # 4. Construct payload
188
+ payload = {
189
+ "model": target_model,
190
+ **{k: v for k, v in kwargs.items() if k not in ("model", "api_base", "api_key")}
191
+ }
192
+
193
+ # 5. Make request
194
+ url = f"{api_base.rstrip('/')}/chat/completions"
195
+ logger.info(f"Forwarding completion request to: {url} (model: {target_model})")
196
+
197
+ try:
198
+ if payload.get("stream"):
199
+ async def stream_generator():
200
+ req = self.client.build_request("POST", url, json=payload, headers=headers)
201
+ resp = await self.client.send(req, stream=True)
202
+ try:
203
+ if resp.status_code != 200:
204
+ await resp.aread()
205
+ self._handle_error_status(resp.status_code, resp.text, resp)
206
+
207
+ buffer = ""
208
+ async for chunk_bytes in resp.aiter_bytes():
209
+ buffer += chunk_bytes.decode("utf-8")
210
+ while "\n" in buffer:
211
+ line, buffer = buffer.split("\n", 1)
212
+ line = line.strip()
213
+ if not line:
214
+ continue
215
+ if line == "data: [DONE]":
216
+ break
217
+ if line.startswith("data:"):
218
+ data_part = line.removeprefix("data:").strip()
219
+ if data_part == "[DONE]":
220
+ break
221
+ try:
222
+ chunk_dict = json.loads(data_part)
223
+ yield StreamingChunk(chunk_dict)
224
+ except Exception as err:
225
+ logger.warning(f"Failed to parse streaming chunk: {err}. Line: {line}")
226
+ finally:
227
+ await resp.aclose()
228
+ return stream_generator()
229
+ else:
230
+ resp = await self.client.post(url, json=payload, headers=headers)
231
+ if resp.status_code != 200:
232
+ self._handle_error_status(resp.status_code, resp.text, resp)
233
+
234
+ return ChatCompletionResponse(resp.json())
235
+
236
+ except httpx.HTTPStatusError as e:
237
+ self._handle_error_status(e.response.status_code, e.response.text, e.response)
238
+ except httpx.RequestError as e:
239
+ logger.error(f"HTTP request error: {e}")
240
+ raise RouterException(f"Provider HTTP request failed: {e}", status_code=500)
241
+
242
+ def _handle_error_status(self, status_code: int, response_text: str, response_obj: Optional[httpx.Response] = None):
243
+ logger.error(f"Upstream provider error status: {status_code}. Response: {response_text}")
244
+
245
+ msg = f"Provider failed with status {status_code}"
246
+ try:
247
+ err_json = json.loads(response_text)
248
+ if "error" in err_json:
249
+ msg = err_json["error"].get("message", msg)
250
+ except Exception:
251
+ pass
252
+
253
+ if status_code == 429:
254
+ raise RateLimitError(msg, status_code=status_code, response=response_obj)
255
+ elif status_code in (401, 403):
256
+ raise AuthenticationError(msg, status_code=status_code, response=response_obj)
257
+ elif status_code == 400 and "context_length" in msg.lower():
258
+ raise ContextWindowExceededError(msg, status_code=status_code, response=response_obj)
259
+ else:
260
+ raise RouterException(msg, status_code=status_code, response=response_obj)