lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
lm_deluge/request_context.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from functools import cached_property
|
|
3
|
-
from typing import Any, Callable
|
|
3
|
+
from typing import Any, Callable, Sequence, TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
from .config import SamplingParams
|
|
6
6
|
from .prompt import CachePattern, Conversation
|
|
7
7
|
from .tracker import StatusTracker
|
|
8
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
9
12
|
|
|
10
13
|
@dataclass
|
|
11
14
|
class RequestContext:
|
|
@@ -31,7 +34,8 @@ class RequestContext:
|
|
|
31
34
|
callback: Callable | None = None
|
|
32
35
|
|
|
33
36
|
# Optional features
|
|
34
|
-
tools:
|
|
37
|
+
tools: Sequence[Any] | None = None
|
|
38
|
+
output_schema: "type[BaseModel] | dict | None" = None
|
|
35
39
|
cache: CachePattern | None = None
|
|
36
40
|
use_responses_api: bool = False
|
|
37
41
|
background: bool = False
|
|
@@ -66,6 +70,7 @@ class RequestContext:
|
|
|
66
70
|
"results_arr": self.results_arr,
|
|
67
71
|
"callback": self.callback,
|
|
68
72
|
"tools": self.tools,
|
|
73
|
+
"output_schema": self.output_schema,
|
|
69
74
|
"cache": self.cache,
|
|
70
75
|
"use_responses_api": self.use_responses_api,
|
|
71
76
|
"background": self.background,
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LM-Deluge Proxy Server
|
|
3
|
+
|
|
4
|
+
A FastAPI-based proxy server that exposes OpenAI-compatible and
|
|
5
|
+
Anthropic-compatible API endpoints, routing requests through lm-deluge
|
|
6
|
+
to any supported provider.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python -m lm_deluge.server
|
|
10
|
+
|
|
11
|
+
Environment Variables:
|
|
12
|
+
DELUGE_PROXY_API_KEY: Optional API key that clients must provide
|
|
13
|
+
DELUGE_PROXY_PORT: Port to run on (default: 8000)
|
|
14
|
+
DELUGE_PROXY_HOST: Host to bind (default: 0.0.0.0)
|
|
15
|
+
DELUGE_PROXY_LOG_REQUESTS: Log full incoming proxy requests when set
|
|
16
|
+
DELUGE_PROXY_LOG_PROVIDER_REQUESTS: Log outbound provider requests when set
|
|
17
|
+
|
|
18
|
+
Provider keys (same as LLMClient):
|
|
19
|
+
OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY, etc.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .app import create_app
|
|
23
|
+
|
|
24
|
+
__all__ = ["create_app"]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Entry point for running the server with: python -m lm_deluge.server
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python -m lm_deluge.server [--host HOST] [--port PORT] [--config PATH]
|
|
6
|
+
|
|
7
|
+
Environment Variables:
|
|
8
|
+
DELUGE_PROXY_HOST: Host to bind (default: 0.0.0.0)
|
|
9
|
+
DELUGE_PROXY_PORT: Port to run on (default: 8000)
|
|
10
|
+
DELUGE_PROXY_API_KEY: Optional API key that clients must provide
|
|
11
|
+
DELUGE_PROXY_LOG_REQUESTS: Log full incoming proxy requests when set
|
|
12
|
+
DELUGE_PROXY_LOG_PROVIDER_REQUESTS: Log outbound provider requests when set
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
import json5
|
|
21
|
+
|
|
22
|
+
from .app import create_app
|
|
23
|
+
from .model_policy import build_policy
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def main():
|
|
27
|
+
parser = argparse.ArgumentParser(
|
|
28
|
+
description="LM-Deluge Proxy Server - OpenAI and Anthropic compatible API proxy"
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--host",
|
|
32
|
+
type=str,
|
|
33
|
+
default=os.getenv("DELUGE_PROXY_HOST", "0.0.0.0"),
|
|
34
|
+
help="Host to bind (default: 0.0.0.0)",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--port",
|
|
38
|
+
type=int,
|
|
39
|
+
default=int(os.getenv("DELUGE_PROXY_PORT", "8000")),
|
|
40
|
+
help="Port to run on (default: 8000)",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--reload",
|
|
44
|
+
action="store_true",
|
|
45
|
+
help="Enable auto-reload for development",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--config",
|
|
49
|
+
type=str,
|
|
50
|
+
help="Path to proxy YAML config file",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument(
|
|
53
|
+
"--mode",
|
|
54
|
+
type=str,
|
|
55
|
+
choices=["allow_user_pick", "force_default", "alias_only"],
|
|
56
|
+
help="Override model policy mode",
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--allow-model",
|
|
60
|
+
action="append",
|
|
61
|
+
dest="allowed_models",
|
|
62
|
+
help="Allow a model id (repeat to allow multiple models)",
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--default-model",
|
|
66
|
+
type=str,
|
|
67
|
+
help="Default model or alias for force_default mode",
|
|
68
|
+
)
|
|
69
|
+
parser.add_argument(
|
|
70
|
+
"--routes",
|
|
71
|
+
type=str,
|
|
72
|
+
help="JSON5 string defining route aliases and strategies",
|
|
73
|
+
)
|
|
74
|
+
alias_group = parser.add_mutually_exclusive_group()
|
|
75
|
+
alias_group.add_argument(
|
|
76
|
+
"--expose-aliases",
|
|
77
|
+
action="store_true",
|
|
78
|
+
help="Expose route aliases in /v1/models",
|
|
79
|
+
)
|
|
80
|
+
alias_group.add_argument(
|
|
81
|
+
"--hide-aliases",
|
|
82
|
+
action="store_true",
|
|
83
|
+
help="Hide route aliases in /v1/models",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
args = parser.parse_args()
|
|
87
|
+
|
|
88
|
+
# Import here to avoid loading uvicorn unless needed
|
|
89
|
+
try:
|
|
90
|
+
import uvicorn
|
|
91
|
+
except ImportError:
|
|
92
|
+
print("Error: uvicorn is required to run the server.")
|
|
93
|
+
print("Install it with: pip install lm-deluge[server]")
|
|
94
|
+
raise SystemExit(1)
|
|
95
|
+
|
|
96
|
+
policy_overrides = {}
|
|
97
|
+
if args.mode:
|
|
98
|
+
policy_overrides["mode"] = args.mode
|
|
99
|
+
if args.allowed_models:
|
|
100
|
+
policy_overrides["allowed_models"] = args.allowed_models
|
|
101
|
+
if args.default_model:
|
|
102
|
+
policy_overrides["default_model"] = args.default_model
|
|
103
|
+
if args.routes:
|
|
104
|
+
try:
|
|
105
|
+
policy_overrides["routes"] = json5.loads(args.routes)
|
|
106
|
+
except Exception as exc:
|
|
107
|
+
print(f"Error parsing --routes JSON5: {exc}")
|
|
108
|
+
raise SystemExit(2)
|
|
109
|
+
if args.expose_aliases:
|
|
110
|
+
policy_overrides["expose_aliases"] = True
|
|
111
|
+
elif args.hide_aliases:
|
|
112
|
+
policy_overrides["expose_aliases"] = False
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
policy = build_policy(path=args.config, overrides=policy_overrides)
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
print(f"Invalid proxy model policy: {exc}")
|
|
118
|
+
raise SystemExit(2)
|
|
119
|
+
app = create_app(policy)
|
|
120
|
+
|
|
121
|
+
print(f"Starting LM-Deluge Proxy Server on {args.host}:{args.port}")
|
|
122
|
+
print("Endpoints:")
|
|
123
|
+
print(f" OpenAI: http://{args.host}:{args.port}/v1/chat/completions")
|
|
124
|
+
print(f" Anthropic: http://{args.host}:{args.port}/v1/messages")
|
|
125
|
+
print(f" Models: http://{args.host}:{args.port}/v1/models")
|
|
126
|
+
print(f" Health: http://{args.host}:{args.port}/health")
|
|
127
|
+
|
|
128
|
+
if os.getenv("DELUGE_PROXY_API_KEY"):
|
|
129
|
+
print("\nAuthentication: ENABLED (DELUGE_PROXY_API_KEY is set)")
|
|
130
|
+
else:
|
|
131
|
+
print("\nAuthentication: DISABLED (set DELUGE_PROXY_API_KEY to enable)")
|
|
132
|
+
|
|
133
|
+
print()
|
|
134
|
+
|
|
135
|
+
uvicorn.run(
|
|
136
|
+
app,
|
|
137
|
+
host=args.host,
|
|
138
|
+
port=args.port,
|
|
139
|
+
reload=args.reload,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
if __name__ == "__main__":
|
|
144
|
+
main()
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapters for converting between API formats and lm-deluge types.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from lm_deluge.api_requests.response import APIResponse
|
|
11
|
+
from lm_deluge.config import SamplingParams
|
|
12
|
+
from lm_deluge.prompt import Conversation, Text, ThoughtSignature, Thinking, ToolCall
|
|
13
|
+
from lm_deluge.tool import Tool
|
|
14
|
+
|
|
15
|
+
from .models_anthropic import (
|
|
16
|
+
AnthropicMessagesRequest,
|
|
17
|
+
AnthropicMessagesResponse,
|
|
18
|
+
AnthropicResponseContentBlock,
|
|
19
|
+
AnthropicUsage,
|
|
20
|
+
)
|
|
21
|
+
from .models_openai import (
|
|
22
|
+
OpenAIChatCompletionsRequest,
|
|
23
|
+
OpenAIChatCompletionsResponse,
|
|
24
|
+
OpenAIChoice,
|
|
25
|
+
OpenAIFunctionCall,
|
|
26
|
+
OpenAIResponseMessage,
|
|
27
|
+
OpenAIToolCall,
|
|
28
|
+
OpenAIUsage,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ============================================================================
|
|
33
|
+
# OpenAI Request Conversion
|
|
34
|
+
# ============================================================================
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def openai_request_to_conversation(req: OpenAIChatCompletionsRequest) -> Conversation:
|
|
38
|
+
"""Convert OpenAI request messages to lm-deluge Conversation."""
|
|
39
|
+
# Use existing conversion - it handles all the complexity
|
|
40
|
+
messages_dicts = [msg.model_dump(exclude_none=True) for msg in req.messages]
|
|
41
|
+
return Conversation.from_openai_chat(messages_dicts)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def openai_request_to_sampling_params(
|
|
45
|
+
req: OpenAIChatCompletionsRequest,
|
|
46
|
+
) -> SamplingParams:
|
|
47
|
+
"""Extract SamplingParams from OpenAI request."""
|
|
48
|
+
params: dict[str, Any] = {}
|
|
49
|
+
|
|
50
|
+
if req.temperature is not None:
|
|
51
|
+
params["temperature"] = req.temperature
|
|
52
|
+
if req.top_p is not None:
|
|
53
|
+
params["top_p"] = req.top_p
|
|
54
|
+
if req.max_completion_tokens is not None:
|
|
55
|
+
params["max_new_tokens"] = req.max_completion_tokens
|
|
56
|
+
elif req.max_tokens is not None:
|
|
57
|
+
params["max_new_tokens"] = req.max_tokens
|
|
58
|
+
if req.reasoning_effort is not None:
|
|
59
|
+
params["reasoning_effort"] = req.reasoning_effort
|
|
60
|
+
if req.response_format and req.response_format.get("type") == "json_object":
|
|
61
|
+
params["json_mode"] = True
|
|
62
|
+
if req.logprobs:
|
|
63
|
+
params["logprobs"] = True
|
|
64
|
+
if req.top_logprobs is not None:
|
|
65
|
+
params["top_logprobs"] = req.top_logprobs
|
|
66
|
+
|
|
67
|
+
return SamplingParams(**params)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def openai_tools_to_lm_deluge(tools: list[Any]) -> list[Tool]:
|
|
71
|
+
"""Convert OpenAI tool definitions to lm-deluge Tools."""
|
|
72
|
+
lm_tools = []
|
|
73
|
+
for tool in tools:
|
|
74
|
+
if hasattr(tool, "model_dump"):
|
|
75
|
+
tool = tool.model_dump()
|
|
76
|
+
if tool.get("type") == "function":
|
|
77
|
+
func = tool["function"]
|
|
78
|
+
params_schema = func.get("parameters") or {}
|
|
79
|
+
properties = params_schema.get("properties", {})
|
|
80
|
+
required = params_schema.get("required", [])
|
|
81
|
+
|
|
82
|
+
lm_tool = Tool(
|
|
83
|
+
name=func["name"],
|
|
84
|
+
description=func.get("description"),
|
|
85
|
+
parameters=properties if properties else None,
|
|
86
|
+
required=required,
|
|
87
|
+
)
|
|
88
|
+
lm_tools.append(lm_tool)
|
|
89
|
+
return lm_tools
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _signature_for_provider(
|
|
93
|
+
signature: ThoughtSignature | str | None, provider: str
|
|
94
|
+
) -> str | None:
|
|
95
|
+
if signature is None:
|
|
96
|
+
return None
|
|
97
|
+
if isinstance(signature, ThoughtSignature):
|
|
98
|
+
if signature.provider is None or signature.provider == provider:
|
|
99
|
+
return signature.value
|
|
100
|
+
return None
|
|
101
|
+
return signature
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# ============================================================================
|
|
105
|
+
# OpenAI Response Conversion
|
|
106
|
+
# ============================================================================
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def api_response_to_openai(
|
|
110
|
+
response: APIResponse, model: str
|
|
111
|
+
) -> OpenAIChatCompletionsResponse:
|
|
112
|
+
"""Convert lm-deluge APIResponse to OpenAI ChatCompletion format."""
|
|
113
|
+
# Handle error responses
|
|
114
|
+
if response.is_error:
|
|
115
|
+
message = OpenAIResponseMessage(
|
|
116
|
+
role="assistant",
|
|
117
|
+
content=response.error_message or "An error occurred",
|
|
118
|
+
)
|
|
119
|
+
choice = OpenAIChoice(
|
|
120
|
+
index=0,
|
|
121
|
+
message=message,
|
|
122
|
+
finish_reason="stop",
|
|
123
|
+
)
|
|
124
|
+
return OpenAIChatCompletionsResponse(
|
|
125
|
+
model=model,
|
|
126
|
+
choices=[choice],
|
|
127
|
+
usage=None,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Extract content from response
|
|
131
|
+
content_text: str | None = None
|
|
132
|
+
tool_calls: list[OpenAIToolCall] | None = None
|
|
133
|
+
|
|
134
|
+
if response.content:
|
|
135
|
+
# Extract text parts
|
|
136
|
+
text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
|
|
137
|
+
if text_parts:
|
|
138
|
+
content_text = "".join(text_parts)
|
|
139
|
+
|
|
140
|
+
# Extract tool calls
|
|
141
|
+
tool_call_parts = [p for p in response.content.parts if isinstance(p, ToolCall)]
|
|
142
|
+
if tool_call_parts:
|
|
143
|
+
tool_calls = [
|
|
144
|
+
OpenAIToolCall(
|
|
145
|
+
id=tc.id,
|
|
146
|
+
type="function",
|
|
147
|
+
function=OpenAIFunctionCall(
|
|
148
|
+
name=tc.name,
|
|
149
|
+
arguments=json.dumps(tc.arguments)
|
|
150
|
+
if isinstance(tc.arguments, dict)
|
|
151
|
+
else tc.arguments,
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
for tc in tool_call_parts
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
# Create message
|
|
158
|
+
message = OpenAIResponseMessage(
|
|
159
|
+
role="assistant",
|
|
160
|
+
content=content_text,
|
|
161
|
+
tool_calls=tool_calls,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Create choice
|
|
165
|
+
choice = OpenAIChoice(
|
|
166
|
+
index=0,
|
|
167
|
+
message=message,
|
|
168
|
+
finish_reason=response.finish_reason or "stop",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Create usage
|
|
172
|
+
usage = None
|
|
173
|
+
if response.usage:
|
|
174
|
+
usage = OpenAIUsage(
|
|
175
|
+
prompt_tokens=response.usage.input_tokens,
|
|
176
|
+
completion_tokens=response.usage.output_tokens,
|
|
177
|
+
total_tokens=response.usage.input_tokens + response.usage.output_tokens,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return OpenAIChatCompletionsResponse(
|
|
181
|
+
model=model,
|
|
182
|
+
choices=[choice],
|
|
183
|
+
usage=usage,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ============================================================================
|
|
188
|
+
# Anthropic Request Conversion
|
|
189
|
+
# ============================================================================
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def anthropic_request_to_conversation(req: AnthropicMessagesRequest) -> Conversation:
|
|
193
|
+
"""Convert Anthropic request messages to lm-deluge Conversation."""
|
|
194
|
+
|
|
195
|
+
def _dump(value: Any) -> Any:
|
|
196
|
+
if hasattr(value, "model_dump"):
|
|
197
|
+
return value.model_dump(exclude_none=True)
|
|
198
|
+
return value
|
|
199
|
+
|
|
200
|
+
messages = [_dump(msg) for msg in req.messages]
|
|
201
|
+
system = req.system
|
|
202
|
+
if isinstance(system, list):
|
|
203
|
+
system = [_dump(block) for block in system]
|
|
204
|
+
|
|
205
|
+
return Conversation.from_anthropic(messages, system=system)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def anthropic_request_to_sampling_params(
|
|
209
|
+
req: AnthropicMessagesRequest,
|
|
210
|
+
) -> SamplingParams:
|
|
211
|
+
"""Extract SamplingParams from Anthropic request."""
|
|
212
|
+
params: dict[str, Any] = {
|
|
213
|
+
"max_new_tokens": req.max_tokens,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if req.temperature is not None:
|
|
217
|
+
params["temperature"] = req.temperature
|
|
218
|
+
if req.top_p is not None:
|
|
219
|
+
params["top_p"] = req.top_p
|
|
220
|
+
if isinstance(req.thinking, dict):
|
|
221
|
+
thinking_type = req.thinking.get("type")
|
|
222
|
+
if thinking_type == "enabled":
|
|
223
|
+
budget_tokens = req.thinking.get("budget_tokens")
|
|
224
|
+
if isinstance(budget_tokens, int):
|
|
225
|
+
params["thinking_budget"] = budget_tokens
|
|
226
|
+
elif thinking_type == "disabled":
|
|
227
|
+
params["thinking_budget"] = 0
|
|
228
|
+
|
|
229
|
+
return SamplingParams(**params)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def anthropic_tools_to_lm_deluge(tools: list[Any]) -> list[Tool]:
|
|
233
|
+
"""Convert Anthropic tool definitions to lm-deluge Tools."""
|
|
234
|
+
lm_tools = []
|
|
235
|
+
for tool in tools:
|
|
236
|
+
if hasattr(tool, "model_dump"):
|
|
237
|
+
tool = tool.model_dump()
|
|
238
|
+
|
|
239
|
+
input_schema = tool.get("input_schema") or {}
|
|
240
|
+
properties = input_schema.get("properties", {})
|
|
241
|
+
required = input_schema.get("required", [])
|
|
242
|
+
|
|
243
|
+
lm_tool = Tool(
|
|
244
|
+
name=tool["name"],
|
|
245
|
+
description=tool.get("description"),
|
|
246
|
+
parameters=properties if properties else None,
|
|
247
|
+
required=required,
|
|
248
|
+
)
|
|
249
|
+
lm_tools.append(lm_tool)
|
|
250
|
+
return lm_tools
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# ============================================================================
|
|
254
|
+
# Anthropic Response Conversion
|
|
255
|
+
# ============================================================================
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def api_response_to_anthropic(
|
|
259
|
+
response: APIResponse, model: str
|
|
260
|
+
) -> AnthropicMessagesResponse:
|
|
261
|
+
"""Convert lm-deluge APIResponse to Anthropic Messages format."""
|
|
262
|
+
|
|
263
|
+
def _map_stop_reason(value: str | None) -> str:
|
|
264
|
+
if not value:
|
|
265
|
+
return "end_turn"
|
|
266
|
+
if value in {"end_turn", "max_tokens", "stop_sequence", "tool_use"}:
|
|
267
|
+
return value
|
|
268
|
+
return {
|
|
269
|
+
"stop": "end_turn",
|
|
270
|
+
"length": "max_tokens",
|
|
271
|
+
"tool_calls": "tool_use",
|
|
272
|
+
}.get(value, "end_turn")
|
|
273
|
+
|
|
274
|
+
# Handle error responses
|
|
275
|
+
if response.is_error:
|
|
276
|
+
content = [
|
|
277
|
+
AnthropicResponseContentBlock(
|
|
278
|
+
type="text",
|
|
279
|
+
text=response.error_message or "An error occurred",
|
|
280
|
+
)
|
|
281
|
+
]
|
|
282
|
+
return AnthropicMessagesResponse(
|
|
283
|
+
model=model,
|
|
284
|
+
content=content,
|
|
285
|
+
stop_reason="end_turn",
|
|
286
|
+
usage=AnthropicUsage(input_tokens=0, output_tokens=0),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Build content blocks
|
|
290
|
+
content_blocks: list[AnthropicResponseContentBlock] = []
|
|
291
|
+
|
|
292
|
+
last_signature = None
|
|
293
|
+
if response.content:
|
|
294
|
+
for part in response.content.parts:
|
|
295
|
+
if isinstance(part, Text):
|
|
296
|
+
content_blocks.append(
|
|
297
|
+
AnthropicResponseContentBlock(type="text", text=part.text)
|
|
298
|
+
)
|
|
299
|
+
elif isinstance(part, ToolCall):
|
|
300
|
+
signature = _signature_for_provider(
|
|
301
|
+
part.thought_signature,
|
|
302
|
+
"anthropic",
|
|
303
|
+
)
|
|
304
|
+
if signature and signature != last_signature:
|
|
305
|
+
content_blocks.append(
|
|
306
|
+
AnthropicResponseContentBlock(
|
|
307
|
+
type="thinking",
|
|
308
|
+
thinking="",
|
|
309
|
+
signature=signature,
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
last_signature = signature
|
|
313
|
+
content_blocks.append(
|
|
314
|
+
AnthropicResponseContentBlock(
|
|
315
|
+
type="tool_use",
|
|
316
|
+
id=part.id,
|
|
317
|
+
name=part.name,
|
|
318
|
+
input=part.arguments,
|
|
319
|
+
)
|
|
320
|
+
)
|
|
321
|
+
elif isinstance(part, Thinking):
|
|
322
|
+
signature = _signature_for_provider(
|
|
323
|
+
part.thought_signature,
|
|
324
|
+
"anthropic",
|
|
325
|
+
)
|
|
326
|
+
if signature is None and part.raw_payload is None:
|
|
327
|
+
continue
|
|
328
|
+
content_blocks.append(
|
|
329
|
+
AnthropicResponseContentBlock(
|
|
330
|
+
type="thinking",
|
|
331
|
+
thinking=part.content,
|
|
332
|
+
signature=signature,
|
|
333
|
+
)
|
|
334
|
+
)
|
|
335
|
+
if signature:
|
|
336
|
+
last_signature = signature
|
|
337
|
+
|
|
338
|
+
# Ensure at least one content block
|
|
339
|
+
if not content_blocks:
|
|
340
|
+
content_blocks.append(AnthropicResponseContentBlock(type="text", text=""))
|
|
341
|
+
|
|
342
|
+
# Map finish reason
|
|
343
|
+
raw_stop_reason = None
|
|
344
|
+
raw_stop_sequence = None
|
|
345
|
+
if isinstance(response.raw_response, dict):
|
|
346
|
+
raw_stop_reason = response.raw_response.get("stop_reason")
|
|
347
|
+
raw_stop_sequence = response.raw_response.get("stop_sequence")
|
|
348
|
+
|
|
349
|
+
stop_reason = _map_stop_reason(raw_stop_reason or response.finish_reason)
|
|
350
|
+
|
|
351
|
+
# Build usage (including cache tokens if present)
|
|
352
|
+
usage = AnthropicUsage(
|
|
353
|
+
input_tokens=response.usage.input_tokens if response.usage else 0,
|
|
354
|
+
output_tokens=response.usage.output_tokens if response.usage else 0,
|
|
355
|
+
cache_creation_input_tokens=response.usage.cache_write_tokens
|
|
356
|
+
if response.usage and response.usage.cache_write_tokens
|
|
357
|
+
else None,
|
|
358
|
+
cache_read_input_tokens=response.usage.cache_read_tokens
|
|
359
|
+
if response.usage and response.usage.cache_read_tokens
|
|
360
|
+
else None,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return AnthropicMessagesResponse(
|
|
364
|
+
model=model,
|
|
365
|
+
content=content_blocks,
|
|
366
|
+
stop_reason=stop_reason,
|
|
367
|
+
stop_sequence=raw_stop_sequence if isinstance(raw_stop_sequence, str) else None,
|
|
368
|
+
usage=usage,
|
|
369
|
+
)
|