prompture 0.0.29.dev8__py3-none-any.whl → 0.0.38.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/__init__.py +264 -23
- prompture/_version.py +34 -0
- prompture/agent.py +924 -0
- prompture/agent_types.py +156 -0
- prompture/aio/__init__.py +74 -0
- prompture/async_agent.py +880 -0
- prompture/async_conversation.py +789 -0
- prompture/async_core.py +803 -0
- prompture/async_driver.py +193 -0
- prompture/async_groups.py +551 -0
- prompture/cache.py +469 -0
- prompture/callbacks.py +55 -0
- prompture/cli.py +63 -4
- prompture/conversation.py +826 -0
- prompture/core.py +894 -263
- prompture/cost_mixin.py +51 -0
- prompture/discovery.py +187 -0
- prompture/driver.py +206 -5
- prompture/drivers/__init__.py +175 -67
- prompture/drivers/airllm_driver.py +109 -0
- prompture/drivers/async_airllm_driver.py +26 -0
- prompture/drivers/async_azure_driver.py +123 -0
- prompture/drivers/async_claude_driver.py +113 -0
- prompture/drivers/async_google_driver.py +316 -0
- prompture/drivers/async_grok_driver.py +97 -0
- prompture/drivers/async_groq_driver.py +90 -0
- prompture/drivers/async_hugging_driver.py +61 -0
- prompture/drivers/async_lmstudio_driver.py +148 -0
- prompture/drivers/async_local_http_driver.py +44 -0
- prompture/drivers/async_ollama_driver.py +135 -0
- prompture/drivers/async_openai_driver.py +102 -0
- prompture/drivers/async_openrouter_driver.py +102 -0
- prompture/drivers/async_registry.py +133 -0
- prompture/drivers/azure_driver.py +42 -9
- prompture/drivers/claude_driver.py +257 -34
- prompture/drivers/google_driver.py +295 -42
- prompture/drivers/grok_driver.py +35 -32
- prompture/drivers/groq_driver.py +33 -26
- prompture/drivers/hugging_driver.py +6 -6
- prompture/drivers/lmstudio_driver.py +97 -19
- prompture/drivers/local_http_driver.py +6 -6
- prompture/drivers/ollama_driver.py +168 -23
- prompture/drivers/openai_driver.py +184 -9
- prompture/drivers/openrouter_driver.py +37 -25
- prompture/drivers/registry.py +306 -0
- prompture/drivers/vision_helpers.py +153 -0
- prompture/field_definitions.py +106 -96
- prompture/group_types.py +147 -0
- prompture/groups.py +530 -0
- prompture/image.py +180 -0
- prompture/logging.py +80 -0
- prompture/model_rates.py +217 -0
- prompture/persistence.py +254 -0
- prompture/persona.py +482 -0
- prompture/runner.py +49 -47
- prompture/scaffold/__init__.py +1 -0
- prompture/scaffold/generator.py +84 -0
- prompture/scaffold/templates/Dockerfile.j2 +12 -0
- prompture/scaffold/templates/README.md.j2 +41 -0
- prompture/scaffold/templates/config.py.j2 +21 -0
- prompture/scaffold/templates/env.example.j2 +8 -0
- prompture/scaffold/templates/main.py.j2 +86 -0
- prompture/scaffold/templates/models.py.j2 +40 -0
- prompture/scaffold/templates/requirements.txt.j2 +5 -0
- prompture/serialization.py +218 -0
- prompture/server.py +183 -0
- prompture/session.py +117 -0
- prompture/settings.py +19 -1
- prompture/tools.py +219 -267
- prompture/tools_schema.py +254 -0
- prompture/validator.py +3 -3
- prompture-0.0.38.dev2.dist-info/METADATA +369 -0
- prompture-0.0.38.dev2.dist-info/RECORD +77 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/WHEEL +1 -1
- prompture-0.0.29.dev8.dist-info/METADATA +0 -368
- prompture-0.0.29.dev8.dist-info/RECORD +0 -27
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
"""Driver for Azure OpenAI Service (migrated to openai>=1.0.0).
|
|
2
2
|
Requires the `openai` package.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import os
|
|
5
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
6
8
|
try:
|
|
7
9
|
from openai import AzureOpenAI
|
|
8
10
|
except Exception:
|
|
9
11
|
AzureOpenAI = None
|
|
10
12
|
|
|
13
|
+
from ..cost_mixin import CostMixin
|
|
11
14
|
from ..driver import Driver
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
class AzureDriver(Driver):
|
|
17
|
+
class AzureDriver(CostMixin, Driver):
|
|
18
|
+
supports_json_mode = True
|
|
19
|
+
supports_json_schema = True
|
|
20
|
+
supports_vision = True
|
|
21
|
+
|
|
15
22
|
# Pricing per 1K tokens (adjust if your Azure pricing differs from OpenAI defaults)
|
|
16
23
|
MODEL_PRICING = {
|
|
17
24
|
"gpt-5-mini": {
|
|
@@ -82,7 +89,21 @@ class AzureDriver(Driver):
|
|
|
82
89
|
else:
|
|
83
90
|
self.client = None
|
|
84
91
|
|
|
85
|
-
|
|
92
|
+
supports_messages = True
|
|
93
|
+
|
|
94
|
+
def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
95
|
+
from .vision_helpers import _prepare_openai_vision_messages
|
|
96
|
+
|
|
97
|
+
return _prepare_openai_vision_messages(messages)
|
|
98
|
+
|
|
99
|
+
def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
|
|
100
|
+
messages = [{"role": "user", "content": prompt}]
|
|
101
|
+
return self._do_generate(messages, options)
|
|
102
|
+
|
|
103
|
+
def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
|
|
104
|
+
return self._do_generate(self._prepare_messages(messages), options)
|
|
105
|
+
|
|
106
|
+
def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
|
|
86
107
|
if self.client is None:
|
|
87
108
|
raise RuntimeError("openai package (>=1.0.0) with AzureOpenAI not installed")
|
|
88
109
|
|
|
@@ -96,13 +117,28 @@ class AzureDriver(Driver):
|
|
|
96
117
|
# Build request kwargs
|
|
97
118
|
kwargs = {
|
|
98
119
|
"model": self.deployment_id, # for Azure, use deployment name
|
|
99
|
-
"messages":
|
|
120
|
+
"messages": messages,
|
|
100
121
|
}
|
|
101
122
|
kwargs[tokens_param] = opts.get("max_tokens", 512)
|
|
102
123
|
|
|
103
124
|
if supports_temperature and "temperature" in opts:
|
|
104
125
|
kwargs["temperature"] = opts["temperature"]
|
|
105
126
|
|
|
127
|
+
# Native JSON mode support
|
|
128
|
+
if options.get("json_mode"):
|
|
129
|
+
json_schema = options.get("json_schema")
|
|
130
|
+
if json_schema:
|
|
131
|
+
kwargs["response_format"] = {
|
|
132
|
+
"type": "json_schema",
|
|
133
|
+
"json_schema": {
|
|
134
|
+
"name": "extraction",
|
|
135
|
+
"strict": True,
|
|
136
|
+
"schema": json_schema,
|
|
137
|
+
},
|
|
138
|
+
}
|
|
139
|
+
else:
|
|
140
|
+
kwargs["response_format"] = {"type": "json_object"}
|
|
141
|
+
|
|
106
142
|
resp = self.client.chat.completions.create(**kwargs)
|
|
107
143
|
|
|
108
144
|
# Extract usage
|
|
@@ -111,11 +147,8 @@ class AzureDriver(Driver):
|
|
|
111
147
|
completion_tokens = getattr(usage, "completion_tokens", 0)
|
|
112
148
|
total_tokens = getattr(usage, "total_tokens", 0)
|
|
113
149
|
|
|
114
|
-
# Calculate cost
|
|
115
|
-
|
|
116
|
-
prompt_cost = (prompt_tokens / 1000) * model_pricing["prompt"]
|
|
117
|
-
completion_cost = (completion_tokens / 1000) * model_pricing["completion"]
|
|
118
|
-
total_cost = prompt_cost + completion_cost
|
|
150
|
+
# Calculate cost via shared mixin
|
|
151
|
+
total_cost = self._calculate_cost("azure", model, prompt_tokens, completion_tokens)
|
|
119
152
|
|
|
120
153
|
# Standardized meta object
|
|
121
154
|
meta = {
|
|
@@ -1,75 +1,137 @@
|
|
|
1
1
|
"""Driver for Anthropic's Claude models. Requires the `anthropic` library.
|
|
2
2
|
Use with API key in CLAUDE_API_KEY env var or provide directly.
|
|
3
3
|
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
4
6
|
import os
|
|
5
|
-
from
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
6
10
|
try:
|
|
7
11
|
import anthropic
|
|
8
12
|
except Exception:
|
|
9
13
|
anthropic = None
|
|
10
14
|
|
|
15
|
+
from ..cost_mixin import CostMixin
|
|
11
16
|
from ..driver import Driver
|
|
12
17
|
|
|
13
|
-
|
|
18
|
+
|
|
19
|
+
class ClaudeDriver(CostMixin, Driver):
|
|
20
|
+
supports_json_mode = True
|
|
21
|
+
supports_json_schema = True
|
|
22
|
+
supports_tool_use = True
|
|
23
|
+
supports_streaming = True
|
|
24
|
+
supports_vision = True
|
|
25
|
+
|
|
14
26
|
# Claude pricing per 1000 tokens (prices should be kept current with Anthropic's pricing)
|
|
15
27
|
MODEL_PRICING = {
|
|
16
28
|
# Claude Opus 4.1
|
|
17
29
|
"claude-opus-4-1-20250805": {
|
|
18
|
-
"prompt": 0.015,
|
|
19
|
-
"completion": 0.075,
|
|
30
|
+
"prompt": 0.015, # $15 per 1M prompt tokens
|
|
31
|
+
"completion": 0.075, # $75 per 1M completion tokens
|
|
20
32
|
},
|
|
21
33
|
# Claude Opus 4.0
|
|
22
34
|
"claude-opus-4-20250514": {
|
|
23
|
-
"prompt": 0.015,
|
|
24
|
-
"completion": 0.075,
|
|
35
|
+
"prompt": 0.015, # $15 per 1M prompt tokens
|
|
36
|
+
"completion": 0.075, # $75 per 1M completion tokens
|
|
25
37
|
},
|
|
26
38
|
# Claude Sonnet 4.0
|
|
27
39
|
"claude-sonnet-4-20250514": {
|
|
28
|
-
"prompt": 0.003,
|
|
29
|
-
"completion": 0.015,
|
|
40
|
+
"prompt": 0.003, # $3 per 1M prompt tokens
|
|
41
|
+
"completion": 0.015, # $15 per 1M completion tokens
|
|
30
42
|
},
|
|
31
43
|
# Claude Sonnet 3.7
|
|
32
44
|
"claude-3-7-sonnet-20250219": {
|
|
33
|
-
"prompt": 0.003,
|
|
34
|
-
"completion": 0.015,
|
|
45
|
+
"prompt": 0.003, # $3 per 1M prompt tokens
|
|
46
|
+
"completion": 0.015, # $15 per 1M completion tokens
|
|
35
47
|
},
|
|
36
48
|
# Claude Haiku 3.5
|
|
37
49
|
"claude-3-5-haiku-20241022": {
|
|
38
|
-
"prompt": 0.0008,
|
|
39
|
-
"completion": 0.004,
|
|
40
|
-
}
|
|
50
|
+
"prompt": 0.0008, # $0.80 per 1M prompt tokens
|
|
51
|
+
"completion": 0.004, # $4 per 1M completion tokens
|
|
52
|
+
},
|
|
41
53
|
}
|
|
42
54
|
|
|
43
55
|
def __init__(self, api_key: str | None = None, model: str = "claude-3-5-haiku-20241022"):
|
|
44
56
|
self.api_key = api_key or os.getenv("CLAUDE_API_KEY")
|
|
45
57
|
self.model = model or os.getenv("CLAUDE_MODEL_NAME", "claude-3-5-haiku-20241022")
|
|
46
58
|
|
|
47
|
-
|
|
59
|
+
supports_messages = True
|
|
60
|
+
|
|
61
|
+
def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
62
|
+
from .vision_helpers import _prepare_claude_vision_messages
|
|
63
|
+
|
|
64
|
+
return _prepare_claude_vision_messages(messages)
|
|
65
|
+
|
|
66
|
+
def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
|
|
67
|
+
messages = [{"role": "user", "content": prompt}]
|
|
68
|
+
return self._do_generate(messages, options)
|
|
69
|
+
|
|
70
|
+
def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
71
|
+
return self._do_generate(self._prepare_messages(messages), options)
|
|
72
|
+
|
|
73
|
+
def _do_generate(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
48
74
|
if anthropic is None:
|
|
49
75
|
raise RuntimeError("anthropic package not installed")
|
|
50
|
-
|
|
76
|
+
|
|
51
77
|
opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
|
|
52
78
|
model = options.get("model", self.model)
|
|
53
|
-
|
|
79
|
+
|
|
54
80
|
client = anthropic.Anthropic(api_key=self.api_key)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
81
|
+
|
|
82
|
+
# Anthropic requires system messages as a top-level parameter
|
|
83
|
+
system_content = None
|
|
84
|
+
api_messages = []
|
|
85
|
+
for msg in messages:
|
|
86
|
+
if msg.get("role") == "system":
|
|
87
|
+
system_content = msg.get("content", "")
|
|
88
|
+
else:
|
|
89
|
+
api_messages.append(msg)
|
|
90
|
+
|
|
91
|
+
# Build common kwargs
|
|
92
|
+
common_kwargs: dict[str, Any] = {
|
|
93
|
+
"model": model,
|
|
94
|
+
"messages": api_messages,
|
|
95
|
+
"temperature": opts["temperature"],
|
|
96
|
+
"max_tokens": opts["max_tokens"],
|
|
97
|
+
}
|
|
98
|
+
if system_content:
|
|
99
|
+
common_kwargs["system"] = system_content
|
|
100
|
+
|
|
101
|
+
# Native JSON mode: use tool-use for schema enforcement
|
|
102
|
+
if options.get("json_mode"):
|
|
103
|
+
json_schema = options.get("json_schema")
|
|
104
|
+
if json_schema:
|
|
105
|
+
tool_def = {
|
|
106
|
+
"name": "extract_json",
|
|
107
|
+
"description": "Extract structured data matching the schema",
|
|
108
|
+
"input_schema": json_schema,
|
|
109
|
+
}
|
|
110
|
+
resp = client.messages.create(
|
|
111
|
+
**common_kwargs,
|
|
112
|
+
tools=[tool_def],
|
|
113
|
+
tool_choice={"type": "tool", "name": "extract_json"},
|
|
114
|
+
)
|
|
115
|
+
text = ""
|
|
116
|
+
for block in resp.content:
|
|
117
|
+
if block.type == "tool_use":
|
|
118
|
+
text = json.dumps(block.input)
|
|
119
|
+
break
|
|
120
|
+
else:
|
|
121
|
+
resp = client.messages.create(**common_kwargs)
|
|
122
|
+
text = resp.content[0].text
|
|
123
|
+
else:
|
|
124
|
+
resp = client.messages.create(**common_kwargs)
|
|
125
|
+
text = resp.content[0].text
|
|
126
|
+
|
|
62
127
|
# Extract token usage from Claude response
|
|
63
128
|
prompt_tokens = resp.usage.input_tokens
|
|
64
129
|
completion_tokens = resp.usage.output_tokens
|
|
65
130
|
total_tokens = prompt_tokens + completion_tokens
|
|
66
|
-
|
|
67
|
-
# Calculate cost
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
completion_cost = (completion_tokens / 1000) * model_pricing["completion"]
|
|
71
|
-
total_cost = prompt_cost + completion_cost
|
|
72
|
-
|
|
131
|
+
|
|
132
|
+
# Calculate cost via shared mixin
|
|
133
|
+
total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
|
|
134
|
+
|
|
73
135
|
# Create standardized meta object
|
|
74
136
|
meta = {
|
|
75
137
|
"prompt_tokens": prompt_tokens,
|
|
@@ -77,8 +139,169 @@ class ClaudeDriver(Driver):
|
|
|
77
139
|
"total_tokens": total_tokens,
|
|
78
140
|
"cost": round(total_cost, 6), # Round to 6 decimal places
|
|
79
141
|
"raw_response": dict(resp),
|
|
80
|
-
"model_name": model
|
|
142
|
+
"model_name": model,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return {"text": text, "meta": meta}
|
|
146
|
+
|
|
147
|
+
# ------------------------------------------------------------------
|
|
148
|
+
# Helpers
|
|
149
|
+
# ------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
def _extract_system_and_messages(
|
|
152
|
+
self, messages: list[dict[str, Any]]
|
|
153
|
+
) -> tuple[str | None, list[dict[str, Any]]]:
|
|
154
|
+
"""Separate system message from conversation messages for Anthropic API."""
|
|
155
|
+
system_content = None
|
|
156
|
+
api_messages: list[dict[str, Any]] = []
|
|
157
|
+
for msg in messages:
|
|
158
|
+
if msg.get("role") == "system":
|
|
159
|
+
system_content = msg.get("content", "")
|
|
160
|
+
else:
|
|
161
|
+
api_messages.append(msg)
|
|
162
|
+
return system_content, api_messages
|
|
163
|
+
|
|
164
|
+
# ------------------------------------------------------------------
|
|
165
|
+
# Tool use
|
|
166
|
+
# ------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
def generate_messages_with_tools(
|
|
169
|
+
self,
|
|
170
|
+
messages: list[dict[str, Any]],
|
|
171
|
+
tools: list[dict[str, Any]],
|
|
172
|
+
options: dict[str, Any],
|
|
173
|
+
) -> dict[str, Any]:
|
|
174
|
+
"""Generate a response that may include tool calls (Anthropic)."""
|
|
175
|
+
if anthropic is None:
|
|
176
|
+
raise RuntimeError("anthropic package not installed")
|
|
177
|
+
|
|
178
|
+
opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
|
|
179
|
+
model = options.get("model", self.model)
|
|
180
|
+
client = anthropic.Anthropic(api_key=self.api_key)
|
|
181
|
+
|
|
182
|
+
system_content, api_messages = self._extract_system_and_messages(messages)
|
|
183
|
+
|
|
184
|
+
# Convert tools from OpenAI format to Anthropic format if needed
|
|
185
|
+
anthropic_tools = []
|
|
186
|
+
for t in tools:
|
|
187
|
+
if "type" in t and t["type"] == "function":
|
|
188
|
+
# OpenAI format -> Anthropic format
|
|
189
|
+
fn = t["function"]
|
|
190
|
+
anthropic_tools.append({
|
|
191
|
+
"name": fn["name"],
|
|
192
|
+
"description": fn.get("description", ""),
|
|
193
|
+
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
|
194
|
+
})
|
|
195
|
+
elif "input_schema" in t:
|
|
196
|
+
# Already Anthropic format
|
|
197
|
+
anthropic_tools.append(t)
|
|
198
|
+
else:
|
|
199
|
+
anthropic_tools.append(t)
|
|
200
|
+
|
|
201
|
+
kwargs: dict[str, Any] = {
|
|
202
|
+
"model": model,
|
|
203
|
+
"messages": api_messages,
|
|
204
|
+
"temperature": opts["temperature"],
|
|
205
|
+
"max_tokens": opts["max_tokens"],
|
|
206
|
+
"tools": anthropic_tools,
|
|
207
|
+
}
|
|
208
|
+
if system_content:
|
|
209
|
+
kwargs["system"] = system_content
|
|
210
|
+
|
|
211
|
+
resp = client.messages.create(**kwargs)
|
|
212
|
+
|
|
213
|
+
prompt_tokens = resp.usage.input_tokens
|
|
214
|
+
completion_tokens = resp.usage.output_tokens
|
|
215
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
216
|
+
total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
|
|
217
|
+
|
|
218
|
+
meta = {
|
|
219
|
+
"prompt_tokens": prompt_tokens,
|
|
220
|
+
"completion_tokens": completion_tokens,
|
|
221
|
+
"total_tokens": total_tokens,
|
|
222
|
+
"cost": round(total_cost, 6),
|
|
223
|
+
"raw_response": dict(resp),
|
|
224
|
+
"model_name": model,
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
text = ""
|
|
228
|
+
tool_calls_out: list[dict[str, Any]] = []
|
|
229
|
+
for block in resp.content:
|
|
230
|
+
if block.type == "text":
|
|
231
|
+
text += block.text
|
|
232
|
+
elif block.type == "tool_use":
|
|
233
|
+
tool_calls_out.append({
|
|
234
|
+
"id": block.id,
|
|
235
|
+
"name": block.name,
|
|
236
|
+
"arguments": block.input,
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"text": text,
|
|
241
|
+
"meta": meta,
|
|
242
|
+
"tool_calls": tool_calls_out,
|
|
243
|
+
"stop_reason": resp.stop_reason,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
# ------------------------------------------------------------------
|
|
247
|
+
# Streaming
|
|
248
|
+
# ------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
def generate_messages_stream(
|
|
251
|
+
self,
|
|
252
|
+
messages: list[dict[str, Any]],
|
|
253
|
+
options: dict[str, Any],
|
|
254
|
+
) -> Iterator[dict[str, Any]]:
|
|
255
|
+
"""Yield response chunks via Anthropic streaming API."""
|
|
256
|
+
if anthropic is None:
|
|
257
|
+
raise RuntimeError("anthropic package not installed")
|
|
258
|
+
|
|
259
|
+
opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
|
|
260
|
+
model = options.get("model", self.model)
|
|
261
|
+
client = anthropic.Anthropic(api_key=self.api_key)
|
|
262
|
+
|
|
263
|
+
system_content, api_messages = self._extract_system_and_messages(messages)
|
|
264
|
+
|
|
265
|
+
kwargs: dict[str, Any] = {
|
|
266
|
+
"model": model,
|
|
267
|
+
"messages": api_messages,
|
|
268
|
+
"temperature": opts["temperature"],
|
|
269
|
+
"max_tokens": opts["max_tokens"],
|
|
270
|
+
}
|
|
271
|
+
if system_content:
|
|
272
|
+
kwargs["system"] = system_content
|
|
273
|
+
|
|
274
|
+
full_text = ""
|
|
275
|
+
prompt_tokens = 0
|
|
276
|
+
completion_tokens = 0
|
|
277
|
+
|
|
278
|
+
with client.messages.stream(**kwargs) as stream:
|
|
279
|
+
for event in stream:
|
|
280
|
+
if hasattr(event, "type"):
|
|
281
|
+
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
|
282
|
+
delta_text = getattr(event.delta, "text", "")
|
|
283
|
+
if delta_text:
|
|
284
|
+
full_text += delta_text
|
|
285
|
+
yield {"type": "delta", "text": delta_text}
|
|
286
|
+
elif event.type == "message_delta" and hasattr(event, "usage"):
|
|
287
|
+
completion_tokens = getattr(event.usage, "output_tokens", 0)
|
|
288
|
+
elif event.type == "message_start" and hasattr(event, "message"):
|
|
289
|
+
usage = getattr(event.message, "usage", None)
|
|
290
|
+
if usage:
|
|
291
|
+
prompt_tokens = getattr(usage, "input_tokens", 0)
|
|
292
|
+
|
|
293
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
294
|
+
total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
|
|
295
|
+
|
|
296
|
+
yield {
|
|
297
|
+
"type": "done",
|
|
298
|
+
"text": full_text,
|
|
299
|
+
"meta": {
|
|
300
|
+
"prompt_tokens": prompt_tokens,
|
|
301
|
+
"completion_tokens": completion_tokens,
|
|
302
|
+
"total_tokens": total_tokens,
|
|
303
|
+
"cost": round(total_cost, 6),
|
|
304
|
+
"raw_response": {},
|
|
305
|
+
"model_name": model,
|
|
306
|
+
},
|
|
81
307
|
}
|
|
82
|
-
|
|
83
|
-
text = resp.content[0].text
|
|
84
|
-
return {"text": text, "meta": meta}
|