prompture 0.0.34.dev2__py3-none-any.whl → 0.0.35.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/__init__.py +4 -0
- prompture/_version.py +2 -2
- prompture/async_conversation.py +129 -6
- prompture/async_driver.py +40 -2
- prompture/callbacks.py +5 -0
- prompture/cli.py +56 -1
- prompture/conversation.py +132 -5
- prompture/driver.py +46 -3
- prompture/drivers/claude_driver.py +167 -2
- prompture/drivers/ollama_driver.py +68 -1
- prompture/drivers/openai_driver.py +144 -2
- prompture/scaffold/__init__.py +1 -0
- prompture/scaffold/generator.py +84 -0
- prompture/scaffold/templates/Dockerfile.j2 +12 -0
- prompture/scaffold/templates/README.md.j2 +41 -0
- prompture/scaffold/templates/config.py.j2 +21 -0
- prompture/scaffold/templates/env.example.j2 +8 -0
- prompture/scaffold/templates/main.py.j2 +86 -0
- prompture/scaffold/templates/models.py.j2 +40 -0
- prompture/scaffold/templates/requirements.txt.j2 +5 -0
- prompture/server.py +183 -0
- prompture/tools_schema.py +254 -0
- {prompture-0.0.34.dev2.dist-info → prompture-0.0.35.dev1.dist-info}/METADATA +7 -1
- {prompture-0.0.34.dev2.dist-info → prompture-0.0.35.dev1.dist-info}/RECORD +28 -17
- {prompture-0.0.34.dev2.dist-info → prompture-0.0.35.dev1.dist-info}/WHEEL +0 -0
- {prompture-0.0.34.dev2.dist-info → prompture-0.0.35.dev1.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.34.dev2.dist-info → prompture-0.0.35.dev1.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.34.dev2.dist-info → prompture-0.0.35.dev1.dist-info}/top_level.txt +0 -0
prompture/driver.py
CHANGED
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import time
|
|
7
|
+
from collections.abc import Iterator
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
from .callbacks import DriverCallbacks
|
|
@@ -32,13 +33,15 @@ class Driver:
|
|
|
32
33
|
supports_json_mode: bool = False
|
|
33
34
|
supports_json_schema: bool = False
|
|
34
35
|
supports_messages: bool = False
|
|
36
|
+
supports_tool_use: bool = False
|
|
37
|
+
supports_streaming: bool = False
|
|
35
38
|
|
|
36
39
|
callbacks: DriverCallbacks | None = None
|
|
37
40
|
|
|
38
41
|
def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
|
|
39
42
|
raise NotImplementedError
|
|
40
43
|
|
|
41
|
-
def generate_messages(self, messages: list[dict[str,
|
|
44
|
+
def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
42
45
|
"""Generate a response from a list of conversation messages.
|
|
43
46
|
|
|
44
47
|
Each message is a dict with ``"role"`` (``"system"``, ``"user"``, or
|
|
@@ -52,6 +55,46 @@ class Driver:
|
|
|
52
55
|
prompt = self._flatten_messages(messages)
|
|
53
56
|
return self.generate(prompt, options)
|
|
54
57
|
|
|
58
|
+
# ------------------------------------------------------------------
|
|
59
|
+
# Tool use
|
|
60
|
+
# ------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
def generate_messages_with_tools(
|
|
63
|
+
self,
|
|
64
|
+
messages: list[dict[str, Any]],
|
|
65
|
+
tools: list[dict[str, Any]],
|
|
66
|
+
options: dict[str, Any],
|
|
67
|
+
) -> dict[str, Any]:
|
|
68
|
+
"""Generate a response that may include tool calls.
|
|
69
|
+
|
|
70
|
+
Returns a dict with keys: ``text``, ``meta``, ``tool_calls``, ``stop_reason``.
|
|
71
|
+
``tool_calls`` is a list of ``{"id": str, "name": str, "arguments": dict}``.
|
|
72
|
+
|
|
73
|
+
Drivers that support tool use should override this method and set
|
|
74
|
+
``supports_tool_use = True``.
|
|
75
|
+
"""
|
|
76
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support tool use")
|
|
77
|
+
|
|
78
|
+
# ------------------------------------------------------------------
|
|
79
|
+
# Streaming
|
|
80
|
+
# ------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
def generate_messages_stream(
|
|
83
|
+
self,
|
|
84
|
+
messages: list[dict[str, Any]],
|
|
85
|
+
options: dict[str, Any],
|
|
86
|
+
) -> Iterator[dict[str, Any]]:
|
|
87
|
+
"""Yield response chunks incrementally.
|
|
88
|
+
|
|
89
|
+
Each chunk is a dict:
|
|
90
|
+
- ``{"type": "delta", "text": str}`` for content fragments
|
|
91
|
+
- ``{"type": "done", "text": str, "meta": dict}`` for the final summary
|
|
92
|
+
|
|
93
|
+
Drivers that support streaming should override this method and set
|
|
94
|
+
``supports_streaming = True``.
|
|
95
|
+
"""
|
|
96
|
+
raise NotImplementedError(f"{self.__class__.__name__} does not support streaming")
|
|
97
|
+
|
|
55
98
|
# ------------------------------------------------------------------
|
|
56
99
|
# Hook-aware wrappers
|
|
57
100
|
# ------------------------------------------------------------------
|
|
@@ -84,7 +127,7 @@ class Driver:
|
|
|
84
127
|
)
|
|
85
128
|
return resp
|
|
86
129
|
|
|
87
|
-
def generate_messages_with_hooks(self, messages: list[dict[str,
|
|
130
|
+
def generate_messages_with_hooks(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
88
131
|
"""Wrap :meth:`generate_messages` with callbacks."""
|
|
89
132
|
driver_name = getattr(self, "model", self.__class__.__name__)
|
|
90
133
|
self._fire_callback(
|
|
@@ -129,7 +172,7 @@ class Driver:
|
|
|
129
172
|
logger.exception("Callback %s raised an exception", event)
|
|
130
173
|
|
|
131
174
|
@staticmethod
|
|
132
|
-
def _flatten_messages(messages: list[dict[str,
|
|
175
|
+
def _flatten_messages(messages: list[dict[str, Any]]) -> str:
|
|
133
176
|
"""Join messages into a single prompt string with role prefixes."""
|
|
134
177
|
parts: list[str] = []
|
|
135
178
|
for msg in messages:
|
|
@@ -4,6 +4,7 @@ Use with API key in CLAUDE_API_KEY env var or provide directly.
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
import os
|
|
7
|
+
from collections.abc import Iterator
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
try:
|
|
@@ -18,6 +19,8 @@ from ..driver import Driver
|
|
|
18
19
|
class ClaudeDriver(CostMixin, Driver):
|
|
19
20
|
supports_json_mode = True
|
|
20
21
|
supports_json_schema = True
|
|
22
|
+
supports_tool_use = True
|
|
23
|
+
supports_streaming = True
|
|
21
24
|
|
|
22
25
|
# Claude pricing per 1000 tokens (prices should be kept current with Anthropic's pricing)
|
|
23
26
|
MODEL_PRICING = {
|
|
@@ -58,10 +61,10 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
58
61
|
messages = [{"role": "user", "content": prompt}]
|
|
59
62
|
return self._do_generate(messages, options)
|
|
60
63
|
|
|
61
|
-
def generate_messages(self, messages: list[dict[str,
|
|
64
|
+
def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
62
65
|
return self._do_generate(messages, options)
|
|
63
66
|
|
|
64
|
-
def _do_generate(self, messages: list[dict[str,
|
|
67
|
+
def _do_generate(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
65
68
|
if anthropic is None:
|
|
66
69
|
raise RuntimeError("anthropic package not installed")
|
|
67
70
|
|
|
@@ -134,3 +137,165 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
134
137
|
}
|
|
135
138
|
|
|
136
139
|
return {"text": text, "meta": meta}
|
|
140
|
+
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
# Helpers
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
def _extract_system_and_messages(
|
|
146
|
+
self, messages: list[dict[str, Any]]
|
|
147
|
+
) -> tuple[str | None, list[dict[str, Any]]]:
|
|
148
|
+
"""Separate system message from conversation messages for Anthropic API."""
|
|
149
|
+
system_content = None
|
|
150
|
+
api_messages: list[dict[str, Any]] = []
|
|
151
|
+
for msg in messages:
|
|
152
|
+
if msg.get("role") == "system":
|
|
153
|
+
system_content = msg.get("content", "")
|
|
154
|
+
else:
|
|
155
|
+
api_messages.append(msg)
|
|
156
|
+
return system_content, api_messages
|
|
157
|
+
|
|
158
|
+
# ------------------------------------------------------------------
|
|
159
|
+
# Tool use
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
def generate_messages_with_tools(
|
|
163
|
+
self,
|
|
164
|
+
messages: list[dict[str, Any]],
|
|
165
|
+
tools: list[dict[str, Any]],
|
|
166
|
+
options: dict[str, Any],
|
|
167
|
+
) -> dict[str, Any]:
|
|
168
|
+
"""Generate a response that may include tool calls (Anthropic)."""
|
|
169
|
+
if anthropic is None:
|
|
170
|
+
raise RuntimeError("anthropic package not installed")
|
|
171
|
+
|
|
172
|
+
opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
|
|
173
|
+
model = options.get("model", self.model)
|
|
174
|
+
client = anthropic.Anthropic(api_key=self.api_key)
|
|
175
|
+
|
|
176
|
+
system_content, api_messages = self._extract_system_and_messages(messages)
|
|
177
|
+
|
|
178
|
+
# Convert tools from OpenAI format to Anthropic format if needed
|
|
179
|
+
anthropic_tools = []
|
|
180
|
+
for t in tools:
|
|
181
|
+
if "type" in t and t["type"] == "function":
|
|
182
|
+
# OpenAI format -> Anthropic format
|
|
183
|
+
fn = t["function"]
|
|
184
|
+
anthropic_tools.append({
|
|
185
|
+
"name": fn["name"],
|
|
186
|
+
"description": fn.get("description", ""),
|
|
187
|
+
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
|
188
|
+
})
|
|
189
|
+
elif "input_schema" in t:
|
|
190
|
+
# Already Anthropic format
|
|
191
|
+
anthropic_tools.append(t)
|
|
192
|
+
else:
|
|
193
|
+
anthropic_tools.append(t)
|
|
194
|
+
|
|
195
|
+
kwargs: dict[str, Any] = {
|
|
196
|
+
"model": model,
|
|
197
|
+
"messages": api_messages,
|
|
198
|
+
"temperature": opts["temperature"],
|
|
199
|
+
"max_tokens": opts["max_tokens"],
|
|
200
|
+
"tools": anthropic_tools,
|
|
201
|
+
}
|
|
202
|
+
if system_content:
|
|
203
|
+
kwargs["system"] = system_content
|
|
204
|
+
|
|
205
|
+
resp = client.messages.create(**kwargs)
|
|
206
|
+
|
|
207
|
+
prompt_tokens = resp.usage.input_tokens
|
|
208
|
+
completion_tokens = resp.usage.output_tokens
|
|
209
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
210
|
+
total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
|
|
211
|
+
|
|
212
|
+
meta = {
|
|
213
|
+
"prompt_tokens": prompt_tokens,
|
|
214
|
+
"completion_tokens": completion_tokens,
|
|
215
|
+
"total_tokens": total_tokens,
|
|
216
|
+
"cost": round(total_cost, 6),
|
|
217
|
+
"raw_response": dict(resp),
|
|
218
|
+
"model_name": model,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
text = ""
|
|
222
|
+
tool_calls_out: list[dict[str, Any]] = []
|
|
223
|
+
for block in resp.content:
|
|
224
|
+
if block.type == "text":
|
|
225
|
+
text += block.text
|
|
226
|
+
elif block.type == "tool_use":
|
|
227
|
+
tool_calls_out.append({
|
|
228
|
+
"id": block.id,
|
|
229
|
+
"name": block.name,
|
|
230
|
+
"arguments": block.input,
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
"text": text,
|
|
235
|
+
"meta": meta,
|
|
236
|
+
"tool_calls": tool_calls_out,
|
|
237
|
+
"stop_reason": resp.stop_reason,
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
# ------------------------------------------------------------------
|
|
241
|
+
# Streaming
|
|
242
|
+
# ------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
def generate_messages_stream(
|
|
245
|
+
self,
|
|
246
|
+
messages: list[dict[str, Any]],
|
|
247
|
+
options: dict[str, Any],
|
|
248
|
+
) -> Iterator[dict[str, Any]]:
|
|
249
|
+
"""Yield response chunks via Anthropic streaming API."""
|
|
250
|
+
if anthropic is None:
|
|
251
|
+
raise RuntimeError("anthropic package not installed")
|
|
252
|
+
|
|
253
|
+
opts = {**{"temperature": 0.0, "max_tokens": 512}, **options}
|
|
254
|
+
model = options.get("model", self.model)
|
|
255
|
+
client = anthropic.Anthropic(api_key=self.api_key)
|
|
256
|
+
|
|
257
|
+
system_content, api_messages = self._extract_system_and_messages(messages)
|
|
258
|
+
|
|
259
|
+
kwargs: dict[str, Any] = {
|
|
260
|
+
"model": model,
|
|
261
|
+
"messages": api_messages,
|
|
262
|
+
"temperature": opts["temperature"],
|
|
263
|
+
"max_tokens": opts["max_tokens"],
|
|
264
|
+
}
|
|
265
|
+
if system_content:
|
|
266
|
+
kwargs["system"] = system_content
|
|
267
|
+
|
|
268
|
+
full_text = ""
|
|
269
|
+
prompt_tokens = 0
|
|
270
|
+
completion_tokens = 0
|
|
271
|
+
|
|
272
|
+
with client.messages.stream(**kwargs) as stream:
|
|
273
|
+
for event in stream:
|
|
274
|
+
if hasattr(event, "type"):
|
|
275
|
+
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
|
276
|
+
delta_text = getattr(event.delta, "text", "")
|
|
277
|
+
if delta_text:
|
|
278
|
+
full_text += delta_text
|
|
279
|
+
yield {"type": "delta", "text": delta_text}
|
|
280
|
+
elif event.type == "message_delta" and hasattr(event, "usage"):
|
|
281
|
+
completion_tokens = getattr(event.usage, "output_tokens", 0)
|
|
282
|
+
elif event.type == "message_start" and hasattr(event, "message"):
|
|
283
|
+
usage = getattr(event.message, "usage", None)
|
|
284
|
+
if usage:
|
|
285
|
+
prompt_tokens = getattr(usage, "input_tokens", 0)
|
|
286
|
+
|
|
287
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
288
|
+
total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
|
|
289
|
+
|
|
290
|
+
yield {
|
|
291
|
+
"type": "done",
|
|
292
|
+
"text": full_text,
|
|
293
|
+
"meta": {
|
|
294
|
+
"prompt_tokens": prompt_tokens,
|
|
295
|
+
"completion_tokens": completion_tokens,
|
|
296
|
+
"total_tokens": total_tokens,
|
|
297
|
+
"cost": round(total_cost, 6),
|
|
298
|
+
"raw_response": {},
|
|
299
|
+
"model_name": model,
|
|
300
|
+
},
|
|
301
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
+
from collections.abc import Iterator
|
|
4
5
|
from typing import Any, Optional
|
|
5
6
|
|
|
6
7
|
import requests
|
|
@@ -12,6 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
12
13
|
|
|
13
14
|
class OllamaDriver(Driver):
|
|
14
15
|
supports_json_mode = True
|
|
16
|
+
supports_streaming = True
|
|
15
17
|
|
|
16
18
|
# Ollama is free – costs are always zero.
|
|
17
19
|
MODEL_PRICING = {"default": {"prompt": 0.0, "completion": 0.0}}
|
|
@@ -121,7 +123,72 @@ class OllamaDriver(Driver):
|
|
|
121
123
|
# Ollama returns text in "response"
|
|
122
124
|
return {"text": response_data.get("response", ""), "meta": meta}
|
|
123
125
|
|
|
124
|
-
|
|
126
|
+
# ------------------------------------------------------------------
|
|
127
|
+
# Streaming
|
|
128
|
+
# ------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
def generate_messages_stream(
|
|
131
|
+
self,
|
|
132
|
+
messages: list[dict[str, Any]],
|
|
133
|
+
options: dict[str, Any],
|
|
134
|
+
) -> Iterator[dict[str, Any]]:
|
|
135
|
+
"""Yield response chunks via Ollama streaming API."""
|
|
136
|
+
merged_options = self.options.copy()
|
|
137
|
+
if options:
|
|
138
|
+
merged_options.update(options)
|
|
139
|
+
|
|
140
|
+
chat_endpoint = self.endpoint.replace("/api/generate", "/api/chat")
|
|
141
|
+
|
|
142
|
+
payload: dict[str, Any] = {
|
|
143
|
+
"model": merged_options.get("model", self.model),
|
|
144
|
+
"messages": messages,
|
|
145
|
+
"stream": True,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if merged_options.get("json_mode"):
|
|
149
|
+
payload["format"] = "json"
|
|
150
|
+
if "temperature" in merged_options:
|
|
151
|
+
payload["temperature"] = merged_options["temperature"]
|
|
152
|
+
if "top_p" in merged_options:
|
|
153
|
+
payload["top_p"] = merged_options["top_p"]
|
|
154
|
+
if "top_k" in merged_options:
|
|
155
|
+
payload["top_k"] = merged_options["top_k"]
|
|
156
|
+
|
|
157
|
+
full_text = ""
|
|
158
|
+
prompt_tokens = 0
|
|
159
|
+
completion_tokens = 0
|
|
160
|
+
|
|
161
|
+
r = requests.post(chat_endpoint, json=payload, timeout=120, stream=True)
|
|
162
|
+
r.raise_for_status()
|
|
163
|
+
|
|
164
|
+
for line in r.iter_lines():
|
|
165
|
+
if not line:
|
|
166
|
+
continue
|
|
167
|
+
chunk = json.loads(line)
|
|
168
|
+
if chunk.get("done"):
|
|
169
|
+
prompt_tokens = chunk.get("prompt_eval_count", 0)
|
|
170
|
+
completion_tokens = chunk.get("eval_count", 0)
|
|
171
|
+
else:
|
|
172
|
+
content = chunk.get("message", {}).get("content", "")
|
|
173
|
+
if content:
|
|
174
|
+
full_text += content
|
|
175
|
+
yield {"type": "delta", "text": content}
|
|
176
|
+
|
|
177
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
178
|
+
yield {
|
|
179
|
+
"type": "done",
|
|
180
|
+
"text": full_text,
|
|
181
|
+
"meta": {
|
|
182
|
+
"prompt_tokens": prompt_tokens,
|
|
183
|
+
"completion_tokens": completion_tokens,
|
|
184
|
+
"total_tokens": total_tokens,
|
|
185
|
+
"cost": 0.0,
|
|
186
|
+
"raw_response": {},
|
|
187
|
+
"model_name": merged_options.get("model", self.model),
|
|
188
|
+
},
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
125
192
|
"""Use Ollama's /api/chat endpoint for multi-turn conversations."""
|
|
126
193
|
merged_options = self.options.copy()
|
|
127
194
|
if options:
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
Requires the `openai` package. Uses OPENAI_API_KEY env var.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import json
|
|
5
6
|
import os
|
|
7
|
+
from collections.abc import Iterator
|
|
6
8
|
from typing import Any
|
|
7
9
|
|
|
8
10
|
try:
|
|
@@ -17,6 +19,8 @@ from ..driver import Driver
|
|
|
17
19
|
class OpenAIDriver(CostMixin, Driver):
|
|
18
20
|
supports_json_mode = True
|
|
19
21
|
supports_json_schema = True
|
|
22
|
+
supports_tool_use = True
|
|
23
|
+
supports_streaming = True
|
|
20
24
|
|
|
21
25
|
# Approximate pricing per 1K tokens (keep updated with OpenAI's official pricing)
|
|
22
26
|
# Each model entry also defines which token parameter it supports and
|
|
@@ -74,10 +78,10 @@ class OpenAIDriver(CostMixin, Driver):
|
|
|
74
78
|
messages = [{"role": "user", "content": prompt}]
|
|
75
79
|
return self._do_generate(messages, options)
|
|
76
80
|
|
|
77
|
-
def generate_messages(self, messages: list[dict[str,
|
|
81
|
+
def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
78
82
|
return self._do_generate(messages, options)
|
|
79
83
|
|
|
80
|
-
def _do_generate(self, messages: list[dict[str,
|
|
84
|
+
def _do_generate(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
81
85
|
if self.client is None:
|
|
82
86
|
raise RuntimeError("openai package (>=1.0.0) is not installed")
|
|
83
87
|
|
|
@@ -142,3 +146,141 @@ class OpenAIDriver(CostMixin, Driver):
|
|
|
142
146
|
|
|
143
147
|
text = resp.choices[0].message.content
|
|
144
148
|
return {"text": text, "meta": meta}
|
|
149
|
+
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
# Tool use
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
def generate_messages_with_tools(
|
|
155
|
+
self,
|
|
156
|
+
messages: list[dict[str, Any]],
|
|
157
|
+
tools: list[dict[str, Any]],
|
|
158
|
+
options: dict[str, Any],
|
|
159
|
+
) -> dict[str, Any]:
|
|
160
|
+
"""Generate a response that may include tool calls."""
|
|
161
|
+
if self.client is None:
|
|
162
|
+
raise RuntimeError("openai package (>=1.0.0) is not installed")
|
|
163
|
+
|
|
164
|
+
model = options.get("model", self.model)
|
|
165
|
+
model_info = self.MODEL_PRICING.get(model, {})
|
|
166
|
+
tokens_param = model_info.get("tokens_param", "max_tokens")
|
|
167
|
+
supports_temperature = model_info.get("supports_temperature", True)
|
|
168
|
+
|
|
169
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
170
|
+
|
|
171
|
+
kwargs: dict[str, Any] = {
|
|
172
|
+
"model": model,
|
|
173
|
+
"messages": messages,
|
|
174
|
+
"tools": tools,
|
|
175
|
+
}
|
|
176
|
+
kwargs[tokens_param] = opts.get("max_tokens", 512)
|
|
177
|
+
|
|
178
|
+
if supports_temperature and "temperature" in opts:
|
|
179
|
+
kwargs["temperature"] = opts["temperature"]
|
|
180
|
+
|
|
181
|
+
resp = self.client.chat.completions.create(**kwargs)
|
|
182
|
+
|
|
183
|
+
usage = getattr(resp, "usage", None)
|
|
184
|
+
prompt_tokens = getattr(usage, "prompt_tokens", 0)
|
|
185
|
+
completion_tokens = getattr(usage, "completion_tokens", 0)
|
|
186
|
+
total_tokens = getattr(usage, "total_tokens", 0)
|
|
187
|
+
total_cost = self._calculate_cost("openai", model, prompt_tokens, completion_tokens)
|
|
188
|
+
|
|
189
|
+
meta = {
|
|
190
|
+
"prompt_tokens": prompt_tokens,
|
|
191
|
+
"completion_tokens": completion_tokens,
|
|
192
|
+
"total_tokens": total_tokens,
|
|
193
|
+
"cost": round(total_cost, 6),
|
|
194
|
+
"raw_response": resp.model_dump(),
|
|
195
|
+
"model_name": model,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
choice = resp.choices[0]
|
|
199
|
+
text = choice.message.content or ""
|
|
200
|
+
stop_reason = choice.finish_reason
|
|
201
|
+
|
|
202
|
+
tool_calls_out: list[dict[str, Any]] = []
|
|
203
|
+
if choice.message.tool_calls:
|
|
204
|
+
for tc in choice.message.tool_calls:
|
|
205
|
+
try:
|
|
206
|
+
args = json.loads(tc.function.arguments)
|
|
207
|
+
except (json.JSONDecodeError, TypeError):
|
|
208
|
+
args = {}
|
|
209
|
+
tool_calls_out.append({
|
|
210
|
+
"id": tc.id,
|
|
211
|
+
"name": tc.function.name,
|
|
212
|
+
"arguments": args,
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
"text": text,
|
|
217
|
+
"meta": meta,
|
|
218
|
+
"tool_calls": tool_calls_out,
|
|
219
|
+
"stop_reason": stop_reason,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# ------------------------------------------------------------------
|
|
223
|
+
# Streaming
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
def generate_messages_stream(
|
|
227
|
+
self,
|
|
228
|
+
messages: list[dict[str, Any]],
|
|
229
|
+
options: dict[str, Any],
|
|
230
|
+
) -> Iterator[dict[str, Any]]:
|
|
231
|
+
"""Yield response chunks via OpenAI streaming API."""
|
|
232
|
+
if self.client is None:
|
|
233
|
+
raise RuntimeError("openai package (>=1.0.0) is not installed")
|
|
234
|
+
|
|
235
|
+
model = options.get("model", self.model)
|
|
236
|
+
model_info = self.MODEL_PRICING.get(model, {})
|
|
237
|
+
tokens_param = model_info.get("tokens_param", "max_tokens")
|
|
238
|
+
supports_temperature = model_info.get("supports_temperature", True)
|
|
239
|
+
|
|
240
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
241
|
+
|
|
242
|
+
kwargs: dict[str, Any] = {
|
|
243
|
+
"model": model,
|
|
244
|
+
"messages": messages,
|
|
245
|
+
"stream": True,
|
|
246
|
+
"stream_options": {"include_usage": True},
|
|
247
|
+
}
|
|
248
|
+
kwargs[tokens_param] = opts.get("max_tokens", 512)
|
|
249
|
+
|
|
250
|
+
if supports_temperature and "temperature" in opts:
|
|
251
|
+
kwargs["temperature"] = opts["temperature"]
|
|
252
|
+
|
|
253
|
+
stream = self.client.chat.completions.create(**kwargs)
|
|
254
|
+
|
|
255
|
+
full_text = ""
|
|
256
|
+
prompt_tokens = 0
|
|
257
|
+
completion_tokens = 0
|
|
258
|
+
|
|
259
|
+
for chunk in stream:
|
|
260
|
+
# Usage comes in the final chunk
|
|
261
|
+
if getattr(chunk, "usage", None):
|
|
262
|
+
prompt_tokens = chunk.usage.prompt_tokens or 0
|
|
263
|
+
completion_tokens = chunk.usage.completion_tokens or 0
|
|
264
|
+
|
|
265
|
+
if chunk.choices:
|
|
266
|
+
delta = chunk.choices[0].delta
|
|
267
|
+
content = getattr(delta, "content", None) or ""
|
|
268
|
+
if content:
|
|
269
|
+
full_text += content
|
|
270
|
+
yield {"type": "delta", "text": content}
|
|
271
|
+
|
|
272
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
273
|
+
total_cost = self._calculate_cost("openai", model, prompt_tokens, completion_tokens)
|
|
274
|
+
|
|
275
|
+
yield {
|
|
276
|
+
"type": "done",
|
|
277
|
+
"text": full_text,
|
|
278
|
+
"meta": {
|
|
279
|
+
"prompt_tokens": prompt_tokens,
|
|
280
|
+
"completion_tokens": completion_tokens,
|
|
281
|
+
"total_tokens": total_tokens,
|
|
282
|
+
"cost": round(total_cost, 6),
|
|
283
|
+
"raw_response": {},
|
|
284
|
+
"model_name": model,
|
|
285
|
+
},
|
|
286
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Project scaffolding for Prompture-based FastAPI apps."""
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Project scaffolding generator.
|
|
2
|
+
|
|
3
|
+
Renders Jinja2 templates into a standalone FastAPI project directory
|
|
4
|
+
that users can customize and deploy.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from jinja2 import Environment, FileSystemLoader
|
|
13
|
+
except ImportError:
|
|
14
|
+
Environment = None # type: ignore[assignment,misc]
|
|
15
|
+
FileSystemLoader = None # type: ignore[assignment,misc]
|
|
16
|
+
|
|
17
|
+
_TEMPLATES_DIR = Path(__file__).parent / "templates"
|
|
18
|
+
|
|
19
|
+
# Map from template file -> output path (relative to project root).
|
|
20
|
+
_FILE_MAP = {
|
|
21
|
+
"main.py.j2": "app/main.py",
|
|
22
|
+
"models.py.j2": "app/models.py",
|
|
23
|
+
"config.py.j2": "app/config.py",
|
|
24
|
+
"requirements.txt.j2": "requirements.txt",
|
|
25
|
+
"env.example.j2": ".env.example",
|
|
26
|
+
"README.md.j2": "README.md",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
_DOCKER_FILES = {
|
|
30
|
+
"Dockerfile.j2": "Dockerfile",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def scaffold_project(
|
|
35
|
+
output_dir: str,
|
|
36
|
+
project_name: str = "my_app",
|
|
37
|
+
model_name: str = "openai/gpt-4o-mini",
|
|
38
|
+
include_docker: bool = True,
|
|
39
|
+
) -> Path:
|
|
40
|
+
"""Render all templates and write the project to *output_dir*.
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
output_dir: Destination directory (created if needed).
|
|
44
|
+
project_name: Human-friendly project name used in templates.
|
|
45
|
+
model_name: Default model string baked into config.
|
|
46
|
+
include_docker: Whether to include Dockerfile.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The :class:`Path` to the generated project root.
|
|
50
|
+
"""
|
|
51
|
+
if Environment is None:
|
|
52
|
+
raise ImportError("jinja2 is required for scaffolding: pip install prompture[scaffold]")
|
|
53
|
+
|
|
54
|
+
env = Environment(
|
|
55
|
+
loader=FileSystemLoader(str(_TEMPLATES_DIR)),
|
|
56
|
+
keep_trailing_newline=True,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
context = {
|
|
60
|
+
"project_name": project_name,
|
|
61
|
+
"model_name": model_name,
|
|
62
|
+
"include_docker": include_docker,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
out = Path(output_dir)
|
|
66
|
+
|
|
67
|
+
file_map = dict(_FILE_MAP)
|
|
68
|
+
if include_docker:
|
|
69
|
+
file_map.update(_DOCKER_FILES)
|
|
70
|
+
|
|
71
|
+
for template_name, rel_path in file_map.items():
|
|
72
|
+
template = env.get_template(template_name)
|
|
73
|
+
rendered = template.render(**context)
|
|
74
|
+
|
|
75
|
+
dest = out / rel_path
|
|
76
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
dest.write_text(rendered, encoding="utf-8")
|
|
78
|
+
|
|
79
|
+
# Create empty __init__.py for the app package
|
|
80
|
+
init_path = out / "app" / "__init__.py"
|
|
81
|
+
if not init_path.exists():
|
|
82
|
+
init_path.write_text("", encoding="utf-8")
|
|
83
|
+
|
|
84
|
+
return out
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# {{ project_name }}
|
|
2
|
+
|
|
3
|
+
A FastAPI server powered by [Prompture](https://github.com/jhd3197/prompture) for structured LLM output.
|
|
4
|
+
|
|
5
|
+
## Quick start
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Install dependencies
|
|
9
|
+
pip install -r requirements.txt
|
|
10
|
+
|
|
11
|
+
# Copy and edit environment config
|
|
12
|
+
cp .env.example .env
|
|
13
|
+
|
|
14
|
+
# Run the server
|
|
15
|
+
uvicorn app.main:app --reload
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## API endpoints
|
|
19
|
+
|
|
20
|
+
| Method | Path | Description |
|
|
21
|
+
|--------|------|-------------|
|
|
22
|
+
| POST | `/v1/chat` | Send a message, get a response |
|
|
23
|
+
| POST | `/v1/extract` | Extract structured JSON with schema |
|
|
24
|
+
| GET | `/v1/conversations/{id}` | Get conversation history |
|
|
25
|
+
| DELETE | `/v1/conversations/{id}` | Delete a conversation |
|
|
26
|
+
|
|
27
|
+
## Example
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
curl -X POST http://localhost:8000/v1/chat \
|
|
31
|
+
-H "Content-Type: application/json" \
|
|
32
|
+
-d '{"message": "Hello!"}'
|
|
33
|
+
```
|
|
34
|
+
{% if include_docker %}
|
|
35
|
+
## Docker
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
docker build -t {{ project_name }} .
|
|
39
|
+
docker run -p 8000:8000 --env-file .env {{ project_name }}
|
|
40
|
+
```
|
|
41
|
+
{% endif %}
|