prompture 0.0.29.dev8__py3-none-any.whl → 0.0.38.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/__init__.py +264 -23
- prompture/_version.py +34 -0
- prompture/agent.py +924 -0
- prompture/agent_types.py +156 -0
- prompture/aio/__init__.py +74 -0
- prompture/async_agent.py +880 -0
- prompture/async_conversation.py +789 -0
- prompture/async_core.py +803 -0
- prompture/async_driver.py +193 -0
- prompture/async_groups.py +551 -0
- prompture/cache.py +469 -0
- prompture/callbacks.py +55 -0
- prompture/cli.py +63 -4
- prompture/conversation.py +826 -0
- prompture/core.py +894 -263
- prompture/cost_mixin.py +51 -0
- prompture/discovery.py +187 -0
- prompture/driver.py +206 -5
- prompture/drivers/__init__.py +175 -67
- prompture/drivers/airllm_driver.py +109 -0
- prompture/drivers/async_airllm_driver.py +26 -0
- prompture/drivers/async_azure_driver.py +123 -0
- prompture/drivers/async_claude_driver.py +113 -0
- prompture/drivers/async_google_driver.py +316 -0
- prompture/drivers/async_grok_driver.py +97 -0
- prompture/drivers/async_groq_driver.py +90 -0
- prompture/drivers/async_hugging_driver.py +61 -0
- prompture/drivers/async_lmstudio_driver.py +148 -0
- prompture/drivers/async_local_http_driver.py +44 -0
- prompture/drivers/async_ollama_driver.py +135 -0
- prompture/drivers/async_openai_driver.py +102 -0
- prompture/drivers/async_openrouter_driver.py +102 -0
- prompture/drivers/async_registry.py +133 -0
- prompture/drivers/azure_driver.py +42 -9
- prompture/drivers/claude_driver.py +257 -34
- prompture/drivers/google_driver.py +295 -42
- prompture/drivers/grok_driver.py +35 -32
- prompture/drivers/groq_driver.py +33 -26
- prompture/drivers/hugging_driver.py +6 -6
- prompture/drivers/lmstudio_driver.py +97 -19
- prompture/drivers/local_http_driver.py +6 -6
- prompture/drivers/ollama_driver.py +168 -23
- prompture/drivers/openai_driver.py +184 -9
- prompture/drivers/openrouter_driver.py +37 -25
- prompture/drivers/registry.py +306 -0
- prompture/drivers/vision_helpers.py +153 -0
- prompture/field_definitions.py +106 -96
- prompture/group_types.py +147 -0
- prompture/groups.py +530 -0
- prompture/image.py +180 -0
- prompture/logging.py +80 -0
- prompture/model_rates.py +217 -0
- prompture/persistence.py +254 -0
- prompture/persona.py +482 -0
- prompture/runner.py +49 -47
- prompture/scaffold/__init__.py +1 -0
- prompture/scaffold/generator.py +84 -0
- prompture/scaffold/templates/Dockerfile.j2 +12 -0
- prompture/scaffold/templates/README.md.j2 +41 -0
- prompture/scaffold/templates/config.py.j2 +21 -0
- prompture/scaffold/templates/env.example.j2 +8 -0
- prompture/scaffold/templates/main.py.j2 +86 -0
- prompture/scaffold/templates/models.py.j2 +40 -0
- prompture/scaffold/templates/requirements.txt.j2 +5 -0
- prompture/serialization.py +218 -0
- prompture/server.py +183 -0
- prompture/session.py +117 -0
- prompture/settings.py +19 -1
- prompture/tools.py +219 -267
- prompture/tools_schema.py +254 -0
- prompture/validator.py +3 -3
- prompture-0.0.38.dev2.dist-info/METADATA +369 -0
- prompture-0.0.38.dev2.dist-info/RECORD +77 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/WHEEL +1 -1
- prompture-0.0.29.dev8.dist-info/METADATA +0 -368
- prompture-0.0.29.dev8.dist-info/RECORD +0 -27
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.29.dev8.dist-info → prompture-0.0.38.dev2.dist-info}/top_level.txt +0 -0
prompture/server.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Built-in API server wrapping AsyncConversation.
|
|
2
|
+
|
|
3
|
+
Provides a FastAPI application with chat, extraction, and model
|
|
4
|
+
listing endpoints. ``fastapi``, ``uvicorn``, and ``sse-starlette``
|
|
5
|
+
are lazy-imported so the module is importable without them installed.
|
|
6
|
+
|
|
7
|
+
Usage::
|
|
8
|
+
|
|
9
|
+
from prompture.server import create_app
|
|
10
|
+
app = create_app(model_name="openai/gpt-4o-mini")
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import uuid
|
|
16
|
+
from typing import Any, Optional
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("prompture.server")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def create_app(
|
|
22
|
+
model_name: str = "openai/gpt-4o-mini",
|
|
23
|
+
system_prompt: Optional[str] = None,
|
|
24
|
+
tools: Any = None,
|
|
25
|
+
cors_origins: Optional[list[str]] = None,
|
|
26
|
+
) -> Any:
|
|
27
|
+
"""Create and return a FastAPI application.
|
|
28
|
+
|
|
29
|
+
Parameters:
|
|
30
|
+
model_name: Default model string (``provider/model``).
|
|
31
|
+
system_prompt: Optional system prompt for new conversations.
|
|
32
|
+
tools: Optional :class:`~prompture.tools_schema.ToolRegistry`.
|
|
33
|
+
cors_origins: CORS allowed origins. ``["*"]`` to allow all.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A ``fastapi.FastAPI`` instance.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
from fastapi import FastAPI, HTTPException
|
|
40
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
41
|
+
from pydantic import BaseModel, Field
|
|
42
|
+
except ImportError as exc:
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"The 'serve' extra is required: pip install prompture[serve]"
|
|
45
|
+
) from exc
|
|
46
|
+
|
|
47
|
+
from .async_conversation import AsyncConversation
|
|
48
|
+
from .tools_schema import ToolRegistry
|
|
49
|
+
|
|
50
|
+
# ---- Pydantic request/response models ----
|
|
51
|
+
|
|
52
|
+
class ChatRequest(BaseModel):
|
|
53
|
+
message: str
|
|
54
|
+
conversation_id: Optional[str] = None
|
|
55
|
+
stream: bool = False
|
|
56
|
+
options: Optional[dict[str, Any]] = None
|
|
57
|
+
|
|
58
|
+
class ChatResponse(BaseModel):
|
|
59
|
+
message: str
|
|
60
|
+
conversation_id: str
|
|
61
|
+
usage: dict[str, Any]
|
|
62
|
+
|
|
63
|
+
class ExtractRequest(BaseModel):
|
|
64
|
+
text: str
|
|
65
|
+
schema_def: dict[str, Any] = Field(..., alias="schema")
|
|
66
|
+
conversation_id: Optional[str] = None
|
|
67
|
+
|
|
68
|
+
model_config = {"populate_by_name": True}
|
|
69
|
+
|
|
70
|
+
class ExtractResponse(BaseModel):
|
|
71
|
+
json_object: dict[str, Any]
|
|
72
|
+
conversation_id: str
|
|
73
|
+
usage: dict[str, Any]
|
|
74
|
+
|
|
75
|
+
class ModelInfo(BaseModel):
|
|
76
|
+
models: list[str]
|
|
77
|
+
|
|
78
|
+
class ConversationHistory(BaseModel):
|
|
79
|
+
conversation_id: str
|
|
80
|
+
messages: list[dict[str, Any]]
|
|
81
|
+
usage: dict[str, Any]
|
|
82
|
+
|
|
83
|
+
# ---- App ----
|
|
84
|
+
|
|
85
|
+
app = FastAPI(title="Prompture API", version="0.1.0")
|
|
86
|
+
|
|
87
|
+
if cors_origins:
|
|
88
|
+
app.add_middleware(
|
|
89
|
+
CORSMiddleware,
|
|
90
|
+
allow_origins=cors_origins,
|
|
91
|
+
allow_credentials=True,
|
|
92
|
+
allow_methods=["*"],
|
|
93
|
+
allow_headers=["*"],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# In-memory conversation store
|
|
97
|
+
_conversations: dict[str, AsyncConversation] = {}
|
|
98
|
+
|
|
99
|
+
tool_registry: Optional[ToolRegistry] = tools
|
|
100
|
+
|
|
101
|
+
def _get_or_create_conversation(conv_id: Optional[str]) -> tuple[str, AsyncConversation]:
|
|
102
|
+
if conv_id and conv_id in _conversations:
|
|
103
|
+
return conv_id, _conversations[conv_id]
|
|
104
|
+
new_id = conv_id or uuid.uuid4().hex[:12]
|
|
105
|
+
conv = AsyncConversation(
|
|
106
|
+
model_name=model_name,
|
|
107
|
+
system_prompt=system_prompt,
|
|
108
|
+
tools=tool_registry,
|
|
109
|
+
)
|
|
110
|
+
_conversations[new_id] = conv
|
|
111
|
+
return new_id, conv
|
|
112
|
+
|
|
113
|
+
# ---- Endpoints ----
|
|
114
|
+
|
|
115
|
+
@app.post("/v1/chat", response_model=ChatResponse)
|
|
116
|
+
async def chat(chat_req: ChatRequest):
|
|
117
|
+
conv_id, conv = _get_or_create_conversation(chat_req.conversation_id)
|
|
118
|
+
|
|
119
|
+
if chat_req.stream:
|
|
120
|
+
# SSE streaming
|
|
121
|
+
try:
|
|
122
|
+
from sse_starlette.sse import EventSourceResponse
|
|
123
|
+
except ImportError:
|
|
124
|
+
raise HTTPException(
|
|
125
|
+
status_code=501,
|
|
126
|
+
detail="Streaming requires sse-starlette: pip install prompture[serve]",
|
|
127
|
+
) from None
|
|
128
|
+
|
|
129
|
+
async def event_generator():
|
|
130
|
+
full_text = ""
|
|
131
|
+
async for chunk in conv.ask_stream(chat_req.message, chat_req.options):
|
|
132
|
+
full_text += chunk
|
|
133
|
+
yield {"data": json.dumps({"text": chunk})}
|
|
134
|
+
yield {"data": json.dumps({"text": "", "done": True, "conversation_id": conv_id, "usage": conv.usage})}
|
|
135
|
+
|
|
136
|
+
return EventSourceResponse(event_generator())
|
|
137
|
+
|
|
138
|
+
text = await conv.ask(chat_req.message, chat_req.options)
|
|
139
|
+
return ChatResponse(message=text, conversation_id=conv_id, usage=conv.usage)
|
|
140
|
+
|
|
141
|
+
@app.post("/v1/extract", response_model=ExtractResponse)
|
|
142
|
+
async def extract(extract_req: ExtractRequest):
|
|
143
|
+
conv_id, conv = _get_or_create_conversation(extract_req.conversation_id)
|
|
144
|
+
result = await conv.ask_for_json(
|
|
145
|
+
content=extract_req.text,
|
|
146
|
+
json_schema=extract_req.schema_def,
|
|
147
|
+
)
|
|
148
|
+
return ExtractResponse(
|
|
149
|
+
json_object=result["json_object"],
|
|
150
|
+
conversation_id=conv_id,
|
|
151
|
+
usage=conv.usage,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
@app.get("/v1/conversations/{conversation_id}", response_model=ConversationHistory)
|
|
155
|
+
async def get_conversation(conversation_id: str):
|
|
156
|
+
if conversation_id not in _conversations:
|
|
157
|
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
|
158
|
+
conv = _conversations[conversation_id]
|
|
159
|
+
return ConversationHistory(
|
|
160
|
+
conversation_id=conversation_id,
|
|
161
|
+
messages=conv.messages,
|
|
162
|
+
usage=conv.usage,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
@app.delete("/v1/conversations/{conversation_id}")
|
|
166
|
+
async def delete_conversation(conversation_id: str):
|
|
167
|
+
if conversation_id not in _conversations:
|
|
168
|
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
|
169
|
+
del _conversations[conversation_id]
|
|
170
|
+
return {"status": "deleted", "conversation_id": conversation_id}
|
|
171
|
+
|
|
172
|
+
@app.get("/v1/models", response_model=ModelInfo)
|
|
173
|
+
async def list_models():
|
|
174
|
+
from .discovery import get_available_models
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
models = get_available_models()
|
|
178
|
+
model_names = [m["id"] if isinstance(m, dict) else str(m) for m in models]
|
|
179
|
+
except Exception:
|
|
180
|
+
model_names = [model_name]
|
|
181
|
+
return ModelInfo(models=model_names)
|
|
182
|
+
|
|
183
|
+
return app
|
prompture/session.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Usage session tracking for Prompture.
|
|
2
|
+
|
|
3
|
+
Provides :class:`UsageSession` which accumulates token counts, costs, and
|
|
4
|
+
errors across multiple driver calls. A session instance is compatible as
|
|
5
|
+
both an ``on_response`` and ``on_error`` callback, so you can wire it
|
|
6
|
+
directly into :class:`~prompture.callbacks.DriverCallbacks`.
|
|
7
|
+
|
|
8
|
+
Usage::
|
|
9
|
+
|
|
10
|
+
from prompture import UsageSession, DriverCallbacks
|
|
11
|
+
|
|
12
|
+
session = UsageSession()
|
|
13
|
+
callbacks = DriverCallbacks(
|
|
14
|
+
on_response=session.record,
|
|
15
|
+
on_error=session.record_error,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# ... pass *callbacks* to your driver / conversation ...
|
|
19
|
+
|
|
20
|
+
print(session.summary()["formatted"])
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class UsageSession:
|
|
31
|
+
"""Accumulates usage statistics across multiple driver calls."""
|
|
32
|
+
|
|
33
|
+
prompt_tokens: int = 0
|
|
34
|
+
completion_tokens: int = 0
|
|
35
|
+
total_tokens: int = 0
|
|
36
|
+
total_cost: float = 0.0
|
|
37
|
+
call_count: int = 0
|
|
38
|
+
errors: int = 0
|
|
39
|
+
_per_model: dict[str, dict[str, Any]] = field(default_factory=dict, repr=False)
|
|
40
|
+
|
|
41
|
+
# ------------------------------------------------------------------ #
|
|
42
|
+
# Recording
|
|
43
|
+
# ------------------------------------------------------------------ #
|
|
44
|
+
|
|
45
|
+
def record(self, response_info: dict[str, Any]) -> None:
|
|
46
|
+
"""Record a successful driver response.
|
|
47
|
+
|
|
48
|
+
Compatible as an ``on_response`` callback for
|
|
49
|
+
:class:`~prompture.callbacks.DriverCallbacks`.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
response_info: Payload dict with at least ``meta`` and
|
|
53
|
+
optionally ``driver`` keys.
|
|
54
|
+
"""
|
|
55
|
+
meta = response_info.get("meta", {})
|
|
56
|
+
pt = meta.get("prompt_tokens", 0)
|
|
57
|
+
ct = meta.get("completion_tokens", 0)
|
|
58
|
+
tt = meta.get("total_tokens", 0)
|
|
59
|
+
cost = meta.get("cost", 0.0)
|
|
60
|
+
|
|
61
|
+
self.prompt_tokens += pt
|
|
62
|
+
self.completion_tokens += ct
|
|
63
|
+
self.total_tokens += tt
|
|
64
|
+
self.total_cost += cost
|
|
65
|
+
self.call_count += 1
|
|
66
|
+
|
|
67
|
+
model = response_info.get("driver", "unknown")
|
|
68
|
+
bucket = self._per_model.setdefault(
|
|
69
|
+
model,
|
|
70
|
+
{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "cost": 0.0, "calls": 0},
|
|
71
|
+
)
|
|
72
|
+
bucket["prompt_tokens"] += pt
|
|
73
|
+
bucket["completion_tokens"] += ct
|
|
74
|
+
bucket["total_tokens"] += tt
|
|
75
|
+
bucket["cost"] += cost
|
|
76
|
+
bucket["calls"] += 1
|
|
77
|
+
|
|
78
|
+
def record_error(self, error_info: dict[str, Any]) -> None:
|
|
79
|
+
"""Record a driver error.
|
|
80
|
+
|
|
81
|
+
Compatible as an ``on_error`` callback for
|
|
82
|
+
:class:`~prompture.callbacks.DriverCallbacks`.
|
|
83
|
+
"""
|
|
84
|
+
self.errors += 1
|
|
85
|
+
|
|
86
|
+
# ------------------------------------------------------------------ #
|
|
87
|
+
# Reporting
|
|
88
|
+
# ------------------------------------------------------------------ #
|
|
89
|
+
|
|
90
|
+
def summary(self) -> dict[str, Any]:
|
|
91
|
+
"""Return a machine-readable summary with a ``formatted`` string."""
|
|
92
|
+
formatted = (
|
|
93
|
+
f"Session: {self.total_tokens:,} tokens across {self.call_count} call(s) costing ${self.total_cost:.4f}"
|
|
94
|
+
)
|
|
95
|
+
if self.errors:
|
|
96
|
+
formatted += f" ({self.errors} error(s))"
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
"prompt_tokens": self.prompt_tokens,
|
|
100
|
+
"completion_tokens": self.completion_tokens,
|
|
101
|
+
"total_tokens": self.total_tokens,
|
|
102
|
+
"total_cost": self.total_cost,
|
|
103
|
+
"call_count": self.call_count,
|
|
104
|
+
"errors": self.errors,
|
|
105
|
+
"per_model": dict(self._per_model),
|
|
106
|
+
"formatted": formatted,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
def reset(self) -> None:
|
|
110
|
+
"""Clear all accumulated counters."""
|
|
111
|
+
self.prompt_tokens = 0
|
|
112
|
+
self.completion_tokens = 0
|
|
113
|
+
self.total_tokens = 0
|
|
114
|
+
self.total_cost = 0.0
|
|
115
|
+
self.call_count = 0
|
|
116
|
+
self.errors = 0
|
|
117
|
+
self._per_model.clear()
|
prompture/settings.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
2
1
|
from typing import Optional
|
|
3
2
|
|
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
4
6
|
class Settings(BaseSettings):
|
|
5
7
|
"""Application settings loaded from environment variables or .env file."""
|
|
6
8
|
|
|
@@ -31,6 +33,7 @@ class Settings(BaseSettings):
|
|
|
31
33
|
# LM Studio
|
|
32
34
|
lmstudio_endpoint: str = "http://127.0.0.1:1234/v1/chat/completions"
|
|
33
35
|
lmstudio_model: str = "deepseek/deepseek-r1-0528-qwen3-8b"
|
|
36
|
+
lmstudio_api_key: Optional[str] = None
|
|
34
37
|
|
|
35
38
|
# Google
|
|
36
39
|
google_api_key: Optional[str] = None
|
|
@@ -48,6 +51,21 @@ class Settings(BaseSettings):
|
|
|
48
51
|
grok_api_key: Optional[str] = None
|
|
49
52
|
grok_model: str = "grok-4-fast-reasoning"
|
|
50
53
|
|
|
54
|
+
# AirLLM
|
|
55
|
+
airllm_model: str = "meta-llama/Llama-2-7b-hf"
|
|
56
|
+
airllm_compression: Optional[str] = None # "4bit" or "8bit"
|
|
57
|
+
|
|
58
|
+
# Model rates cache
|
|
59
|
+
model_rates_ttl_days: int = 7 # How often to refresh models.dev cache
|
|
60
|
+
|
|
61
|
+
# Response cache
|
|
62
|
+
cache_enabled: bool = False
|
|
63
|
+
cache_backend: str = "memory"
|
|
64
|
+
cache_ttl_seconds: int = 3600
|
|
65
|
+
cache_memory_maxsize: int = 256
|
|
66
|
+
cache_sqlite_path: Optional[str] = None
|
|
67
|
+
cache_redis_url: Optional[str] = None
|
|
68
|
+
|
|
51
69
|
model_config = SettingsConfigDict(
|
|
52
70
|
env_file=".env",
|
|
53
71
|
extra="ignore",
|