prpt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prpt/__init__.py +33 -0
- prpt/__main__.py +5 -0
- prpt/_subprocess.py +168 -0
- prpt/adapters/__init__.py +4 -0
- prpt/adapters/anthropic_adapter.py +246 -0
- prpt/adapters/echo.py +16 -0
- prpt/adapters/factory.py +40 -0
- prpt/adapters/openai_adapter.py +72 -0
- prpt/adapters/shell.py +209 -0
- prpt/cli.py +664 -0
- prpt/compress/__init__.py +4 -0
- prpt/compress/tool_output.py +725 -0
- prpt/core/__init__.py +11 -0
- prpt/core/constants.py +89 -0
- prpt/core/dotenv.py +63 -0
- prpt/core/spec.py +93 -0
- prpt/core/types.py +88 -0
- prpt/core/utils.py +133 -0
- prpt/handoff.py +260 -0
- prpt/hooks/__init__.py +0 -0
- prpt/hooks/optimize_prompt.py +129 -0
- prpt/judges/__init__.py +28 -0
- prpt/judges/judge.py +447 -0
- prpt/judges/slm.py +91 -0
- prpt/normalizers/__init__.py +13 -0
- prpt/normalizers/base.py +333 -0
- prpt/normalizers/heuristic.py +210 -0
- prpt/normalizers/slm_anthropic.py +476 -0
- prpt/normalizers/slm_openai.py +278 -0
- prpt/normalizers/slm_openai_v2.py +185 -0
- prpt/normalizers/slm_subscription.py +310 -0
- prpt/repo/__init__.py +4 -0
- prpt/repo/collector.py +81 -0
- prpt/repo/loader.py +619 -0
- prpt/session.py +138 -0
- prpt/stats.py +250 -0
- prpt/ui.py +233 -0
- prpt-0.1.0.dist-info/METADATA +350 -0
- prpt-0.1.0.dist-info/RECORD +43 -0
- prpt-0.1.0.dist-info/WHEEL +5 -0
- prpt-0.1.0.dist-info/entry_points.txt +2 -0
- prpt-0.1.0.dist-info/licenses/LICENSE +201 -0
- prpt-0.1.0.dist-info/top_level.txt +1 -0
prpt/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""PromptPilot — prompt-optimizing wrapper for AI coding CLIs."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from prpt.core.types import (
|
|
5
|
+
Confidence,
|
|
6
|
+
NormalizedRequest,
|
|
7
|
+
RepoMetadata,
|
|
8
|
+
RewriteMode,
|
|
9
|
+
TaskType,
|
|
10
|
+
TokenStats,
|
|
11
|
+
ValidationResult,
|
|
12
|
+
)
|
|
13
|
+
from prpt.normalizers.base import Normalizer, SemanticValidator, create_normalizer
|
|
14
|
+
from prpt.normalizers.heuristic import HeuristicNormalizer
|
|
15
|
+
from prpt.repo.collector import RepoContextCollector
|
|
16
|
+
from prpt.cli import main, main_cli
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Confidence",
|
|
20
|
+
"HeuristicNormalizer",
|
|
21
|
+
"NormalizedRequest",
|
|
22
|
+
"Normalizer",
|
|
23
|
+
"RepoContextCollector",
|
|
24
|
+
"RepoMetadata",
|
|
25
|
+
"RewriteMode",
|
|
26
|
+
"SemanticValidator",
|
|
27
|
+
"TaskType",
|
|
28
|
+
"TokenStats",
|
|
29
|
+
"ValidationResult",
|
|
30
|
+
"create_normalizer",
|
|
31
|
+
"main",
|
|
32
|
+
"main_cli",
|
|
33
|
+
]
|
prpt/__main__.py
ADDED
prpt/_subprocess.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Subprocess helpers — utf-8 enforcement + claude.exe orphan management.
|
|
2
|
+
|
|
3
|
+
Two recurring bug patterns this module exists to prevent:
|
|
4
|
+
|
|
5
|
+
1. **Windows GBK encoding crash** when subprocess.run is called with `text=True`
|
|
6
|
+
but no explicit `encoding=`. Python 3.11 on Windows defaults to the locale
|
|
7
|
+
encoding (often GBK), which fails on non-ASCII bytes from many tools.
|
|
8
|
+
`safe_run()` enforces utf-8/replace whenever text mode is implied.
|
|
9
|
+
|
|
10
|
+
2. **claude.exe zombie accumulation.** When a chain run crashes or is Ctrl-C'd
|
|
11
|
+
with a claude.exe subprocess in flight, the orphan keeps retrying API calls,
|
|
12
|
+
competes for the API key's concurrent-request quota, and (~10+ orphans)
|
|
13
|
+
eventually exhausts Windows process handles and crashes the OS.
|
|
14
|
+
`claude_subprocess_session()` wraps a chain-run / launcher entry point
|
|
15
|
+
in a context manager that reaps orphans on entry AND exit.
|
|
16
|
+
|
|
17
|
+
Use:
|
|
18
|
+
from prpt._subprocess import safe_run, claude_subprocess_session
|
|
19
|
+
|
|
20
|
+
# Encoding-safe subprocess
|
|
21
|
+
r = safe_run(["powershell", "-Command", "Get-Process"], capture_output=True, text=True)
|
|
22
|
+
|
|
23
|
+
# Wrap a launcher loop so any zombies it leaves are reaped automatically
|
|
24
|
+
with claude_subprocess_session("extra_gated_runs"):
|
|
25
|
+
for r in range(args.start, args.start + args.count):
|
|
26
|
+
run_chain_once(...)
|
|
27
|
+
"""
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import os
|
|
31
|
+
import subprocess
|
|
32
|
+
import sys
|
|
33
|
+
import time
|
|
34
|
+
from contextlib import contextmanager
|
|
35
|
+
from typing import Iterator
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def safe_run(*args, **kwargs) -> subprocess.CompletedProcess:
|
|
39
|
+
"""subprocess.run with utf-8 enforced when text mode is implied.
|
|
40
|
+
|
|
41
|
+
"Text mode is implied" if any of these is set:
|
|
42
|
+
- text=True
|
|
43
|
+
- universal_newlines=True
|
|
44
|
+
- encoding= (any value)
|
|
45
|
+
|
|
46
|
+
In that case, encoding defaults to "utf-8" and errors defaults to "replace"
|
|
47
|
+
if not explicitly passed. Binary mode (text=False) is left alone — caller
|
|
48
|
+
must handle bytes themselves.
|
|
49
|
+
|
|
50
|
+
All other args/kwargs pass through to subprocess.run.
|
|
51
|
+
"""
|
|
52
|
+
text_mode = (
|
|
53
|
+
kwargs.get("text") is True
|
|
54
|
+
or kwargs.get("universal_newlines") is True
|
|
55
|
+
or "encoding" in kwargs
|
|
56
|
+
)
|
|
57
|
+
if text_mode:
|
|
58
|
+
kwargs.setdefault("encoding", "utf-8")
|
|
59
|
+
kwargs.setdefault("errors", "replace")
|
|
60
|
+
return subprocess.run(*args, **kwargs)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def reap_claude_orphans() -> int:
|
|
64
|
+
"""Kill claude.exe processes whose parent process no longer exists.
|
|
65
|
+
|
|
66
|
+
Orphans accumulate when a harness run crashes or is Ctrl-C'd while a
|
|
67
|
+
claude.exe subprocess is in flight: the child continues running, retries
|
|
68
|
+
API calls indefinitely, and competes for the API key's concurrent-request
|
|
69
|
+
quota. ~10+ orphans can exhaust Windows process handles and crash the OS.
|
|
70
|
+
|
|
71
|
+
Windows-only; no-op on other platforms. Returns the number of processes
|
|
72
|
+
killed (best effort).
|
|
73
|
+
"""
|
|
74
|
+
if os.name != "nt":
|
|
75
|
+
return 0
|
|
76
|
+
try:
|
|
77
|
+
# List all claude.exe PIDs with their parent PIDs
|
|
78
|
+
proc = safe_run(
|
|
79
|
+
["wmic", "process", "where", "Name='claude.exe'",
|
|
80
|
+
"get", "ProcessId,ParentProcessId", "/format:csv"],
|
|
81
|
+
capture_output=True, text=True, timeout=30,
|
|
82
|
+
)
|
|
83
|
+
if proc.returncode != 0:
|
|
84
|
+
return 0
|
|
85
|
+
claude_pairs: list[tuple[int, int]] = []
|
|
86
|
+
for line in proc.stdout.splitlines():
|
|
87
|
+
parts = [p.strip() for p in line.split(",")]
|
|
88
|
+
if len(parts) < 3:
|
|
89
|
+
continue
|
|
90
|
+
try:
|
|
91
|
+
ppid = int(parts[1])
|
|
92
|
+
pid = int(parts[2])
|
|
93
|
+
claude_pairs.append((pid, ppid))
|
|
94
|
+
except (ValueError, IndexError):
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
if not claude_pairs:
|
|
98
|
+
return 0
|
|
99
|
+
|
|
100
|
+
# List all live PIDs to detect orphans
|
|
101
|
+
live_proc = safe_run(
|
|
102
|
+
["wmic", "process", "get", "ProcessId", "/format:csv"],
|
|
103
|
+
capture_output=True, text=True, timeout=30,
|
|
104
|
+
)
|
|
105
|
+
if live_proc.returncode != 0:
|
|
106
|
+
return 0
|
|
107
|
+
live_pids: set[int] = set()
|
|
108
|
+
for line in live_proc.stdout.splitlines():
|
|
109
|
+
parts = [p.strip() for p in line.split(",")]
|
|
110
|
+
if len(parts) < 2:
|
|
111
|
+
continue
|
|
112
|
+
try:
|
|
113
|
+
live_pids.add(int(parts[1]))
|
|
114
|
+
except (ValueError, IndexError):
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
killed = 0
|
|
118
|
+
for pid, ppid in claude_pairs:
|
|
119
|
+
if ppid not in live_pids:
|
|
120
|
+
# Orphan — kill the process tree rooted at this claude.exe
|
|
121
|
+
try:
|
|
122
|
+
safe_run(
|
|
123
|
+
["taskkill", "/F", "/T", "/PID", str(pid)],
|
|
124
|
+
capture_output=True, timeout=10,
|
|
125
|
+
)
|
|
126
|
+
killed += 1
|
|
127
|
+
except Exception:
|
|
128
|
+
pass
|
|
129
|
+
return killed
|
|
130
|
+
except Exception:
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@contextmanager
|
|
135
|
+
def claude_subprocess_session(label: str = "") -> Iterator[None]:
|
|
136
|
+
"""Wrap a sequence of claude.exe subprocess invocations with auto-reap.
|
|
137
|
+
|
|
138
|
+
Reaps claude.exe orphans on entry (cleans up from any prior crashed run)
|
|
139
|
+
AND on exit (catches anything we may have leaked, even if the body raised).
|
|
140
|
+
Use for launcher entry points (e.g. extra_*_runs.py main, run_chain_once)
|
|
141
|
+
so future launcher scripts get the protection automatically.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
label: optional tag printed when zombies are reaped on exit. Helps
|
|
145
|
+
identify which run was leaking.
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
with claude_subprocess_session("extra_gated_runs"):
|
|
149
|
+
for r in range(args.start, args.start + args.count):
|
|
150
|
+
run_chain_once(chain, "claude-code", "gated_session", r, out_dir)
|
|
151
|
+
"""
|
|
152
|
+
t0 = time.time()
|
|
153
|
+
pre_reaped = reap_claude_orphans()
|
|
154
|
+
if pre_reaped > 0:
|
|
155
|
+
sys.stderr.write(
|
|
156
|
+
"[claude_subprocess_session{0}] reaped {1} pre-existing orphans\n"
|
|
157
|
+
.format(":" + label if label else "", pre_reaped)
|
|
158
|
+
)
|
|
159
|
+
try:
|
|
160
|
+
yield
|
|
161
|
+
finally:
|
|
162
|
+
post_reaped = reap_claude_orphans()
|
|
163
|
+
if post_reaped > 0:
|
|
164
|
+
elapsed = time.time() - t0
|
|
165
|
+
sys.stderr.write(
|
|
166
|
+
"[claude_subprocess_session{0}] reaped {1} on exit ({2:.0f}s)\n"
|
|
167
|
+
.format(":" + label if label else "", post_reaped, elapsed)
|
|
168
|
+
)
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Direct Anthropic API adapter — calls a model and captures actual token usage.
|
|
2
|
+
|
|
3
|
+
Supports claude-opus-4-7, claude-sonnet-4-7, and any future Anthropic model
|
|
4
|
+
via the `model` constructor parameter.
|
|
5
|
+
|
|
6
|
+
Prompt-caching design
|
|
7
|
+
---------------------
|
|
8
|
+
The adapter accepts an optional `system` string (a stable instruction block)
|
|
9
|
+
and an optional `context_block` string (stable repo context). When either is
|
|
10
|
+
large enough to cross Anthropic's minimum cacheable threshold, a
|
|
11
|
+
`cache_control: ephemeral` marker is inserted so repeated calls within the
|
|
12
|
+
5-minute TTL window are served from cache at 0.10× the normal input price.
|
|
13
|
+
|
|
14
|
+
Cache breakpoint placement:
|
|
15
|
+
[ system (cacheable) ] [ context_block (cacheable) ] | [ task prompt ]
|
|
16
|
+
└────────── stable prefix, written once ────────────┘ └── dynamic ──┘
|
|
17
|
+
|
|
18
|
+
When no system / context_block is provided the adapter falls back to the
|
|
19
|
+
original single-string call — fully backward-compatible.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import os
|
|
25
|
+
from typing import Optional
|
|
26
|
+
|
|
27
|
+
from prpt.core.constants import (
|
|
28
|
+
DEFAULT_TARGET_MODEL, CACHE_WRITE_MULTIPLIER, CACHE_READ_MULTIPLIER,
|
|
29
|
+
MODEL_PRICING,
|
|
30
|
+
)
|
|
31
|
+
from prpt.core.utils import write_stderr
|
|
32
|
+
from prpt.adapters.echo import ToolAdapter
|
|
33
|
+
|
|
34
|
+
# Anthropic's minimum cacheable block: ~1 024 tokens for Sonnet/Opus.
|
|
35
|
+
# Using chars as a cheap proxy: 1 024 tokens ≈ 4 096 chars.
|
|
36
|
+
_CACHE_MIN_CHARS = 4_096
|
|
37
|
+
_EPHEMERAL = {"type": "ephemeral"}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _extract_usage(response) -> dict:
|
|
41
|
+
"""Pull all Anthropic usage fields into a flat dict (cache-aware)."""
|
|
42
|
+
u = response.usage
|
|
43
|
+
return {
|
|
44
|
+
"input_tokens": getattr(u, "input_tokens", 0) or 0,
|
|
45
|
+
"output_tokens": getattr(u, "output_tokens", 0) or 0,
|
|
46
|
+
"cache_creation_input_tokens": getattr(u, "cache_creation_input_tokens", 0) or 0,
|
|
47
|
+
"cache_read_input_tokens": getattr(u, "cache_read_input_tokens", 0) or 0,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _cost_usd(usage: dict, model: str) -> float:
|
|
52
|
+
"""Compute total cost in USD using Anthropic cache-aware pricing."""
|
|
53
|
+
price = MODEL_PRICING.get(model, {"input": 15.00, "output": 75.00})
|
|
54
|
+
p_in = price["input"] / 1_000_000
|
|
55
|
+
p_out = price["output"] / 1_000_000
|
|
56
|
+
return (
|
|
57
|
+
usage["input_tokens"] * p_in
|
|
58
|
+
+ usage["output_tokens"] * p_out
|
|
59
|
+
+ usage["cache_creation_input_tokens"] * p_in * CACHE_WRITE_MULTIPLIER
|
|
60
|
+
+ usage["cache_read_input_tokens"] * p_in * CACHE_READ_MULTIPLIER
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AnthropicDirectAdapter(ToolAdapter):
|
|
65
|
+
"""
|
|
66
|
+
Calls an Anthropic model (default: claude-opus-4-7) via the SDK.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
model : str
|
|
71
|
+
Any Anthropic model ID, e.g. ``"claude-opus-4-7"`` or
|
|
72
|
+
``"claude-sonnet-4-7"``.
|
|
73
|
+
system : str, optional
|
|
74
|
+
Stable instruction block sent as the ``system`` message. When large
|
|
75
|
+
enough, marked with ``cache_control: ephemeral`` so it is cached
|
|
76
|
+
across repeated calls within 5 minutes.
|
|
77
|
+
context_block : str, optional
|
|
78
|
+
Stable repo-context blob. Inserted as the first user content block,
|
|
79
|
+
also marked cacheable when large enough. Placed *before* the task
|
|
80
|
+
prompt so the cache breakpoint sits between the stable prefix and the
|
|
81
|
+
per-request tail.
|
|
82
|
+
api_key : str, optional
|
|
83
|
+
Falls back to ``ANTHROPIC_API_KEY`` env var.
|
|
84
|
+
max_tokens : int
|
|
85
|
+
Maximum output tokens (default 4 096).
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
model: str = DEFAULT_TARGET_MODEL,
|
|
91
|
+
api_key: Optional[str] = None,
|
|
92
|
+
max_tokens: int = 4096,
|
|
93
|
+
system: Optional[str] = None,
|
|
94
|
+
context_block: Optional[str] = None,
|
|
95
|
+
):
|
|
96
|
+
try:
|
|
97
|
+
import anthropic as _anthropic
|
|
98
|
+
self._client = _anthropic.Anthropic(
|
|
99
|
+
api_key=api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
100
|
+
)
|
|
101
|
+
except ImportError:
|
|
102
|
+
raise ImportError(
|
|
103
|
+
"Claude tool support requires the anthropic SDK.\n"
|
|
104
|
+
" Run: pip install prpt[claude]"
|
|
105
|
+
)
|
|
106
|
+
self._model = model
|
|
107
|
+
self._max_tokens = max_tokens
|
|
108
|
+
self._system = system
|
|
109
|
+
self._context_block = context_block
|
|
110
|
+
self.last_usage: Optional[dict] = None
|
|
111
|
+
|
|
112
|
+
# ------------------------------------------------------------------
|
|
113
|
+
# Cache-aware request builders
|
|
114
|
+
# ------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def _build_system(self) -> list[dict] | str | None:
|
|
117
|
+
"""Return system param: block-list with cache marker if large enough."""
|
|
118
|
+
if not self._system:
|
|
119
|
+
return None
|
|
120
|
+
block: dict = {"type": "text", "text": self._system}
|
|
121
|
+
if len(self._system) >= _CACHE_MIN_CHARS:
|
|
122
|
+
block["cache_control"] = _EPHEMERAL
|
|
123
|
+
return [block]
|
|
124
|
+
|
|
125
|
+
def _build_user_content(self, task_prompt: str) -> list[dict] | str:
|
|
126
|
+
"""
|
|
127
|
+
Build user content.
|
|
128
|
+
|
|
129
|
+
If a context_block was provided: split into two blocks —
|
|
130
|
+
[0] cacheable context, [1] fresh task prompt.
|
|
131
|
+
Otherwise: single plain-string (backward-compat, no cache marker).
|
|
132
|
+
"""
|
|
133
|
+
if not self._context_block:
|
|
134
|
+
return task_prompt
|
|
135
|
+
|
|
136
|
+
ctx_block: dict = {
|
|
137
|
+
"type": "text",
|
|
138
|
+
"text": (
|
|
139
|
+
"<repository_context>\n"
|
|
140
|
+
+ self._context_block
|
|
141
|
+
+ "\n</repository_context>"
|
|
142
|
+
),
|
|
143
|
+
}
|
|
144
|
+
if len(self._context_block) >= _CACHE_MIN_CHARS:
|
|
145
|
+
ctx_block["cache_control"] = _EPHEMERAL
|
|
146
|
+
|
|
147
|
+
return [
|
|
148
|
+
ctx_block,
|
|
149
|
+
{"type": "text", "text": task_prompt},
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
# ToolAdapter interface
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
def run(self, final_prompt: str, args: argparse.Namespace) -> int:
|
|
157
|
+
self.last_usage = None
|
|
158
|
+
if getattr(args, "verbose", False):
|
|
159
|
+
cache_state = "on" if self._system or self._context_block else "off"
|
|
160
|
+
write_stderr(
|
|
161
|
+
"[adapter] calling {model} via Anthropic SDK "
|
|
162
|
+
"(cache={cache})".format(model=self._model, cache=cache_state)
|
|
163
|
+
)
|
|
164
|
+
try:
|
|
165
|
+
create_kwargs: dict = {
|
|
166
|
+
"model": self._model,
|
|
167
|
+
"max_tokens": self._max_tokens,
|
|
168
|
+
"messages": [{
|
|
169
|
+
"role": "user",
|
|
170
|
+
"content": self._build_user_content(final_prompt),
|
|
171
|
+
}],
|
|
172
|
+
}
|
|
173
|
+
system_arg = self._build_system()
|
|
174
|
+
if system_arg is not None:
|
|
175
|
+
create_kwargs["system"] = system_arg
|
|
176
|
+
|
|
177
|
+
response = self._client.messages.create(**create_kwargs)
|
|
178
|
+
|
|
179
|
+
usage = _extract_usage(response)
|
|
180
|
+
usage["total_cost_usd"] = _cost_usd(usage, self._model)
|
|
181
|
+
self.last_usage = usage
|
|
182
|
+
|
|
183
|
+
verbose = getattr(args, "verbose", False)
|
|
184
|
+
printed_text = False
|
|
185
|
+
tool_uses: list[str] = []
|
|
186
|
+
|
|
187
|
+
for block in response.content:
|
|
188
|
+
btype = getattr(block, "type", None)
|
|
189
|
+
if btype == "text" or (btype is None and hasattr(block, "text")):
|
|
190
|
+
text = getattr(block, "text", "")
|
|
191
|
+
if text:
|
|
192
|
+
print(text)
|
|
193
|
+
printed_text = True
|
|
194
|
+
elif btype == "thinking":
|
|
195
|
+
if verbose:
|
|
196
|
+
thinking_text = getattr(block, "thinking", "")
|
|
197
|
+
write_stderr("[adapter] thinking: {0}".format(thinking_text))
|
|
198
|
+
elif btype == "redacted_thinking":
|
|
199
|
+
if verbose:
|
|
200
|
+
write_stderr("[adapter] thinking: <redacted>")
|
|
201
|
+
elif btype == "tool_use":
|
|
202
|
+
name = getattr(block, "name", "<unknown>")
|
|
203
|
+
tool_uses.append(name)
|
|
204
|
+
|
|
205
|
+
if verbose and tool_uses:
|
|
206
|
+
write_stderr(
|
|
207
|
+
"[adapter] tool_use blocks: {0}".format(", ".join(tool_uses))
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
if not printed_text:
|
|
211
|
+
stop_reason = getattr(response, "stop_reason", "unknown")
|
|
212
|
+
detail = (
|
|
213
|
+
" (tool_use: {0})".format(", ".join(tool_uses))
|
|
214
|
+
if tool_uses else ""
|
|
215
|
+
)
|
|
216
|
+
write_stderr(
|
|
217
|
+
"[adapter] warning: no text content in response "
|
|
218
|
+
"(stop_reason={0}){1}".format(stop_reason, detail)
|
|
219
|
+
)
|
|
220
|
+
return 0
|
|
221
|
+
|
|
222
|
+
except Exception as exc:
|
|
223
|
+
write_stderr("[adapter] Anthropic API call failed: {0}".format(exc))
|
|
224
|
+
return 1
|
|
225
|
+
|
|
226
|
+
# ------------------------------------------------------------------
|
|
227
|
+
# Convenience: human-readable cache summary for --verbose / stats
|
|
228
|
+
# ------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
def cache_summary(self) -> str:
|
|
231
|
+
"""Return a one-line cache hit summary from the last run."""
|
|
232
|
+
if not self.last_usage:
|
|
233
|
+
return "no run yet"
|
|
234
|
+
r = self.last_usage.get("cache_read_input_tokens", 0)
|
|
235
|
+
w = self.last_usage.get("cache_creation_input_tokens", 0)
|
|
236
|
+
total_cached = r + w
|
|
237
|
+
if total_cached == 0:
|
|
238
|
+
return "cache: off (no cacheable blocks)"
|
|
239
|
+
hit_pct = r / total_cached * 100 if total_cached else 0
|
|
240
|
+
return (
|
|
241
|
+
"cache: {r:,} read / {w:,} written ({pct:.0f}% hit) "
|
|
242
|
+
"cost ${cost:.6f}".format(
|
|
243
|
+
r=r, w=w, pct=hit_pct,
|
|
244
|
+
cost=self.last_usage.get("total_cost_usd", 0.0),
|
|
245
|
+
)
|
|
246
|
+
)
|
prpt/adapters/echo.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Echo adapter — prints prompt to stdout (dry-run / default)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ToolAdapter:
|
|
8
|
+
"""Base class for all downstream tool adapters."""
|
|
9
|
+
def run(self, final_prompt: str, args: argparse.Namespace) -> int:
|
|
10
|
+
raise NotImplementedError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EchoAdapter(ToolAdapter):
|
|
14
|
+
def run(self, final_prompt: str, args: argparse.Namespace) -> int:
|
|
15
|
+
print(final_prompt)
|
|
16
|
+
return 0
|
prpt/adapters/factory.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Adapter factory — selects the right downstream adapter from --tool flag."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from prpt.core.constants import DEFAULT_TARGET_MODEL
|
|
8
|
+
from prpt.adapters.echo import EchoAdapter, ToolAdapter
|
|
9
|
+
from prpt.adapters.shell import CodexAdapter, ShellToolAdapter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AdapterFactory:
|
|
13
|
+
@staticmethod
|
|
14
|
+
def create(args: argparse.Namespace) -> ToolAdapter:
|
|
15
|
+
tool = (getattr(args, "tool", None) or "echo").lower()
|
|
16
|
+
extra_args = getattr(args, "tool_arg", None) or []
|
|
17
|
+
model = getattr(args, "model", None) or DEFAULT_TARGET_MODEL
|
|
18
|
+
max_tokens = getattr(args, "max_tokens", 4096) or 4096
|
|
19
|
+
api_key = getattr(args, "api_key", None)
|
|
20
|
+
|
|
21
|
+
if tool == "echo":
|
|
22
|
+
return EchoAdapter()
|
|
23
|
+
|
|
24
|
+
if tool == "codex":
|
|
25
|
+
return CodexAdapter(extra_args=extra_args)
|
|
26
|
+
|
|
27
|
+
if tool == "anthropic":
|
|
28
|
+
from prpt.adapters.anthropic_adapter import AnthropicDirectAdapter
|
|
29
|
+
system = getattr(args, "system_prompt", None)
|
|
30
|
+
context_block = getattr(args, "context_block", None)
|
|
31
|
+
return AnthropicDirectAdapter(
|
|
32
|
+
model=model, api_key=api_key, max_tokens=max_tokens,
|
|
33
|
+
system=system, context_block=context_block,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
if tool == "openai":
|
|
37
|
+
from prpt.adapters.openai_adapter import OpenAIDirectAdapter
|
|
38
|
+
return OpenAIDirectAdapter(model=model, api_key=api_key, max_tokens=max_tokens)
|
|
39
|
+
|
|
40
|
+
return ShellToolAdapter(tool_name=tool, extra_args=extra_args)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Direct OpenAI API adapter — calls a model and captures actual token usage."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import os
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from prpt.core.utils import write_stderr
|
|
9
|
+
from prpt.adapters.echo import ToolAdapter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OpenAIDirectAdapter(ToolAdapter):
|
|
13
|
+
"""
|
|
14
|
+
Calls an OpenAI model via the SDK.
|
|
15
|
+
Captures actual input/output token usage for before/after comparison.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self, model: str = "gpt-4o", api_key: Optional[str] = None, max_tokens: int = 4096,
|
|
20
|
+
):
|
|
21
|
+
try:
|
|
22
|
+
import openai as _openai
|
|
23
|
+
self._client = _openai.OpenAI(
|
|
24
|
+
api_key=api_key or os.environ.get("OPENAI_API_KEY")
|
|
25
|
+
)
|
|
26
|
+
except ImportError:
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"Codex tool support requires the openai SDK.\n"
|
|
29
|
+
" Run: pip install prpt[codex]"
|
|
30
|
+
)
|
|
31
|
+
self._model = model
|
|
32
|
+
self._max_tokens = max_tokens
|
|
33
|
+
self.last_usage: Optional[dict] = None
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def _is_reasoning_model(model: str) -> bool:
|
|
37
|
+
"""Return True if the model is an o-series reasoning model (o1/o3/o4/o5+)."""
|
|
38
|
+
name = model.lower()
|
|
39
|
+
# Strip common provider prefixes (e.g. "openai/o3-mini", "azure/o1")
|
|
40
|
+
if "/" in name:
|
|
41
|
+
name = name.rsplit("/", 1)[1]
|
|
42
|
+
return name.startswith(("o1", "o3", "o4", "o5", "o6", "o7", "o8", "o9"))
|
|
43
|
+
|
|
44
|
+
def run(self, final_prompt: str, args: argparse.Namespace) -> int:
|
|
45
|
+
self.last_usage = None
|
|
46
|
+
if getattr(args, "verbose", False):
|
|
47
|
+
write_stderr("[adapter] calling {0} via OpenAI SDK".format(self._model))
|
|
48
|
+
try:
|
|
49
|
+
# o-series reasoning models (o1/o3/o4/...) reject `max_tokens` and
|
|
50
|
+
# require `max_completion_tokens` instead.
|
|
51
|
+
token_kwarg = (
|
|
52
|
+
"max_completion_tokens"
|
|
53
|
+
if self._is_reasoning_model(self._model)
|
|
54
|
+
else "max_tokens"
|
|
55
|
+
)
|
|
56
|
+
create_kwargs = {
|
|
57
|
+
"model": self._model,
|
|
58
|
+
"messages": [{"role": "user", "content": final_prompt}],
|
|
59
|
+
token_kwarg: self._max_tokens,
|
|
60
|
+
}
|
|
61
|
+
response = self._client.chat.completions.create(**create_kwargs)
|
|
62
|
+
self.last_usage = {
|
|
63
|
+
"input_tokens": response.usage.prompt_tokens,
|
|
64
|
+
"output_tokens": response.usage.completion_tokens,
|
|
65
|
+
}
|
|
66
|
+
content = response.choices[0].message.content
|
|
67
|
+
if content:
|
|
68
|
+
print(content)
|
|
69
|
+
return 0
|
|
70
|
+
except Exception as exc:
|
|
71
|
+
write_stderr("[adapter] OpenAI API call failed: {0}".format(exc))
|
|
72
|
+
return 1
|