promptkeep 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promptkeep-0.1.0/.gitignore +9 -0
- promptkeep-0.1.0/LICENSE +21 -0
- promptkeep-0.1.0/PKG-INFO +119 -0
- promptkeep-0.1.0/README.md +94 -0
- promptkeep-0.1.0/plan.md +390 -0
- promptkeep-0.1.0/pyproject.toml +46 -0
- promptkeep-0.1.0/src/promptkeep/__init__.py +29 -0
- promptkeep-0.1.0/src/promptkeep/config.py +82 -0
- promptkeep-0.1.0/src/promptkeep/decorator.py +97 -0
- promptkeep-0.1.0/src/promptkeep/history.py +117 -0
- promptkeep-0.1.0/src/promptkeep/integrations/__init__.py +33 -0
- promptkeep-0.1.0/src/promptkeep/integrations/openai_wrapper.py +384 -0
- promptkeep-0.1.0/src/promptkeep/prompts.py +233 -0
- promptkeep-0.1.0/src/promptkeep/py.typed +0 -0
- promptkeep-0.1.0/src/promptkeep/rendering.py +120 -0
- promptkeep-0.1.0/src/promptkeep/storage.py +381 -0
- promptkeep-0.1.0/src/promptkeep/tracking.py +64 -0
- promptkeep-0.1.0/tests/__init__.py +0 -0
- promptkeep-0.1.0/tests/conftest.py +15 -0
- promptkeep-0.1.0/tests/fakes.py +127 -0
- promptkeep-0.1.0/tests/test_decorator.py +151 -0
- promptkeep-0.1.0/tests/test_history.py +77 -0
- promptkeep-0.1.0/tests/test_openai_wrapper.py +316 -0
- promptkeep-0.1.0/tests/test_prompt.py +183 -0
- promptkeep-0.1.0/tests/test_rendering.py +109 -0
- promptkeep-0.1.0/tests/test_storage.py +131 -0
- promptkeep-0.1.0/uv.lock +669 -0
promptkeep-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Pranav
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: promptkeep
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Prompts as first-class objects: versioned templates, SQLite lineage tracking, and OpenAI run tracking.
|
|
5
|
+
Author-email: Pranav <pranav2278@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: llm,openai,prompt,prompt-engineering,prompts,tracking,versioning
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Requires-Dist: peewee>=3.17
|
|
22
|
+
Provides-Extra: openai
|
|
23
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# promptkeep
|
|
27
|
+
|
|
28
|
+
Prompts as first-class objects: named, versioned templates with lineage tracked in SQLite,
|
|
29
|
+
variable rendering, a decorator for computed prompts, and a transparent OpenAI SDK wrapper
|
|
30
|
+
that records every run (prompt version + variables + output + usage).
|
|
31
|
+
|
|
32
|
+
## The basics
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from promptkeep import Prompt
|
|
36
|
+
|
|
37
|
+
prompt = Prompt(
|
|
38
|
+
text="You are a code reviewer. Focus on {var1}.",
|
|
39
|
+
variables={"var1": "correctness"},
|
|
40
|
+
name="REVIEW_SYSTEM", # the prompt's stable identity
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
prompt.text # rendered string — safe to pass to any SDK
|
|
44
|
+
prompt.raw # raw template, placeholders intact
|
|
45
|
+
prompt.version # 1 — bumps automatically whenever the template text changes
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Same `name` + edited text ⇒ a new version row in SQLite (deduplicated by content hash).
|
|
49
|
+
Variables are *run data*, never versions — change them freely.
|
|
50
|
+
|
|
51
|
+
Rendering is lenient by default: unknown `{placeholders}` and JSON braces in the template
|
|
52
|
+
pass through untouched. Use `strict=True` (per prompt or via `configure`) to raise instead.
|
|
53
|
+
|
|
54
|
+
## Computed prompts
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from promptkeep import prompt
|
|
58
|
+
|
|
59
|
+
@prompt(name="REVIEW_SYSTEM")
|
|
60
|
+
def review_sys_prompt(var1="some value", n_examples=3):
|
|
61
|
+
examples = "\n".join(load_examples(n_examples))
|
|
62
|
+
return f"You are a reviewer.\n{examples}\nFocus on {{var1}}."
|
|
63
|
+
|
|
64
|
+
p = review_sys_prompt(var1="security") # -> Prompt (raw + rendered + version)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The function returns the raw template; the call's arguments become the variables.
|
|
68
|
+
|
|
69
|
+
## OpenAI integration
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from openai import OpenAI
|
|
73
|
+
from promptkeep import wrap
|
|
74
|
+
|
|
75
|
+
OpenAI = wrap(OpenAI) # or: client = wrap(OpenAI(...))
|
|
76
|
+
client = OpenAI()
|
|
77
|
+
|
|
78
|
+
completion = client.chat.completions.create(
|
|
79
|
+
model="gpt-5.5",
|
|
80
|
+
messages=[
|
|
81
|
+
{"role": "developer", "content": prompt}, # Prompt object, directly
|
|
82
|
+
{"role": "user", "content": "How do I check isinstance?"},
|
|
83
|
+
],
|
|
84
|
+
)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The API receives a plain string; a *run* is recorded linking this prompt version to the
|
|
88
|
+
variables used, the rendered text, the model, the output, token usage, and latency.
|
|
89
|
+
Streaming, async clients, and multi-part content are supported. Tracking failures never
|
|
90
|
+
break the API call. Unwrapped clients work too — just pass `prompt.text`.
|
|
91
|
+
|
|
92
|
+
## History
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from promptkeep import history
|
|
96
|
+
|
|
97
|
+
history.versions("REVIEW_SYSTEM") # lineage, oldest first
|
|
98
|
+
print(history.diff("REVIEW_SYSTEM", 1, 3)) # unified diff between versions
|
|
99
|
+
history.runs("REVIEW_SYSTEM", version=3) # recorded runs, newest first
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Configuration
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
import promptkeep
|
|
106
|
+
|
|
107
|
+
promptkeep.configure(
|
|
108
|
+
db_path="path/to/prompts.db", # default: ./.promptkeep.db (or $PROMPTKEEP_DB)
|
|
109
|
+
enabled=True, # $PROMPTKEEP_DISABLED=1 turns tracking off
|
|
110
|
+
strict=False, # raise on missing variables
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Development
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
uv sync # install with dev dependencies
|
|
118
|
+
uv run pytest # run the test suite
|
|
119
|
+
```
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# promptkeep
|
|
2
|
+
|
|
3
|
+
Prompts as first-class objects: named, versioned templates with lineage tracked in SQLite,
|
|
4
|
+
variable rendering, a decorator for computed prompts, and a transparent OpenAI SDK wrapper
|
|
5
|
+
that records every run (prompt version + variables + output + usage).
|
|
6
|
+
|
|
7
|
+
## The basics
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from promptkeep import Prompt
|
|
11
|
+
|
|
12
|
+
prompt = Prompt(
|
|
13
|
+
text="You are a code reviewer. Focus on {var1}.",
|
|
14
|
+
variables={"var1": "correctness"},
|
|
15
|
+
name="REVIEW_SYSTEM", # the prompt's stable identity
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
prompt.text # rendered string — safe to pass to any SDK
|
|
19
|
+
prompt.raw # raw template, placeholders intact
|
|
20
|
+
prompt.version # 1 — bumps automatically whenever the template text changes
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Same `name` + edited text ⇒ a new version row in SQLite (deduplicated by content hash).
|
|
24
|
+
Variables are *run data*, never versions — change them freely.
|
|
25
|
+
|
|
26
|
+
Rendering is lenient by default: unknown `{placeholders}` and JSON braces in the template
|
|
27
|
+
pass through untouched. Use `strict=True` (per prompt or via `configure`) to raise instead.
|
|
28
|
+
|
|
29
|
+
## Computed prompts
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from promptkeep import prompt
|
|
33
|
+
|
|
34
|
+
@prompt(name="REVIEW_SYSTEM")
|
|
35
|
+
def review_sys_prompt(var1="some value", n_examples=3):
|
|
36
|
+
examples = "\n".join(load_examples(n_examples))
|
|
37
|
+
return f"You are a reviewer.\n{examples}\nFocus on {{var1}}."
|
|
38
|
+
|
|
39
|
+
p = review_sys_prompt(var1="security") # -> Prompt (raw + rendered + version)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
The function returns the raw template; the call's arguments become the variables.
|
|
43
|
+
|
|
44
|
+
## OpenAI integration
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from openai import OpenAI
|
|
48
|
+
from promptkeep import wrap
|
|
49
|
+
|
|
50
|
+
OpenAI = wrap(OpenAI) # or: client = wrap(OpenAI(...))
|
|
51
|
+
client = OpenAI()
|
|
52
|
+
|
|
53
|
+
completion = client.chat.completions.create(
|
|
54
|
+
model="gpt-5.5",
|
|
55
|
+
messages=[
|
|
56
|
+
{"role": "developer", "content": prompt}, # Prompt object, directly
|
|
57
|
+
{"role": "user", "content": "How do I check isinstance?"},
|
|
58
|
+
],
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
The API receives a plain string; a *run* is recorded linking this prompt version to the
|
|
63
|
+
variables used, the rendered text, the model, the output, token usage, and latency.
|
|
64
|
+
Streaming, async clients, and multi-part content are supported. Tracking failures never
|
|
65
|
+
break the API call. Unwrapped clients work too — just pass `prompt.text`.
|
|
66
|
+
|
|
67
|
+
## History
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from promptkeep import history
|
|
71
|
+
|
|
72
|
+
history.versions("REVIEW_SYSTEM") # lineage, oldest first
|
|
73
|
+
print(history.diff("REVIEW_SYSTEM", 1, 3)) # unified diff between versions
|
|
74
|
+
history.runs("REVIEW_SYSTEM", version=3) # recorded runs, newest first
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Configuration
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import promptkeep
|
|
81
|
+
|
|
82
|
+
promptkeep.configure(
|
|
83
|
+
db_path="path/to/prompts.db", # default: ./.promptkeep.db (or $PROMPTKEEP_DB)
|
|
84
|
+
enabled=True, # $PROMPTKEEP_DISABLED=1 turns tracking off
|
|
85
|
+
strict=False, # raise on missing variables
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Development
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
uv sync # install with dev dependencies
|
|
93
|
+
uv run pytest # run the test suite
|
|
94
|
+
```
|
promptkeep-0.1.0/plan.md
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
# Prompt Manager — Implementation Plan
|
|
2
|
+
|
|
3
|
+
A Python library for managing LLM prompts as first-class objects: versioned templates with
|
|
4
|
+
lineage tracking (SQLite), variable rendering, a decorator for computed prompts, and a
|
|
5
|
+
transparent wrapper around the OpenAI SDK that tracks every run (prompt version + variables +
|
|
6
|
+
output) without changing how people write their OpenAI code.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## 1. Goals
|
|
11
|
+
|
|
12
|
+
1. **Prompt as an object, not a string.** `Prompt(text=..., variables=..., name=...)` — the
|
|
13
|
+
`name` is the stable identity; the template text is the versioned content.
|
|
14
|
+
2. **Lineage tracking.** Same `name`, different template text ⇒ new version recorded in SQLite.
|
|
15
|
+
Full history of how a prompt evolved is queryable.
|
|
16
|
+
3. **Variable rendering.** Templates contain `{var}` placeholders; values come in via a
|
|
17
|
+
`variables` dict (and/or kwargs). Rendered output is a plain string usable anywhere.
|
|
18
|
+
4. **Computed prompts via decorator.** `@prompt(name=...)` on a function that builds a prompt
|
|
19
|
+
with real logic, not just substitution. Both the raw template and the rendered result are
|
|
20
|
+
captured so lineage still works.
|
|
21
|
+
5. **Zero-friction OpenAI integration.** `OpenAI = wrap(OpenAI)` — after that, users pass a
|
|
22
|
+
`Prompt` object directly as message `content` and the library (a) substitutes the rendered
|
|
23
|
+
string before the request goes out, and (b) records a *run*: which prompt version ran, with
|
|
24
|
+
which variables, against which model, and what came back (output, token usage, latency).
|
|
25
|
+
6. **Degrades gracefully.** Without wrapping, `prompt.text` is a plain string — works with any
|
|
26
|
+
SDK, any framework, no lock-in.
|
|
27
|
+
|
|
28
|
+
## 2. Non-goals (for now)
|
|
29
|
+
|
|
30
|
+
- No server, no UI, no cloud sync — local SQLite only. (A CLI viewer is a stretch goal.)
|
|
31
|
+
- No prompt *optimization* / eval framework — we record runs; we don't judge them.
|
|
32
|
+
- No providers beyond OpenAI in v1 (but the wrapper layer is built so Anthropic etc. can be
|
|
33
|
+
added later without touching core).
|
|
34
|
+
- Not tracking changes in *variables* as lineage — variable values are run-scoped data, not
|
|
35
|
+
prompt identity. They're recorded per-run, never as versions.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 3. Core concepts & data model
|
|
40
|
+
|
|
41
|
+
Three entities, strictly layered:
|
|
42
|
+
|
|
43
|
+
| Entity | Identity | What changes it | Where stored |
|
|
44
|
+
|---|---|---|---|
|
|
45
|
+
| **Prompt** | `name` (unique) | never — it's the ID | `prompts` table |
|
|
46
|
+
| **Version** | hash of raw template text | any edit to the template | `prompt_versions` table |
|
|
47
|
+
| **Run** | auto id | every wrapped LLM call | `runs` table |
|
|
48
|
+
|
|
49
|
+
- A **Prompt** is the named lineage ("REVIEW_SYSTEM").
|
|
50
|
+
- A **Version** is one concrete template text under that name. Versions are deduplicated by
|
|
51
|
+
content hash: constructing the same text twice does *not* create a new version; editing the
|
|
52
|
+
text does. Version numbers are monotonically increasing per prompt.
|
|
53
|
+
- A **Run** links a version to one execution: the variables dict used, the final rendered
|
|
54
|
+
text, the model + request params, the response (output text, usage, response id), status,
|
|
55
|
+
and latency.
|
|
56
|
+
|
|
57
|
+
### SQLite schema (v1)
|
|
58
|
+
|
|
59
|
+
```sql
|
|
60
|
+
CREATE TABLE prompts (
|
|
61
|
+
id INTEGER PRIMARY KEY,
|
|
62
|
+
name TEXT NOT NULL UNIQUE,
|
|
63
|
+
created_at TEXT NOT NULL -- ISO-8601 UTC
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
CREATE TABLE prompt_versions (
|
|
67
|
+
id INTEGER PRIMARY KEY,
|
|
68
|
+
prompt_id INTEGER NOT NULL REFERENCES prompts(id),
|
|
69
|
+
version INTEGER NOT NULL, -- 1, 2, 3... per prompt
|
|
70
|
+
template TEXT NOT NULL, -- raw text, placeholders intact
|
|
71
|
+
template_hash TEXT NOT NULL, -- sha256 of normalized template
|
|
72
|
+
source TEXT NOT NULL, -- 'literal' | 'decorator'
|
|
73
|
+
fn_source_hash TEXT, -- decorator only: hash of function source
|
|
74
|
+
created_at TEXT NOT NULL,
|
|
75
|
+
UNIQUE (prompt_id, template_hash),
|
|
76
|
+
UNIQUE (prompt_id, version)
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
CREATE TABLE runs (
|
|
80
|
+
id INTEGER PRIMARY KEY,
|
|
81
|
+
version_id INTEGER NOT NULL REFERENCES prompt_versions(id),
|
|
82
|
+
variables TEXT, -- JSON dict as passed by the user
|
|
83
|
+
rendered_text TEXT NOT NULL,
|
|
84
|
+
provider TEXT NOT NULL, -- 'openai'
|
|
85
|
+
model TEXT,
|
|
86
|
+
request_params TEXT, -- JSON (temperature, etc., minus messages)
|
|
87
|
+
response_id TEXT,
|
|
88
|
+
output_text TEXT,
|
|
89
|
+
prompt_tokens INTEGER,
|
|
90
|
+
completion_tokens INTEGER,
|
|
91
|
+
total_tokens INTEGER,
|
|
92
|
+
latency_ms INTEGER,
|
|
93
|
+
status TEXT NOT NULL, -- 'ok' | 'error'
|
|
94
|
+
error TEXT,
|
|
95
|
+
created_at TEXT NOT NULL
|
|
96
|
+
);
|
|
97
|
+
CREATE INDEX idx_runs_version ON runs(version_id, created_at);
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
- DB opened in WAL mode; one connection per thread (`threading.local`) so wrapped calls from
|
|
101
|
+
multi-threaded apps don't fight over a cursor.
|
|
102
|
+
- `schema_version` pragma (`user_version`) + tiny forward-only migration runner, so the schema
|
|
103
|
+
can evolve without breaking existing DBs.
|
|
104
|
+
|
|
105
|
+
### DB location & configuration
|
|
106
|
+
|
|
107
|
+
Default: `./.prompts.db` in the current working directory (a project-level artifact, like
|
|
108
|
+
`.env`). Overridable by:
|
|
109
|
+
|
|
110
|
+
1. `prompt_manager.configure(db_path=..., enabled=...)` — explicit wins.
|
|
111
|
+
2. `PROMPT_MANAGER_DB` env var.
|
|
112
|
+
3. `PROMPT_MANAGER_DISABLED=1` kills all persistence (Prompt still renders fine — critical for
|
|
113
|
+
CI and for users who only want the rendering ergonomics).
|
|
114
|
+
|
|
115
|
+
**Registration timing:** constructing a `Prompt` does **not** hit the DB (prompts are usually
|
|
116
|
+
defined at module import time; import-time writes are a footgun — read-only filesystems, test
|
|
117
|
+
collection, etc.). The version row is written lazily on first *use* (first render access or
|
|
118
|
+
first tracked run), memoized per process so it's one write, not one per call.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## 4. Public API
|
|
123
|
+
|
|
124
|
+
Everything importable from the top-level package:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from prompt_manager import Prompt, prompt, wrap, configure
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### 4.1 `Prompt` class
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
prompt = Prompt(
|
|
134
|
+
text="You are a reviewer. Focus on {var1}.",
|
|
135
|
+
variables={"var1": "correctness"}, # optional at construction
|
|
136
|
+
name="REVIEW_SYSTEM", # required — it's the identity
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
prompt.text # -> rendered string: "You are a reviewer. Focus on correctness."
|
|
140
|
+
prompt.raw # -> raw template: "You are a reviewer. Focus on {var1}."
|
|
141
|
+
str(prompt) # == prompt.text
|
|
142
|
+
prompt.name # "REVIEW_SYSTEM"
|
|
143
|
+
prompt.version # int, resolved lazily from the DB (None if tracking disabled)
|
|
144
|
+
prompt.format(var1="security") # -> NEW Prompt with updated variables (immutable style)
|
|
145
|
+
prompt.render(var1="security") # -> rendered str directly (one-shot, no new object)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Naming decision: **`.text` = rendered, `.raw` = template.** Rationale: `.text` is what you
|
|
149
|
+
reach for 95% of the time (the thing you paste into `content=`), so it gets the short name.
|
|
150
|
+
`rendered_text`/`raw_text` aliases can exist but the docs push `.text`/`.raw`.
|
|
151
|
+
|
|
152
|
+
**Key trick — provenance-carrying strings.** `prompt.text` doesn't return a bare `str`; it
|
|
153
|
+
returns `RenderedText(str)` — a `str` subclass that behaves identically everywhere (json
|
|
154
|
+
serialization, `+`, f-strings, the OpenAI SDK...) but carries two hidden attributes:
|
|
155
|
+
`_pm_prompt` (the Prompt) and `_pm_variables` (the dict used to render). Consequences:
|
|
156
|
+
|
|
157
|
+
- Unwrapped SDKs receive a real string. Nothing breaks. (User requirement: "even if I'm not
|
|
158
|
+
wrapping it, it should still receive a string.")
|
|
159
|
+
- The wrapped OpenAI client can track runs **both** when the user passes the `Prompt` object
|
|
160
|
+
directly as `content` **and** when they pass `prompt.text` — because provenance rides along
|
|
161
|
+
on the string itself. No extra method calls on the prompt, ever.
|
|
162
|
+
|
|
163
|
+
`Prompt` itself is *not* a `str` subclass (str immutability forces rendering into `__new__`,
|
|
164
|
+
raw/rendered duality gets ugly, and `.replace()` etc. silently return plain str). Instead the
|
|
165
|
+
wrapper accepts `Prompt | RenderedText | str` as message content, and `Prompt.__str__`
|
|
166
|
+
returns `self.text` as a safety net for accidental `f"{prompt}"` usage.
|
|
167
|
+
|
|
168
|
+
**Immutability:** a `Prompt` is frozen after construction. "Changing" variables produces a new
|
|
169
|
+
`Prompt` sharing the same name/template (⇒ same version). This is what makes version identity
|
|
170
|
+
clean — the object can never drift away from the hash it registered under.
|
|
171
|
+
|
|
172
|
+
### 4.2 Rendering rules
|
|
173
|
+
|
|
174
|
+
- Syntax: Python `{var}` placeholders, rendered via `format_map`.
|
|
175
|
+
- **Lenient by default**: missing variables stay as literal `{var}` in the output (via a
|
|
176
|
+
`SafeDict` that returns `"{key}"` for missing keys). Prompts full of JSON examples and code
|
|
177
|
+
braces are the norm, and hard-crashing on `{"key": ...}` inside a prompt is the #1 pain of
|
|
178
|
+
naive `.format` use. `{{` / `}}` escaping still works for users who want to be explicit.
|
|
179
|
+
- `strict=True` (per-Prompt or global via `configure`) raises `MissingVariableError` listing
|
|
180
|
+
every unresolved placeholder — for people who want the guardrail.
|
|
181
|
+
- `prompt.placeholders` -> `set[str]` of declared variables (parsed with `string.Formatter`),
|
|
182
|
+
so tooling/tests can validate coverage.
|
|
183
|
+
|
|
184
|
+
### 4.3 `@prompt` decorator (computed prompts)
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
@prompt(name="REVIEW_SYSTEM")
|
|
188
|
+
def review_sys_prompt(var1="some value", n_examples=3):
|
|
189
|
+
examples = pick_examples(n_examples) # real computation
|
|
190
|
+
return f"You are a reviewer...\n{examples}\nFocus on {{var1}}."
|
|
191
|
+
|
|
192
|
+
p = review_sys_prompt(var1="security") # -> returns a Prompt object
|
|
193
|
+
p.text # rendered
|
|
194
|
+
p.raw # the template string the function returned
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Contract: **the function returns the raw template** (placeholders intact, `{{ }}`-escaped
|
|
198
|
+
where needed); the decorator turns it into a `Prompt`, using the call's kwargs as the
|
|
199
|
+
`variables` dict. This beats the `(raw, rendered)` tuple idea from the sketch: one return
|
|
200
|
+
value, no way for raw and rendered to disagree, and the caller gets a full `Prompt` object
|
|
201
|
+
(so `.text`, `.raw`, wrap-tracking all work identically to the class path). Returning a
|
|
202
|
+
`Prompt` also satisfies "returns (raw_prompt, rendered_prompt)" — both are on the object.
|
|
203
|
+
|
|
204
|
+
Versioning semantics for computed prompts (the subtle part):
|
|
205
|
+
|
|
206
|
+
- The **version identity is the returned template text** (same content-hash dedup as literal
|
|
207
|
+
prompts). If computation makes the template genuinely different (different examples baked
|
|
208
|
+
in), that *is* a different prompt text and gets a new version — correct, if chatty.
|
|
209
|
+
- To keep the lineage readable we additionally store `fn_source_hash` =
|
|
210
|
+
sha256(`inspect.getsource(fn)`) on each version row. History queries can then distinguish
|
|
211
|
+
"the code changed" from "the same code produced different text this call".
|
|
212
|
+
- Guidance in docs: keep run-varying data in `{placeholders}`, keep the computed part as
|
|
213
|
+
stable as possible. The library works either way; the history is just noisier otherwise.
|
|
214
|
+
- Kwargs used for rendering vs. kwargs used only for computation: all call kwargs are recorded
|
|
215
|
+
as the run's `variables`; rendering is lenient, so kwargs without matching placeholders are
|
|
216
|
+
simply ignored by the formatter. No separate declaration needed.
|
|
217
|
+
|
|
218
|
+
### 4.4 `wrap()` — OpenAI integration
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from openai import OpenAI
|
|
222
|
+
from prompt_manager import wrap
|
|
223
|
+
|
|
224
|
+
OpenAI = wrap(OpenAI) # wrap the class…
|
|
225
|
+
client = OpenAI(api_key=...) # …used exactly as before
|
|
226
|
+
# or: client = wrap(OpenAI(api_key=...)) # wrapping an instance also works
|
|
227
|
+
|
|
228
|
+
completion = client.chat.completions.create(
|
|
229
|
+
model="gpt-5.5",
|
|
230
|
+
messages=[
|
|
231
|
+
{"role": "developer", "content": prompt}, # Prompt object, directly
|
|
232
|
+
{"role": "user", "content": user_question},
|
|
233
|
+
],
|
|
234
|
+
)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Mechanics (no monkey-patching of the `openai` module — we only touch objects the user
|
|
238
|
+
explicitly passed to `wrap`):
|
|
239
|
+
|
|
240
|
+
- `wrap(cls)` returns a subclass whose `__init__` calls super then replaces
|
|
241
|
+
`self.chat.completions.create` with a tracking closure around the original bound method.
|
|
242
|
+
`wrap(instance)` does the same replacement on the live instance. Everything else on the
|
|
243
|
+
client is untouched — same attributes, same types, `isinstance` still holds.
|
|
244
|
+
- The interceptor:
|
|
245
|
+
1. Walks `messages`; for any `content` that is a `Prompt`, renders it; for `RenderedText`,
|
|
246
|
+
uses it as-is; either way collects `(prompt, variables, rendered)` provenance. Replaces
|
|
247
|
+
content with the plain rendered `str`. Multi-part content (list-of-blocks) handled by
|
|
248
|
+
walking `text` blocks too.
|
|
249
|
+
2. Calls the real `create()`, timing it.
|
|
250
|
+
3. Records one `runs` row **per tracked prompt** in the message list (a system + a user
|
|
251
|
+
prompt in one call ⇒ two runs sharing response metadata), including model, filtered
|
|
252
|
+
request params, output text (`choices[0].message.content`), usage, response id.
|
|
253
|
+
4. On exception: records the run with `status='error'` + the exception text, then re-raises
|
|
254
|
+
unchanged.
|
|
255
|
+
- **Tracking must never break the user's call**: every DB write is wrapped in its own
|
|
256
|
+
try/except that logs a warning and continues. A failed insert loses telemetry, never a
|
|
257
|
+
completion.
|
|
258
|
+
- **Streaming** (`stream=True`): return a thin iterator proxy that yields chunks through
|
|
259
|
+
untouched while accumulating delta content; the run row is written when the stream closes
|
|
260
|
+
(with whatever usage info the final chunk carries). Same idea for the context-manager form.
|
|
261
|
+
- **Async** (`AsyncOpenAI`): same interceptor with `async def` + `await`; detection by
|
|
262
|
+
whether the wrapped `create` is a coroutine function.
|
|
263
|
+
- **Responses API** (`client.responses.create`): same pattern, phase 2 of the wrapper —
|
|
264
|
+
chat.completions first since that's the stated usage.
|
|
265
|
+
- Wrapper code lives in `integrations/openai_wrapper.py` behind a narrow interface
|
|
266
|
+
(`extract_prompts(messages)`, `record_run(...)`), so an `integrations/anthropic_wrapper.py`
|
|
267
|
+
later is additive.
|
|
268
|
+
- `openai` is an **optional dependency** (`pip install prompt-manager[openai]`); core never
|
|
269
|
+
imports it.
|
|
270
|
+
|
|
271
|
+
### 4.5 History / inspection API
|
|
272
|
+
|
|
273
|
+
Minimal programmatic access so the DB isn't a black box:
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
from prompt_manager import history
|
|
277
|
+
|
|
278
|
+
history.versions("REVIEW_SYSTEM") # -> [VersionInfo(version=1, template=..., created_at=...), ...]
|
|
279
|
+
history.diff("REVIEW_SYSTEM", 1, 3) # -> unified diff string between two versions
|
|
280
|
+
history.runs("REVIEW_SYSTEM", version=3, limit=20) # -> [RunInfo(...), ...]
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
Stretch goal: `python -m prompt_manager history REVIEW_SYSTEM` CLI over the same functions.
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## 5. Package layout
|
|
288
|
+
|
|
289
|
+
```
|
|
290
|
+
prompt-manager/
|
|
291
|
+
├── pyproject.toml # hatchling; deps: none (core). extras: openai
|
|
292
|
+
├── README.md
|
|
293
|
+
├── plan.md # this file
|
|
294
|
+
├── src/
|
|
295
|
+
│ └── prompt_manager/
|
|
296
|
+
│ ├── __init__.py # Prompt, prompt, wrap, configure, history
|
|
297
|
+
│ ├── config.py # configure(), env vars, global settings singleton
|
|
298
|
+
│ ├── prompt.py # Prompt, RenderedText
|
|
299
|
+
│ ├── rendering.py # SafeDict, placeholder parsing, strict mode
|
|
300
|
+
│ ├── decorator.py # @prompt
|
|
301
|
+
│ ├── storage.py # connection mgmt, schema/migrations, upserts, queries
|
|
302
|
+
│ ├── tracking.py # run recording (provider-agnostic)
|
|
303
|
+
│ ├── history.py # versions() / diff() / runs()
|
|
304
|
+
│ └── integrations/
|
|
305
|
+
│ ├── __init__.py # wrap() dispatcher (detects openai class/instance)
|
|
306
|
+
│ └── openai_wrapper.py
|
|
307
|
+
└── tests/
|
|
308
|
+
├── test_prompt.py
|
|
309
|
+
├── test_rendering.py
|
|
310
|
+
├── test_decorator.py
|
|
311
|
+
├── test_storage.py
|
|
312
|
+
├── test_history.py
|
|
313
|
+
└── test_openai_wrapper.py # fake client, no network
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
Tooling: `uv` for env/deps, `pytest`, `ruff` (lint + format). Python ≥ 3.9.
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## 6. Implementation phases
|
|
321
|
+
|
|
322
|
+
### Phase 0 — Scaffolding
|
|
323
|
+
- [ ] `git init`, `pyproject.toml` (name TBD — see open questions), `src/` layout, `uv sync`
|
|
324
|
+
- [ ] pytest + ruff configured; empty package imports cleanly
|
|
325
|
+
|
|
326
|
+
### Phase 1 — Core `Prompt` + rendering (no DB yet)
|
|
327
|
+
- [ ] `RenderedText(str)` with `_pm_prompt` / `_pm_variables`
|
|
328
|
+
- [ ] `Prompt`: constructor validation (non-empty name/text), frozen attrs, `.raw`, `.text`,
|
|
329
|
+
`.render()`, `.format()`, `__str__`, `__repr__`, equality by (name, template, variables)
|
|
330
|
+
- [ ] `rendering.py`: SafeDict lenient rendering, `{{}}` escaping, strict mode,
|
|
331
|
+
`placeholders` extraction via `string.Formatter().parse`
|
|
332
|
+
- [ ] Tests: rendering matrix (missing vars, extra vars, JSON braces, nested braces, non-str
|
|
333
|
+
values), immutability, provenance attributes survive typical string usage
|
|
334
|
+
|
|
335
|
+
### Phase 2 — Storage + lineage
|
|
336
|
+
- [ ] `config.py`: `configure()`, env vars, `enabled` flag, default db path
|
|
337
|
+
- [ ] `storage.py`: lazy connection (thread-local), WAL, schema creation, `user_version`
|
|
338
|
+
migrations, `get_or_create_prompt`, `get_or_create_version` (hash dedup, version
|
|
339
|
+
counter), all writes exception-shielded
|
|
340
|
+
- [ ] Lazy registration hook in `Prompt` (first `.text`/`.render` registers version once)
|
|
341
|
+
- [ ] `history.py`: `versions()`, `diff()` (difflib unified), `runs()`
|
|
342
|
+
- [ ] Tests: same-text ⇒ same version; edited text ⇒ v+1; dedup across processes (reopen db);
|
|
343
|
+
disabled mode does zero I/O; concurrent registration from threads
|
|
344
|
+
|
|
345
|
+
### Phase 3 — `@prompt` decorator
|
|
346
|
+
- [ ] `decorator.py`: capture kwargs (incl. defaults via `inspect.signature.bind`), call fn,
|
|
347
|
+
wrap returned str into `Prompt`, attach `fn_source_hash`
|
|
348
|
+
- [ ] Error if fn returns non-str; `functools.wraps` preserved
|
|
349
|
+
- [ ] Tests: defaults vs explicit kwargs, computed templates creating new versions,
|
|
350
|
+
fn-source-hash recorded, decorated fn still introspectable
|
|
351
|
+
|
|
352
|
+
### Phase 4 — OpenAI wrapper + run tracking (sync, non-streaming)
|
|
353
|
+
- [ ] `wrap()` dispatcher: class vs instance detection
|
|
354
|
+
- [ ] Message walking: str / Prompt / RenderedText / content-block lists
|
|
355
|
+
- [ ] Run recording via `tracking.py` (renders, timing, usage, error path)
|
|
356
|
+
- [ ] Tests against a fake OpenAI-shaped client (no network): substitution happens, run rows
|
|
357
|
+
correct, multiple prompts per call ⇒ multiple runs, tracking failure doesn't break the
|
|
358
|
+
call, unwrapped-with-`.text` path also tracks via provenance
|
|
359
|
+
- [ ] One optional live smoke test behind `OPENAI_API_KEY` guard
|
|
360
|
+
|
|
361
|
+
### Phase 5 — Streaming + async
|
|
362
|
+
- [ ] Stream proxy (sync iterator + context manager), run written at stream end
|
|
363
|
+
- [ ] `AsyncOpenAI` support
|
|
364
|
+
- [ ] Tests with fake streaming/async clients
|
|
365
|
+
|
|
366
|
+
### Phase 6 — Polish
|
|
367
|
+
- [ ] `responses.create` support in the wrapper
|
|
368
|
+
- [ ] README with the three usage tiers (plain / decorator / wrapped)
|
|
369
|
+
- [ ] CLI viewer (`python -m prompt_manager history NAME`) — stretch
|
|
370
|
+
- [ ] Version + publish prep (classifiers, py.typed, LICENSE)
|
|
371
|
+
|
|
372
|
+
Phases 1–4 are the MVP the pseudocode describes; 5–6 can trail.
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## 7. Open questions
|
|
377
|
+
|
|
378
|
+
1. **Package name** — `prompt-manager` is likely taken on PyPI. Alternatives: `promptline`,
|
|
379
|
+
`promptvault`, `promptkeep`? (Doesn't block implementation; module name can be decided at
|
|
380
|
+
Phase 0.)
|
|
381
|
+
2. **Default DB location** — plan says project-local `./.prompts.db`. If you'd rather have one
|
|
382
|
+
global DB per machine (`~/.prompt_manager/prompts.db`), say so; it's a one-line default.
|
|
383
|
+
3. **Lenient rendering default** — plan says missing variables stay as `{var}` silently
|
|
384
|
+
(JSON-in-prompt safety). Comfortable with that, or should the default warn/raise?
|
|
385
|
+
4. **Decorator contract** — plan says the function returns the raw template and the decorator
|
|
386
|
+
returns a `Prompt` object (instead of a `(raw, rendered)` tuple). Flag if you specifically
|
|
387
|
+
want the tuple form.
|
|
388
|
+
5. The original brief cut off at "run management — when it is running…". Runs-as-records
|
|
389
|
+
(version + variables + output per call) is covered; if you meant something more (live
|
|
390
|
+
monitoring, callbacks, cost aggregation), that's additive on top of the `runs` table.
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "promptkeep"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Prompts as first-class objects: versioned templates, SQLite lineage tracking, and OpenAI run tracking."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [{ name = "Pranav", email = "pranav2278@gmail.com" }]
|
|
14
|
+
keywords = ["prompt", "prompts", "llm", "openai", "prompt-engineering", "versioning", "tracking"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Programming Language :: Python :: 3.14",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
"Typing :: Typed",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"peewee>=3.17",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
openai = ["openai>=1.0"]
|
|
34
|
+
|
|
35
|
+
[dependency-groups]
|
|
36
|
+
dev = ["pytest>=8", "ruff>=0.4"]
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/promptkeep"]
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
testpaths = ["tests"]
|
|
43
|
+
|
|
44
|
+
[tool.ruff]
|
|
45
|
+
line-length = 100
|
|
46
|
+
src = ["src", "tests"]
|