promptkeep 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .venv/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ dist/
7
+ build/
8
+ *.egg-info/
9
+ .promptkeep.db*
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pranav
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: promptkeep
3
+ Version: 0.1.0
4
+ Summary: Prompts as first-class objects: versioned templates, SQLite lineage tracking, and OpenAI run tracking.
5
+ Author-email: Pranav <pranav2278@gmail.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: llm,openai,prompt,prompt-engineering,prompts,tracking,versioning
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: peewee>=3.17
22
+ Provides-Extra: openai
23
+ Requires-Dist: openai>=1.0; extra == 'openai'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # promptkeep
27
+
28
+ Prompts as first-class objects: named, versioned templates with lineage tracked in SQLite,
29
+ variable rendering, a decorator for computed prompts, and a transparent OpenAI SDK wrapper
30
+ that records every run (prompt version + variables + output + usage).
31
+
32
+ ## The basics
33
+
34
+ ```python
35
+ from promptkeep import Prompt
36
+
37
+ prompt = Prompt(
38
+ text="You are a code reviewer. Focus on {var1}.",
39
+ variables={"var1": "correctness"},
40
+ name="REVIEW_SYSTEM", # the prompt's stable identity
41
+ )
42
+
43
+ prompt.text # rendered string — safe to pass to any SDK
44
+ prompt.raw # raw template, placeholders intact
45
+ prompt.version # 1 — bumps automatically whenever the template text changes
46
+ ```
47
+
48
+ Same `name` + edited text ⇒ a new version row in SQLite (deduplicated by content hash).
49
+ Variables are *run data*, never versions — change them freely.
50
+
51
+ Rendering is lenient by default: unknown `{placeholders}` and JSON braces in the template
52
+ pass through untouched. Use `strict=True` (per prompt or via `configure`) to raise instead.
53
+
54
+ ## Computed prompts
55
+
56
+ ```python
57
+ from promptkeep import prompt
58
+
59
+ @prompt(name="REVIEW_SYSTEM")
60
+ def review_sys_prompt(var1="some value", n_examples=3):
61
+ examples = "\n".join(load_examples(n_examples))
62
+ return f"You are a reviewer.\n{examples}\nFocus on {{var1}}."
63
+
64
+ p = review_sys_prompt(var1="security") # -> Prompt (raw + rendered + version)
65
+ ```
66
+
67
+ The function returns the raw template; the call's arguments become the variables.
68
+
69
+ ## OpenAI integration
70
+
71
+ ```python
72
+ from openai import OpenAI
73
+ from promptkeep import wrap
74
+
75
+ OpenAI = wrap(OpenAI) # or: client = wrap(OpenAI(...))
76
+ client = OpenAI()
77
+
78
+ completion = client.chat.completions.create(
79
+ model="gpt-5.5",
80
+ messages=[
81
+ {"role": "developer", "content": prompt}, # Prompt object, directly
82
+ {"role": "user", "content": "How do I check isinstance?"},
83
+ ],
84
+ )
85
+ ```
86
+
87
+ The API receives a plain string; a *run* is recorded linking this prompt version to the
88
+ variables used, the rendered text, the model, the output, token usage, and latency.
89
+ Streaming, async clients, and multi-part content are supported. Tracking failures never
90
+ break the API call. Unwrapped clients work too — just pass `prompt.text`.
91
+
92
+ ## History
93
+
94
+ ```python
95
+ from promptkeep import history
96
+
97
+ history.versions("REVIEW_SYSTEM") # lineage, oldest first
98
+ print(history.diff("REVIEW_SYSTEM", 1, 3)) # unified diff between versions
99
+ history.runs("REVIEW_SYSTEM", version=3) # recorded runs, newest first
100
+ ```
101
+
102
+ ## Configuration
103
+
104
+ ```python
105
+ import promptkeep
106
+
107
+ promptkeep.configure(
108
+ db_path="path/to/prompts.db", # default: ./.promptkeep.db (or $PROMPTKEEP_DB)
109
+ enabled=True, # $PROMPTKEEP_DISABLED=1 turns tracking off
110
+ strict=False, # raise on missing variables
111
+ )
112
+ ```
113
+
114
+ ## Development
115
+
116
+ ```bash
117
+ uv sync # install with dev dependencies
118
+ uv run pytest # run the test suite
119
+ ```
@@ -0,0 +1,94 @@
1
+ # promptkeep
2
+
3
+ Prompts as first-class objects: named, versioned templates with lineage tracked in SQLite,
4
+ variable rendering, a decorator for computed prompts, and a transparent OpenAI SDK wrapper
5
+ that records every run (prompt version + variables + output + usage).
6
+
7
+ ## The basics
8
+
9
+ ```python
10
+ from promptkeep import Prompt
11
+
12
+ prompt = Prompt(
13
+ text="You are a code reviewer. Focus on {var1}.",
14
+ variables={"var1": "correctness"},
15
+ name="REVIEW_SYSTEM", # the prompt's stable identity
16
+ )
17
+
18
+ prompt.text # rendered string — safe to pass to any SDK
19
+ prompt.raw # raw template, placeholders intact
20
+ prompt.version # 1 — bumps automatically whenever the template text changes
21
+ ```
22
+
23
+ Same `name` + edited text ⇒ a new version row in SQLite (deduplicated by content hash).
24
+ Variables are *run data*, never versions — change them freely.
25
+
26
+ Rendering is lenient by default: unknown `{placeholders}` and JSON braces in the template
27
+ pass through untouched. Use `strict=True` (per prompt or via `configure`) to raise instead.
28
+
29
+ ## Computed prompts
30
+
31
+ ```python
32
+ from promptkeep import prompt
33
+
34
+ @prompt(name="REVIEW_SYSTEM")
35
+ def review_sys_prompt(var1="some value", n_examples=3):
36
+ examples = "\n".join(load_examples(n_examples))
37
+ return f"You are a reviewer.\n{examples}\nFocus on {{var1}}."
38
+
39
+ p = review_sys_prompt(var1="security") # -> Prompt (raw + rendered + version)
40
+ ```
41
+
42
+ The function returns the raw template; the call's arguments become the variables.
43
+
44
+ ## OpenAI integration
45
+
46
+ ```python
47
+ from openai import OpenAI
48
+ from promptkeep import wrap
49
+
50
+ OpenAI = wrap(OpenAI) # or: client = wrap(OpenAI(...))
51
+ client = OpenAI()
52
+
53
+ completion = client.chat.completions.create(
54
+ model="gpt-5.5",
55
+ messages=[
56
+ {"role": "developer", "content": prompt}, # Prompt object, directly
57
+ {"role": "user", "content": "How do I check isinstance?"},
58
+ ],
59
+ )
60
+ ```
61
+
62
+ The API receives a plain string; a *run* is recorded linking this prompt version to the
63
+ variables used, the rendered text, the model, the output, token usage, and latency.
64
+ Streaming, async clients, and multi-part content are supported. Tracking failures never
65
+ break the API call. Unwrapped clients work too — just pass `prompt.text`.
66
+
67
+ ## History
68
+
69
+ ```python
70
+ from promptkeep import history
71
+
72
+ history.versions("REVIEW_SYSTEM") # lineage, oldest first
73
+ print(history.diff("REVIEW_SYSTEM", 1, 3)) # unified diff between versions
74
+ history.runs("REVIEW_SYSTEM", version=3) # recorded runs, newest first
75
+ ```
76
+
77
+ ## Configuration
78
+
79
+ ```python
80
+ import promptkeep
81
+
82
+ promptkeep.configure(
83
+ db_path="path/to/prompts.db", # default: ./.promptkeep.db (or $PROMPTKEEP_DB)
84
+ enabled=True, # $PROMPTKEEP_DISABLED=1 turns tracking off
85
+ strict=False, # raise on missing variables
86
+ )
87
+ ```
88
+
89
+ ## Development
90
+
91
+ ```bash
92
+ uv sync # install with dev dependencies
93
+ uv run pytest # run the test suite
94
+ ```
@@ -0,0 +1,390 @@
1
+ # Prompt Manager — Implementation Plan
2
+
3
+ A Python library for managing LLM prompts as first-class objects: versioned templates with
4
+ lineage tracking (SQLite), variable rendering, a decorator for computed prompts, and a
5
+ transparent wrapper around the OpenAI SDK that tracks every run (prompt version + variables +
6
+ output) without changing how people write their OpenAI code.
7
+
8
+ ---
9
+
10
+ ## 1. Goals
11
+
12
+ 1. **Prompt as an object, not a string.** `Prompt(text=..., variables=..., name=...)` — the
13
+ `name` is the stable identity; the template text is the versioned content.
14
+ 2. **Lineage tracking.** Same `name`, different template text ⇒ new version recorded in SQLite.
15
+ Full history of how a prompt evolved is queryable.
16
+ 3. **Variable rendering.** Templates contain `{var}` placeholders; values come in via a
17
+ `variables` dict (and/or kwargs). Rendered output is a plain string usable anywhere.
18
+ 4. **Computed prompts via decorator.** `@prompt(name=...)` on a function that builds a prompt
19
+ with real logic, not just substitution. Both the raw template and the rendered result are
20
+ captured so lineage still works.
21
+ 5. **Zero-friction OpenAI integration.** `OpenAI = wrap(OpenAI)` — after that, users pass a
22
+ `Prompt` object directly as message `content` and the library (a) substitutes the rendered
23
+ string before the request goes out, and (b) records a *run*: which prompt version ran, with
24
+ which variables, against which model, and what came back (output, token usage, latency).
25
+ 6. **Degrades gracefully.** Without wrapping, `prompt.text` is a plain string — works with any
26
+ SDK, any framework, no lock-in.
27
+
28
+ ## 2. Non-goals (for now)
29
+
30
+ - No server, no UI, no cloud sync — local SQLite only. (A CLI viewer is a stretch goal.)
31
+ - No prompt *optimization* / eval framework — we record runs; we don't judge them.
32
+ - No providers beyond OpenAI in v1 (but the wrapper layer is built so Anthropic etc. can be
33
+ added later without touching core).
34
+ - Not tracking changes in *variables* as lineage — variable values are run-scoped data, not
35
+ prompt identity. They're recorded per-run, never as versions.
36
+
37
+ ---
38
+
39
+ ## 3. Core concepts & data model
40
+
41
+ Three entities, strictly layered:
42
+
43
+ | Entity | Identity | What changes it | Where stored |
44
+ |---|---|---|---|
45
+ | **Prompt** | `name` (unique) | never — it's the ID | `prompts` table |
46
+ | **Version** | hash of raw template text | any edit to the template | `prompt_versions` table |
47
+ | **Run** | auto id | every wrapped LLM call | `runs` table |
48
+
49
+ - A **Prompt** is the named lineage ("REVIEW_SYSTEM").
50
+ - A **Version** is one concrete template text under that name. Versions are deduplicated by
51
+ content hash: constructing the same text twice does *not* create a new version; editing the
52
+ text does. Version numbers are monotonically increasing per prompt.
53
+ - A **Run** links a version to one execution: the variables dict used, the final rendered
54
+ text, the model + request params, the response (output text, usage, response id), status,
55
+ and latency.
56
+
57
+ ### SQLite schema (v1)
58
+
59
+ ```sql
60
+ CREATE TABLE prompts (
61
+ id INTEGER PRIMARY KEY,
62
+ name TEXT NOT NULL UNIQUE,
63
+ created_at TEXT NOT NULL -- ISO-8601 UTC
64
+ );
65
+
66
+ CREATE TABLE prompt_versions (
67
+ id INTEGER PRIMARY KEY,
68
+ prompt_id INTEGER NOT NULL REFERENCES prompts(id),
69
+ version INTEGER NOT NULL, -- 1, 2, 3... per prompt
70
+ template TEXT NOT NULL, -- raw text, placeholders intact
71
+ template_hash TEXT NOT NULL, -- sha256 of normalized template
72
+ source TEXT NOT NULL, -- 'literal' | 'decorator'
73
+ fn_source_hash TEXT, -- decorator only: hash of function source
74
+ created_at TEXT NOT NULL,
75
+ UNIQUE (prompt_id, template_hash),
76
+ UNIQUE (prompt_id, version)
77
+ );
78
+
79
+ CREATE TABLE runs (
80
+ id INTEGER PRIMARY KEY,
81
+ version_id INTEGER NOT NULL REFERENCES prompt_versions(id),
82
+ variables TEXT, -- JSON dict as passed by the user
83
+ rendered_text TEXT NOT NULL,
84
+ provider TEXT NOT NULL, -- 'openai'
85
+ model TEXT,
86
+ request_params TEXT, -- JSON (temperature, etc., minus messages)
87
+ response_id TEXT,
88
+ output_text TEXT,
89
+ prompt_tokens INTEGER,
90
+ completion_tokens INTEGER,
91
+ total_tokens INTEGER,
92
+ latency_ms INTEGER,
93
+ status TEXT NOT NULL, -- 'ok' | 'error'
94
+ error TEXT,
95
+ created_at TEXT NOT NULL
96
+ );
97
+ CREATE INDEX idx_runs_version ON runs(version_id, created_at);
98
+ ```
99
+
100
+ - DB opened in WAL mode; one connection per thread (`threading.local`) so wrapped calls from
101
+ multi-threaded apps don't fight over a cursor.
102
+ - `schema_version` pragma (`user_version`) + tiny forward-only migration runner, so the schema
103
+ can evolve without breaking existing DBs.
104
+
105
+ ### DB location & configuration
106
+
107
+ Default: `./.prompts.db` in the current working directory (a project-level artifact, like
108
+ `.env`). Overridable by:
109
+
110
+ 1. `prompt_manager.configure(db_path=..., enabled=...)` — explicit wins.
111
+ 2. `PROMPT_MANAGER_DB` env var.
112
+ 3. `PROMPT_MANAGER_DISABLED=1` kills all persistence (Prompt still renders fine — critical for
113
+ CI and for users who only want the rendering ergonomics).
114
+
115
+ **Registration timing:** constructing a `Prompt` does **not** hit the DB (prompts are usually
116
+ defined at module import time; import-time writes are a footgun — read-only filesystems, test
117
+ collection, etc.). The version row is written lazily on first *use* (first render access or
118
+ first tracked run), memoized per process so it's one write, not one per call.
119
+
120
+ ---
121
+
122
+ ## 4. Public API
123
+
124
+ Everything importable from the top-level package:
125
+
126
+ ```python
127
+ from prompt_manager import Prompt, prompt, wrap, configure
128
+ ```
129
+
130
+ ### 4.1 `Prompt` class
131
+
132
+ ```python
133
+ prompt = Prompt(
134
+ text="You are a reviewer. Focus on {var1}.",
135
+ variables={"var1": "correctness"}, # optional at construction
136
+ name="REVIEW_SYSTEM", # required — it's the identity
137
+ )
138
+
139
+ prompt.text # -> rendered string: "You are a reviewer. Focus on correctness."
140
+ prompt.raw # -> raw template: "You are a reviewer. Focus on {var1}."
141
+ str(prompt) # == prompt.text
142
+ prompt.name # "REVIEW_SYSTEM"
143
+ prompt.version # int, resolved lazily from the DB (None if tracking disabled)
144
+ prompt.format(var1="security") # -> NEW Prompt with updated variables (immutable style)
145
+ prompt.render(var1="security") # -> rendered str directly (one-shot, no new object)
146
+ ```
147
+
148
+ Naming decision: **`.text` = rendered, `.raw` = template.** Rationale: `.text` is what you
149
+ reach for 95% of the time (the thing you paste into `content=`), so it gets the short name.
150
+ `rendered_text`/`raw_text` aliases can exist but the docs push `.text`/`.raw`.
151
+
152
+ **Key trick — provenance-carrying strings.** `prompt.text` doesn't return a bare `str`; it
153
+ returns `RenderedText(str)` — a `str` subclass that behaves identically everywhere (json
154
+ serialization, `+`, f-strings, the OpenAI SDK...) but carries two hidden attributes:
155
+ `_pm_prompt` (the Prompt) and `_pm_variables` (the dict used to render). Consequences:
156
+
157
+ - Unwrapped SDKs receive a real string. Nothing breaks. (User requirement: "even if I'm not
158
+ wrapping it, it should still receive a string.")
159
+ - The wrapped OpenAI client can track runs **both** when the user passes the `Prompt` object
160
+ directly as `content` **and** when they pass `prompt.text` — because provenance rides along
161
+ on the string itself. No extra method calls on the prompt, ever.
162
+
163
+ `Prompt` itself is *not* a `str` subclass (str immutability forces rendering into `__new__`,
164
+ raw/rendered duality gets ugly, and `.replace()` etc. silently return plain str). Instead the
165
+ wrapper accepts `Prompt | RenderedText | str` as message content, and `Prompt.__str__`
166
+ returns `self.text` as a safety net for accidental `f"{prompt}"` usage.
167
+
168
+ **Immutability:** a `Prompt` is frozen after construction. "Changing" variables produces a new
169
+ `Prompt` sharing the same name/template (⇒ same version). This is what makes version identity
170
+ clean — the object can never drift away from the hash it registered under.
171
+
172
+ ### 4.2 Rendering rules
173
+
174
+ - Syntax: Python `{var}` placeholders, rendered via `format_map`.
175
+ - **Lenient by default**: missing variables stay as literal `{var}` in the output (via a
176
+ `SafeDict` that returns `"{key}"` for missing keys). Prompts full of JSON examples and code
177
+ braces are the norm, and hard-crashing on `{"key": ...}` inside a prompt is the #1 pain of
178
+ naive `.format` use. `{{` / `}}` escaping still works for users who want to be explicit.
179
+ - `strict=True` (per-Prompt or global via `configure`) raises `MissingVariableError` listing
180
+ every unresolved placeholder — for people who want the guardrail.
181
+ - `prompt.placeholders` -> `set[str]` of declared variables (parsed with `string.Formatter`),
182
+ so tooling/tests can validate coverage.
183
+
184
+ ### 4.3 `@prompt` decorator (computed prompts)
185
+
186
+ ```python
187
+ @prompt(name="REVIEW_SYSTEM")
188
+ def review_sys_prompt(var1="some value", n_examples=3):
189
+ examples = pick_examples(n_examples) # real computation
190
+ return f"You are a reviewer...\n{examples}\nFocus on {{var1}}."
191
+
192
+ p = review_sys_prompt(var1="security") # -> returns a Prompt object
193
+ p.text # rendered
194
+ p.raw # the template string the function returned
195
+ ```
196
+
197
+ Contract: **the function returns the raw template** (placeholders intact, `{{ }}`-escaped
198
+ where needed); the decorator turns it into a `Prompt`, using the call's kwargs as the
199
+ `variables` dict. This beats the `(raw, rendered)` tuple idea from the sketch: one return
200
+ value, no way for raw and rendered to disagree, and the caller gets a full `Prompt` object
201
+ (so `.text`, `.raw`, wrap-tracking all work identically to the class path). Returning a
202
+ `Prompt` also satisfies "returns (raw_prompt, rendered_prompt)" — both are on the object.
203
+
204
+ Versioning semantics for computed prompts (the subtle part):
205
+
206
+ - The **version identity is the returned template text** (same content-hash dedup as literal
207
+ prompts). If computation makes the template genuinely different (different examples baked
208
+ in), that *is* a different prompt text and gets a new version — correct, if chatty.
209
+ - To keep the lineage readable we additionally store `fn_source_hash` =
210
+ sha256(`inspect.getsource(fn)`) on each version row. History queries can then distinguish
211
+ "the code changed" from "the same code produced different text this call".
212
+ - Guidance in docs: keep run-varying data in `{placeholders}`, keep the computed part as
213
+ stable as possible. The library works either way; the history is just noisier otherwise.
214
+ - Kwargs used for rendering vs. kwargs used only for computation: all call kwargs are recorded
215
+ as the run's `variables`; rendering is lenient, so kwargs without matching placeholders are
216
+ simply ignored by the formatter. No separate declaration needed.
217
+
218
+ ### 4.4 `wrap()` — OpenAI integration
219
+
220
+ ```python
221
+ from openai import OpenAI
222
+ from prompt_manager import wrap
223
+
224
+ OpenAI = wrap(OpenAI) # wrap the class…
225
+ client = OpenAI(api_key=...) # …used exactly as before
226
+ # or: client = wrap(OpenAI(api_key=...)) # wrapping an instance also works
227
+
228
+ completion = client.chat.completions.create(
229
+ model="gpt-5.5",
230
+ messages=[
231
+ {"role": "developer", "content": prompt}, # Prompt object, directly
232
+ {"role": "user", "content": user_question},
233
+ ],
234
+ )
235
+ ```
236
+
237
+ Mechanics (no monkey-patching of the `openai` module — we only touch objects the user
238
+ explicitly passed to `wrap`):
239
+
240
+ - `wrap(cls)` returns a subclass whose `__init__` calls super then replaces
241
+ `self.chat.completions.create` with a tracking closure around the original bound method.
242
+ `wrap(instance)` does the same replacement on the live instance. Everything else on the
243
+ client is untouched — same attributes, same types, `isinstance` still holds.
244
+ - The interceptor:
245
+ 1. Walks `messages`; for any `content` that is a `Prompt`, renders it; for `RenderedText`,
246
+ uses it as-is; either way collects `(prompt, variables, rendered)` provenance. Replaces
247
+ content with the plain rendered `str`. Multi-part content (list-of-blocks) handled by
248
+ walking `text` blocks too.
249
+ 2. Calls the real `create()`, timing it.
250
+ 3. Records one `runs` row **per tracked prompt** in the message list (a system + a user
251
+ prompt in one call ⇒ two runs sharing response metadata), including model, filtered
252
+ request params, output text (`choices[0].message.content`), usage, response id.
253
+ 4. On exception: records the run with `status='error'` + the exception text, then re-raises
254
+ unchanged.
255
+ - **Tracking must never break the user's call**: every DB write is wrapped in its own
256
+ try/except that logs a warning and continues. A failed insert loses telemetry, never a
257
+ completion.
258
+ - **Streaming** (`stream=True`): return a thin iterator proxy that yields chunks through
259
+ untouched while accumulating delta content; the run row is written when the stream closes
260
+ (with whatever usage info the final chunk carries). Same idea for the context-manager form.
261
+ - **Async** (`AsyncOpenAI`): same interceptor with `async def` + `await`; detection by
262
+ whether the wrapped `create` is a coroutine function.
263
+ - **Responses API** (`client.responses.create`): same pattern, phase 2 of the wrapper —
264
+ chat.completions first since that's the stated usage.
265
+ - Wrapper code lives in `integrations/openai_wrapper.py` behind a narrow interface
266
+ (`extract_prompts(messages)`, `record_run(...)`), so an `integrations/anthropic_wrapper.py`
267
+ later is additive.
268
+ - `openai` is an **optional dependency** (`pip install prompt-manager[openai]`); core never
269
+ imports it.
270
+
271
+ ### 4.5 History / inspection API
272
+
273
+ Minimal programmatic access so the DB isn't a black box:
274
+
275
+ ```python
276
+ from prompt_manager import history
277
+
278
+ history.versions("REVIEW_SYSTEM") # -> [VersionInfo(version=1, template=..., created_at=...), ...]
279
+ history.diff("REVIEW_SYSTEM", 1, 3) # -> unified diff string between two versions
280
+ history.runs("REVIEW_SYSTEM", version=3, limit=20) # -> [RunInfo(...), ...]
281
+ ```
282
+
283
+ Stretch goal: `python -m prompt_manager history REVIEW_SYSTEM` CLI over the same functions.
284
+
285
+ ---
286
+
287
+ ## 5. Package layout
288
+
289
+ ```
290
+ prompt-manager/
291
+ ├── pyproject.toml # hatchling; deps: none (core). extras: openai
292
+ ├── README.md
293
+ ├── plan.md # this file
294
+ ├── src/
295
+ │ └── prompt_manager/
296
+ │ ├── __init__.py # Prompt, prompt, wrap, configure, history
297
+ │ ├── config.py # configure(), env vars, global settings singleton
298
+ │ ├── prompt.py # Prompt, RenderedText
299
+ │ ├── rendering.py # SafeDict, placeholder parsing, strict mode
300
+ │ ├── decorator.py # @prompt
301
+ │ ├── storage.py # connection mgmt, schema/migrations, upserts, queries
302
+ │ ├── tracking.py # run recording (provider-agnostic)
303
+ │ ├── history.py # versions() / diff() / runs()
304
+ │ └── integrations/
305
+ │ ├── __init__.py # wrap() dispatcher (detects openai class/instance)
306
+ │ └── openai_wrapper.py
307
+ └── tests/
308
+ ├── test_prompt.py
309
+ ├── test_rendering.py
310
+ ├── test_decorator.py
311
+ ├── test_storage.py
312
+ ├── test_history.py
313
+ └── test_openai_wrapper.py # fake client, no network
314
+ ```
315
+
316
+ Tooling: `uv` for env/deps, `pytest`, `ruff` (lint + format). Python ≥ 3.9.
317
+
318
+ ---
319
+
320
+ ## 6. Implementation phases
321
+
322
+ ### Phase 0 — Scaffolding
323
+ - [ ] `git init`, `pyproject.toml` (name TBD — see open questions), `src/` layout, `uv sync`
324
+ - [ ] pytest + ruff configured; empty package imports cleanly
325
+
326
+ ### Phase 1 — Core `Prompt` + rendering (no DB yet)
327
+ - [ ] `RenderedText(str)` with `_pm_prompt` / `_pm_variables`
328
+ - [ ] `Prompt`: constructor validation (non-empty name/text), frozen attrs, `.raw`, `.text`,
329
+ `.render()`, `.format()`, `__str__`, `__repr__`, equality by (name, template, variables)
330
+ - [ ] `rendering.py`: SafeDict lenient rendering, `{{}}` escaping, strict mode,
331
+ `placeholders` extraction via `string.Formatter().parse`
332
+ - [ ] Tests: rendering matrix (missing vars, extra vars, JSON braces, nested braces, non-str
333
+ values), immutability, provenance attributes survive typical string usage
334
+
335
+ ### Phase 2 — Storage + lineage
336
+ - [ ] `config.py`: `configure()`, env vars, `enabled` flag, default db path
337
+ - [ ] `storage.py`: lazy connection (thread-local), WAL, schema creation, `user_version`
338
+ migrations, `get_or_create_prompt`, `get_or_create_version` (hash dedup, version
339
+ counter), all writes exception-shielded
340
+ - [ ] Lazy registration hook in `Prompt` (first `.text`/`.render` registers version once)
341
+ - [ ] `history.py`: `versions()`, `diff()` (difflib unified), `runs()`
342
+ - [ ] Tests: same-text ⇒ same version; edited text ⇒ v+1; dedup across processes (reopen db);
343
+ disabled mode does zero I/O; concurrent registration from threads
344
+
345
+ ### Phase 3 — `@prompt` decorator
346
+ - [ ] `decorator.py`: capture kwargs (incl. defaults via `inspect.signature.bind`), call fn,
347
+ wrap returned str into `Prompt`, attach `fn_source_hash`
348
+ - [ ] Error if fn returns non-str; `functools.wraps` preserved
349
+ - [ ] Tests: defaults vs explicit kwargs, computed templates creating new versions,
350
+ fn-source-hash recorded, decorated fn still introspectable
351
+
352
+ ### Phase 4 — OpenAI wrapper + run tracking (sync, non-streaming)
353
+ - [ ] `wrap()` dispatcher: class vs instance detection
354
+ - [ ] Message walking: str / Prompt / RenderedText / content-block lists
355
+ - [ ] Run recording via `tracking.py` (renders, timing, usage, error path)
356
+ - [ ] Tests against a fake OpenAI-shaped client (no network): substitution happens, run rows
357
+ correct, multiple prompts per call ⇒ multiple runs, tracking failure doesn't break the
358
+ call, unwrapped-with-`.text` path also tracks via provenance
359
+ - [ ] One optional live smoke test behind `OPENAI_API_KEY` guard
360
+
361
+ ### Phase 5 — Streaming + async
362
+ - [ ] Stream proxy (sync iterator + context manager), run written at stream end
363
+ - [ ] `AsyncOpenAI` support
364
+ - [ ] Tests with fake streaming/async clients
365
+
366
+ ### Phase 6 — Polish
367
+ - [ ] `responses.create` support in the wrapper
368
+ - [ ] README with the three usage tiers (plain / decorator / wrapped)
369
+ - [ ] CLI viewer (`python -m prompt_manager history NAME`) — stretch
370
+ - [ ] Version + publish prep (classifiers, py.typed, LICENSE)
371
+
372
+ Phases 1–4 are the MVP the pseudocode describes; 5–6 can trail.
373
+
374
+ ---
375
+
376
+ ## 7. Open questions
377
+
378
+ 1. **Package name** — `prompt-manager` is likely taken on PyPI. Alternatives: `promptline`,
379
+ `promptvault`, `promptkeep`? (Doesn't block implementation; module name can be decided at
380
+ Phase 0.)
381
+ 2. **Default DB location** — plan says project-local `./.prompts.db`. If you'd rather have one
382
+ global DB per machine (`~/.prompt_manager/prompts.db`), say so; it's a one-line default.
383
+ 3. **Lenient rendering default** — plan says missing variables stay as `{var}` silently
384
+ (JSON-in-prompt safety). Comfortable with that, or should the default warn/raise?
385
+ 4. **Decorator contract** — plan says the function returns the raw template and the decorator
386
+ returns a `Prompt` object (instead of a `(raw, rendered)` tuple). Flag if you specifically
387
+ want the tuple form.
388
+ 5. The original brief cut off at "run management — when it is running…". Runs-as-records
389
+ (version + variables + output per call) is covered; if you meant something more (live
390
+ monitoring, callbacks, cost aggregation), that's additive on top of the `runs` table.
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "promptkeep"
7
+ version = "0.1.0"
8
+ description = "Prompts as first-class objects: versioned templates, SQLite lineage tracking, and OpenAI run tracking."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [{ name = "Pranav", email = "pranav2278@gmail.com" }]
14
+ keywords = ["prompt", "prompts", "llm", "openai", "prompt-engineering", "versioning", "tracking"]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Programming Language :: Python :: 3.14",
25
+ "Topic :: Software Development :: Libraries :: Python Modules",
26
+ "Typing :: Typed",
27
+ ]
28
+ dependencies = [
29
+ "peewee>=3.17",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ openai = ["openai>=1.0"]
34
+
35
+ [dependency-groups]
36
+ dev = ["pytest>=8", "ruff>=0.4"]
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/promptkeep"]
40
+
41
+ [tool.pytest.ini_options]
42
+ testpaths = ["tests"]
43
+
44
+ [tool.ruff]
45
+ line-length = 100
46
+ src = ["src", "tests"]