promptsmithv2 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promptsmithv2-1.0.0/LICENSE +21 -0
- promptsmithv2-1.0.0/PKG-INFO +266 -0
- promptsmithv2-1.0.0/README.md +239 -0
- promptsmithv2-1.0.0/promptsmith/__init__.py +15 -0
- promptsmithv2-1.0.0/promptsmith/diff.py +97 -0
- promptsmithv2-1.0.0/promptsmith/prompt.py +292 -0
- promptsmithv2-1.0.0/promptsmith/registry.py +391 -0
- promptsmithv2-1.0.0/promptsmithv2.egg-info/PKG-INFO +266 -0
- promptsmithv2-1.0.0/promptsmithv2.egg-info/SOURCES.txt +13 -0
- promptsmithv2-1.0.0/promptsmithv2.egg-info/dependency_links.txt +1 -0
- promptsmithv2-1.0.0/promptsmithv2.egg-info/requires.txt +3 -0
- promptsmithv2-1.0.0/promptsmithv2.egg-info/top_level.txt +1 -0
- promptsmithv2-1.0.0/pyproject.toml +41 -0
- promptsmithv2-1.0.0/setup.cfg +4 -0
- promptsmithv2-1.0.0/tests/test_core.py +238 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 prabhay759
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: promptsmithv2
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Structured prompt builder and version manager for LLM engineers — typed variables, versioning, diffing, A/B testing, and audit trails
|
|
5
|
+
Author: prabhay759
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/prabhay759/promptsmith
|
|
8
|
+
Project-URL: Repository, https://github.com/prabhay759/promptsmith
|
|
9
|
+
Project-URL: Issues, https://github.com/prabhay759/promptsmith/issues
|
|
10
|
+
Keywords: llm,prompt,prompt-engineering,versioning,openai,langchain
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# promptsmith
|
|
29
|
+
|
|
30
|
+
> Structured prompt builder and version manager for LLM engineers. Typed variables, Git-friendly versioning, human-readable diffs, A/B testing, and full audit trails. Works with any LLM. Zero dependencies.
|
|
31
|
+
|
|
32
|
+
[](https://pypi.org/project/promptsmith/)
|
|
33
|
+
[](https://pypi.org/project/promptsmith/)
|
|
34
|
+
[](LICENSE)
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## The Problem
|
|
39
|
+
|
|
40
|
+
Every LLM engineer ends up with prompts scattered across f-strings, Notion docs, and constants files. No versioning. No way to diff what changed. No audit trail of which prompt produced which output.
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
# the reality
|
|
44
|
+
prompt = f"Summarize this in {n} words: {text}" # in utils.py
|
|
45
|
+
SYSTEM = "You are helpful..." # in constants.py
|
|
46
|
+
prompt2 = "Summarize this concisely: " + text # in api.py
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**promptsmith gives your prompts the same discipline as your code.**
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install promptsmith
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
No dependencies. Requires Python 3.8+.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from promptsmith import Prompt, PromptRegistry
|
|
67
|
+
|
|
68
|
+
# Define a typed prompt
|
|
69
|
+
prompt = Prompt(
|
|
70
|
+
name="summarizer",
|
|
71
|
+
template="Summarize this {content_type} in {max_words} words:\n\n{content}",
|
|
72
|
+
variables={"content_type": str, "max_words": int, "content": str},
|
|
73
|
+
description="General purpose summarizer",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Render it — validates types before rendering
|
|
77
|
+
text = prompt.render(content_type="article", max_words=100, content="...")
|
|
78
|
+
|
|
79
|
+
# Save to registry
|
|
80
|
+
registry = PromptRegistry("./prompts")
|
|
81
|
+
registry.save(prompt)
|
|
82
|
+
|
|
83
|
+
# Load anywhere in your codebase
|
|
84
|
+
p = registry.load("summarizer")
|
|
85
|
+
text = p.render(content_type="email", max_words=50, content="...")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Core Concepts
|
|
91
|
+
|
|
92
|
+
### Typed Variables
|
|
93
|
+
|
|
94
|
+
Variables are typed and validated before rendering — catch bugs before the LLM call:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
prompt = Prompt(
|
|
98
|
+
name="classifier",
|
|
99
|
+
template="Classify this text as {label_a} or {label_b}:\n{text}",
|
|
100
|
+
variables={
|
|
101
|
+
"label_a": str,
|
|
102
|
+
"label_b": str,
|
|
103
|
+
"text": str,
|
|
104
|
+
}
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Type errors caught early
|
|
108
|
+
prompt.render(label_a="positive", label_b="negative", text=42)
|
|
109
|
+
# PromptRenderError: Variable 'text' expected str, got int
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Versioning
|
|
113
|
+
|
|
114
|
+
Every change creates a new version automatically:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
p1 = registry.load("summarizer") # 1.0.0
|
|
118
|
+
|
|
119
|
+
p2 = p1.update(
|
|
120
|
+
template="Summarize this {content_type} concisely in under {max_words} words:\n\n{content}",
|
|
121
|
+
changelog="Added 'concisely' — tighter outputs"
|
|
122
|
+
)
|
|
123
|
+
registry.save(p2) # saves as 1.0.1
|
|
124
|
+
|
|
125
|
+
# Load specific version
|
|
126
|
+
old = registry.load("summarizer", version="1.0.0")
|
|
127
|
+
new = registry.load("summarizer", version="1.0.1")
|
|
128
|
+
new = registry.load("summarizer") # latest
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Human-Readable Diffs
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
print(registry.diff("summarizer", "1.0.0", "1.0.1"))
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
── Template ─────────────────────────────────────────
|
|
139
|
+
--- template (1.0.0)
|
|
140
|
+
+++ template (1.0.1)
|
|
141
|
+
@@ -1 +1 @@
|
|
142
|
+
-Summarize this {content_type} in {max_words} words:
|
|
143
|
+
+Summarize this {content_type} concisely in under {max_words} words:
|
|
144
|
+
|
|
145
|
+
── Metadata ─────────────────────────────────────────
|
|
146
|
+
1.0.0 → 1.0.1
|
|
147
|
+
changelog: Added 'concisely' — tighter outputs
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### A/B Testing
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
result = registry.ab_test(
|
|
154
|
+
name="summarizer",
|
|
155
|
+
version_a="1.0.0",
|
|
156
|
+
version_b="1.0.1",
|
|
157
|
+
inputs={"content_type": "article", "max_words": 100, "content": article_text},
|
|
158
|
+
runner=lambda prompt: openai.chat.completions.create(
|
|
159
|
+
model="gpt-4", messages=[{"role": "user", "content": prompt}]
|
|
160
|
+
).choices[0].message.content,
|
|
161
|
+
scorer=lambda a, b: len(b) - len(a), # positive = B wins
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
result.print_comparison()
|
|
165
|
+
print(f"Winner: {result.winner}")
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Chat Models (System + User)
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
prompt = Prompt(
|
|
172
|
+
name="assistant",
|
|
173
|
+
template="Answer this question: {question}",
|
|
174
|
+
system="You are a helpful assistant. Be concise.",
|
|
175
|
+
variables={"question": str},
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
messages = prompt.render_messages(question="What is BPE tokenization?")
|
|
179
|
+
# [{"role": "system", "content": "You are..."}, {"role": "user", "content": "Answer..."}]
|
|
180
|
+
|
|
181
|
+
response = openai.chat.completions.create(model="gpt-4", messages=messages)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Version History & Audit Trail
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
# Full history
|
|
188
|
+
for entry in registry.history("summarizer"):
|
|
189
|
+
print(f"v{entry['version']} — {entry['changelog']} ({entry['created_at'][:10]})")
|
|
190
|
+
|
|
191
|
+
# Past A/B results
|
|
192
|
+
for run in registry.ab_history("summarizer"):
|
|
193
|
+
print(f"{run['version_a']} vs {run['version_b']} → winner: {run['winner']}")
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Storage (Git-Friendly)
|
|
197
|
+
|
|
198
|
+
```
|
|
199
|
+
prompts/
|
|
200
|
+
├── promptsmith.db ← SQLite index for fast queries
|
|
201
|
+
├── summarizer/
|
|
202
|
+
│ ├── 1.0.0.json ← full prompt definition
|
|
203
|
+
│ └── 1.0.1.json
|
|
204
|
+
└── classifier/
|
|
205
|
+
└── 1.0.0.json
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Commit the `prompts/` directory to Git — every prompt change is tracked just like code.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## API Reference
|
|
213
|
+
|
|
214
|
+
### `Prompt`
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
Prompt(
|
|
218
|
+
name, # Unique identifier
|
|
219
|
+
template, # Text with {variable} placeholders
|
|
220
|
+
variables=None, # dict of name → type or PromptVariable
|
|
221
|
+
version="1.0.0",
|
|
222
|
+
description="",
|
|
223
|
+
changelog="",
|
|
224
|
+
tags=[],
|
|
225
|
+
system=None, # System prompt for chat models
|
|
226
|
+
metadata={},
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
| Method | Description |
|
|
231
|
+
|---|---|
|
|
232
|
+
| `render(**kwargs)` | Render prompt, raises on type errors |
|
|
233
|
+
| `render_messages(**kwargs)` | Returns OpenAI-style messages list |
|
|
234
|
+
| `update(template, ...)` | Create new version with changes |
|
|
235
|
+
| `validate(**kwargs)` | Check inputs without rendering |
|
|
236
|
+
| `to_dict()` / `from_dict()` | Serialization |
|
|
237
|
+
| `to_json()` / `from_json()` | JSON serialization |
|
|
238
|
+
|
|
239
|
+
### `PromptRegistry`
|
|
240
|
+
|
|
241
|
+
| Method | Description |
|
|
242
|
+
|---|---|
|
|
243
|
+
| `save(prompt)` | Save to disk + index |
|
|
244
|
+
| `load(name, version=None)` | Load latest or specific version |
|
|
245
|
+
| `history(name)` | All versions with changelogs |
|
|
246
|
+
| `diff(name, v_a, v_b)` | Human-readable diff |
|
|
247
|
+
| `ab_test(name, v_a, v_b, inputs, runner, scorer)` | A/B test two versions |
|
|
248
|
+
| `list(tag=None)` | List all prompts |
|
|
249
|
+
| `names()` | All prompt names |
|
|
250
|
+
| `delete(name, version=None)` | Delete version(s) |
|
|
251
|
+
| `export_all(path)` | Export all prompts to JSON |
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Running Tests
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
pip install pytest
|
|
259
|
+
pytest tests/ -v
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## License
|
|
265
|
+
|
|
266
|
+
MIT © prabhay759
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# promptsmith
|
|
2
|
+
|
|
3
|
+
> Structured prompt builder and version manager for LLM engineers. Typed variables, Git-friendly versioning, human-readable diffs, A/B testing, and full audit trails. Works with any LLM. Zero dependencies.
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/promptsmith/)
|
|
6
|
+
[](https://pypi.org/project/promptsmith/)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## The Problem
|
|
12
|
+
|
|
13
|
+
Every LLM engineer ends up with prompts scattered across f-strings, Notion docs, and constants files. No versioning. No way to diff what changed. No audit trail of which prompt produced which output.
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
# the reality
|
|
17
|
+
prompt = f"Summarize this in {n} words: {text}" # in utils.py
|
|
18
|
+
SYSTEM = "You are helpful..." # in constants.py
|
|
19
|
+
prompt2 = "Summarize this concisely: " + text # in api.py
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**promptsmith gives your prompts the same discipline as your code.**
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install promptsmith
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
No dependencies. Requires Python 3.8+.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from promptsmith import Prompt, PromptRegistry
|
|
40
|
+
|
|
41
|
+
# Define a typed prompt
|
|
42
|
+
prompt = Prompt(
|
|
43
|
+
name="summarizer",
|
|
44
|
+
template="Summarize this {content_type} in {max_words} words:\n\n{content}",
|
|
45
|
+
variables={"content_type": str, "max_words": int, "content": str},
|
|
46
|
+
description="General purpose summarizer",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Render it — validates types before rendering
|
|
50
|
+
text = prompt.render(content_type="article", max_words=100, content="...")
|
|
51
|
+
|
|
52
|
+
# Save to registry
|
|
53
|
+
registry = PromptRegistry("./prompts")
|
|
54
|
+
registry.save(prompt)
|
|
55
|
+
|
|
56
|
+
# Load anywhere in your codebase
|
|
57
|
+
p = registry.load("summarizer")
|
|
58
|
+
text = p.render(content_type="email", max_words=50, content="...")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Core Concepts
|
|
64
|
+
|
|
65
|
+
### Typed Variables
|
|
66
|
+
|
|
67
|
+
Variables are typed and validated before rendering — catch bugs before the LLM call:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
prompt = Prompt(
|
|
71
|
+
name="classifier",
|
|
72
|
+
template="Classify this text as {label_a} or {label_b}:\n{text}",
|
|
73
|
+
variables={
|
|
74
|
+
"label_a": str,
|
|
75
|
+
"label_b": str,
|
|
76
|
+
"text": str,
|
|
77
|
+
}
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Type errors caught early
|
|
81
|
+
prompt.render(label_a="positive", label_b="negative", text=42)
|
|
82
|
+
# PromptRenderError: Variable 'text' expected str, got int
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Versioning
|
|
86
|
+
|
|
87
|
+
Every change creates a new version automatically:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
p1 = registry.load("summarizer") # 1.0.0
|
|
91
|
+
|
|
92
|
+
p2 = p1.update(
|
|
93
|
+
template="Summarize this {content_type} concisely in under {max_words} words:\n\n{content}",
|
|
94
|
+
changelog="Added 'concisely' — tighter outputs"
|
|
95
|
+
)
|
|
96
|
+
registry.save(p2) # saves as 1.0.1
|
|
97
|
+
|
|
98
|
+
# Load specific version
|
|
99
|
+
old = registry.load("summarizer", version="1.0.0")
|
|
100
|
+
new = registry.load("summarizer", version="1.0.1")
|
|
101
|
+
new = registry.load("summarizer") # latest
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Human-Readable Diffs
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
print(registry.diff("summarizer", "1.0.0", "1.0.1"))
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
── Template ─────────────────────────────────────────
|
|
112
|
+
--- template (1.0.0)
|
|
113
|
+
+++ template (1.0.1)
|
|
114
|
+
@@ -1 +1 @@
|
|
115
|
+
-Summarize this {content_type} in {max_words} words:
|
|
116
|
+
+Summarize this {content_type} concisely in under {max_words} words:
|
|
117
|
+
|
|
118
|
+
── Metadata ─────────────────────────────────────────
|
|
119
|
+
1.0.0 → 1.0.1
|
|
120
|
+
changelog: Added 'concisely' — tighter outputs
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### A/B Testing
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
result = registry.ab_test(
|
|
127
|
+
name="summarizer",
|
|
128
|
+
version_a="1.0.0",
|
|
129
|
+
version_b="1.0.1",
|
|
130
|
+
inputs={"content_type": "article", "max_words": 100, "content": article_text},
|
|
131
|
+
runner=lambda prompt: openai.chat.completions.create(
|
|
132
|
+
model="gpt-4", messages=[{"role": "user", "content": prompt}]
|
|
133
|
+
).choices[0].message.content,
|
|
134
|
+
scorer=lambda a, b: len(b) - len(a), # positive = B wins
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
result.print_comparison()
|
|
138
|
+
print(f"Winner: {result.winner}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Chat Models (System + User)
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
prompt = Prompt(
|
|
145
|
+
name="assistant",
|
|
146
|
+
template="Answer this question: {question}",
|
|
147
|
+
system="You are a helpful assistant. Be concise.",
|
|
148
|
+
variables={"question": str},
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
messages = prompt.render_messages(question="What is BPE tokenization?")
|
|
152
|
+
# [{"role": "system", "content": "You are..."}, {"role": "user", "content": "Answer..."}]
|
|
153
|
+
|
|
154
|
+
response = openai.chat.completions.create(model="gpt-4", messages=messages)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Version History & Audit Trail
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
# Full history
|
|
161
|
+
for entry in registry.history("summarizer"):
|
|
162
|
+
print(f"v{entry['version']} — {entry['changelog']} ({entry['created_at'][:10]})")
|
|
163
|
+
|
|
164
|
+
# Past A/B results
|
|
165
|
+
for run in registry.ab_history("summarizer"):
|
|
166
|
+
print(f"{run['version_a']} vs {run['version_b']} → winner: {run['winner']}")
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Storage (Git-Friendly)
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
prompts/
|
|
173
|
+
├── promptsmith.db ← SQLite index for fast queries
|
|
174
|
+
├── summarizer/
|
|
175
|
+
│ ├── 1.0.0.json ← full prompt definition
|
|
176
|
+
│ └── 1.0.1.json
|
|
177
|
+
└── classifier/
|
|
178
|
+
└── 1.0.0.json
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Commit the `prompts/` directory to Git — every prompt change is tracked just like code.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## API Reference
|
|
186
|
+
|
|
187
|
+
### `Prompt`
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
Prompt(
|
|
191
|
+
name, # Unique identifier
|
|
192
|
+
template, # Text with {variable} placeholders
|
|
193
|
+
variables=None, # dict of name → type or PromptVariable
|
|
194
|
+
version="1.0.0",
|
|
195
|
+
description="",
|
|
196
|
+
changelog="",
|
|
197
|
+
tags=[],
|
|
198
|
+
system=None, # System prompt for chat models
|
|
199
|
+
metadata={},
|
|
200
|
+
)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
| Method | Description |
|
|
204
|
+
|---|---|
|
|
205
|
+
| `render(**kwargs)` | Render prompt, raises on type errors |
|
|
206
|
+
| `render_messages(**kwargs)` | Returns OpenAI-style messages list |
|
|
207
|
+
| `update(template, ...)` | Create new version with changes |
|
|
208
|
+
| `validate(**kwargs)` | Check inputs without rendering |
|
|
209
|
+
| `to_dict()` / `from_dict()` | Serialization |
|
|
210
|
+
| `to_json()` / `from_json()` | JSON serialization |
|
|
211
|
+
|
|
212
|
+
### `PromptRegistry`
|
|
213
|
+
|
|
214
|
+
| Method | Description |
|
|
215
|
+
|---|---|
|
|
216
|
+
| `save(prompt)` | Save to disk + index |
|
|
217
|
+
| `load(name, version=None)` | Load latest or specific version |
|
|
218
|
+
| `history(name)` | All versions with changelogs |
|
|
219
|
+
| `diff(name, v_a, v_b)` | Human-readable diff |
|
|
220
|
+
| `ab_test(name, v_a, v_b, inputs, runner, scorer)` | A/B test two versions |
|
|
221
|
+
| `list(tag=None)` | List all prompts |
|
|
222
|
+
| `names()` | All prompt names |
|
|
223
|
+
| `delete(name, version=None)` | Delete version(s) |
|
|
224
|
+
| `export_all(path)` | Export all prompts to JSON |
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Running Tests
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
pip install pytest
|
|
232
|
+
pytest tests/ -v
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## License
|
|
238
|
+
|
|
239
|
+
MIT © prabhay759
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
promptsmith — Structured prompt builder and version manager for LLM engineers.
|
|
3
|
+
Version control, typed variables, diffing, A/B testing, and audit trails.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .prompt import Prompt, PromptVariable, PromptRenderError
|
|
7
|
+
from .registry import PromptRegistry
|
|
8
|
+
from .diff import diff_prompts
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"Prompt", "PromptVariable", "PromptRenderError",
|
|
12
|
+
"PromptRegistry", "diff_prompts",
|
|
13
|
+
]
|
|
14
|
+
__version__ = "1.0.0"
|
|
15
|
+
__author__ = "prabhay759"
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""
|
|
2
|
+
promptsmith.diff
|
|
3
|
+
----------------
|
|
4
|
+
Human-readable diffing between prompt versions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import difflib
|
|
8
|
+
from typing import List, TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .prompt import Prompt
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def diff_prompts(a: "Prompt", b: "Prompt", context: int = 3) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Return a unified diff between two prompt versions.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
a : Prompt
|
|
21
|
+
The older / baseline prompt.
|
|
22
|
+
b : Prompt
|
|
23
|
+
The newer prompt.
|
|
24
|
+
context : int
|
|
25
|
+
Lines of context to show around changes.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
str — unified diff string, empty if identical.
|
|
30
|
+
"""
|
|
31
|
+
lines = []
|
|
32
|
+
|
|
33
|
+
# Template diff
|
|
34
|
+
template_diff = _text_diff(
|
|
35
|
+
a.template, b.template,
|
|
36
|
+
fromfile=f"template ({a.version})",
|
|
37
|
+
tofile=f"template ({b.version})",
|
|
38
|
+
context=context,
|
|
39
|
+
)
|
|
40
|
+
if template_diff:
|
|
41
|
+
lines.append("── Template ─────────────────────────────────────────")
|
|
42
|
+
lines.append(template_diff)
|
|
43
|
+
|
|
44
|
+
# System prompt diff
|
|
45
|
+
if a.system or b.system:
|
|
46
|
+
system_diff = _text_diff(
|
|
47
|
+
a.system or "", b.system or "",
|
|
48
|
+
fromfile=f"system ({a.version})",
|
|
49
|
+
tofile=f"system ({b.version})",
|
|
50
|
+
context=context,
|
|
51
|
+
)
|
|
52
|
+
if system_diff:
|
|
53
|
+
lines.append("── System ───────────────────────────────────────────")
|
|
54
|
+
lines.append(system_diff)
|
|
55
|
+
|
|
56
|
+
# Variable diff
|
|
57
|
+
var_changes = _diff_variables(a, b)
|
|
58
|
+
if var_changes:
|
|
59
|
+
lines.append("── Variables ────────────────────────────────────────")
|
|
60
|
+
lines.extend(var_changes)
|
|
61
|
+
|
|
62
|
+
# Metadata
|
|
63
|
+
lines.append("── Metadata ─────────────────────────────────────────")
|
|
64
|
+
lines.append(f" {a.version} → {b.version}")
|
|
65
|
+
lines.append(f" changelog: {b.changelog}")
|
|
66
|
+
|
|
67
|
+
return "\n".join(lines) if lines else "(no changes)"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _text_diff(old: str, new: str, fromfile: str, tofile: str, context: int) -> str:
|
|
71
|
+
if old == new:
|
|
72
|
+
return ""
|
|
73
|
+
old_lines = old.splitlines(keepends=True)
|
|
74
|
+
new_lines = new.splitlines(keepends=True)
|
|
75
|
+
return "".join(difflib.unified_diff(
|
|
76
|
+
old_lines, new_lines,
|
|
77
|
+
fromfile=fromfile, tofile=tofile,
|
|
78
|
+
n=context,
|
|
79
|
+
))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _diff_variables(a: "Prompt", b: "Prompt") -> List[str]:
|
|
83
|
+
lines = []
|
|
84
|
+
all_keys = set(a.variables) | set(b.variables)
|
|
85
|
+
for key in sorted(all_keys):
|
|
86
|
+
if key not in a.variables:
|
|
87
|
+
lines.append(f" + added: {key} ({b.variables[key].type_.__name__})")
|
|
88
|
+
elif key not in b.variables:
|
|
89
|
+
lines.append(f" - removed: {key}")
|
|
90
|
+
else:
|
|
91
|
+
va, vb = a.variables[key], b.variables[key]
|
|
92
|
+
if va.type_ != vb.type_:
|
|
93
|
+
lines.append(
|
|
94
|
+
f" ~ changed: {key} "
|
|
95
|
+
f"{va.type_.__name__} → {vb.type_.__name__}"
|
|
96
|
+
)
|
|
97
|
+
return lines
|