promptsmithv2 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 prabhay759
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,266 @@
1
+ Metadata-Version: 2.4
2
+ Name: promptsmithv2
3
+ Version: 1.0.0
4
+ Summary: Structured prompt builder and version manager for LLM engineers — typed variables, versioning, diffing, A/B testing, and audit trails
5
+ Author: prabhay759
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/prabhay759/promptsmith
8
+ Project-URL: Repository, https://github.com/prabhay759/promptsmith
9
+ Project-URL: Issues, https://github.com/prabhay759/promptsmith/issues
10
+ Keywords: llm,prompt,prompt-engineering,versioning,openai,langchain
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=7; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # promptsmith
29
+
30
+ > Structured prompt builder and version manager for LLM engineers. Typed variables, Git-friendly versioning, human-readable diffs, A/B testing, and full audit trails. Works with any LLM. Zero dependencies.
31
+
32
+ [![PyPI version](https://img.shields.io/pypi/v/promptsmith.svg)](https://pypi.org/project/promptsmith/)
33
+ [![Python](https://img.shields.io/pypi/pyversions/promptsmith.svg)](https://pypi.org/project/promptsmith/)
34
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
35
+
36
+ ---
37
+
38
+ ## The Problem
39
+
40
+ Every LLM engineer ends up with prompts scattered across f-strings, Notion docs, and constants files. No versioning. No way to diff what changed. No audit trail of which prompt produced which output.
41
+
42
+ ```python
43
+ # the reality
44
+ prompt = f"Summarize this in {n} words: {text}" # in utils.py
45
+ SYSTEM = "You are helpful..." # in constants.py
46
+ prompt2 = "Summarize this concisely: " + text # in api.py
47
+ ```
48
+
49
+ **promptsmith gives your prompts the same discipline as your code.**
50
+
51
+ ---
52
+
53
+ ## Installation
54
+
55
+ ```bash
56
+ pip install promptsmith
57
+ ```
58
+
59
+ No dependencies. Requires Python 3.8+.
60
+
61
+ ---
62
+
63
+ ## Quick Start
64
+
65
+ ```python
66
+ from promptsmith import Prompt, PromptRegistry
67
+
68
+ # Define a typed prompt
69
+ prompt = Prompt(
70
+ name="summarizer",
71
+ template="Summarize this {content_type} in {max_words} words:\n\n{content}",
72
+ variables={"content_type": str, "max_words": int, "content": str},
73
+ description="General purpose summarizer",
74
+ )
75
+
76
+ # Render it — validates types before rendering
77
+ text = prompt.render(content_type="article", max_words=100, content="...")
78
+
79
+ # Save to registry
80
+ registry = PromptRegistry("./prompts")
81
+ registry.save(prompt)
82
+
83
+ # Load anywhere in your codebase
84
+ p = registry.load("summarizer")
85
+ text = p.render(content_type="email", max_words=50, content="...")
86
+ ```
87
+
88
+ ---
89
+
90
+ ## Core Concepts
91
+
92
+ ### Typed Variables
93
+
94
+ Variables are typed and validated before rendering — catch bugs before the LLM call:
95
+
96
+ ```python
97
+ prompt = Prompt(
98
+ name="classifier",
99
+ template="Classify this text as {label_a} or {label_b}:\n{text}",
100
+ variables={
101
+ "label_a": str,
102
+ "label_b": str,
103
+ "text": str,
104
+ }
105
+ )
106
+
107
+ # Type errors caught early
108
+ prompt.render(label_a="positive", label_b="negative", text=42)
109
+ # PromptRenderError: Variable 'text' expected str, got int
110
+ ```
111
+
112
+ ### Versioning
113
+
114
+ Every change creates a new version automatically:
115
+
116
+ ```python
117
+ p1 = registry.load("summarizer") # 1.0.0
118
+
119
+ p2 = p1.update(
120
+ template="Summarize this {content_type} concisely in under {max_words} words:\n\n{content}",
121
+ changelog="Added 'concisely' — tighter outputs"
122
+ )
123
+ registry.save(p2) # saves as 1.0.1
124
+
125
+ # Load specific version
126
+ old = registry.load("summarizer", version="1.0.0")
127
+ new = registry.load("summarizer", version="1.0.1")
128
+ new = registry.load("summarizer") # latest
129
+ ```
130
+
131
+ ### Human-Readable Diffs
132
+
133
+ ```python
134
+ print(registry.diff("summarizer", "1.0.0", "1.0.1"))
135
+ ```
136
+
137
+ ```
138
+ ── Template ─────────────────────────────────────────
139
+ --- template (1.0.0)
140
+ +++ template (1.0.1)
141
+ @@ -1 +1 @@
142
+ -Summarize this {content_type} in {max_words} words:
143
+ +Summarize this {content_type} concisely in under {max_words} words:
144
+
145
+ ── Metadata ─────────────────────────────────────────
146
+ 1.0.0 → 1.0.1
147
+ changelog: Added 'concisely' — tighter outputs
148
+ ```
149
+
150
+ ### A/B Testing
151
+
152
+ ```python
153
+ result = registry.ab_test(
154
+ name="summarizer",
155
+ version_a="1.0.0",
156
+ version_b="1.0.1",
157
+ inputs={"content_type": "article", "max_words": 100, "content": article_text},
158
+ runner=lambda prompt: openai.chat.completions.create(
159
+ model="gpt-4", messages=[{"role": "user", "content": prompt}]
160
+ ).choices[0].message.content,
161
+ scorer=lambda a, b: len(b) - len(a), # positive = B wins
162
+ )
163
+
164
+ result.print_comparison()
165
+ print(f"Winner: {result.winner}")
166
+ ```
167
+
168
+ ### Chat Models (System + User)
169
+
170
+ ```python
171
+ prompt = Prompt(
172
+ name="assistant",
173
+ template="Answer this question: {question}",
174
+ system="You are a helpful assistant. Be concise.",
175
+ variables={"question": str},
176
+ )
177
+
178
+ messages = prompt.render_messages(question="What is BPE tokenization?")
179
+ # [{"role": "system", "content": "You are..."}, {"role": "user", "content": "Answer..."}]
180
+
181
+ response = openai.chat.completions.create(model="gpt-4", messages=messages)
182
+ ```
183
+
184
+ ### Version History & Audit Trail
185
+
186
+ ```python
187
+ # Full history
188
+ for entry in registry.history("summarizer"):
189
+ print(f"v{entry['version']} — {entry['changelog']} ({entry['created_at'][:10]})")
190
+
191
+ # Past A/B results
192
+ for run in registry.ab_history("summarizer"):
193
+ print(f"{run['version_a']} vs {run['version_b']} → winner: {run['winner']}")
194
+ ```
195
+
196
+ ### Storage (Git-Friendly)
197
+
198
+ ```
199
+ prompts/
200
+ ├── promptsmith.db ← SQLite index for fast queries
201
+ ├── summarizer/
202
+ │ ├── 1.0.0.json ← full prompt definition
203
+ │ └── 1.0.1.json
204
+ └── classifier/
205
+ └── 1.0.0.json
206
+ ```
207
+
208
+ Commit the `prompts/` directory to Git — every prompt change is tracked just like code.
209
+
210
+ ---
211
+
212
+ ## API Reference
213
+
214
+ ### `Prompt`
215
+
216
+ ```python
217
+ Prompt(
218
+ name, # Unique identifier
219
+ template, # Text with {variable} placeholders
220
+ variables=None, # dict of name → type or PromptVariable
221
+ version="1.0.0",
222
+ description="",
223
+ changelog="",
224
+ tags=[],
225
+ system=None, # System prompt for chat models
226
+ metadata={},
227
+ )
228
+ ```
229
+
230
+ | Method | Description |
231
+ |---|---|
232
+ | `render(**kwargs)` | Render prompt, raises on type errors |
233
+ | `render_messages(**kwargs)` | Returns OpenAI-style messages list |
234
+ | `update(template, ...)` | Create new version with changes |
235
+ | `validate(**kwargs)` | Check inputs without rendering |
236
+ | `to_dict()` / `from_dict()` | Serialization |
237
+ | `to_json()` / `from_json()` | JSON serialization |
238
+
239
+ ### `PromptRegistry`
240
+
241
+ | Method | Description |
242
+ |---|---|
243
+ | `save(prompt)` | Save to disk + index |
244
+ | `load(name, version=None)` | Load latest or specific version |
245
+ | `history(name)` | All versions with changelogs |
246
+ | `diff(name, v_a, v_b)` | Human-readable diff |
247
+ | `ab_test(name, v_a, v_b, inputs, runner, scorer)` | A/B test two versions |
248
+ | `list(tag=None)` | List all prompts |
249
+ | `names()` | All prompt names |
250
+ | `delete(name, version=None)` | Delete version(s) |
251
+ | `export_all(path)` | Export all prompts to JSON |
252
+
253
+ ---
254
+
255
+ ## Running Tests
256
+
257
+ ```bash
258
+ pip install pytest
259
+ pytest tests/ -v
260
+ ```
261
+
262
+ ---
263
+
264
+ ## License
265
+
266
+ MIT © prabhay759
@@ -0,0 +1,239 @@
1
+ # promptsmith
2
+
3
+ > Structured prompt builder and version manager for LLM engineers. Typed variables, Git-friendly versioning, human-readable diffs, A/B testing, and full audit trails. Works with any LLM. Zero dependencies.
4
+
5
+ [![PyPI version](https://img.shields.io/pypi/v/promptsmith.svg)](https://pypi.org/project/promptsmith/)
6
+ [![Python](https://img.shields.io/pypi/pyversions/promptsmith.svg)](https://pypi.org/project/promptsmith/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
+
9
+ ---
10
+
11
+ ## The Problem
12
+
13
+ Every LLM engineer ends up with prompts scattered across f-strings, Notion docs, and constants files. No versioning. No way to diff what changed. No audit trail of which prompt produced which output.
14
+
15
+ ```python
16
+ # the reality
17
+ prompt = f"Summarize this in {n} words: {text}" # in utils.py
18
+ SYSTEM = "You are helpful..." # in constants.py
19
+ prompt2 = "Summarize this concisely: " + text # in api.py
20
+ ```
21
+
22
+ **promptsmith gives your prompts the same discipline as your code.**
23
+
24
+ ---
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install promptsmith
30
+ ```
31
+
32
+ No dependencies. Requires Python 3.8+.
33
+
34
+ ---
35
+
36
+ ## Quick Start
37
+
38
+ ```python
39
+ from promptsmith import Prompt, PromptRegistry
40
+
41
+ # Define a typed prompt
42
+ prompt = Prompt(
43
+ name="summarizer",
44
+ template="Summarize this {content_type} in {max_words} words:\n\n{content}",
45
+ variables={"content_type": str, "max_words": int, "content": str},
46
+ description="General purpose summarizer",
47
+ )
48
+
49
+ # Render it — validates types before rendering
50
+ text = prompt.render(content_type="article", max_words=100, content="...")
51
+
52
+ # Save to registry
53
+ registry = PromptRegistry("./prompts")
54
+ registry.save(prompt)
55
+
56
+ # Load anywhere in your codebase
57
+ p = registry.load("summarizer")
58
+ text = p.render(content_type="email", max_words=50, content="...")
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Core Concepts
64
+
65
+ ### Typed Variables
66
+
67
+ Variables are typed and validated before rendering — catch bugs before the LLM call:
68
+
69
+ ```python
70
+ prompt = Prompt(
71
+ name="classifier",
72
+ template="Classify this text as {label_a} or {label_b}:\n{text}",
73
+ variables={
74
+ "label_a": str,
75
+ "label_b": str,
76
+ "text": str,
77
+ }
78
+ )
79
+
80
+ # Type errors caught early
81
+ prompt.render(label_a="positive", label_b="negative", text=42)
82
+ # PromptRenderError: Variable 'text' expected str, got int
83
+ ```
84
+
85
+ ### Versioning
86
+
87
+ Every change creates a new version automatically:
88
+
89
+ ```python
90
+ p1 = registry.load("summarizer") # 1.0.0
91
+
92
+ p2 = p1.update(
93
+ template="Summarize this {content_type} concisely in under {max_words} words:\n\n{content}",
94
+ changelog="Added 'concisely' — tighter outputs"
95
+ )
96
+ registry.save(p2) # saves as 1.0.1
97
+
98
+ # Load specific version
99
+ old = registry.load("summarizer", version="1.0.0")
100
+ new = registry.load("summarizer", version="1.0.1")
101
+ new = registry.load("summarizer") # latest
102
+ ```
103
+
104
+ ### Human-Readable Diffs
105
+
106
+ ```python
107
+ print(registry.diff("summarizer", "1.0.0", "1.0.1"))
108
+ ```
109
+
110
+ ```
111
+ ── Template ─────────────────────────────────────────
112
+ --- template (1.0.0)
113
+ +++ template (1.0.1)
114
+ @@ -1 +1 @@
115
+ -Summarize this {content_type} in {max_words} words:
116
+ +Summarize this {content_type} concisely in under {max_words} words:
117
+
118
+ ── Metadata ─────────────────────────────────────────
119
+ 1.0.0 → 1.0.1
120
+ changelog: Added 'concisely' — tighter outputs
121
+ ```
122
+
123
+ ### A/B Testing
124
+
125
+ ```python
126
+ result = registry.ab_test(
127
+ name="summarizer",
128
+ version_a="1.0.0",
129
+ version_b="1.0.1",
130
+ inputs={"content_type": "article", "max_words": 100, "content": article_text},
131
+ runner=lambda prompt: openai.chat.completions.create(
132
+ model="gpt-4", messages=[{"role": "user", "content": prompt}]
133
+ ).choices[0].message.content,
134
+ scorer=lambda a, b: len(b) - len(a), # positive = B wins
135
+ )
136
+
137
+ result.print_comparison()
138
+ print(f"Winner: {result.winner}")
139
+ ```
140
+
141
+ ### Chat Models (System + User)
142
+
143
+ ```python
144
+ prompt = Prompt(
145
+ name="assistant",
146
+ template="Answer this question: {question}",
147
+ system="You are a helpful assistant. Be concise.",
148
+ variables={"question": str},
149
+ )
150
+
151
+ messages = prompt.render_messages(question="What is BPE tokenization?")
152
+ # [{"role": "system", "content": "You are..."}, {"role": "user", "content": "Answer..."}]
153
+
154
+ response = openai.chat.completions.create(model="gpt-4", messages=messages)
155
+ ```
156
+
157
+ ### Version History & Audit Trail
158
+
159
+ ```python
160
+ # Full history
161
+ for entry in registry.history("summarizer"):
162
+ print(f"v{entry['version']} — {entry['changelog']} ({entry['created_at'][:10]})")
163
+
164
+ # Past A/B results
165
+ for run in registry.ab_history("summarizer"):
166
+ print(f"{run['version_a']} vs {run['version_b']} → winner: {run['winner']}")
167
+ ```
168
+
169
+ ### Storage (Git-Friendly)
170
+
171
+ ```
172
+ prompts/
173
+ ├── promptsmith.db ← SQLite index for fast queries
174
+ ├── summarizer/
175
+ │ ├── 1.0.0.json ← full prompt definition
176
+ │ └── 1.0.1.json
177
+ └── classifier/
178
+ └── 1.0.0.json
179
+ ```
180
+
181
+ Commit the `prompts/` directory to Git — every prompt change is tracked just like code.
182
+
183
+ ---
184
+
185
+ ## API Reference
186
+
187
+ ### `Prompt`
188
+
189
+ ```python
190
+ Prompt(
191
+ name, # Unique identifier
192
+ template, # Text with {variable} placeholders
193
+ variables=None, # dict of name → type or PromptVariable
194
+ version="1.0.0",
195
+ description="",
196
+ changelog="",
197
+ tags=[],
198
+ system=None, # System prompt for chat models
199
+ metadata={},
200
+ )
201
+ ```
202
+
203
+ | Method | Description |
204
+ |---|---|
205
+ | `render(**kwargs)` | Render prompt, raises on type errors |
206
+ | `render_messages(**kwargs)` | Returns OpenAI-style messages list |
207
+ | `update(template, ...)` | Create new version with changes |
208
+ | `validate(**kwargs)` | Check inputs without rendering |
209
+ | `to_dict()` / `from_dict()` | Serialization |
210
+ | `to_json()` / `from_json()` | JSON serialization |
211
+
212
+ ### `PromptRegistry`
213
+
214
+ | Method | Description |
215
+ |---|---|
216
+ | `save(prompt)` | Save to disk + index |
217
+ | `load(name, version=None)` | Load latest or specific version |
218
+ | `history(name)` | All versions with changelogs |
219
+ | `diff(name, v_a, v_b)` | Human-readable diff |
220
+ | `ab_test(name, v_a, v_b, inputs, runner, scorer)` | A/B test two versions |
221
+ | `list(tag=None)` | List all prompts |
222
+ | `names()` | All prompt names |
223
+ | `delete(name, version=None)` | Delete version(s) |
224
+ | `export_all(path)` | Export all prompts to JSON |
225
+
226
+ ---
227
+
228
+ ## Running Tests
229
+
230
+ ```bash
231
+ pip install pytest
232
+ pytest tests/ -v
233
+ ```
234
+
235
+ ---
236
+
237
+ ## License
238
+
239
+ MIT © prabhay759
@@ -0,0 +1,15 @@
1
+ """
2
+ promptsmith — Structured prompt builder and version manager for LLM engineers.
3
+ Version control, typed variables, diffing, A/B testing, and audit trails.
4
+ """
5
+
6
+ from .prompt import Prompt, PromptVariable, PromptRenderError
7
+ from .registry import PromptRegistry
8
+ from .diff import diff_prompts
9
+
10
+ __all__ = [
11
+ "Prompt", "PromptVariable", "PromptRenderError",
12
+ "PromptRegistry", "diff_prompts",
13
+ ]
14
+ __version__ = "1.0.0"
15
+ __author__ = "prabhay759"
@@ -0,0 +1,97 @@
1
+ """
2
+ promptsmith.diff
3
+ ----------------
4
+ Human-readable diffing between prompt versions.
5
+ """
6
+
7
+ import difflib
8
+ from typing import List, TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from .prompt import Prompt
12
+
13
+
14
+ def diff_prompts(a: "Prompt", b: "Prompt", context: int = 3) -> str:
15
+ """
16
+ Return a unified diff between two prompt versions.
17
+
18
+ Parameters
19
+ ----------
20
+ a : Prompt
21
+ The older / baseline prompt.
22
+ b : Prompt
23
+ The newer prompt.
24
+ context : int
25
+ Lines of context to show around changes.
26
+
27
+ Returns
28
+ -------
29
+ str — unified diff string, empty if identical.
30
+ """
31
+ lines = []
32
+
33
+ # Template diff
34
+ template_diff = _text_diff(
35
+ a.template, b.template,
36
+ fromfile=f"template ({a.version})",
37
+ tofile=f"template ({b.version})",
38
+ context=context,
39
+ )
40
+ if template_diff:
41
+ lines.append("── Template ─────────────────────────────────────────")
42
+ lines.append(template_diff)
43
+
44
+ # System prompt diff
45
+ if a.system or b.system:
46
+ system_diff = _text_diff(
47
+ a.system or "", b.system or "",
48
+ fromfile=f"system ({a.version})",
49
+ tofile=f"system ({b.version})",
50
+ context=context,
51
+ )
52
+ if system_diff:
53
+ lines.append("── System ───────────────────────────────────────────")
54
+ lines.append(system_diff)
55
+
56
+ # Variable diff
57
+ var_changes = _diff_variables(a, b)
58
+ if var_changes:
59
+ lines.append("── Variables ────────────────────────────────────────")
60
+ lines.extend(var_changes)
61
+
62
+ # Metadata
63
+ lines.append("── Metadata ─────────────────────────────────────────")
64
+ lines.append(f" {a.version} → {b.version}")
65
+ lines.append(f" changelog: {b.changelog}")
66
+
67
+ return "\n".join(lines) if lines else "(no changes)"
68
+
69
+
70
+ def _text_diff(old: str, new: str, fromfile: str, tofile: str, context: int) -> str:
71
+ if old == new:
72
+ return ""
73
+ old_lines = old.splitlines(keepends=True)
74
+ new_lines = new.splitlines(keepends=True)
75
+ return "".join(difflib.unified_diff(
76
+ old_lines, new_lines,
77
+ fromfile=fromfile, tofile=tofile,
78
+ n=context,
79
+ ))
80
+
81
+
82
+ def _diff_variables(a: "Prompt", b: "Prompt") -> List[str]:
83
+ lines = []
84
+ all_keys = set(a.variables) | set(b.variables)
85
+ for key in sorted(all_keys):
86
+ if key not in a.variables:
87
+ lines.append(f" + added: {key} ({b.variables[key].type_.__name__})")
88
+ elif key not in b.variables:
89
+ lines.append(f" - removed: {key}")
90
+ else:
91
+ va, vb = a.variables[key], b.variables[key]
92
+ if va.type_ != vb.type_:
93
+ lines.append(
94
+ f" ~ changed: {key} "
95
+ f"{va.type_.__name__} → {vb.type_.__name__}"
96
+ )
97
+ return lines