promptdiff-ai 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
promptdiff/__init__.py ADDED
@@ -0,0 +1,52 @@
1
+ """promptdiff: Semantic diff for LLM prompt changes.
2
+
3
+ Public API exports for convenience imports::
4
+
5
+ from promptdiff import diff_files, PromptDiff, format_text
6
+ """
7
+
8
+ from promptdiff.models import (
9
+ BreakingChange,
10
+ ChangeStatus,
11
+ MessageDiff,
12
+ MetadataDiff,
13
+ PromptDiff,
14
+ TokenDelta,
15
+ VariableDiff,
16
+ )
17
+ from promptdiff.differ import (
18
+ compute_token_delta,
19
+ diff_files,
20
+ diff_messages,
21
+ diff_metadata,
22
+ diff_variables,
23
+ )
24
+ from promptdiff.analyzer import analyze_breaking_changes
25
+ from promptdiff.reporter import format_json, format_markdown, format_text
26
+
27
+ __version__ = "1.0.0"
28
+
29
+ __all__ = [
30
+ # Models
31
+ "BreakingChange",
32
+ "ChangeStatus",
33
+ "MessageDiff",
34
+ "MetadataDiff",
35
+ "PromptDiff",
36
+ "TokenDelta",
37
+ "VariableDiff",
38
+ # Differ
39
+ "compute_token_delta",
40
+ "diff_files",
41
+ "diff_messages",
42
+ "diff_metadata",
43
+ "diff_variables",
44
+ # Analyzer
45
+ "analyze_breaking_changes",
46
+ # Reporter
47
+ "format_json",
48
+ "format_markdown",
49
+ "format_text",
50
+ # Version
51
+ "__version__",
52
+ ]
promptdiff/analyzer.py ADDED
@@ -0,0 +1,163 @@
1
+ """Breaking change detection for promptdiff.
2
+
3
+ Analyzes a PromptDiff and classifies changes by severity.
4
+
5
+ Breaking changes:
6
+ - New required variable (no default) -- high severity
7
+ - Removed variable -- high severity
8
+ - Removed message -- high severity
9
+ - Changed role ordering -- medium severity
10
+ - Model change (in metadata) -- medium severity
11
+
12
+ Non-breaking changes:
13
+ - Added variable with default
14
+ - Added messages
15
+ - Content modifications
16
+ - Metadata changes (except model)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from promptdiff.models import (
22
+ BreakingChange,
23
+ ChangeStatus,
24
+ PromptDiff,
25
+ )
26
+
27
+
28
+ def analyze_breaking_changes(diff: PromptDiff) -> list[BreakingChange]:
29
+ """Analyze a diff for breaking changes.
30
+
31
+ Parameters
32
+ ----------
33
+ diff:
34
+ The prompt diff to analyze.
35
+
36
+ Returns
37
+ -------
38
+ list[BreakingChange]
39
+ All detected breaking changes sorted by severity.
40
+ """
41
+ changes: list[BreakingChange] = []
42
+ changes.extend(_check_variable_changes(diff))
43
+ changes.extend(_check_message_changes(diff))
44
+ changes.extend(_check_metadata_changes(diff))
45
+ changes.extend(_check_role_ordering(diff))
46
+
47
+ # Sort by severity: high first, then medium, then low.
48
+ severity_order = {"high": 0, "medium": 1, "low": 2}
49
+ changes.sort(key=lambda c: severity_order.get(c.severity, 3))
50
+
51
+ return changes
52
+
53
+
54
+ def _check_variable_changes(diff: PromptDiff) -> list[BreakingChange]:
55
+ """Detect breaking variable changes."""
56
+ changes: list[BreakingChange] = []
57
+
58
+ for vd in diff.variable_diffs:
59
+ if vd.status == ChangeStatus.REMOVED:
60
+ changes.append(
61
+ BreakingChange(
62
+ category="variable",
63
+ description=f"Variable '{vd.name}' was removed",
64
+ severity="high",
65
+ )
66
+ )
67
+ elif vd.status == ChangeStatus.ADDED and vd.is_breaking:
68
+ changes.append(
69
+ BreakingChange(
70
+ category="variable",
71
+ description=(
72
+ f"New required variable '{vd.name}' added without a default value"
73
+ ),
74
+ severity="high",
75
+ )
76
+ )
77
+
78
+ return changes
79
+
80
+
81
+ def _check_message_changes(diff: PromptDiff) -> list[BreakingChange]:
82
+ """Detect breaking message changes."""
83
+ changes: list[BreakingChange] = []
84
+
85
+ for md in diff.message_diffs:
86
+ if md.status == ChangeStatus.REMOVED:
87
+ changes.append(
88
+ BreakingChange(
89
+ category="message",
90
+ description=f"{md.role.capitalize()} message was removed",
91
+ severity="high",
92
+ )
93
+ )
94
+
95
+ return changes
96
+
97
+
98
+ def _check_metadata_changes(diff: PromptDiff) -> list[BreakingChange]:
99
+ """Detect breaking metadata changes (model changes)."""
100
+ changes: list[BreakingChange] = []
101
+
102
+ for md in diff.metadata_diffs:
103
+ if md.key == "model" and md.status == ChangeStatus.MODIFIED:
104
+ changes.append(
105
+ BreakingChange(
106
+ category="model",
107
+ description=(
108
+ f"Model changed from '{md.old_value}' to '{md.new_value}'"
109
+ ),
110
+ severity="medium",
111
+ )
112
+ )
113
+ elif md.key == "model" and md.status == ChangeStatus.REMOVED:
114
+ changes.append(
115
+ BreakingChange(
116
+ category="model",
117
+ description="Model specification was removed",
118
+ severity="medium",
119
+ )
120
+ )
121
+
122
+ return changes
123
+
124
+
125
+ def _check_role_ordering(diff: PromptDiff) -> list[BreakingChange]:
126
+ """Detect role ordering changes.
127
+
128
+ If the sequence of roles (ignoring unchanged content) changes between
129
+ old and new, this is potentially breaking.
130
+ """
131
+ changes: list[BreakingChange] = []
132
+
133
+ # Reconstruct old and new role sequences from the message diffs.
134
+ old_roles: list[str] = []
135
+ new_roles: list[str] = []
136
+
137
+ for md in diff.message_diffs:
138
+ if md.status == ChangeStatus.REMOVED:
139
+ old_roles.append(md.role)
140
+ elif md.status == ChangeStatus.ADDED:
141
+ new_roles.append(md.role)
142
+ elif md.status in (ChangeStatus.MODIFIED, ChangeStatus.UNCHANGED):
143
+ old_roles.append(md.role)
144
+ new_roles.append(md.role)
145
+
146
+ if old_roles != new_roles and len(old_roles) > 0 and len(new_roles) > 0:
147
+ # Only flag if neither list is a subset scenario (pure additions/removals
148
+ # are already caught above). Check if the common elements changed order.
149
+ common_old = [r for r in old_roles if r in new_roles]
150
+ common_new = [r for r in new_roles if r in old_roles]
151
+ if common_old != common_new:
152
+ changes.append(
153
+ BreakingChange(
154
+ category="role",
155
+ description=(
156
+ f"Message role ordering changed: "
157
+ f"{' -> '.join(old_roles)} to {' -> '.join(new_roles)}"
158
+ ),
159
+ severity="medium",
160
+ )
161
+ )
162
+
163
+ return changes
promptdiff/cli.py ADDED
@@ -0,0 +1,123 @@
1
+ """Typer CLI application for promptdiff.
2
+
3
+ Provides the ``promptdiff`` entry point for comparing two prompt files
4
+ and reporting differences in text, JSON, or Markdown format.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ import typer
15
+ from rich.console import Console
16
+
17
+ from promptdiff.differ import diff_files
18
+ from promptdiff.reporter import format_json, format_markdown, format_text
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Console
22
+ # ---------------------------------------------------------------------------
23
+
24
+ _console = Console()
25
+ _err_console = Console(stderr=True)
26
+
27
+
28
+ class OutputFormat(str, Enum):
29
+ """Supported output formats."""
30
+
31
+ TEXT = "text"
32
+ JSON = "json"
33
+ MARKDOWN = "markdown"
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Version callback
38
+ # ---------------------------------------------------------------------------
39
+
40
+
41
+ def _version_callback(value: bool) -> None:
42
+ if value:
43
+ from promptdiff import __version__
44
+
45
+ _console.print(f"promptdiff {__version__}")
46
+ raise typer.Exit()
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Main command — single-command app (no subcommands needed)
51
+ # ---------------------------------------------------------------------------
52
+
53
+
54
+ def main(
55
+ file_a: Path = typer.Argument(..., help="Path to the old prompt file"),
56
+ file_b: Path = typer.Argument(..., help="Path to the new prompt file"),
57
+ output_format: OutputFormat = typer.Option(
58
+ OutputFormat.TEXT,
59
+ "--format",
60
+ "-f",
61
+ help="Output format: text, json, markdown",
62
+ ),
63
+ exit_on_breaking: bool = typer.Option(
64
+ False,
65
+ "--exit-on-breaking",
66
+ help="Exit with code 1 if breaking changes are found",
67
+ ),
68
+ token_detail: bool = typer.Option(
69
+ False,
70
+ "--token-detail",
71
+ help="Show per-message token breakdowns",
72
+ ),
73
+ encoding: str = typer.Option(
74
+ "cl100k_base",
75
+ "--encoding",
76
+ "-e",
77
+ help="tiktoken encoding for token counting",
78
+ ),
79
+ version: bool = typer.Option(
80
+ False,
81
+ "--version",
82
+ "-V",
83
+ callback=_version_callback,
84
+ is_eager=True,
85
+ help="Show version and exit",
86
+ ),
87
+ ) -> None:
88
+ """Semantic diff for LLM prompt changes.
89
+
90
+ Compare two prompt files and show structured diff with message-level
91
+ changes, variable changes, token deltas, and breaking change classification.
92
+ """
93
+ try:
94
+ result = diff_files(file_a, file_b, encoding=encoding)
95
+ except FileNotFoundError as exc:
96
+ _err_console.print(f"[red]Error:[/red] {exc}")
97
+ raise typer.Exit(2)
98
+ except Exception as exc:
99
+ _err_console.print(f"[red]Error:[/red] {exc}")
100
+ raise typer.Exit(2)
101
+
102
+ if output_format == OutputFormat.TEXT:
103
+ output = format_text(result, show_token_detail=token_detail)
104
+ _console.print(output)
105
+ elif output_format == OutputFormat.JSON:
106
+ output = format_json(result)
107
+ # Use print() to avoid Rich adding ANSI codes to raw JSON.
108
+ print(output)
109
+ elif output_format == OutputFormat.MARKDOWN:
110
+ output = format_markdown(result)
111
+ print(output)
112
+
113
+ if exit_on_breaking and result.is_breaking:
114
+ raise typer.Exit(1)
115
+
116
+
117
+ app = typer.Typer(
118
+ name="promptdiff",
119
+ help="Semantic diff for LLM prompt files.",
120
+ add_completion=False,
121
+ no_args_is_help=True,
122
+ )
123
+ app.command()(main)
promptdiff/differ.py ADDED
@@ -0,0 +1,393 @@
1
+ """Core diff engine for promptdiff.
2
+
3
+ Compares two prompt files parsed via prompttools_core and produces a
4
+ structured PromptDiff with message-level, variable-level, and metadata-level
5
+ change information.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import difflib
11
+ from pathlib import Path
12
+ from typing import Any, Optional
13
+
14
+ from prompttools_core import PromptFile, Tokenizer, count_tokens, parse_file
15
+ from prompttools_core.models import Message
16
+
17
+ from promptdiff.analyzer import analyze_breaking_changes
18
+ from promptdiff.models import (
19
+ ChangeStatus,
20
+ MessageDiff,
21
+ MetadataDiff,
22
+ PromptDiff,
23
+ TokenDelta,
24
+ VariableDiff,
25
+ )
26
+
27
+ # Default encoding used for token counting when no model is specified.
28
+ _DEFAULT_ENCODING = "cl100k_base"
29
+
30
+
31
+ def diff_files(
32
+ old_path: Path,
33
+ new_path: Path,
34
+ encoding: str = _DEFAULT_ENCODING,
35
+ ) -> PromptDiff:
36
+ """Parse and diff two prompt files.
37
+
38
+ Parameters
39
+ ----------
40
+ old_path:
41
+ Path to the old prompt file.
42
+ new_path:
43
+ Path to the new prompt file.
44
+ encoding:
45
+ tiktoken encoding name for token counting.
46
+
47
+ Returns
48
+ -------
49
+ PromptDiff
50
+ A structured diff between the two files.
51
+ """
52
+ old_file = parse_file(Path(old_path))
53
+ new_file = parse_file(Path(new_path))
54
+
55
+ msg_diffs = diff_messages(old_file.messages, new_file.messages, encoding)
56
+ var_diffs = diff_variables(
57
+ old_file.variables,
58
+ new_file.variables,
59
+ old_file.variable_defaults,
60
+ new_file.variable_defaults,
61
+ )
62
+ meta_diffs = diff_metadata(old_file.metadata, new_file.metadata)
63
+ token_d = compute_token_delta(old_file, new_file, encoding)
64
+
65
+ result = PromptDiff(
66
+ file_path=new_path,
67
+ old_hash=old_file.content_hash,
68
+ new_hash=new_file.content_hash,
69
+ message_diffs=msg_diffs,
70
+ variable_diffs=var_diffs,
71
+ metadata_diffs=meta_diffs,
72
+ token_delta=token_d,
73
+ breaking_changes=[],
74
+ )
75
+
76
+ # Run breaking change analysis and attach results.
77
+ result.breaking_changes = analyze_breaking_changes(result)
78
+
79
+ return result
80
+
81
+
82
+ def diff_messages(
83
+ old_msgs: list[Message],
84
+ new_msgs: list[Message],
85
+ encoding: str = _DEFAULT_ENCODING,
86
+ ) -> list[MessageDiff]:
87
+ """Align and diff two ordered lists of messages.
88
+
89
+ Alignment is done by (role, position-within-role). Messages are matched
90
+ by role in the order they appear -- the first system message in old is
91
+ compared with the first system message in new, etc.
92
+
93
+ Parameters
94
+ ----------
95
+ old_msgs:
96
+ Messages from the old prompt file.
97
+ new_msgs:
98
+ Messages from the new prompt file.
99
+ encoding:
100
+ tiktoken encoding name for token counting.
101
+
102
+ Returns
103
+ -------
104
+ list[MessageDiff]
105
+ Ordered list of message diffs.
106
+ """
107
+ # Group messages by role, preserving order within each role.
108
+ old_by_role: dict[str, list[Message]] = {}
109
+ new_by_role: dict[str, list[Message]] = {}
110
+
111
+ for msg in old_msgs:
112
+ old_by_role.setdefault(msg.role, []).append(msg)
113
+ for msg in new_msgs:
114
+ new_by_role.setdefault(msg.role, []).append(msg)
115
+
116
+ all_roles_ordered: list[str] = []
117
+ seen: set[str] = set()
118
+ for msg in old_msgs:
119
+ if msg.role not in seen:
120
+ all_roles_ordered.append(msg.role)
121
+ seen.add(msg.role)
122
+ for msg in new_msgs:
123
+ if msg.role not in seen:
124
+ all_roles_ordered.append(msg.role)
125
+ seen.add(msg.role)
126
+
127
+ diffs: list[MessageDiff] = []
128
+
129
+ for role in all_roles_ordered:
130
+ old_list = old_by_role.get(role, [])
131
+ new_list = new_by_role.get(role, [])
132
+
133
+ max_len = max(len(old_list), len(new_list))
134
+ for i in range(max_len):
135
+ old_msg = old_list[i] if i < len(old_list) else None
136
+ new_msg = new_list[i] if i < len(new_list) else None
137
+ diffs.append(_diff_single_message(old_msg, new_msg, role, encoding))
138
+
139
+ return diffs
140
+
141
+
142
+ def _diff_single_message(
143
+ old_msg: Optional[Message],
144
+ new_msg: Optional[Message],
145
+ role: str,
146
+ encoding: str,
147
+ ) -> MessageDiff:
148
+ """Diff a single aligned pair of messages."""
149
+ if old_msg is None and new_msg is not None:
150
+ # Added
151
+ new_tokens = count_tokens(new_msg.content, encoding)
152
+ return MessageDiff(
153
+ status=ChangeStatus.ADDED,
154
+ role=role,
155
+ new_content=new_msg.content,
156
+ token_delta=new_tokens,
157
+ changes=[f"Added {role} message ({new_tokens} tokens)"],
158
+ )
159
+
160
+ if old_msg is not None and new_msg is None:
161
+ # Removed
162
+ old_tokens = count_tokens(old_msg.content, encoding)
163
+ return MessageDiff(
164
+ status=ChangeStatus.REMOVED,
165
+ role=role,
166
+ old_content=old_msg.content,
167
+ token_delta=-old_tokens,
168
+ changes=[f"Removed {role} message ({old_tokens} tokens)"],
169
+ )
170
+
171
+ # Both exist -- compare content.
172
+ assert old_msg is not None and new_msg is not None
173
+
174
+ if old_msg.content == new_msg.content:
175
+ return MessageDiff(
176
+ status=ChangeStatus.UNCHANGED,
177
+ role=role,
178
+ old_content=old_msg.content,
179
+ new_content=new_msg.content,
180
+ token_delta=0,
181
+ )
182
+
183
+ # Modified
184
+ old_tokens = count_tokens(old_msg.content, encoding)
185
+ new_tokens = count_tokens(new_msg.content, encoding)
186
+ delta = new_tokens - old_tokens
187
+
188
+ content_diff = "\n".join(
189
+ difflib.unified_diff(
190
+ old_msg.content.splitlines(),
191
+ new_msg.content.splitlines(),
192
+ lineterm="",
193
+ fromfile="old",
194
+ tofile="new",
195
+ )
196
+ )
197
+
198
+ changes: list[str] = []
199
+ if delta != 0:
200
+ direction = "increased" if delta > 0 else "decreased"
201
+ changes.append(
202
+ f"{role.capitalize()} message {direction} by {abs(delta)} tokens"
203
+ )
204
+ changes.append(f"{role.capitalize()} message content modified")
205
+
206
+ return MessageDiff(
207
+ status=ChangeStatus.MODIFIED,
208
+ role=role,
209
+ old_content=old_msg.content,
210
+ new_content=new_msg.content,
211
+ content_diff=content_diff,
212
+ token_delta=delta,
213
+ changes=changes,
214
+ )
215
+
216
+
217
+ def diff_variables(
218
+ old_vars: dict[str, str],
219
+ new_vars: dict[str, str],
220
+ old_defaults: dict[str, str],
221
+ new_defaults: dict[str, str],
222
+ ) -> list[VariableDiff]:
223
+ """Compare variable sets between old and new prompt versions.
224
+
225
+ Parameters
226
+ ----------
227
+ old_vars:
228
+ Variables found in the old version (name -> syntax style).
229
+ new_vars:
230
+ Variables found in the new version (name -> syntax style).
231
+ old_defaults:
232
+ Default values from the old version metadata.
233
+ new_defaults:
234
+ Default values from the new version metadata.
235
+
236
+ Returns
237
+ -------
238
+ list[VariableDiff]
239
+ List of variable diffs.
240
+ """
241
+ all_names = sorted(set(old_vars) | set(new_vars))
242
+ diffs: list[VariableDiff] = []
243
+
244
+ for name in all_names:
245
+ in_old = name in old_vars
246
+ in_new = name in new_vars
247
+
248
+ if in_old and not in_new:
249
+ diffs.append(
250
+ VariableDiff(
251
+ name=name,
252
+ status=ChangeStatus.REMOVED,
253
+ old_default=old_defaults.get(name),
254
+ is_breaking=True,
255
+ )
256
+ )
257
+ elif not in_old and in_new:
258
+ new_default = new_defaults.get(name)
259
+ # Breaking if no default is provided for the new variable.
260
+ diffs.append(
261
+ VariableDiff(
262
+ name=name,
263
+ status=ChangeStatus.ADDED,
264
+ new_default=new_default,
265
+ is_breaking=new_default is None,
266
+ )
267
+ )
268
+ else:
269
+ # Present in both -- check if default changed.
270
+ old_def = old_defaults.get(name)
271
+ new_def = new_defaults.get(name)
272
+ if old_def == new_def:
273
+ diffs.append(
274
+ VariableDiff(
275
+ name=name,
276
+ status=ChangeStatus.UNCHANGED,
277
+ old_default=old_def,
278
+ new_default=new_def,
279
+ )
280
+ )
281
+ else:
282
+ diffs.append(
283
+ VariableDiff(
284
+ name=name,
285
+ status=ChangeStatus.MODIFIED,
286
+ old_default=old_def,
287
+ new_default=new_def,
288
+ )
289
+ )
290
+
291
+ return diffs
292
+
293
+
294
+ def diff_metadata(
295
+ old_meta: dict[str, Any],
296
+ new_meta: dict[str, Any],
297
+ ) -> list[MetadataDiff]:
298
+ """Compare metadata dictionaries.
299
+
300
+ Parameters
301
+ ----------
302
+ old_meta:
303
+ Metadata from the old prompt file.
304
+ new_meta:
305
+ Metadata from the new prompt file.
306
+
307
+ Returns
308
+ -------
309
+ list[MetadataDiff]
310
+ List of metadata diffs.
311
+ """
312
+ all_keys = sorted(set(old_meta) | set(new_meta))
313
+ diffs: list[MetadataDiff] = []
314
+
315
+ for key in all_keys:
316
+ in_old = key in old_meta
317
+ in_new = key in new_meta
318
+
319
+ if in_old and not in_new:
320
+ diffs.append(
321
+ MetadataDiff(
322
+ key=key,
323
+ status=ChangeStatus.REMOVED,
324
+ old_value=old_meta[key],
325
+ )
326
+ )
327
+ elif not in_old and in_new:
328
+ diffs.append(
329
+ MetadataDiff(
330
+ key=key,
331
+ status=ChangeStatus.ADDED,
332
+ new_value=new_meta[key],
333
+ )
334
+ )
335
+ elif old_meta[key] == new_meta[key]:
336
+ diffs.append(
337
+ MetadataDiff(
338
+ key=key,
339
+ status=ChangeStatus.UNCHANGED,
340
+ old_value=old_meta[key],
341
+ new_value=new_meta[key],
342
+ )
343
+ )
344
+ else:
345
+ diffs.append(
346
+ MetadataDiff(
347
+ key=key,
348
+ status=ChangeStatus.MODIFIED,
349
+ old_value=old_meta[key],
350
+ new_value=new_meta[key],
351
+ )
352
+ )
353
+
354
+ return diffs
355
+
356
+
357
+ def compute_token_delta(
358
+ old_file: PromptFile,
359
+ new_file: PromptFile,
360
+ encoding: str = _DEFAULT_ENCODING,
361
+ ) -> TokenDelta:
362
+ """Compute token count comparison between two prompt files.
363
+
364
+ Parameters
365
+ ----------
366
+ old_file:
367
+ The old parsed prompt file.
368
+ new_file:
369
+ The new parsed prompt file.
370
+ encoding:
371
+ tiktoken encoding name for token counting.
372
+
373
+ Returns
374
+ -------
375
+ TokenDelta
376
+ Token count delta.
377
+ """
378
+ tokenizer = Tokenizer(encoding=encoding)
379
+ old_total = tokenizer.count_file(old_file)
380
+ new_total = tokenizer.count_file(new_file)
381
+
382
+ delta = new_total - old_total
383
+ if old_total == 0:
384
+ percent_change = 100.0 if new_total > 0 else 0.0
385
+ else:
386
+ percent_change = round((delta / old_total) * 100, 2)
387
+
388
+ return TokenDelta(
389
+ old_total=old_total,
390
+ new_total=new_total,
391
+ delta=delta,
392
+ percent_change=percent_change,
393
+ )
promptdiff/models.py ADDED
@@ -0,0 +1,135 @@
1
+ """Data models for promptdiff.
2
+
3
+ All models use Pydantic v2 syntax. These define the structured representation
4
+ of prompt diffs, including message-level changes, variable changes, token
5
+ deltas, and breaking change classification.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Any, Optional
13
+
14
+ from pydantic import BaseModel, Field, computed_field
15
+
16
+
17
+ class ChangeStatus(str, Enum):
18
+ """Status of a change between two versions."""
19
+
20
+ ADDED = "added"
21
+ REMOVED = "removed"
22
+ MODIFIED = "modified"
23
+ UNCHANGED = "unchanged"
24
+
25
+
26
+ class MessageDiff(BaseModel):
27
+ """Diff of a single message between old and new prompt versions."""
28
+
29
+ status: ChangeStatus = Field(..., description="Change status for this message")
30
+ role: str = Field(..., description="Message role (system, user, assistant, tool)")
31
+ old_content: Optional[str] = Field(
32
+ default=None, description="Content in the old version"
33
+ )
34
+ new_content: Optional[str] = Field(
35
+ default=None, description="Content in the new version"
36
+ )
37
+ content_diff: Optional[str] = Field(
38
+ default=None, description="Unified diff of the content"
39
+ )
40
+ token_delta: int = Field(
41
+ default=0, description="Change in token count for this message"
42
+ )
43
+ changes: list[str] = Field(
44
+ default_factory=list,
45
+ description="Human-readable descriptions of changes",
46
+ )
47
+
48
+
49
+ class VariableDiff(BaseModel):
50
+ """Diff of a template variable between old and new prompt versions."""
51
+
52
+ name: str = Field(..., description="Variable name")
53
+ status: ChangeStatus = Field(..., description="Change status for this variable")
54
+ old_default: Optional[str] = Field(
55
+ default=None, description="Default value in the old version"
56
+ )
57
+ new_default: Optional[str] = Field(
58
+ default=None, description="Default value in the new version"
59
+ )
60
+ is_breaking: bool = Field(
61
+ default=False,
62
+ description="True if this is a breaking change",
63
+ )
64
+
65
+
66
+ class MetadataDiff(BaseModel):
67
+ """Diff of a metadata key between old and new prompt versions."""
68
+
69
+ key: str = Field(..., description="Metadata key")
70
+ status: ChangeStatus = Field(..., description="Change status for this key")
71
+ old_value: Optional[Any] = Field(
72
+ default=None, description="Value in the old version"
73
+ )
74
+ new_value: Optional[Any] = Field(
75
+ default=None, description="Value in the new version"
76
+ )
77
+
78
+
79
+ class TokenDelta(BaseModel):
80
+ """Token count comparison between old and new prompt versions."""
81
+
82
+ old_total: int = Field(..., description="Total tokens in old version")
83
+ new_total: int = Field(..., description="Total tokens in new version")
84
+ delta: int = Field(..., description="Change in token count (new - old)")
85
+ percent_change: float = Field(
86
+ ..., description="Percentage change in token count"
87
+ )
88
+
89
+
90
+ class BreakingChange(BaseModel):
91
+ """A detected breaking change between prompt versions."""
92
+
93
+ category: str = Field(
94
+ ...,
95
+ description="Category: variable, message, model, role",
96
+ )
97
+ description: str = Field(
98
+ ..., description="Human-readable description of the breaking change"
99
+ )
100
+ severity: str = Field(
101
+ ..., description="Severity: high, medium, low"
102
+ )
103
+
104
+
105
+ class PromptDiff(BaseModel):
106
+ """Complete structured diff between two prompt file versions."""
107
+
108
+ file_path: Path = Field(..., description="Path to the diffed file")
109
+ old_hash: str = Field(..., description="SHA256 hash of the old content")
110
+ new_hash: str = Field(..., description="SHA256 hash of the new content")
111
+ message_diffs: list[MessageDiff] = Field(
112
+ default_factory=list,
113
+ description="Per-message diffs",
114
+ )
115
+ variable_diffs: list[VariableDiff] = Field(
116
+ default_factory=list,
117
+ description="Per-variable diffs",
118
+ )
119
+ metadata_diffs: list[MetadataDiff] = Field(
120
+ default_factory=list,
121
+ description="Per-metadata-key diffs",
122
+ )
123
+ token_delta: TokenDelta = Field(
124
+ ..., description="Token count comparison"
125
+ )
126
+ breaking_changes: list[BreakingChange] = Field(
127
+ default_factory=list,
128
+ description="Detected breaking changes",
129
+ )
130
+
131
+ @computed_field # type: ignore[misc]
132
+ @property
133
+ def is_breaking(self) -> bool:
134
+ """True if any breaking changes were detected."""
135
+ return len(self.breaking_changes) > 0
promptdiff/reporter.py ADDED
@@ -0,0 +1,266 @@
1
+ """Output formatters for promptdiff.
2
+
3
+ Supports text (Rich terminal), JSON, and Markdown output formats.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ from typing import Any
10
+
11
+ from promptdiff.models import (
12
+ ChangeStatus,
13
+ PromptDiff,
14
+ )
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # JSON output
19
+ # ---------------------------------------------------------------------------
20
+
21
+
22
+ def format_json(diff: PromptDiff) -> str:
23
+ """Format a PromptDiff as JSON.
24
+
25
+ Parameters
26
+ ----------
27
+ diff:
28
+ The prompt diff to format.
29
+
30
+ Returns
31
+ -------
32
+ str
33
+ JSON string.
34
+ """
35
+ return diff.model_dump_json(indent=2)
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Text output (Rich-ready, but returns plain strings with markup)
40
+ # ---------------------------------------------------------------------------
41
+
42
+ _STATUS_STYLE: dict[ChangeStatus, tuple[str, str]] = {
43
+ ChangeStatus.ADDED: ("[green]+[/green]", "green"),
44
+ ChangeStatus.REMOVED: ("[red]-[/red]", "red"),
45
+ ChangeStatus.MODIFIED: ("[yellow]~[/yellow]", "yellow"),
46
+ ChangeStatus.UNCHANGED: (" ", "dim"),
47
+ }
48
+
49
+
50
+ def format_text(diff: PromptDiff, show_token_detail: bool = False) -> str:
51
+ """Format a PromptDiff as rich-markup terminal text.
52
+
53
+ Parameters
54
+ ----------
55
+ diff:
56
+ The prompt diff to format.
57
+ show_token_detail:
58
+ If True, include per-message token breakdowns.
59
+
60
+ Returns
61
+ -------
62
+ str
63
+ Text with Rich markup suitable for Console.print().
64
+ """
65
+ lines: list[str] = []
66
+
67
+ # Header
68
+ lines.append("")
69
+ lines.append(f"[bold]Prompt Diff:[/bold] {diff.file_path}")
70
+ lines.append(f" old: {diff.old_hash[:12]} new: {diff.new_hash[:12]}")
71
+ lines.append("")
72
+
73
+ # Breaking changes summary
74
+ if diff.is_breaking:
75
+ lines.append(
76
+ f"[bold red]BREAKING CHANGES ({len(diff.breaking_changes)}):[/bold red]"
77
+ )
78
+ for bc in diff.breaking_changes:
79
+ severity_color = {"high": "red", "medium": "yellow", "low": "blue"}.get(
80
+ bc.severity, "white"
81
+ )
82
+ lines.append(
83
+ f" [{severity_color}]{bc.severity.upper()}[/{severity_color}] "
84
+ f"[{bc.category}] {bc.description}"
85
+ )
86
+ lines.append("")
87
+ else:
88
+ lines.append("[green]No breaking changes detected.[/green]")
89
+ lines.append("")
90
+
91
+ # Token summary
92
+ td = diff.token_delta
93
+ delta_str = f"+{td.delta}" if td.delta > 0 else str(td.delta)
94
+ pct_str = f"+{td.percent_change}%" if td.percent_change > 0 else f"{td.percent_change}%"
95
+ delta_color = "red" if td.delta > 0 else "green" if td.delta < 0 else "dim"
96
+ lines.append("[bold]Token Delta:[/bold]")
97
+ lines.append(
98
+ f" {td.old_total} -> {td.new_total} "
99
+ f"([{delta_color}]{delta_str}[/{delta_color}], {pct_str})"
100
+ )
101
+ lines.append("")
102
+
103
+ # Message diffs
104
+ msg_changes = [m for m in diff.message_diffs if m.status != ChangeStatus.UNCHANGED]
105
+ if msg_changes:
106
+ lines.append(f"[bold]Messages ({len(msg_changes)} changed):[/bold]")
107
+ for md in diff.message_diffs:
108
+ marker, color = _STATUS_STYLE[md.status]
109
+ if md.status == ChangeStatus.UNCHANGED and not show_token_detail:
110
+ continue
111
+ lines.append(f" {marker} [{color}]{md.role}[/{color}]")
112
+ for change in md.changes:
113
+ lines.append(f" {change}")
114
+ if show_token_detail and md.token_delta != 0:
115
+ sign = "+" if md.token_delta > 0 else ""
116
+ lines.append(f" tokens: {sign}{md.token_delta}")
117
+ if md.content_diff and md.status == ChangeStatus.MODIFIED:
118
+ for dl in md.content_diff.splitlines()[:10]:
119
+ if dl.startswith("+") and not dl.startswith("+++"):
120
+ lines.append(f" [green]{dl}[/green]")
121
+ elif dl.startswith("-") and not dl.startswith("---"):
122
+ lines.append(f" [red]{dl}[/red]")
123
+ else:
124
+ lines.append(f" {dl}")
125
+ lines.append("")
126
+
127
+ # Variable diffs
128
+ var_changes = [v for v in diff.variable_diffs if v.status != ChangeStatus.UNCHANGED]
129
+ if var_changes:
130
+ lines.append(f"[bold]Variables ({len(var_changes)} changed):[/bold]")
131
+ for vd in var_changes:
132
+ marker, color = _STATUS_STYLE[vd.status]
133
+ breaking_tag = " [red](BREAKING)[/red]" if vd.is_breaking else ""
134
+ lines.append(f" {marker} [{color}]{vd.name}[/{color}]{breaking_tag}")
135
+ if vd.status == ChangeStatus.ADDED and vd.new_default is not None:
136
+ lines.append(f" default: {vd.new_default!r}")
137
+ elif vd.status == ChangeStatus.MODIFIED:
138
+ lines.append(
139
+ f" default: {vd.old_default!r} -> {vd.new_default!r}"
140
+ )
141
+ lines.append("")
142
+
143
+ # Metadata diffs
144
+ meta_changes = [m for m in diff.metadata_diffs if m.status != ChangeStatus.UNCHANGED]
145
+ if meta_changes:
146
+ lines.append(f"[bold]Metadata ({len(meta_changes)} changed):[/bold]")
147
+ for md in meta_changes:
148
+ marker, color = _STATUS_STYLE[md.status]
149
+ lines.append(f" {marker} [{color}]{md.key}[/{color}]")
150
+ if md.status == ChangeStatus.ADDED:
151
+ lines.append(f" value: {md.new_value!r}")
152
+ elif md.status == ChangeStatus.REMOVED:
153
+ lines.append(f" was: {md.old_value!r}")
154
+ elif md.status == ChangeStatus.MODIFIED:
155
+ lines.append(
156
+ f" {md.old_value!r} -> {md.new_value!r}"
157
+ )
158
+ lines.append("")
159
+
160
+ return "\n".join(lines)
161
+
162
+
163
+ # ---------------------------------------------------------------------------
164
+ # Markdown output (for GitHub PR comments)
165
+ # ---------------------------------------------------------------------------
166
+
167
+ _STATUS_EMOJI: dict[ChangeStatus, str] = {
168
+ ChangeStatus.ADDED: "+",
169
+ ChangeStatus.REMOVED: "-",
170
+ ChangeStatus.MODIFIED: "~",
171
+ ChangeStatus.UNCHANGED: " ",
172
+ }
173
+
174
+
175
+ def format_markdown(diff: PromptDiff) -> str:
176
+ """Format a PromptDiff as Markdown.
177
+
178
+ Parameters
179
+ ----------
180
+ diff:
181
+ The prompt diff to format.
182
+
183
+ Returns
184
+ -------
185
+ str
186
+ Markdown string suitable for GitHub PR comments.
187
+ """
188
+ lines: list[str] = []
189
+
190
+ # Header
191
+ lines.append(f"## Prompt Diff: `{diff.file_path}`")
192
+ lines.append("")
193
+ lines.append(f"**Old:** `{diff.old_hash[:12]}` | **New:** `{diff.new_hash[:12]}`")
194
+ lines.append("")
195
+
196
+ # Breaking changes
197
+ if diff.is_breaking:
198
+ lines.append(
199
+ f"### Breaking Changes ({len(diff.breaking_changes)})"
200
+ )
201
+ lines.append("")
202
+ for bc in diff.breaking_changes:
203
+ severity_badge = {
204
+ "high": "**HIGH**",
205
+ "medium": "MEDIUM",
206
+ "low": "low",
207
+ }.get(bc.severity, bc.severity)
208
+ lines.append(
209
+ f"- {severity_badge} [{bc.category}]: {bc.description}"
210
+ )
211
+ lines.append("")
212
+ else:
213
+ lines.append("No breaking changes detected.")
214
+ lines.append("")
215
+
216
+ # Token delta
217
+ td = diff.token_delta
218
+ delta_str = f"+{td.delta}" if td.delta > 0 else str(td.delta)
219
+ pct_str = f"+{td.percent_change}%" if td.percent_change > 0 else f"{td.percent_change}%"
220
+ lines.append("### Token Delta")
221
+ lines.append("")
222
+ lines.append(f"| Old | New | Delta | Change |")
223
+ lines.append(f"|-----|-----|-------|--------|")
224
+ lines.append(f"| {td.old_total} | {td.new_total} | {delta_str} | {pct_str} |")
225
+ lines.append("")
226
+
227
+ # Messages
228
+ msg_changes = [m for m in diff.message_diffs if m.status != ChangeStatus.UNCHANGED]
229
+ if msg_changes:
230
+ lines.append(f"### Messages ({len(msg_changes)} changed)")
231
+ lines.append("")
232
+ for md in msg_changes:
233
+ status_marker = _STATUS_EMOJI[md.status]
234
+ lines.append(f"- `{status_marker}` **{md.role}**: {md.status.value}")
235
+ for change in md.changes:
236
+ lines.append(f" - {change}")
237
+ if md.content_diff:
238
+ lines.append("")
239
+ lines.append(" ```diff")
240
+ for dl in md.content_diff.splitlines()[:15]:
241
+ lines.append(f" {dl}")
242
+ lines.append(" ```")
243
+ lines.append("")
244
+
245
+ # Variables
246
+ var_changes = [v for v in diff.variable_diffs if v.status != ChangeStatus.UNCHANGED]
247
+ if var_changes:
248
+ lines.append(f"### Variables ({len(var_changes)} changed)")
249
+ lines.append("")
250
+ for vd in var_changes:
251
+ status_marker = _STATUS_EMOJI[vd.status]
252
+ breaking = " **(BREAKING)**" if vd.is_breaking else ""
253
+ lines.append(f"- `{status_marker}` `{vd.name}`: {vd.status.value}{breaking}")
254
+ lines.append("")
255
+
256
+ # Metadata
257
+ meta_changes = [m for m in diff.metadata_diffs if m.status != ChangeStatus.UNCHANGED]
258
+ if meta_changes:
259
+ lines.append(f"### Metadata ({len(meta_changes)} changed)")
260
+ lines.append("")
261
+ for md in meta_changes:
262
+ status_marker = _STATUS_EMOJI[md.status]
263
+ lines.append(f"- `{status_marker}` `{md.key}`: {md.status.value}")
264
+ lines.append("")
265
+
266
+ return "\n".join(lines)
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: promptdiff-ai
3
+ Version: 1.0.0
4
+ Summary: Semantic diff for LLM prompt changes
5
+ Project-URL: Homepage, https://github.com/scottconverse/promptdiff
6
+ Project-URL: Repository, https://github.com/scottconverse/promptdiff
7
+ Project-URL: Issues, https://github.com/scottconverse/promptdiff/issues
8
+ Author: Scott Converse
9
+ License: MIT
10
+ Keywords: ai,breaking-changes,diff,llm,prompt,semantic
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: prompttools-core-ai<2.0,>=1.0
22
+ Requires-Dist: rich>=13.0
23
+ Requires-Dist: typer[all]>=0.12
24
+ Provides-Extra: dev
25
+ Requires-Dist: mypy; extra == 'dev'
26
+ Requires-Dist: pytest-cov; extra == 'dev'
27
+ Requires-Dist: pytest>=8.0; extra == 'dev'
28
+ Requires-Dist: ruff; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # promptdiff
32
+
33
+ Semantic diff for LLM prompt changes. Part of the [prompttools](https://github.com/scottconverse/prompttools) suite.
34
+
35
+ Unlike generic text diffing, promptdiff understands prompt structure: messages, variables, metadata, and token counts. It classifies changes as breaking or non-breaking and outputs structured reports suitable for CI/CD pipelines and GitHub PR comments.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install promptdiff-ai
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ```bash
46
+ # Compare two prompt files
47
+ promptdiff old_prompt.yaml new_prompt.yaml
48
+
49
+ # JSON output for CI pipelines
50
+ promptdiff old.yaml new.yaml --format json
51
+
52
+ # Markdown output for GitHub PR comments
53
+ promptdiff old.yaml new.yaml --format markdown
54
+
55
+ # Exit with code 1 if breaking changes are found (CI gate)
56
+ promptdiff old.yaml new.yaml --exit-on-breaking
57
+
58
+ # Show per-message token breakdowns
59
+ promptdiff old.yaml new.yaml --token-detail
60
+ ```
61
+
62
+ ## What It Detects
63
+
64
+ ### Message-Level Changes
65
+ - Added, removed, or modified messages
66
+ - Per-message token deltas
67
+ - Unified content diffs within modified messages
68
+
69
+ ### Variable Changes
70
+ - New variables (with or without defaults)
71
+ - Removed variables
72
+ - Modified default values
73
+
74
+ ### Metadata Changes
75
+ - Model changes
76
+ - Added/removed/modified metadata keys
77
+
78
+ ### Token Deltas
79
+ - Total token count comparison
80
+ - Percentage change
81
+ - Per-message breakdowns (with `--token-detail`)
82
+
83
+ ## Breaking Change Classification
84
+
85
+ ### Breaking (High Severity)
86
+ - **New required variable** -- a variable added without a default value; existing callers will fail
87
+ - **Removed variable** -- callers referencing this variable will break
88
+ - **Removed message** -- changes the prompt structure
89
+
90
+ ### Breaking (Medium Severity)
91
+ - **Model change** -- may affect behavior, pricing, and capabilities
92
+ - **Role ordering change** -- may affect model behavior
93
+
94
+ ### Non-Breaking
95
+ - Added variable with a default value
96
+ - Added messages (extends the prompt)
97
+ - Content modifications within existing messages
98
+ - Metadata changes (except model)
99
+
100
+ ## Output Formats
101
+
102
+ ### Text (default)
103
+ Rich terminal output with color-coded diffs:
104
+
105
+ ```
106
+ Prompt Diff: new_prompt.yaml
107
+ old: a1b2c3d4e5f6 new: f6e5d4c3b2a1
108
+
109
+ BREAKING CHANGES (2):
110
+ HIGH [variable] Variable 'tone' was removed
111
+ MEDIUM [model] Model changed from 'gpt-4' to 'gpt-4o'
112
+
113
+ Token Delta:
114
+ 150 -> 165 (+15, +10.0%)
115
+
116
+ Messages (1 changed):
117
+ ~ system
118
+ System message content modified
119
+ ```
120
+
121
+ ### JSON
122
+ Structured JSON for programmatic consumption:
123
+
124
+ ```bash
125
+ promptdiff old.yaml new.yaml --format json
126
+ ```
127
+
128
+ ### Markdown
129
+ GitHub-flavored Markdown for PR comments:
130
+
131
+ ```bash
132
+ promptdiff old.yaml new.yaml --format markdown
133
+ ```
134
+
135
+ ## Python API
136
+
137
+ ```python
138
+ from promptdiff import diff_files, format_text, format_json
139
+
140
+ # Compare two files
141
+ result = diff_files("prompts/v1.yaml", "prompts/v2.yaml")
142
+
143
+ # Check for breaking changes
144
+ if result.is_breaking:
145
+ for bc in result.breaking_changes:
146
+ print(f"[{bc.severity}] {bc.description}")
147
+
148
+ # Get token delta
149
+ print(f"Tokens: {result.token_delta.old_total} -> {result.token_delta.new_total}")
150
+
151
+ # Format output
152
+ print(format_text(result))
153
+ ```
154
+
155
+ ## CLI Reference
156
+
157
+ ```
158
+ Usage: promptdiff [OPTIONS] FILE_A FILE_B
159
+
160
+ Arguments:
161
+ FILE_A Path to the old prompt file
162
+ FILE_B Path to the new prompt file
163
+
164
+ Options:
165
+ -f, --format [text|json|markdown] Output format (default: text)
166
+ --exit-on-breaking Exit with code 1 if breaking changes found
167
+ --token-detail Show per-message token breakdowns
168
+ -e, --encoding TEXT tiktoken encoding (default: cl100k_base)
169
+ -V, --version Show version and exit
170
+ --help Show this message and exit
171
+ ```
172
+
173
+ ## Supported File Formats
174
+
175
+ All formats supported by prompttools-core:
176
+ - YAML (`.yaml`, `.yml`)
177
+ - JSON (`.json`)
178
+ - Markdown (`.md`)
179
+ - Text (`.txt`)
180
+
181
+ ## License
182
+
183
+ MIT
@@ -0,0 +1,10 @@
1
+ promptdiff/__init__.py,sha256=PPiJkrvfsrjdZonlq3KqxSyfF8KfSq9Zkgvn_QjS0ZE,1055
2
+ promptdiff/analyzer.py,sha256=kP4gsNsKmi0kpleJghs9tJQeh2EvIjN7z-JrBy9TxHs,5203
3
+ promptdiff/cli.py,sha256=hvgO0cMD8F1rnUraPtJ9I_kfQXEhZ5jcyYZI160OvWo,3562
4
+ promptdiff/differ.py,sha256=WqNa9_mmqps3lVU4FePXTODSdFDBWVktDUdwmqzGqhc,11125
5
+ promptdiff/models.py,sha256=Amx-Ux2WPQzSjksiF4X-hMe5ljwsbIlRQghG3nlaeBw,4446
6
+ promptdiff/reporter.py,sha256=EfdKoVUEreik-LDXSU_Y_6vt-VxIt7iWY18yNfrq1zk,9401
7
+ promptdiff_ai-1.0.0.dist-info/METADATA,sha256=4uF2wQ7mKwGbVeQKhAt5VXsxPvGwhLw9y-ZnpNqmz3I,5020
8
+ promptdiff_ai-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ promptdiff_ai-1.0.0.dist-info/entry_points.txt,sha256=y1t43_RBs-tH4P2yHMEJJGskSOl5fQ25Kb5Fyz7a8RU,50
10
+ promptdiff_ai-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ promptdiff = promptdiff.cli:app