promptdiff-ai 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promptdiff/__init__.py +52 -0
- promptdiff/analyzer.py +163 -0
- promptdiff/cli.py +123 -0
- promptdiff/differ.py +393 -0
- promptdiff/models.py +135 -0
- promptdiff/reporter.py +266 -0
- promptdiff_ai-1.0.0.dist-info/METADATA +183 -0
- promptdiff_ai-1.0.0.dist-info/RECORD +10 -0
- promptdiff_ai-1.0.0.dist-info/WHEEL +4 -0
- promptdiff_ai-1.0.0.dist-info/entry_points.txt +2 -0
promptdiff/__init__.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""promptdiff: Semantic diff for LLM prompt changes.
|
|
2
|
+
|
|
3
|
+
Public API exports for convenience imports::
|
|
4
|
+
|
|
5
|
+
from promptdiff import diff_files, PromptDiff, format_text
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from promptdiff.models import (
|
|
9
|
+
BreakingChange,
|
|
10
|
+
ChangeStatus,
|
|
11
|
+
MessageDiff,
|
|
12
|
+
MetadataDiff,
|
|
13
|
+
PromptDiff,
|
|
14
|
+
TokenDelta,
|
|
15
|
+
VariableDiff,
|
|
16
|
+
)
|
|
17
|
+
from promptdiff.differ import (
|
|
18
|
+
compute_token_delta,
|
|
19
|
+
diff_files,
|
|
20
|
+
diff_messages,
|
|
21
|
+
diff_metadata,
|
|
22
|
+
diff_variables,
|
|
23
|
+
)
|
|
24
|
+
from promptdiff.analyzer import analyze_breaking_changes
|
|
25
|
+
from promptdiff.reporter import format_json, format_markdown, format_text
|
|
26
|
+
|
|
27
|
+
__version__ = "1.0.0"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Models
|
|
31
|
+
"BreakingChange",
|
|
32
|
+
"ChangeStatus",
|
|
33
|
+
"MessageDiff",
|
|
34
|
+
"MetadataDiff",
|
|
35
|
+
"PromptDiff",
|
|
36
|
+
"TokenDelta",
|
|
37
|
+
"VariableDiff",
|
|
38
|
+
# Differ
|
|
39
|
+
"compute_token_delta",
|
|
40
|
+
"diff_files",
|
|
41
|
+
"diff_messages",
|
|
42
|
+
"diff_metadata",
|
|
43
|
+
"diff_variables",
|
|
44
|
+
# Analyzer
|
|
45
|
+
"analyze_breaking_changes",
|
|
46
|
+
# Reporter
|
|
47
|
+
"format_json",
|
|
48
|
+
"format_markdown",
|
|
49
|
+
"format_text",
|
|
50
|
+
# Version
|
|
51
|
+
"__version__",
|
|
52
|
+
]
|
promptdiff/analyzer.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Breaking change detection for promptdiff.
|
|
2
|
+
|
|
3
|
+
Analyzes a PromptDiff and classifies changes by severity.
|
|
4
|
+
|
|
5
|
+
Breaking changes:
|
|
6
|
+
- New required variable (no default) -- high severity
|
|
7
|
+
- Removed variable -- high severity
|
|
8
|
+
- Removed message -- high severity
|
|
9
|
+
- Changed role ordering -- medium severity
|
|
10
|
+
- Model change (in metadata) -- medium severity
|
|
11
|
+
|
|
12
|
+
Non-breaking changes:
|
|
13
|
+
- Added variable with default
|
|
14
|
+
- Added messages
|
|
15
|
+
- Content modifications
|
|
16
|
+
- Metadata changes (except model)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from promptdiff.models import (
|
|
22
|
+
BreakingChange,
|
|
23
|
+
ChangeStatus,
|
|
24
|
+
PromptDiff,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def analyze_breaking_changes(diff: PromptDiff) -> list[BreakingChange]:
|
|
29
|
+
"""Analyze a diff for breaking changes.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
diff:
|
|
34
|
+
The prompt diff to analyze.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
list[BreakingChange]
|
|
39
|
+
All detected breaking changes sorted by severity.
|
|
40
|
+
"""
|
|
41
|
+
changes: list[BreakingChange] = []
|
|
42
|
+
changes.extend(_check_variable_changes(diff))
|
|
43
|
+
changes.extend(_check_message_changes(diff))
|
|
44
|
+
changes.extend(_check_metadata_changes(diff))
|
|
45
|
+
changes.extend(_check_role_ordering(diff))
|
|
46
|
+
|
|
47
|
+
# Sort by severity: high first, then medium, then low.
|
|
48
|
+
severity_order = {"high": 0, "medium": 1, "low": 2}
|
|
49
|
+
changes.sort(key=lambda c: severity_order.get(c.severity, 3))
|
|
50
|
+
|
|
51
|
+
return changes
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _check_variable_changes(diff: PromptDiff) -> list[BreakingChange]:
|
|
55
|
+
"""Detect breaking variable changes."""
|
|
56
|
+
changes: list[BreakingChange] = []
|
|
57
|
+
|
|
58
|
+
for vd in diff.variable_diffs:
|
|
59
|
+
if vd.status == ChangeStatus.REMOVED:
|
|
60
|
+
changes.append(
|
|
61
|
+
BreakingChange(
|
|
62
|
+
category="variable",
|
|
63
|
+
description=f"Variable '{vd.name}' was removed",
|
|
64
|
+
severity="high",
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
elif vd.status == ChangeStatus.ADDED and vd.is_breaking:
|
|
68
|
+
changes.append(
|
|
69
|
+
BreakingChange(
|
|
70
|
+
category="variable",
|
|
71
|
+
description=(
|
|
72
|
+
f"New required variable '{vd.name}' added without a default value"
|
|
73
|
+
),
|
|
74
|
+
severity="high",
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return changes
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _check_message_changes(diff: PromptDiff) -> list[BreakingChange]:
|
|
82
|
+
"""Detect breaking message changes."""
|
|
83
|
+
changes: list[BreakingChange] = []
|
|
84
|
+
|
|
85
|
+
for md in diff.message_diffs:
|
|
86
|
+
if md.status == ChangeStatus.REMOVED:
|
|
87
|
+
changes.append(
|
|
88
|
+
BreakingChange(
|
|
89
|
+
category="message",
|
|
90
|
+
description=f"{md.role.capitalize()} message was removed",
|
|
91
|
+
severity="high",
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return changes
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _check_metadata_changes(diff: PromptDiff) -> list[BreakingChange]:
|
|
99
|
+
"""Detect breaking metadata changes (model changes)."""
|
|
100
|
+
changes: list[BreakingChange] = []
|
|
101
|
+
|
|
102
|
+
for md in diff.metadata_diffs:
|
|
103
|
+
if md.key == "model" and md.status == ChangeStatus.MODIFIED:
|
|
104
|
+
changes.append(
|
|
105
|
+
BreakingChange(
|
|
106
|
+
category="model",
|
|
107
|
+
description=(
|
|
108
|
+
f"Model changed from '{md.old_value}' to '{md.new_value}'"
|
|
109
|
+
),
|
|
110
|
+
severity="medium",
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
elif md.key == "model" and md.status == ChangeStatus.REMOVED:
|
|
114
|
+
changes.append(
|
|
115
|
+
BreakingChange(
|
|
116
|
+
category="model",
|
|
117
|
+
description="Model specification was removed",
|
|
118
|
+
severity="medium",
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return changes
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _check_role_ordering(diff: PromptDiff) -> list[BreakingChange]:
|
|
126
|
+
"""Detect role ordering changes.
|
|
127
|
+
|
|
128
|
+
If the sequence of roles (ignoring unchanged content) changes between
|
|
129
|
+
old and new, this is potentially breaking.
|
|
130
|
+
"""
|
|
131
|
+
changes: list[BreakingChange] = []
|
|
132
|
+
|
|
133
|
+
# Reconstruct old and new role sequences from the message diffs.
|
|
134
|
+
old_roles: list[str] = []
|
|
135
|
+
new_roles: list[str] = []
|
|
136
|
+
|
|
137
|
+
for md in diff.message_diffs:
|
|
138
|
+
if md.status == ChangeStatus.REMOVED:
|
|
139
|
+
old_roles.append(md.role)
|
|
140
|
+
elif md.status == ChangeStatus.ADDED:
|
|
141
|
+
new_roles.append(md.role)
|
|
142
|
+
elif md.status in (ChangeStatus.MODIFIED, ChangeStatus.UNCHANGED):
|
|
143
|
+
old_roles.append(md.role)
|
|
144
|
+
new_roles.append(md.role)
|
|
145
|
+
|
|
146
|
+
if old_roles != new_roles and len(old_roles) > 0 and len(new_roles) > 0:
|
|
147
|
+
# Only flag if neither list is a subset scenario (pure additions/removals
|
|
148
|
+
# are already caught above). Check if the common elements changed order.
|
|
149
|
+
common_old = [r for r in old_roles if r in new_roles]
|
|
150
|
+
common_new = [r for r in new_roles if r in old_roles]
|
|
151
|
+
if common_old != common_new:
|
|
152
|
+
changes.append(
|
|
153
|
+
BreakingChange(
|
|
154
|
+
category="role",
|
|
155
|
+
description=(
|
|
156
|
+
f"Message role ordering changed: "
|
|
157
|
+
f"{' -> '.join(old_roles)} to {' -> '.join(new_roles)}"
|
|
158
|
+
),
|
|
159
|
+
severity="medium",
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return changes
|
promptdiff/cli.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Typer CLI application for promptdiff.
|
|
2
|
+
|
|
3
|
+
Provides the ``promptdiff`` entry point for comparing two prompt files
|
|
4
|
+
and reporting differences in text, JSON, or Markdown format.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import typer
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
|
|
17
|
+
from promptdiff.differ import diff_files
|
|
18
|
+
from promptdiff.reporter import format_json, format_markdown, format_text
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Console
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
_console = Console()
|
|
25
|
+
_err_console = Console(stderr=True)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class OutputFormat(str, Enum):
|
|
29
|
+
"""Supported output formats."""
|
|
30
|
+
|
|
31
|
+
TEXT = "text"
|
|
32
|
+
JSON = "json"
|
|
33
|
+
MARKDOWN = "markdown"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Version callback
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _version_callback(value: bool) -> None:
|
|
42
|
+
if value:
|
|
43
|
+
from promptdiff import __version__
|
|
44
|
+
|
|
45
|
+
_console.print(f"promptdiff {__version__}")
|
|
46
|
+
raise typer.Exit()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Main command — single-command app (no subcommands needed)
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def main(
|
|
55
|
+
file_a: Path = typer.Argument(..., help="Path to the old prompt file"),
|
|
56
|
+
file_b: Path = typer.Argument(..., help="Path to the new prompt file"),
|
|
57
|
+
output_format: OutputFormat = typer.Option(
|
|
58
|
+
OutputFormat.TEXT,
|
|
59
|
+
"--format",
|
|
60
|
+
"-f",
|
|
61
|
+
help="Output format: text, json, markdown",
|
|
62
|
+
),
|
|
63
|
+
exit_on_breaking: bool = typer.Option(
|
|
64
|
+
False,
|
|
65
|
+
"--exit-on-breaking",
|
|
66
|
+
help="Exit with code 1 if breaking changes are found",
|
|
67
|
+
),
|
|
68
|
+
token_detail: bool = typer.Option(
|
|
69
|
+
False,
|
|
70
|
+
"--token-detail",
|
|
71
|
+
help="Show per-message token breakdowns",
|
|
72
|
+
),
|
|
73
|
+
encoding: str = typer.Option(
|
|
74
|
+
"cl100k_base",
|
|
75
|
+
"--encoding",
|
|
76
|
+
"-e",
|
|
77
|
+
help="tiktoken encoding for token counting",
|
|
78
|
+
),
|
|
79
|
+
version: bool = typer.Option(
|
|
80
|
+
False,
|
|
81
|
+
"--version",
|
|
82
|
+
"-V",
|
|
83
|
+
callback=_version_callback,
|
|
84
|
+
is_eager=True,
|
|
85
|
+
help="Show version and exit",
|
|
86
|
+
),
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Semantic diff for LLM prompt changes.
|
|
89
|
+
|
|
90
|
+
Compare two prompt files and show structured diff with message-level
|
|
91
|
+
changes, variable changes, token deltas, and breaking change classification.
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
result = diff_files(file_a, file_b, encoding=encoding)
|
|
95
|
+
except FileNotFoundError as exc:
|
|
96
|
+
_err_console.print(f"[red]Error:[/red] {exc}")
|
|
97
|
+
raise typer.Exit(2)
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
_err_console.print(f"[red]Error:[/red] {exc}")
|
|
100
|
+
raise typer.Exit(2)
|
|
101
|
+
|
|
102
|
+
if output_format == OutputFormat.TEXT:
|
|
103
|
+
output = format_text(result, show_token_detail=token_detail)
|
|
104
|
+
_console.print(output)
|
|
105
|
+
elif output_format == OutputFormat.JSON:
|
|
106
|
+
output = format_json(result)
|
|
107
|
+
# Use print() to avoid Rich adding ANSI codes to raw JSON.
|
|
108
|
+
print(output)
|
|
109
|
+
elif output_format == OutputFormat.MARKDOWN:
|
|
110
|
+
output = format_markdown(result)
|
|
111
|
+
print(output)
|
|
112
|
+
|
|
113
|
+
if exit_on_breaking and result.is_breaking:
|
|
114
|
+
raise typer.Exit(1)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
app = typer.Typer(
|
|
118
|
+
name="promptdiff",
|
|
119
|
+
help="Semantic diff for LLM prompt files.",
|
|
120
|
+
add_completion=False,
|
|
121
|
+
no_args_is_help=True,
|
|
122
|
+
)
|
|
123
|
+
app.command()(main)
|
promptdiff/differ.py
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""Core diff engine for promptdiff.
|
|
2
|
+
|
|
3
|
+
Compares two prompt files parsed via prompttools_core and produces a
|
|
4
|
+
structured PromptDiff with message-level, variable-level, and metadata-level
|
|
5
|
+
change information.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import difflib
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
from prompttools_core import PromptFile, Tokenizer, count_tokens, parse_file
|
|
15
|
+
from prompttools_core.models import Message
|
|
16
|
+
|
|
17
|
+
from promptdiff.analyzer import analyze_breaking_changes
|
|
18
|
+
from promptdiff.models import (
|
|
19
|
+
ChangeStatus,
|
|
20
|
+
MessageDiff,
|
|
21
|
+
MetadataDiff,
|
|
22
|
+
PromptDiff,
|
|
23
|
+
TokenDelta,
|
|
24
|
+
VariableDiff,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Default encoding used for token counting when no model is specified.
|
|
28
|
+
_DEFAULT_ENCODING = "cl100k_base"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def diff_files(
|
|
32
|
+
old_path: Path,
|
|
33
|
+
new_path: Path,
|
|
34
|
+
encoding: str = _DEFAULT_ENCODING,
|
|
35
|
+
) -> PromptDiff:
|
|
36
|
+
"""Parse and diff two prompt files.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
old_path:
|
|
41
|
+
Path to the old prompt file.
|
|
42
|
+
new_path:
|
|
43
|
+
Path to the new prompt file.
|
|
44
|
+
encoding:
|
|
45
|
+
tiktoken encoding name for token counting.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
PromptDiff
|
|
50
|
+
A structured diff between the two files.
|
|
51
|
+
"""
|
|
52
|
+
old_file = parse_file(Path(old_path))
|
|
53
|
+
new_file = parse_file(Path(new_path))
|
|
54
|
+
|
|
55
|
+
msg_diffs = diff_messages(old_file.messages, new_file.messages, encoding)
|
|
56
|
+
var_diffs = diff_variables(
|
|
57
|
+
old_file.variables,
|
|
58
|
+
new_file.variables,
|
|
59
|
+
old_file.variable_defaults,
|
|
60
|
+
new_file.variable_defaults,
|
|
61
|
+
)
|
|
62
|
+
meta_diffs = diff_metadata(old_file.metadata, new_file.metadata)
|
|
63
|
+
token_d = compute_token_delta(old_file, new_file, encoding)
|
|
64
|
+
|
|
65
|
+
result = PromptDiff(
|
|
66
|
+
file_path=new_path,
|
|
67
|
+
old_hash=old_file.content_hash,
|
|
68
|
+
new_hash=new_file.content_hash,
|
|
69
|
+
message_diffs=msg_diffs,
|
|
70
|
+
variable_diffs=var_diffs,
|
|
71
|
+
metadata_diffs=meta_diffs,
|
|
72
|
+
token_delta=token_d,
|
|
73
|
+
breaking_changes=[],
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Run breaking change analysis and attach results.
|
|
77
|
+
result.breaking_changes = analyze_breaking_changes(result)
|
|
78
|
+
|
|
79
|
+
return result
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def diff_messages(
|
|
83
|
+
old_msgs: list[Message],
|
|
84
|
+
new_msgs: list[Message],
|
|
85
|
+
encoding: str = _DEFAULT_ENCODING,
|
|
86
|
+
) -> list[MessageDiff]:
|
|
87
|
+
"""Align and diff two ordered lists of messages.
|
|
88
|
+
|
|
89
|
+
Alignment is done by (role, position-within-role). Messages are matched
|
|
90
|
+
by role in the order they appear -- the first system message in old is
|
|
91
|
+
compared with the first system message in new, etc.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
old_msgs:
|
|
96
|
+
Messages from the old prompt file.
|
|
97
|
+
new_msgs:
|
|
98
|
+
Messages from the new prompt file.
|
|
99
|
+
encoding:
|
|
100
|
+
tiktoken encoding name for token counting.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
list[MessageDiff]
|
|
105
|
+
Ordered list of message diffs.
|
|
106
|
+
"""
|
|
107
|
+
# Group messages by role, preserving order within each role.
|
|
108
|
+
old_by_role: dict[str, list[Message]] = {}
|
|
109
|
+
new_by_role: dict[str, list[Message]] = {}
|
|
110
|
+
|
|
111
|
+
for msg in old_msgs:
|
|
112
|
+
old_by_role.setdefault(msg.role, []).append(msg)
|
|
113
|
+
for msg in new_msgs:
|
|
114
|
+
new_by_role.setdefault(msg.role, []).append(msg)
|
|
115
|
+
|
|
116
|
+
all_roles_ordered: list[str] = []
|
|
117
|
+
seen: set[str] = set()
|
|
118
|
+
for msg in old_msgs:
|
|
119
|
+
if msg.role not in seen:
|
|
120
|
+
all_roles_ordered.append(msg.role)
|
|
121
|
+
seen.add(msg.role)
|
|
122
|
+
for msg in new_msgs:
|
|
123
|
+
if msg.role not in seen:
|
|
124
|
+
all_roles_ordered.append(msg.role)
|
|
125
|
+
seen.add(msg.role)
|
|
126
|
+
|
|
127
|
+
diffs: list[MessageDiff] = []
|
|
128
|
+
|
|
129
|
+
for role in all_roles_ordered:
|
|
130
|
+
old_list = old_by_role.get(role, [])
|
|
131
|
+
new_list = new_by_role.get(role, [])
|
|
132
|
+
|
|
133
|
+
max_len = max(len(old_list), len(new_list))
|
|
134
|
+
for i in range(max_len):
|
|
135
|
+
old_msg = old_list[i] if i < len(old_list) else None
|
|
136
|
+
new_msg = new_list[i] if i < len(new_list) else None
|
|
137
|
+
diffs.append(_diff_single_message(old_msg, new_msg, role, encoding))
|
|
138
|
+
|
|
139
|
+
return diffs
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _diff_single_message(
|
|
143
|
+
old_msg: Optional[Message],
|
|
144
|
+
new_msg: Optional[Message],
|
|
145
|
+
role: str,
|
|
146
|
+
encoding: str,
|
|
147
|
+
) -> MessageDiff:
|
|
148
|
+
"""Diff a single aligned pair of messages."""
|
|
149
|
+
if old_msg is None and new_msg is not None:
|
|
150
|
+
# Added
|
|
151
|
+
new_tokens = count_tokens(new_msg.content, encoding)
|
|
152
|
+
return MessageDiff(
|
|
153
|
+
status=ChangeStatus.ADDED,
|
|
154
|
+
role=role,
|
|
155
|
+
new_content=new_msg.content,
|
|
156
|
+
token_delta=new_tokens,
|
|
157
|
+
changes=[f"Added {role} message ({new_tokens} tokens)"],
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if old_msg is not None and new_msg is None:
|
|
161
|
+
# Removed
|
|
162
|
+
old_tokens = count_tokens(old_msg.content, encoding)
|
|
163
|
+
return MessageDiff(
|
|
164
|
+
status=ChangeStatus.REMOVED,
|
|
165
|
+
role=role,
|
|
166
|
+
old_content=old_msg.content,
|
|
167
|
+
token_delta=-old_tokens,
|
|
168
|
+
changes=[f"Removed {role} message ({old_tokens} tokens)"],
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Both exist -- compare content.
|
|
172
|
+
assert old_msg is not None and new_msg is not None
|
|
173
|
+
|
|
174
|
+
if old_msg.content == new_msg.content:
|
|
175
|
+
return MessageDiff(
|
|
176
|
+
status=ChangeStatus.UNCHANGED,
|
|
177
|
+
role=role,
|
|
178
|
+
old_content=old_msg.content,
|
|
179
|
+
new_content=new_msg.content,
|
|
180
|
+
token_delta=0,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Modified
|
|
184
|
+
old_tokens = count_tokens(old_msg.content, encoding)
|
|
185
|
+
new_tokens = count_tokens(new_msg.content, encoding)
|
|
186
|
+
delta = new_tokens - old_tokens
|
|
187
|
+
|
|
188
|
+
content_diff = "\n".join(
|
|
189
|
+
difflib.unified_diff(
|
|
190
|
+
old_msg.content.splitlines(),
|
|
191
|
+
new_msg.content.splitlines(),
|
|
192
|
+
lineterm="",
|
|
193
|
+
fromfile="old",
|
|
194
|
+
tofile="new",
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
changes: list[str] = []
|
|
199
|
+
if delta != 0:
|
|
200
|
+
direction = "increased" if delta > 0 else "decreased"
|
|
201
|
+
changes.append(
|
|
202
|
+
f"{role.capitalize()} message {direction} by {abs(delta)} tokens"
|
|
203
|
+
)
|
|
204
|
+
changes.append(f"{role.capitalize()} message content modified")
|
|
205
|
+
|
|
206
|
+
return MessageDiff(
|
|
207
|
+
status=ChangeStatus.MODIFIED,
|
|
208
|
+
role=role,
|
|
209
|
+
old_content=old_msg.content,
|
|
210
|
+
new_content=new_msg.content,
|
|
211
|
+
content_diff=content_diff,
|
|
212
|
+
token_delta=delta,
|
|
213
|
+
changes=changes,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def diff_variables(
|
|
218
|
+
old_vars: dict[str, str],
|
|
219
|
+
new_vars: dict[str, str],
|
|
220
|
+
old_defaults: dict[str, str],
|
|
221
|
+
new_defaults: dict[str, str],
|
|
222
|
+
) -> list[VariableDiff]:
|
|
223
|
+
"""Compare variable sets between old and new prompt versions.
|
|
224
|
+
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
old_vars:
|
|
228
|
+
Variables found in the old version (name -> syntax style).
|
|
229
|
+
new_vars:
|
|
230
|
+
Variables found in the new version (name -> syntax style).
|
|
231
|
+
old_defaults:
|
|
232
|
+
Default values from the old version metadata.
|
|
233
|
+
new_defaults:
|
|
234
|
+
Default values from the new version metadata.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
list[VariableDiff]
|
|
239
|
+
List of variable diffs.
|
|
240
|
+
"""
|
|
241
|
+
all_names = sorted(set(old_vars) | set(new_vars))
|
|
242
|
+
diffs: list[VariableDiff] = []
|
|
243
|
+
|
|
244
|
+
for name in all_names:
|
|
245
|
+
in_old = name in old_vars
|
|
246
|
+
in_new = name in new_vars
|
|
247
|
+
|
|
248
|
+
if in_old and not in_new:
|
|
249
|
+
diffs.append(
|
|
250
|
+
VariableDiff(
|
|
251
|
+
name=name,
|
|
252
|
+
status=ChangeStatus.REMOVED,
|
|
253
|
+
old_default=old_defaults.get(name),
|
|
254
|
+
is_breaking=True,
|
|
255
|
+
)
|
|
256
|
+
)
|
|
257
|
+
elif not in_old and in_new:
|
|
258
|
+
new_default = new_defaults.get(name)
|
|
259
|
+
# Breaking if no default is provided for the new variable.
|
|
260
|
+
diffs.append(
|
|
261
|
+
VariableDiff(
|
|
262
|
+
name=name,
|
|
263
|
+
status=ChangeStatus.ADDED,
|
|
264
|
+
new_default=new_default,
|
|
265
|
+
is_breaking=new_default is None,
|
|
266
|
+
)
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
# Present in both -- check if default changed.
|
|
270
|
+
old_def = old_defaults.get(name)
|
|
271
|
+
new_def = new_defaults.get(name)
|
|
272
|
+
if old_def == new_def:
|
|
273
|
+
diffs.append(
|
|
274
|
+
VariableDiff(
|
|
275
|
+
name=name,
|
|
276
|
+
status=ChangeStatus.UNCHANGED,
|
|
277
|
+
old_default=old_def,
|
|
278
|
+
new_default=new_def,
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
diffs.append(
|
|
283
|
+
VariableDiff(
|
|
284
|
+
name=name,
|
|
285
|
+
status=ChangeStatus.MODIFIED,
|
|
286
|
+
old_default=old_def,
|
|
287
|
+
new_default=new_def,
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return diffs
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def diff_metadata(
|
|
295
|
+
old_meta: dict[str, Any],
|
|
296
|
+
new_meta: dict[str, Any],
|
|
297
|
+
) -> list[MetadataDiff]:
|
|
298
|
+
"""Compare metadata dictionaries.
|
|
299
|
+
|
|
300
|
+
Parameters
|
|
301
|
+
----------
|
|
302
|
+
old_meta:
|
|
303
|
+
Metadata from the old prompt file.
|
|
304
|
+
new_meta:
|
|
305
|
+
Metadata from the new prompt file.
|
|
306
|
+
|
|
307
|
+
Returns
|
|
308
|
+
-------
|
|
309
|
+
list[MetadataDiff]
|
|
310
|
+
List of metadata diffs.
|
|
311
|
+
"""
|
|
312
|
+
all_keys = sorted(set(old_meta) | set(new_meta))
|
|
313
|
+
diffs: list[MetadataDiff] = []
|
|
314
|
+
|
|
315
|
+
for key in all_keys:
|
|
316
|
+
in_old = key in old_meta
|
|
317
|
+
in_new = key in new_meta
|
|
318
|
+
|
|
319
|
+
if in_old and not in_new:
|
|
320
|
+
diffs.append(
|
|
321
|
+
MetadataDiff(
|
|
322
|
+
key=key,
|
|
323
|
+
status=ChangeStatus.REMOVED,
|
|
324
|
+
old_value=old_meta[key],
|
|
325
|
+
)
|
|
326
|
+
)
|
|
327
|
+
elif not in_old and in_new:
|
|
328
|
+
diffs.append(
|
|
329
|
+
MetadataDiff(
|
|
330
|
+
key=key,
|
|
331
|
+
status=ChangeStatus.ADDED,
|
|
332
|
+
new_value=new_meta[key],
|
|
333
|
+
)
|
|
334
|
+
)
|
|
335
|
+
elif old_meta[key] == new_meta[key]:
|
|
336
|
+
diffs.append(
|
|
337
|
+
MetadataDiff(
|
|
338
|
+
key=key,
|
|
339
|
+
status=ChangeStatus.UNCHANGED,
|
|
340
|
+
old_value=old_meta[key],
|
|
341
|
+
new_value=new_meta[key],
|
|
342
|
+
)
|
|
343
|
+
)
|
|
344
|
+
else:
|
|
345
|
+
diffs.append(
|
|
346
|
+
MetadataDiff(
|
|
347
|
+
key=key,
|
|
348
|
+
status=ChangeStatus.MODIFIED,
|
|
349
|
+
old_value=old_meta[key],
|
|
350
|
+
new_value=new_meta[key],
|
|
351
|
+
)
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
return diffs
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def compute_token_delta(
|
|
358
|
+
old_file: PromptFile,
|
|
359
|
+
new_file: PromptFile,
|
|
360
|
+
encoding: str = _DEFAULT_ENCODING,
|
|
361
|
+
) -> TokenDelta:
|
|
362
|
+
"""Compute token count comparison between two prompt files.
|
|
363
|
+
|
|
364
|
+
Parameters
|
|
365
|
+
----------
|
|
366
|
+
old_file:
|
|
367
|
+
The old parsed prompt file.
|
|
368
|
+
new_file:
|
|
369
|
+
The new parsed prompt file.
|
|
370
|
+
encoding:
|
|
371
|
+
tiktoken encoding name for token counting.
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
TokenDelta
|
|
376
|
+
Token count delta.
|
|
377
|
+
"""
|
|
378
|
+
tokenizer = Tokenizer(encoding=encoding)
|
|
379
|
+
old_total = tokenizer.count_file(old_file)
|
|
380
|
+
new_total = tokenizer.count_file(new_file)
|
|
381
|
+
|
|
382
|
+
delta = new_total - old_total
|
|
383
|
+
if old_total == 0:
|
|
384
|
+
percent_change = 100.0 if new_total > 0 else 0.0
|
|
385
|
+
else:
|
|
386
|
+
percent_change = round((delta / old_total) * 100, 2)
|
|
387
|
+
|
|
388
|
+
return TokenDelta(
|
|
389
|
+
old_total=old_total,
|
|
390
|
+
new_total=new_total,
|
|
391
|
+
delta=delta,
|
|
392
|
+
percent_change=percent_change,
|
|
393
|
+
)
|
promptdiff/models.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Data models for promptdiff.
|
|
2
|
+
|
|
3
|
+
All models use Pydantic v2 syntax. These define the structured representation
|
|
4
|
+
of prompt diffs, including message-level changes, variable changes, token
|
|
5
|
+
deltas, and breaking change classification.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, Field, computed_field
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ChangeStatus(str, Enum):
|
|
18
|
+
"""Status of a change between two versions."""
|
|
19
|
+
|
|
20
|
+
ADDED = "added"
|
|
21
|
+
REMOVED = "removed"
|
|
22
|
+
MODIFIED = "modified"
|
|
23
|
+
UNCHANGED = "unchanged"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MessageDiff(BaseModel):
|
|
27
|
+
"""Diff of a single message between old and new prompt versions."""
|
|
28
|
+
|
|
29
|
+
status: ChangeStatus = Field(..., description="Change status for this message")
|
|
30
|
+
role: str = Field(..., description="Message role (system, user, assistant, tool)")
|
|
31
|
+
old_content: Optional[str] = Field(
|
|
32
|
+
default=None, description="Content in the old version"
|
|
33
|
+
)
|
|
34
|
+
new_content: Optional[str] = Field(
|
|
35
|
+
default=None, description="Content in the new version"
|
|
36
|
+
)
|
|
37
|
+
content_diff: Optional[str] = Field(
|
|
38
|
+
default=None, description="Unified diff of the content"
|
|
39
|
+
)
|
|
40
|
+
token_delta: int = Field(
|
|
41
|
+
default=0, description="Change in token count for this message"
|
|
42
|
+
)
|
|
43
|
+
changes: list[str] = Field(
|
|
44
|
+
default_factory=list,
|
|
45
|
+
description="Human-readable descriptions of changes",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class VariableDiff(BaseModel):
|
|
50
|
+
"""Diff of a template variable between old and new prompt versions."""
|
|
51
|
+
|
|
52
|
+
name: str = Field(..., description="Variable name")
|
|
53
|
+
status: ChangeStatus = Field(..., description="Change status for this variable")
|
|
54
|
+
old_default: Optional[str] = Field(
|
|
55
|
+
default=None, description="Default value in the old version"
|
|
56
|
+
)
|
|
57
|
+
new_default: Optional[str] = Field(
|
|
58
|
+
default=None, description="Default value in the new version"
|
|
59
|
+
)
|
|
60
|
+
is_breaking: bool = Field(
|
|
61
|
+
default=False,
|
|
62
|
+
description="True if this is a breaking change",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class MetadataDiff(BaseModel):
|
|
67
|
+
"""Diff of a metadata key between old and new prompt versions."""
|
|
68
|
+
|
|
69
|
+
key: str = Field(..., description="Metadata key")
|
|
70
|
+
status: ChangeStatus = Field(..., description="Change status for this key")
|
|
71
|
+
old_value: Optional[Any] = Field(
|
|
72
|
+
default=None, description="Value in the old version"
|
|
73
|
+
)
|
|
74
|
+
new_value: Optional[Any] = Field(
|
|
75
|
+
default=None, description="Value in the new version"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TokenDelta(BaseModel):
|
|
80
|
+
"""Token count comparison between old and new prompt versions."""
|
|
81
|
+
|
|
82
|
+
old_total: int = Field(..., description="Total tokens in old version")
|
|
83
|
+
new_total: int = Field(..., description="Total tokens in new version")
|
|
84
|
+
delta: int = Field(..., description="Change in token count (new - old)")
|
|
85
|
+
percent_change: float = Field(
|
|
86
|
+
..., description="Percentage change in token count"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class BreakingChange(BaseModel):
|
|
91
|
+
"""A detected breaking change between prompt versions."""
|
|
92
|
+
|
|
93
|
+
category: str = Field(
|
|
94
|
+
...,
|
|
95
|
+
description="Category: variable, message, model, role",
|
|
96
|
+
)
|
|
97
|
+
description: str = Field(
|
|
98
|
+
..., description="Human-readable description of the breaking change"
|
|
99
|
+
)
|
|
100
|
+
severity: str = Field(
|
|
101
|
+
..., description="Severity: high, medium, low"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class PromptDiff(BaseModel):
|
|
106
|
+
"""Complete structured diff between two prompt file versions."""
|
|
107
|
+
|
|
108
|
+
file_path: Path = Field(..., description="Path to the diffed file")
|
|
109
|
+
old_hash: str = Field(..., description="SHA256 hash of the old content")
|
|
110
|
+
new_hash: str = Field(..., description="SHA256 hash of the new content")
|
|
111
|
+
message_diffs: list[MessageDiff] = Field(
|
|
112
|
+
default_factory=list,
|
|
113
|
+
description="Per-message diffs",
|
|
114
|
+
)
|
|
115
|
+
variable_diffs: list[VariableDiff] = Field(
|
|
116
|
+
default_factory=list,
|
|
117
|
+
description="Per-variable diffs",
|
|
118
|
+
)
|
|
119
|
+
metadata_diffs: list[MetadataDiff] = Field(
|
|
120
|
+
default_factory=list,
|
|
121
|
+
description="Per-metadata-key diffs",
|
|
122
|
+
)
|
|
123
|
+
token_delta: TokenDelta = Field(
|
|
124
|
+
..., description="Token count comparison"
|
|
125
|
+
)
|
|
126
|
+
breaking_changes: list[BreakingChange] = Field(
|
|
127
|
+
default_factory=list,
|
|
128
|
+
description="Detected breaking changes",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
@computed_field # type: ignore[misc]
|
|
132
|
+
@property
|
|
133
|
+
def is_breaking(self) -> bool:
|
|
134
|
+
"""True if any breaking changes were detected."""
|
|
135
|
+
return len(self.breaking_changes) > 0
|
promptdiff/reporter.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Output formatters for promptdiff.
|
|
2
|
+
|
|
3
|
+
Supports text (Rich terminal), JSON, and Markdown output formats.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from promptdiff.models import (
|
|
12
|
+
ChangeStatus,
|
|
13
|
+
PromptDiff,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# JSON output
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def format_json(diff: PromptDiff) -> str:
|
|
23
|
+
"""Format a PromptDiff as JSON.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
diff:
|
|
28
|
+
The prompt diff to format.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
str
|
|
33
|
+
JSON string.
|
|
34
|
+
"""
|
|
35
|
+
return diff.model_dump_json(indent=2)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Text output (Rich-ready, but returns plain strings with markup)
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
_STATUS_STYLE: dict[ChangeStatus, tuple[str, str]] = {
|
|
43
|
+
ChangeStatus.ADDED: ("[green]+[/green]", "green"),
|
|
44
|
+
ChangeStatus.REMOVED: ("[red]-[/red]", "red"),
|
|
45
|
+
ChangeStatus.MODIFIED: ("[yellow]~[/yellow]", "yellow"),
|
|
46
|
+
ChangeStatus.UNCHANGED: (" ", "dim"),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def format_text(diff: PromptDiff, show_token_detail: bool = False) -> str:
|
|
51
|
+
"""Format a PromptDiff as rich-markup terminal text.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
diff:
|
|
56
|
+
The prompt diff to format.
|
|
57
|
+
show_token_detail:
|
|
58
|
+
If True, include per-message token breakdowns.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
str
|
|
63
|
+
Text with Rich markup suitable for Console.print().
|
|
64
|
+
"""
|
|
65
|
+
lines: list[str] = []
|
|
66
|
+
|
|
67
|
+
# Header
|
|
68
|
+
lines.append("")
|
|
69
|
+
lines.append(f"[bold]Prompt Diff:[/bold] {diff.file_path}")
|
|
70
|
+
lines.append(f" old: {diff.old_hash[:12]} new: {diff.new_hash[:12]}")
|
|
71
|
+
lines.append("")
|
|
72
|
+
|
|
73
|
+
# Breaking changes summary
|
|
74
|
+
if diff.is_breaking:
|
|
75
|
+
lines.append(
|
|
76
|
+
f"[bold red]BREAKING CHANGES ({len(diff.breaking_changes)}):[/bold red]"
|
|
77
|
+
)
|
|
78
|
+
for bc in diff.breaking_changes:
|
|
79
|
+
severity_color = {"high": "red", "medium": "yellow", "low": "blue"}.get(
|
|
80
|
+
bc.severity, "white"
|
|
81
|
+
)
|
|
82
|
+
lines.append(
|
|
83
|
+
f" [{severity_color}]{bc.severity.upper()}[/{severity_color}] "
|
|
84
|
+
f"[{bc.category}] {bc.description}"
|
|
85
|
+
)
|
|
86
|
+
lines.append("")
|
|
87
|
+
else:
|
|
88
|
+
lines.append("[green]No breaking changes detected.[/green]")
|
|
89
|
+
lines.append("")
|
|
90
|
+
|
|
91
|
+
# Token summary
|
|
92
|
+
td = diff.token_delta
|
|
93
|
+
delta_str = f"+{td.delta}" if td.delta > 0 else str(td.delta)
|
|
94
|
+
pct_str = f"+{td.percent_change}%" if td.percent_change > 0 else f"{td.percent_change}%"
|
|
95
|
+
delta_color = "red" if td.delta > 0 else "green" if td.delta < 0 else "dim"
|
|
96
|
+
lines.append("[bold]Token Delta:[/bold]")
|
|
97
|
+
lines.append(
|
|
98
|
+
f" {td.old_total} -> {td.new_total} "
|
|
99
|
+
f"([{delta_color}]{delta_str}[/{delta_color}], {pct_str})"
|
|
100
|
+
)
|
|
101
|
+
lines.append("")
|
|
102
|
+
|
|
103
|
+
# Message diffs
|
|
104
|
+
msg_changes = [m for m in diff.message_diffs if m.status != ChangeStatus.UNCHANGED]
|
|
105
|
+
if msg_changes:
|
|
106
|
+
lines.append(f"[bold]Messages ({len(msg_changes)} changed):[/bold]")
|
|
107
|
+
for md in diff.message_diffs:
|
|
108
|
+
marker, color = _STATUS_STYLE[md.status]
|
|
109
|
+
if md.status == ChangeStatus.UNCHANGED and not show_token_detail:
|
|
110
|
+
continue
|
|
111
|
+
lines.append(f" {marker} [{color}]{md.role}[/{color}]")
|
|
112
|
+
for change in md.changes:
|
|
113
|
+
lines.append(f" {change}")
|
|
114
|
+
if show_token_detail and md.token_delta != 0:
|
|
115
|
+
sign = "+" if md.token_delta > 0 else ""
|
|
116
|
+
lines.append(f" tokens: {sign}{md.token_delta}")
|
|
117
|
+
if md.content_diff and md.status == ChangeStatus.MODIFIED:
|
|
118
|
+
for dl in md.content_diff.splitlines()[:10]:
|
|
119
|
+
if dl.startswith("+") and not dl.startswith("+++"):
|
|
120
|
+
lines.append(f" [green]{dl}[/green]")
|
|
121
|
+
elif dl.startswith("-") and not dl.startswith("---"):
|
|
122
|
+
lines.append(f" [red]{dl}[/red]")
|
|
123
|
+
else:
|
|
124
|
+
lines.append(f" {dl}")
|
|
125
|
+
lines.append("")
|
|
126
|
+
|
|
127
|
+
# Variable diffs
|
|
128
|
+
var_changes = [v for v in diff.variable_diffs if v.status != ChangeStatus.UNCHANGED]
|
|
129
|
+
if var_changes:
|
|
130
|
+
lines.append(f"[bold]Variables ({len(var_changes)} changed):[/bold]")
|
|
131
|
+
for vd in var_changes:
|
|
132
|
+
marker, color = _STATUS_STYLE[vd.status]
|
|
133
|
+
breaking_tag = " [red](BREAKING)[/red]" if vd.is_breaking else ""
|
|
134
|
+
lines.append(f" {marker} [{color}]{vd.name}[/{color}]{breaking_tag}")
|
|
135
|
+
if vd.status == ChangeStatus.ADDED and vd.new_default is not None:
|
|
136
|
+
lines.append(f" default: {vd.new_default!r}")
|
|
137
|
+
elif vd.status == ChangeStatus.MODIFIED:
|
|
138
|
+
lines.append(
|
|
139
|
+
f" default: {vd.old_default!r} -> {vd.new_default!r}"
|
|
140
|
+
)
|
|
141
|
+
lines.append("")
|
|
142
|
+
|
|
143
|
+
# Metadata diffs
|
|
144
|
+
meta_changes = [m for m in diff.metadata_diffs if m.status != ChangeStatus.UNCHANGED]
|
|
145
|
+
if meta_changes:
|
|
146
|
+
lines.append(f"[bold]Metadata ({len(meta_changes)} changed):[/bold]")
|
|
147
|
+
for md in meta_changes:
|
|
148
|
+
marker, color = _STATUS_STYLE[md.status]
|
|
149
|
+
lines.append(f" {marker} [{color}]{md.key}[/{color}]")
|
|
150
|
+
if md.status == ChangeStatus.ADDED:
|
|
151
|
+
lines.append(f" value: {md.new_value!r}")
|
|
152
|
+
elif md.status == ChangeStatus.REMOVED:
|
|
153
|
+
lines.append(f" was: {md.old_value!r}")
|
|
154
|
+
elif md.status == ChangeStatus.MODIFIED:
|
|
155
|
+
lines.append(
|
|
156
|
+
f" {md.old_value!r} -> {md.new_value!r}"
|
|
157
|
+
)
|
|
158
|
+
lines.append("")
|
|
159
|
+
|
|
160
|
+
return "\n".join(lines)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
# Markdown output (for GitHub PR comments)
|
|
165
|
+
# ---------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
_STATUS_EMOJI: dict[ChangeStatus, str] = {
|
|
168
|
+
ChangeStatus.ADDED: "+",
|
|
169
|
+
ChangeStatus.REMOVED: "-",
|
|
170
|
+
ChangeStatus.MODIFIED: "~",
|
|
171
|
+
ChangeStatus.UNCHANGED: " ",
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def format_markdown(diff: PromptDiff) -> str:
|
|
176
|
+
"""Format a PromptDiff as Markdown.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
diff:
|
|
181
|
+
The prompt diff to format.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
str
|
|
186
|
+
Markdown string suitable for GitHub PR comments.
|
|
187
|
+
"""
|
|
188
|
+
lines: list[str] = []
|
|
189
|
+
|
|
190
|
+
# Header
|
|
191
|
+
lines.append(f"## Prompt Diff: `{diff.file_path}`")
|
|
192
|
+
lines.append("")
|
|
193
|
+
lines.append(f"**Old:** `{diff.old_hash[:12]}` | **New:** `{diff.new_hash[:12]}`")
|
|
194
|
+
lines.append("")
|
|
195
|
+
|
|
196
|
+
# Breaking changes
|
|
197
|
+
if diff.is_breaking:
|
|
198
|
+
lines.append(
|
|
199
|
+
f"### Breaking Changes ({len(diff.breaking_changes)})"
|
|
200
|
+
)
|
|
201
|
+
lines.append("")
|
|
202
|
+
for bc in diff.breaking_changes:
|
|
203
|
+
severity_badge = {
|
|
204
|
+
"high": "**HIGH**",
|
|
205
|
+
"medium": "MEDIUM",
|
|
206
|
+
"low": "low",
|
|
207
|
+
}.get(bc.severity, bc.severity)
|
|
208
|
+
lines.append(
|
|
209
|
+
f"- {severity_badge} [{bc.category}]: {bc.description}"
|
|
210
|
+
)
|
|
211
|
+
lines.append("")
|
|
212
|
+
else:
|
|
213
|
+
lines.append("No breaking changes detected.")
|
|
214
|
+
lines.append("")
|
|
215
|
+
|
|
216
|
+
# Token delta
|
|
217
|
+
td = diff.token_delta
|
|
218
|
+
delta_str = f"+{td.delta}" if td.delta > 0 else str(td.delta)
|
|
219
|
+
pct_str = f"+{td.percent_change}%" if td.percent_change > 0 else f"{td.percent_change}%"
|
|
220
|
+
lines.append("### Token Delta")
|
|
221
|
+
lines.append("")
|
|
222
|
+
lines.append(f"| Old | New | Delta | Change |")
|
|
223
|
+
lines.append(f"|-----|-----|-------|--------|")
|
|
224
|
+
lines.append(f"| {td.old_total} | {td.new_total} | {delta_str} | {pct_str} |")
|
|
225
|
+
lines.append("")
|
|
226
|
+
|
|
227
|
+
# Messages
|
|
228
|
+
msg_changes = [m for m in diff.message_diffs if m.status != ChangeStatus.UNCHANGED]
|
|
229
|
+
if msg_changes:
|
|
230
|
+
lines.append(f"### Messages ({len(msg_changes)} changed)")
|
|
231
|
+
lines.append("")
|
|
232
|
+
for md in msg_changes:
|
|
233
|
+
status_marker = _STATUS_EMOJI[md.status]
|
|
234
|
+
lines.append(f"- `{status_marker}` **{md.role}**: {md.status.value}")
|
|
235
|
+
for change in md.changes:
|
|
236
|
+
lines.append(f" - {change}")
|
|
237
|
+
if md.content_diff:
|
|
238
|
+
lines.append("")
|
|
239
|
+
lines.append(" ```diff")
|
|
240
|
+
for dl in md.content_diff.splitlines()[:15]:
|
|
241
|
+
lines.append(f" {dl}")
|
|
242
|
+
lines.append(" ```")
|
|
243
|
+
lines.append("")
|
|
244
|
+
|
|
245
|
+
# Variables
|
|
246
|
+
var_changes = [v for v in diff.variable_diffs if v.status != ChangeStatus.UNCHANGED]
|
|
247
|
+
if var_changes:
|
|
248
|
+
lines.append(f"### Variables ({len(var_changes)} changed)")
|
|
249
|
+
lines.append("")
|
|
250
|
+
for vd in var_changes:
|
|
251
|
+
status_marker = _STATUS_EMOJI[vd.status]
|
|
252
|
+
breaking = " **(BREAKING)**" if vd.is_breaking else ""
|
|
253
|
+
lines.append(f"- `{status_marker}` `{vd.name}`: {vd.status.value}{breaking}")
|
|
254
|
+
lines.append("")
|
|
255
|
+
|
|
256
|
+
# Metadata
|
|
257
|
+
meta_changes = [m for m in diff.metadata_diffs if m.status != ChangeStatus.UNCHANGED]
|
|
258
|
+
if meta_changes:
|
|
259
|
+
lines.append(f"### Metadata ({len(meta_changes)} changed)")
|
|
260
|
+
lines.append("")
|
|
261
|
+
for md in meta_changes:
|
|
262
|
+
status_marker = _STATUS_EMOJI[md.status]
|
|
263
|
+
lines.append(f"- `{status_marker}` `{md.key}`: {md.status.value}")
|
|
264
|
+
lines.append("")
|
|
265
|
+
|
|
266
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: promptdiff-ai
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Semantic diff for LLM prompt changes
|
|
5
|
+
Project-URL: Homepage, https://github.com/scottconverse/promptdiff
|
|
6
|
+
Project-URL: Repository, https://github.com/scottconverse/promptdiff
|
|
7
|
+
Project-URL: Issues, https://github.com/scottconverse/promptdiff/issues
|
|
8
|
+
Author: Scott Converse
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: ai,breaking-changes,diff,llm,prompt,semantic
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Requires-Dist: prompttools-core-ai<2.0,>=1.0
|
|
22
|
+
Requires-Dist: rich>=13.0
|
|
23
|
+
Requires-Dist: typer[all]>=0.12
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# promptdiff
|
|
32
|
+
|
|
33
|
+
Semantic diff for LLM prompt changes. Part of the [prompttools](https://github.com/scottconverse/prompttools) suite.
|
|
34
|
+
|
|
35
|
+
Unlike generic text diffing, promptdiff understands prompt structure: messages, variables, metadata, and token counts. It classifies changes as breaking or non-breaking and outputs structured reports suitable for CI/CD pipelines and GitHub PR comments.
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install promptdiff-ai
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Compare two prompt files
|
|
47
|
+
promptdiff old_prompt.yaml new_prompt.yaml
|
|
48
|
+
|
|
49
|
+
# JSON output for CI pipelines
|
|
50
|
+
promptdiff old.yaml new.yaml --format json
|
|
51
|
+
|
|
52
|
+
# Markdown output for GitHub PR comments
|
|
53
|
+
promptdiff old.yaml new.yaml --format markdown
|
|
54
|
+
|
|
55
|
+
# Exit with code 1 if breaking changes are found (CI gate)
|
|
56
|
+
promptdiff old.yaml new.yaml --exit-on-breaking
|
|
57
|
+
|
|
58
|
+
# Show per-message token breakdowns
|
|
59
|
+
promptdiff old.yaml new.yaml --token-detail
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## What It Detects
|
|
63
|
+
|
|
64
|
+
### Message-Level Changes
|
|
65
|
+
- Added, removed, or modified messages
|
|
66
|
+
- Per-message token deltas
|
|
67
|
+
- Unified content diffs within modified messages
|
|
68
|
+
|
|
69
|
+
### Variable Changes
|
|
70
|
+
- New variables (with or without defaults)
|
|
71
|
+
- Removed variables
|
|
72
|
+
- Modified default values
|
|
73
|
+
|
|
74
|
+
### Metadata Changes
|
|
75
|
+
- Model changes
|
|
76
|
+
- Added/removed/modified metadata keys
|
|
77
|
+
|
|
78
|
+
### Token Deltas
|
|
79
|
+
- Total token count comparison
|
|
80
|
+
- Percentage change
|
|
81
|
+
- Per-message breakdowns (with `--token-detail`)
|
|
82
|
+
|
|
83
|
+
## Breaking Change Classification
|
|
84
|
+
|
|
85
|
+
### Breaking (High Severity)
|
|
86
|
+
- **New required variable** -- a variable added without a default value; existing callers will fail
|
|
87
|
+
- **Removed variable** -- callers referencing this variable will break
|
|
88
|
+
- **Removed message** -- changes the prompt structure
|
|
89
|
+
|
|
90
|
+
### Breaking (Medium Severity)
|
|
91
|
+
- **Model change** -- may affect behavior, pricing, and capabilities
|
|
92
|
+
- **Role ordering change** -- may affect model behavior
|
|
93
|
+
|
|
94
|
+
### Non-Breaking
|
|
95
|
+
- Added variable with a default value
|
|
96
|
+
- Added messages (extends the prompt)
|
|
97
|
+
- Content modifications within existing messages
|
|
98
|
+
- Metadata changes (except model)
|
|
99
|
+
|
|
100
|
+
## Output Formats
|
|
101
|
+
|
|
102
|
+
### Text (default)
|
|
103
|
+
Rich terminal output with color-coded diffs:
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
Prompt Diff: new_prompt.yaml
|
|
107
|
+
old: a1b2c3d4e5f6 new: f6e5d4c3b2a1
|
|
108
|
+
|
|
109
|
+
BREAKING CHANGES (2):
|
|
110
|
+
HIGH [variable] Variable 'tone' was removed
|
|
111
|
+
MEDIUM [model] Model changed from 'gpt-4' to 'gpt-4o'
|
|
112
|
+
|
|
113
|
+
Token Delta:
|
|
114
|
+
150 -> 165 (+15, +10.0%)
|
|
115
|
+
|
|
116
|
+
Messages (1 changed):
|
|
117
|
+
~ system
|
|
118
|
+
System message content modified
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### JSON
|
|
122
|
+
Structured JSON for programmatic consumption:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
promptdiff old.yaml new.yaml --format json
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Markdown
|
|
129
|
+
GitHub-flavored Markdown for PR comments:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
promptdiff old.yaml new.yaml --format markdown
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Python API
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from promptdiff import diff_files, format_text, format_json
|
|
139
|
+
|
|
140
|
+
# Compare two files
|
|
141
|
+
result = diff_files("prompts/v1.yaml", "prompts/v2.yaml")
|
|
142
|
+
|
|
143
|
+
# Check for breaking changes
|
|
144
|
+
if result.is_breaking:
|
|
145
|
+
for bc in result.breaking_changes:
|
|
146
|
+
print(f"[{bc.severity}] {bc.description}")
|
|
147
|
+
|
|
148
|
+
# Get token delta
|
|
149
|
+
print(f"Tokens: {result.token_delta.old_total} -> {result.token_delta.new_total}")
|
|
150
|
+
|
|
151
|
+
# Format output
|
|
152
|
+
print(format_text(result))
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## CLI Reference
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
Usage: promptdiff [OPTIONS] FILE_A FILE_B
|
|
159
|
+
|
|
160
|
+
Arguments:
|
|
161
|
+
FILE_A Path to the old prompt file
|
|
162
|
+
FILE_B Path to the new prompt file
|
|
163
|
+
|
|
164
|
+
Options:
|
|
165
|
+
-f, --format [text|json|markdown] Output format (default: text)
|
|
166
|
+
--exit-on-breaking Exit with code 1 if breaking changes found
|
|
167
|
+
--token-detail Show per-message token breakdowns
|
|
168
|
+
-e, --encoding TEXT tiktoken encoding (default: cl100k_base)
|
|
169
|
+
-V, --version Show version and exit
|
|
170
|
+
--help Show this message and exit
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Supported File Formats
|
|
174
|
+
|
|
175
|
+
All formats supported by prompttools-core:
|
|
176
|
+
- YAML (`.yaml`, `.yml`)
|
|
177
|
+
- JSON (`.json`)
|
|
178
|
+
- Markdown (`.md`)
|
|
179
|
+
- Text (`.txt`)
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
MIT
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
promptdiff/__init__.py,sha256=PPiJkrvfsrjdZonlq3KqxSyfF8KfSq9Zkgvn_QjS0ZE,1055
|
|
2
|
+
promptdiff/analyzer.py,sha256=kP4gsNsKmi0kpleJghs9tJQeh2EvIjN7z-JrBy9TxHs,5203
|
|
3
|
+
promptdiff/cli.py,sha256=hvgO0cMD8F1rnUraPtJ9I_kfQXEhZ5jcyYZI160OvWo,3562
|
|
4
|
+
promptdiff/differ.py,sha256=WqNa9_mmqps3lVU4FePXTODSdFDBWVktDUdwmqzGqhc,11125
|
|
5
|
+
promptdiff/models.py,sha256=Amx-Ux2WPQzSjksiF4X-hMe5ljwsbIlRQghG3nlaeBw,4446
|
|
6
|
+
promptdiff/reporter.py,sha256=EfdKoVUEreik-LDXSU_Y_6vt-VxIt7iWY18yNfrq1zk,9401
|
|
7
|
+
promptdiff_ai-1.0.0.dist-info/METADATA,sha256=4uF2wQ7mKwGbVeQKhAt5VXsxPvGwhLw9y-ZnpNqmz3I,5020
|
|
8
|
+
promptdiff_ai-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
9
|
+
promptdiff_ai-1.0.0.dist-info/entry_points.txt,sha256=y1t43_RBs-tH4P2yHMEJJGskSOl5fQ25Kb5Fyz7a8RU,50
|
|
10
|
+
promptdiff_ai-1.0.0.dist-info/RECORD,,
|