llm-model-diff 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-model-diff
3
+ Version: 0.1.0
4
+ Summary: Compare LLM model outputs side-by-side with rich diff visualization
5
+ Author: model-diff contributors
6
+ License: MIT
7
+ Keywords: llm,ai,diff,comparison,openai,anthropic,cli
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: click>=8.0
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: anthropic>=0.20.0
25
+ Requires-Dist: openai>=1.0.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
29
+ Requires-Dist: black>=23.0; extra == "dev"
30
+ Requires-Dist: isort>=5.0; extra == "dev"
31
+ Requires-Dist: mypy>=1.0; extra == "dev"
32
+ Requires-Dist: flake8>=6.0; extra == "dev"
33
+
34
+ # model-diff
35
+
36
+ Compare LLM model outputs side-by-side with rich diff visualization.
37
+
38
+ Run the same prompt on multiple models simultaneously and see exactly what each model says differently.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install model-diff
44
+ ```
45
+
46
+ Or install from source:
47
+
48
+ ```bash
49
+ git clone https://github.com/yourname/model-diff
50
+ cd model-diff
51
+ pip install -e .
52
+ ```
53
+
54
+ ## Requirements
55
+
56
+ Set the API keys for the providers you want to use:
57
+
58
+ ```bash
59
+ export OPENAI_API_KEY=sk-...
60
+ export ANTHROPIC_API_KEY=sk-ant-...
61
+ ```
62
+
63
+ Missing keys are handled gracefully — models without a key are skipped with a warning.
64
+
65
+ ## Usage
66
+
67
+ ```bash
68
+ # Default: compare GPT-4o vs Claude Sonnet
69
+ model-diff "What is the best way to handle errors in Python?"
70
+
71
+ # Specify models explicitly
72
+ model-diff "Explain recursion" --models gpt-4o,claude-sonnet-4-6
73
+
74
+ # Use a prompt file
75
+ model-diff --prompt prompt.txt --models gpt-4o,claude-haiku-4-5-20251001,claude-sonnet-4-6
76
+
77
+ # Word-level diff
78
+ model-diff "Explain recursion" --diff words
79
+
80
+ # Show only differences (hide matching sections)
81
+ model-diff "Explain recursion" --only-diff
82
+
83
+ # Deterministic outputs
84
+ model-diff "Explain recursion" --temperature 0.0
85
+
86
+ # Save results to JSON
87
+ model-diff "Explain recursion" --output results.json
88
+ ```
89
+
90
+ ## Supported Models
91
+
92
+ | Model ID | Provider | API Key |
93
+ |---|---|---|
94
+ | `gpt-4o` | OpenAI | `OPENAI_API_KEY` |
95
+ | `gpt-4o-mini` | OpenAI | `OPENAI_API_KEY` |
96
+ | `claude-opus-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
97
+ | `claude-sonnet-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
98
+ | `claude-haiku-4-5-20251001` | Anthropic | `ANTHROPIC_API_KEY` |
99
+
100
+ ## Architecture
101
+
102
+ ```
103
+ src/model_diff/
104
+ ├── cli.py # Click-based CLI entry point
105
+ ├── models.py # Provider-specific API callers, run concurrently via threading
106
+ └── differ.py # difflib-based diff engine + Rich output formatter
107
+ ```
108
+
109
+ Model calls are issued concurrently using `threading`, so wall time equals the slowest model rather than the sum of all models.
110
+
111
+ ## License
112
+
113
+ MIT
@@ -0,0 +1,80 @@
1
+ # model-diff
2
+
3
+ Compare LLM model outputs side-by-side with rich diff visualization.
4
+
5
+ Run the same prompt on multiple models simultaneously and see exactly what each model says differently.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install model-diff
11
+ ```
12
+
13
+ Or install from source:
14
+
15
+ ```bash
16
+ git clone https://github.com/yourname/model-diff
17
+ cd model-diff
18
+ pip install -e .
19
+ ```
20
+
21
+ ## Requirements
22
+
23
+ Set the API keys for the providers you want to use:
24
+
25
+ ```bash
26
+ export OPENAI_API_KEY=sk-...
27
+ export ANTHROPIC_API_KEY=sk-ant-...
28
+ ```
29
+
30
+ Missing keys are handled gracefully — models without a key are skipped with a warning.
31
+
32
+ ## Usage
33
+
34
+ ```bash
35
+ # Default: compare GPT-4o vs Claude Sonnet
36
+ model-diff "What is the best way to handle errors in Python?"
37
+
38
+ # Specify models explicitly
39
+ model-diff "Explain recursion" --models gpt-4o,claude-sonnet-4-6
40
+
41
+ # Use a prompt file
42
+ model-diff --prompt prompt.txt --models gpt-4o,claude-haiku-4-5-20251001,claude-sonnet-4-6
43
+
44
+ # Word-level diff
45
+ model-diff "Explain recursion" --diff words
46
+
47
+ # Show only differences (hide matching sections)
48
+ model-diff "Explain recursion" --only-diff
49
+
50
+ # Deterministic outputs
51
+ model-diff "Explain recursion" --temperature 0.0
52
+
53
+ # Save results to JSON
54
+ model-diff "Explain recursion" --output results.json
55
+ ```
56
+
57
+ ## Supported Models
58
+
59
+ | Model ID | Provider | API Key |
60
+ |---|---|---|
61
+ | `gpt-4o` | OpenAI | `OPENAI_API_KEY` |
62
+ | `gpt-4o-mini` | OpenAI | `OPENAI_API_KEY` |
63
+ | `claude-opus-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
64
+ | `claude-sonnet-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
65
+ | `claude-haiku-4-5-20251001` | Anthropic | `ANTHROPIC_API_KEY` |
66
+
67
+ ## Architecture
68
+
69
+ ```
70
+ src/model_diff/
71
+ ├── cli.py # Click-based CLI entry point
72
+ ├── models.py # Provider-specific API callers, run concurrently via threading
73
+ └── differ.py # difflib-based diff engine + Rich output formatter
74
+ ```
75
+
76
+ Model calls are issued concurrently using `threading`, so wall time equals the slowest model rather than the sum of all models.
77
+
78
+ ## License
79
+
80
+ MIT
@@ -0,0 +1,73 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "llm-model-diff"
7
+ version = "0.1.0"
8
+ description = "Compare LLM model outputs side-by-side with rich diff visualization"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "model-diff contributors" }]
12
+ requires-python = ">=3.8"
13
+ keywords = ["llm", "ai", "diff", "comparison", "openai", "anthropic", "cli"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Environment :: Console",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Software Development :: Libraries :: Python Modules",
26
+ "Topic :: Utilities",
27
+ ]
28
+ dependencies = [
29
+ "click>=8.0",
30
+ "rich>=13.0",
31
+ "anthropic>=0.20.0",
32
+ "openai>=1.0.0",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ dev = [
37
+ "pytest>=7.0",
38
+ "pytest-cov>=4.0",
39
+ "black>=23.0",
40
+ "isort>=5.0",
41
+ "mypy>=1.0",
42
+ "flake8>=6.0",
43
+ ]
44
+
45
+ [project.scripts]
46
+ llm-model-diff = "model_diff.cli:main"
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = ["src"]
50
+
51
+ [tool.setuptools.package-dir]
52
+ "" = "src"
53
+
54
+ [tool.black]
55
+ line-length = 88
56
+ target-version = ["py38"]
57
+
58
+ [tool.isort]
59
+ profile = "black"
60
+ line_length = 88
61
+
62
+ [tool.mypy]
63
+ python_version = "3.8"
64
+ warn_return_any = true
65
+ warn_unused_configs = true
66
+ disallow_untyped_defs = true
67
+ ignore_missing_imports = true
68
+
69
+ [tool.pytest.ini_options]
70
+ testpaths = ["tests"]
71
+ python_files = ["test_*.py", "*_test.py"]
72
+ python_classes = ["Test*"]
73
+ python_functions = ["test_*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-model-diff
3
+ Version: 0.1.0
4
+ Summary: Compare LLM model outputs side-by-side with rich diff visualization
5
+ Author: model-diff contributors
6
+ License: MIT
7
+ Keywords: llm,ai,diff,comparison,openai,anthropic,cli
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: click>=8.0
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: anthropic>=0.20.0
25
+ Requires-Dist: openai>=1.0.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
29
+ Requires-Dist: black>=23.0; extra == "dev"
30
+ Requires-Dist: isort>=5.0; extra == "dev"
31
+ Requires-Dist: mypy>=1.0; extra == "dev"
32
+ Requires-Dist: flake8>=6.0; extra == "dev"
33
+
34
+ # model-diff
35
+
36
+ Compare LLM model outputs side-by-side with rich diff visualization.
37
+
38
+ Run the same prompt on multiple models simultaneously and see exactly what each model says differently.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install model-diff
44
+ ```
45
+
46
+ Or install from source:
47
+
48
+ ```bash
49
+ git clone https://github.com/yourname/model-diff
50
+ cd model-diff
51
+ pip install -e .
52
+ ```
53
+
54
+ ## Requirements
55
+
56
+ Set the API keys for the providers you want to use:
57
+
58
+ ```bash
59
+ export OPENAI_API_KEY=sk-...
60
+ export ANTHROPIC_API_KEY=sk-ant-...
61
+ ```
62
+
63
+ Missing keys are handled gracefully — models without a key are skipped with a warning.
64
+
65
+ ## Usage
66
+
67
+ ```bash
68
+ # Default: compare GPT-4o vs Claude Sonnet
69
+ model-diff "What is the best way to handle errors in Python?"
70
+
71
+ # Specify models explicitly
72
+ model-diff "Explain recursion" --models gpt-4o,claude-sonnet-4-6
73
+
74
+ # Use a prompt file
75
+ model-diff --prompt prompt.txt --models gpt-4o,claude-haiku-4-5-20251001,claude-sonnet-4-6
76
+
77
+ # Word-level diff
78
+ model-diff "Explain recursion" --diff words
79
+
80
+ # Show only differences (hide matching sections)
81
+ model-diff "Explain recursion" --only-diff
82
+
83
+ # Deterministic outputs
84
+ model-diff "Explain recursion" --temperature 0.0
85
+
86
+ # Save results to JSON
87
+ model-diff "Explain recursion" --output results.json
88
+ ```
89
+
90
+ ## Supported Models
91
+
92
+ | Model ID | Provider | API Key |
93
+ |---|---|---|
94
+ | `gpt-4o` | OpenAI | `OPENAI_API_KEY` |
95
+ | `gpt-4o-mini` | OpenAI | `OPENAI_API_KEY` |
96
+ | `claude-opus-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
97
+ | `claude-sonnet-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
98
+ | `claude-haiku-4-5-20251001` | Anthropic | `ANTHROPIC_API_KEY` |
99
+
100
+ ## Architecture
101
+
102
+ ```
103
+ src/model_diff/
104
+ ├── cli.py # Click-based CLI entry point
105
+ ├── models.py # Provider-specific API callers, run concurrently via threading
106
+ └── differ.py # difflib-based diff engine + Rich output formatter
107
+ ```
108
+
109
+ Model calls are issued concurrently using `threading`, so wall time equals the slowest model rather than the sum of all models.
110
+
111
+ ## License
112
+
113
+ MIT
@@ -0,0 +1,15 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/llm_model_diff.egg-info/PKG-INFO
4
+ src/llm_model_diff.egg-info/SOURCES.txt
5
+ src/llm_model_diff.egg-info/dependency_links.txt
6
+ src/llm_model_diff.egg-info/entry_points.txt
7
+ src/llm_model_diff.egg-info/requires.txt
8
+ src/llm_model_diff.egg-info/top_level.txt
9
+ src/model_diff/__init__.py
10
+ src/model_diff/cli.py
11
+ src/model_diff/differ.py
12
+ src/model_diff/models.py
13
+ tests/test_cli.py
14
+ tests/test_differ.py
15
+ tests/test_models.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ llm-model-diff = model_diff.cli:main
@@ -0,0 +1,12 @@
1
+ click>=8.0
2
+ rich>=13.0
3
+ anthropic>=0.20.0
4
+ openai>=1.0.0
5
+
6
+ [dev]
7
+ pytest>=7.0
8
+ pytest-cov>=4.0
9
+ black>=23.0
10
+ isort>=5.0
11
+ mypy>=1.0
12
+ flake8>=6.0
@@ -0,0 +1,7 @@
1
+ """model-diff: Compare LLM model outputs side-by-side with rich diff visualization."""
2
+
3
+ __version__ = "0.1.0"
4
+ __all__ = ["ModelRunner", "DiffEngine", "ModelResult"]
5
+
6
+ from model_diff.models import ModelResult, ModelRunner
7
+ from model_diff.differ import DiffEngine
@@ -0,0 +1,154 @@
1
+ """CLI entry point for model-diff."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ import click
10
+ from rich.console import Console
11
+
12
+ from model_diff.models import DEFAULT_MODELS, ALL_SUPPORTED_MODELS, ModelRunner
13
+ from model_diff.differ import DiffEngine
14
+
15
+ console = Console()
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Helpers
20
+ # ---------------------------------------------------------------------------
21
+
22
+ def _parse_models(models_str: str) -> List[str]:
23
+ """Split a comma-separated model list and strip whitespace."""
24
+ return [m.strip() for m in models_str.split(",") if m.strip()]
25
+
26
+
27
+ def _load_prompt(prompt_text: Optional[str], prompt_file: Optional[str]) -> str:
28
+ """Return the prompt string from either the positional arg or a file."""
29
+ if prompt_file:
30
+ p = Path(prompt_file)
31
+ if not p.exists():
32
+ console.print(f"[red]Error:[/red] prompt file '{prompt_file}' not found.")
33
+ sys.exit(1)
34
+ return p.read_text(encoding="utf-8").strip()
35
+ if prompt_text:
36
+ return prompt_text.strip()
37
+ console.print(
38
+ "[red]Error:[/red] You must supply a prompt as an argument or via --prompt."
39
+ )
40
+ console.print("Usage: model-diff \"Your prompt here\"")
41
+ console.print(" model-diff --prompt prompt.txt")
42
+ sys.exit(1)
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Click command
47
+ # ---------------------------------------------------------------------------
48
+
49
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
50
+ @click.argument("prompt_text", required=False, metavar="PROMPT")
51
+ @click.option(
52
+ "--prompt",
53
+ "prompt_file",
54
+ default=None,
55
+ metavar="FILE",
56
+ help="Path to a text file containing the prompt (alternative to inline PROMPT).",
57
+ )
58
+ @click.option(
59
+ "--models",
60
+ "models_str",
61
+ default=",".join(DEFAULT_MODELS),
62
+ show_default=True,
63
+ metavar="MODEL1,MODEL2,...",
64
+ help=(
65
+ "Comma-separated list of model IDs to compare. "
66
+ f"Supported: {', '.join(sorted(ALL_SUPPORTED_MODELS))}"
67
+ ),
68
+ )
69
+ @click.option(
70
+ "--diff",
71
+ "diff_mode",
72
+ default="lines",
73
+ type=click.Choice(["lines", "words", "chars"], case_sensitive=False),
74
+ show_default=True,
75
+ help="Granularity of the diff: lines, words, or chars.",
76
+ )
77
+ @click.option(
78
+ "--only-diff",
79
+ is_flag=True,
80
+ default=False,
81
+ help="Hide matching sections and show only the differing parts.",
82
+ )
83
+ @click.option(
84
+ "--output",
85
+ "output_file",
86
+ default=None,
87
+ metavar="FILE",
88
+ help="Save the full results as JSON to this file.",
89
+ )
90
+ @click.option(
91
+ "--temperature",
92
+ default=0.7,
93
+ show_default=True,
94
+ type=click.FloatRange(0.0, 2.0),
95
+ help="Sampling temperature passed to each model (0.0 = deterministic).",
96
+ )
97
+ @click.version_option(package_name="model-diff")
98
+ def main(
99
+ prompt_text: Optional[str],
100
+ prompt_file: Optional[str],
101
+ models_str: str,
102
+ diff_mode: str,
103
+ only_diff: bool,
104
+ output_file: Optional[str],
105
+ temperature: float,
106
+ ) -> None:
107
+ """
108
+ Run the same prompt on multiple LLM models and show a side-by-side diff.
109
+
110
+ \b
111
+ Examples:
112
+ model-diff "What is the best way to handle errors in Python?"
113
+ model-diff "Explain recursion" --models gpt-4o,claude-sonnet-4-6
114
+ model-diff --prompt prompt.txt --models gpt-4o-mini,claude-haiku-4-5-20251001
115
+ model-diff "Explain recursion" --diff words --only-diff
116
+ model-diff "Explain recursion" --output results.json --temperature 0.0
117
+ """
118
+ prompt = _load_prompt(prompt_text, prompt_file)
119
+
120
+ requested_models = _parse_models(models_str)
121
+
122
+ # Validate model names
123
+ valid_models, warnings = ModelRunner.validate_models(requested_models)
124
+ for w in warnings:
125
+ console.print(f"[yellow]Warning:[/yellow] {w}")
126
+
127
+ if not valid_models:
128
+ console.print(
129
+ "[red]Error:[/red] No valid models to run. "
130
+ f"Supported: {', '.join(sorted(ALL_SUPPORTED_MODELS))}"
131
+ )
132
+ sys.exit(1)
133
+
134
+ # ── Run the models ────────────────────────────────────────────────
135
+ console.print(
136
+ f"\n[dim]Running prompt on {len(valid_models)} model(s): "
137
+ f"{', '.join(valid_models)} (temperature={temperature})[/dim]\n"
138
+ )
139
+
140
+ runner = ModelRunner(models=valid_models, temperature=temperature)
141
+ results = runner.run(prompt)
142
+
143
+ # ── Render ────────────────────────────────────────────────────────
144
+ engine = DiffEngine(diff_mode=diff_mode, only_diff=only_diff, console=console)
145
+ engine.render(
146
+ prompt=prompt,
147
+ results=results,
148
+ temperature=temperature,
149
+ output_file=output_file,
150
+ )
151
+
152
+ # Exit with non-zero if every model failed
153
+ if all(r.error for r in results):
154
+ sys.exit(2)