outputguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ from outputguard.exceptions import (
2
+ OutputGuardError,
3
+ ParseError,
4
+ RepairError,
5
+ SchemaValidationError,
6
+ StrategyError,
7
+ )
8
+ from outputguard.guard import OutputGuard
9
+ from outputguard.models import RepairResult, ValidationError, ValidationResult
10
+ from outputguard.report import RepairReport, StrategyApplication
11
+
12
+ _default_guard = OutputGuard()
13
+
14
+
15
+ def validate(text: str, schema: dict) -> ValidationResult:
16
+ return _default_guard.validate(text, schema)
17
+
18
+
19
+ def repair(text: str) -> RepairResult:
20
+ return _default_guard.repair(text) # type: ignore[return-value]
21
+
22
+
23
+ def validate_and_repair(text: str, schema: dict) -> ValidationResult:
24
+ return _default_guard.validate_and_repair(text, schema)
25
+
26
+
27
+ def parse(text: str, schema: dict) -> dict | list:
28
+ """Validate, repair, and return parsed data. Raises on failure."""
29
+ return _default_guard.parse(text, schema)
30
+
31
+
32
+ def retry_prompt(text: str, schema: dict, errors: list[ValidationError]) -> str:
33
+ return _default_guard.retry_prompt(text, schema, errors)
34
+
35
+
36
+ __all__ = [
37
+ "OutputGuard",
38
+ "OutputGuardError",
39
+ "ParseError",
40
+ "RepairError",
41
+ "RepairReport",
42
+ "RepairResult",
43
+ "SchemaValidationError",
44
+ "StrategyApplication",
45
+ "StrategyError",
46
+ "ValidationError",
47
+ "ValidationResult",
48
+ "parse",
49
+ "repair",
50
+ "retry_prompt",
51
+ "validate",
52
+ "validate_and_repair",
53
+ ]
outputguard/cli.py ADDED
@@ -0,0 +1,227 @@
1
+ """OutputGuard CLI — validate, repair, and inspect LLM JSON output."""
2
+
3
+ import dataclasses
4
+ import json
5
+ import sys
6
+
7
+ import click
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+
11
+ import outputguard
12
+ from outputguard.guard import OutputGuard
13
+ from outputguard.models import RepairResult, ValidationResult
14
+ from outputguard.repairer import repair as _repair
15
+ from outputguard.strategies import ALL_STRATEGIES, STRATEGY_DESCRIPTIONS
16
+
17
+ console = Console(stderr=True)
18
+
19
+
20
+ def _read_input(input_path: str) -> str:
21
+ with click.open_file(input_path, "r") as f:
22
+ return f.read()
23
+
24
+
25
+ def _load_schema(schema_path: str) -> dict:
26
+ with open(schema_path) as f:
27
+ return json.load(f)
28
+
29
+
30
+ def _print_validation_text(result: ValidationResult) -> None:
31
+ if result.valid:
32
+ if result.repaired:
33
+ console.print(
34
+ "[yellow]⚠ Repaired and valid[/yellow] "
35
+ f"strategies: {', '.join(result.strategies_applied)}"
36
+ )
37
+ else:
38
+ console.print("[green]✓ Valid[/green]")
39
+ else:
40
+ console.print("[red]✗ Invalid[/red]")
41
+ for err in result.errors:
42
+ console.print(f" [red]{err.path}[/red]: {err.message}")
43
+
44
+
45
+ def _result_to_dict(obj: ValidationResult | RepairResult) -> dict:
46
+ return dataclasses.asdict(obj)
47
+
48
+
49
+ def _write_output(text: str, output_path: str | None) -> None:
50
+ if output_path:
51
+ with open(output_path, "w") as f:
52
+ f.write(text)
53
+ else:
54
+ click.echo(text)
55
+
56
+
57
+ @click.group()
58
+ def cli() -> None:
59
+ """OutputGuard — validate and repair LLM JSON output."""
60
+
61
+
62
+ @cli.command()
63
+ @click.argument("input_path", metavar="INPUT")
64
+ @click.option("-s", "--schema", "schema_path", required=True, help="Path to JSON Schema file.")
65
+ @click.option(
66
+ "-r", "--repair", "do_repair", is_flag=True, help="Attempt repair if validation fails."
67
+ )
68
+ @click.option(
69
+ "-f",
70
+ "--format",
71
+ "fmt",
72
+ type=click.Choice(["text", "json"]),
73
+ default="text",
74
+ help="Output format.",
75
+ )
76
+ @click.option("-q", "--quiet", is_flag=True, help="Exit code only, no output.")
77
+ @click.option("-o", "--output", "output_path", default=None, help="Write result to file.")
78
+ @click.option("-d", "--diff", "show_diff", is_flag=True, help="Show diff of repairs.")
79
+ @click.option("-v", "--verbose", is_flag=True, help="Show each strategy's effect.")
80
+ def validate(
81
+ input_path: str,
82
+ schema_path: str,
83
+ do_repair: bool,
84
+ fmt: str,
85
+ quiet: bool,
86
+ output_path: str | None,
87
+ show_diff: bool,
88
+ verbose: bool,
89
+ ) -> None:
90
+ """Validate INPUT (file or - for stdin) against a JSON schema."""
91
+ text = _read_input(input_path)
92
+ schema = _load_schema(schema_path)
93
+
94
+ if do_repair:
95
+ result = outputguard.validate_and_repair(text, schema)
96
+ else:
97
+ result = outputguard.validate(text, schema)
98
+
99
+ if not quiet:
100
+ if fmt == "json":
101
+ _write_output(json.dumps(_result_to_dict(result), indent=2), output_path)
102
+ else:
103
+ _print_validation_text(result)
104
+ if result.valid and result.repaired:
105
+ if show_diff or verbose:
106
+ _show_repair_details(text, result, verbose)
107
+ if result.repaired_text:
108
+ _write_output(result.repaired_text, output_path)
109
+
110
+ sys.exit(0 if result.valid else 1)
111
+
112
+
113
+ def _show_repair_details(original: str, result: ValidationResult, verbose: bool) -> None:
114
+ """Show diff/verbose output for a repair."""
115
+ if not result.repaired:
116
+ return
117
+ _result, report = _repair(original, report=True)
118
+ if verbose:
119
+ step_diffs = report.step_diffs()
120
+ if step_diffs:
121
+ console.print("\n[bold]Strategy details:[/bold]")
122
+ console.print(step_diffs)
123
+ console.print(f"[dim]Confidence: {report.confidence:.0%}[/dim]")
124
+ else:
125
+ diff = report.diff
126
+ if diff:
127
+ console.print("\n[bold]Diff:[/bold]")
128
+ console.print(diff)
129
+
130
+
131
+ @cli.command()
132
+ @click.argument("input_path", metavar="INPUT")
133
+ @click.option(
134
+ "-f",
135
+ "--format",
136
+ "fmt",
137
+ type=click.Choice(["text", "json"]),
138
+ default="text",
139
+ help="Output format.",
140
+ )
141
+ @click.option("-o", "--output", "output_path", default=None, help="Write result to file.")
142
+ @click.option("--strategies", default=None, help="Comma-separated strategy names.")
143
+ @click.option("-d", "--diff", "show_diff", is_flag=True, help="Show diff of repairs.")
144
+ @click.option("-v", "--verbose", is_flag=True, help="Show each strategy's effect.")
145
+ def repair(
146
+ input_path: str,
147
+ fmt: str,
148
+ output_path: str | None,
149
+ strategies: str | None,
150
+ show_diff: bool,
151
+ verbose: bool,
152
+ ) -> None:
153
+ """Repair malformed JSON from INPUT (file or - for stdin)."""
154
+ text = _read_input(input_path)
155
+ strategy_list = [s.strip() for s in strategies.split(",")] if strategies else None
156
+
157
+ guard = OutputGuard(strategies=strategy_list)
158
+ need_report = show_diff or verbose
159
+ if need_report:
160
+ result, report = guard.repair(text, report=True)
161
+ else:
162
+ result = guard.repair(text)
163
+ report = None
164
+
165
+ if fmt == "json":
166
+ _write_output(json.dumps(_result_to_dict(result), indent=2), output_path)
167
+ else:
168
+ if result.repaired:
169
+ console.print(
170
+ f"[yellow]⚠ Repaired[/yellow] strategies: {', '.join(result.strategies_applied)}"
171
+ )
172
+ if report and verbose:
173
+ step_diffs = report.step_diffs()
174
+ if step_diffs:
175
+ console.print("\n[bold]Strategy details:[/bold]")
176
+ console.print(step_diffs)
177
+ console.print(f"[dim]Confidence: {report.confidence:.0%}[/dim]")
178
+ elif report and show_diff:
179
+ diff = report.diff
180
+ if diff:
181
+ console.print("\n[bold]Diff:[/bold]")
182
+ console.print(diff)
183
+ _write_output(result.text, output_path)
184
+ elif result.parse_error:
185
+ console.print(f"[red]✗ Could not repair[/red]: {result.parse_error}")
186
+ else:
187
+ console.print("[green]✓ Already valid JSON[/green]")
188
+ _write_output(result.text, output_path)
189
+
190
+ sys.exit(0 if result.repaired or result.parse_error is None else 1)
191
+
192
+
193
+ @cli.command("retry-prompt")
194
+ @click.argument("input_path", metavar="INPUT")
195
+ @click.option("-s", "--schema", "schema_path", required=True, help="Path to JSON Schema file.")
196
+ def retry_prompt(input_path: str, schema_path: str) -> None:
197
+ """Generate a retry prompt for invalid JSON from INPUT."""
198
+ text = _read_input(input_path)
199
+ schema = _load_schema(schema_path)
200
+
201
+ result = outputguard.validate(text, schema)
202
+ prompt = outputguard.retry_prompt(text, schema, result.errors)
203
+ click.echo(prompt)
204
+ sys.exit(0)
205
+
206
+
207
+ @cli.command()
208
+ def strategies() -> None:
209
+ """List all available repair strategies."""
210
+ table = Table(title="Repair Strategies")
211
+ table.add_column("#", style="dim", width=4)
212
+ table.add_column("Name", style="cyan")
213
+ table.add_column("Description")
214
+
215
+ for i, (name, _fn) in enumerate(ALL_STRATEGIES, 1):
216
+ table.add_row(str(i), name, STRATEGY_DESCRIPTIONS.get(name, ""))
217
+
218
+ console.print(table)
219
+ sys.exit(0)
220
+
221
+
222
+ @cli.command()
223
+ def version() -> None:
224
+ """Show outputguard version."""
225
+ from importlib.metadata import version as pkg_version
226
+
227
+ click.echo(f"outputguard {pkg_version('outputguard')}")
@@ -0,0 +1,38 @@
1
+ class OutputGuardError(Exception):
2
+ """Base exception for all outputguard errors."""
3
+
4
+
5
+ class ParseError(OutputGuardError):
6
+ """JSON could not be parsed even after repair attempts."""
7
+
8
+ def __init__(self, message: str, original_text: str, parse_error: str | None = None):
9
+ self.original_text = original_text
10
+ self.parse_error = parse_error
11
+ super().__init__(message)
12
+
13
+
14
+ class SchemaValidationError(OutputGuardError):
15
+ """JSON parsed but doesn't match the schema, even after repair."""
16
+
17
+ def __init__(self, message: str, data: dict | list, errors: list, schema: dict):
18
+ self.data = data
19
+ self.validation_errors = errors
20
+ self.schema = schema
21
+ super().__init__(message)
22
+
23
+
24
+ class RepairError(OutputGuardError):
25
+ """Repair was attempted but failed."""
26
+
27
+ def __init__(self, message: str, strategies_tried: list[str], original_text: str):
28
+ self.strategies_tried = strategies_tried
29
+ self.original_text = original_text
30
+ super().__init__(message)
31
+
32
+
33
+ class StrategyError(OutputGuardError):
34
+ """A specific repair strategy encountered an error."""
35
+
36
+ def __init__(self, message: str, strategy_name: str):
37
+ self.strategy_name = strategy_name
38
+ super().__init__(message)
outputguard/guard.py ADDED
@@ -0,0 +1,91 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal, overload
4
+
5
+ from outputguard import repairer as _repairer
6
+ from outputguard import retry as _retry
7
+ from outputguard import validator as _validator
8
+ from outputguard.exceptions import ParseError, SchemaValidationError
9
+ from outputguard.models import RepairResult, ValidationError, ValidationResult
10
+ from outputguard.report import RepairReport
11
+
12
+
13
+ class OutputGuard:
14
+ def __init__(
15
+ self,
16
+ strategies: list[str] | None = None,
17
+ max_repair_attempts: int = 3,
18
+ ):
19
+ self.strategies = strategies
20
+ self.max_repair_attempts = max_repair_attempts
21
+
22
+ def validate(self, text: str, schema: dict) -> ValidationResult:
23
+ return _validator.validate(text, schema)
24
+
25
+ @overload
26
+ def repair(self, text: str) -> RepairResult: ...
27
+
28
+ @overload
29
+ def repair(self, text: str, *, report: Literal[True]) -> tuple[RepairResult, RepairReport]: ...
30
+
31
+ def repair(
32
+ self, text: str, *, report: bool = False
33
+ ) -> RepairResult | tuple[RepairResult, RepairReport]:
34
+ if report:
35
+ return _repairer.repair(text, self.strategies, report=True)
36
+ return _repairer.repair(text, self.strategies)
37
+
38
+ def validate_and_repair(self, text: str, schema: dict) -> ValidationResult:
39
+ """Validate, and if invalid, attempt repair then re-validate."""
40
+ result = self.validate(text, schema)
41
+ if result.valid:
42
+ return result
43
+
44
+ current_text = text
45
+ for _attempt in range(self.max_repair_attempts):
46
+ repair_result = _repairer.repair(current_text, self.strategies)
47
+ if not repair_result.repaired:
48
+ continue
49
+
50
+ revalidation = self.validate(repair_result.text, schema)
51
+ if revalidation.valid:
52
+ revalidation.repaired = True
53
+ revalidation.strategies_applied = repair_result.strategies_applied
54
+ revalidation.original_text = text
55
+ revalidation.repaired_text = repair_result.text
56
+ return revalidation
57
+ current_text = repair_result.text
58
+
59
+ result.original_text = text
60
+ return result
61
+
62
+ def parse(self, text: str, schema: dict) -> dict | list:
63
+ """Validate, repair, and return parsed data. Raises on failure.
64
+
65
+ This is the simplest API: give it text and a schema, get back
66
+ parsed data or an exception.
67
+
68
+ Raises:
69
+ ParseError: If the text cannot be parsed as JSON even after repair.
70
+ SchemaValidationError: If the parsed JSON doesn't match the schema.
71
+ """
72
+ result = self.validate_and_repair(text, schema)
73
+ if result.valid:
74
+ assert result.data is not None
75
+ return result.data
76
+
77
+ if result.data is None:
78
+ raise ParseError(
79
+ "Could not parse JSON from LLM output",
80
+ original_text=text,
81
+ parse_error=result.errors[0].message if result.errors else None,
82
+ )
83
+ raise SchemaValidationError(
84
+ f"JSON does not match schema: {len(result.errors)} error(s)",
85
+ data=result.data,
86
+ errors=result.errors,
87
+ schema=schema,
88
+ )
89
+
90
+ def retry_prompt(self, text: str, schema: dict, errors: list[ValidationError]) -> str:
91
+ return _retry.retry_prompt(text, schema, errors)
outputguard/models.py ADDED
@@ -0,0 +1,29 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any
3
+
4
+
5
+ @dataclass
6
+ class ValidationError:
7
+ message: str
8
+ path: str # JSON path, e.g. "$.items[0].name"
9
+ schema_path: str # Schema path that was violated
10
+ value: Any = None
11
+
12
+
13
+ @dataclass
14
+ class ValidationResult:
15
+ valid: bool
16
+ data: dict | list | None = None
17
+ errors: list[ValidationError] = field(default_factory=list)
18
+ repaired: bool = False
19
+ strategies_applied: list[str] = field(default_factory=list)
20
+ original_text: str = ""
21
+ repaired_text: str = ""
22
+
23
+
24
+ @dataclass
25
+ class RepairResult:
26
+ repaired: bool
27
+ text: str
28
+ strategies_applied: list[str] = field(default_factory=list)
29
+ parse_error: str | None = None
outputguard/py.typed ADDED
File without changes
@@ -0,0 +1,102 @@
1
+ """JSON repair engine — applies strategies in sequence to fix malformed JSON."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Literal, overload
7
+
8
+ from outputguard.models import RepairResult
9
+ from outputguard.report import RepairReport, StrategyApplication
10
+ from outputguard.strategies import get_strategies
11
+
12
+
13
+ @overload
14
+ def repair(text: str, strategies: list[str] | None = ...) -> RepairResult: ...
15
+
16
+
17
+ @overload
18
+ def repair(
19
+ text: str, strategies: list[str] | None = ..., *, report: Literal[True]
20
+ ) -> tuple[RepairResult, RepairReport]: ...
21
+
22
+
23
+ def repair(
24
+ text: str, strategies: list[str] | None = None, *, report: bool = False
25
+ ) -> RepairResult | tuple[RepairResult, RepairReport]:
26
+ """Apply repair strategies in order, try to parse after each one.
27
+
28
+ If report=True, returns a (RepairResult, RepairReport) tuple.
29
+ """
30
+ try:
31
+ json.loads(text)
32
+ result = RepairResult(repaired=False, text=text)
33
+ if report:
34
+ return result, RepairReport(original_text=text, final_text=text, success=True)
35
+ return result
36
+ except json.JSONDecodeError:
37
+ pass
38
+
39
+ strategy_list = get_strategies(strategies)
40
+ last_error: str = ""
41
+ steps: list[StrategyApplication] = []
42
+
43
+ # First pass: apply ALL strategies in sequence, then try parsing
44
+ current = text
45
+ applied: list[str] = []
46
+ for name, fn in strategy_list:
47
+ before = current
48
+ try:
49
+ current = fn(current)
50
+ except Exception:
51
+ current = before
52
+ changed = current != before
53
+ steps.append(
54
+ StrategyApplication(name=name, changed=changed, input_text=before, output_text=current)
55
+ )
56
+ if changed:
57
+ applied.append(name)
58
+
59
+ try:
60
+ json.loads(current)
61
+ result = RepairResult(repaired=True, text=current, strategies_applied=applied)
62
+ if report:
63
+ return result, RepairReport(
64
+ original_text=text, final_text=current, success=True, steps=steps
65
+ )
66
+ return result
67
+ except json.JSONDecodeError as e:
68
+ last_error = str(e)
69
+
70
+ # Second pass: apply one at a time with parse attempts between each
71
+ current = text
72
+ applied = []
73
+ steps = []
74
+ for name, fn in strategy_list:
75
+ before = current
76
+ try:
77
+ current = fn(current)
78
+ except Exception:
79
+ current = before
80
+ changed = current != before
81
+ steps.append(
82
+ StrategyApplication(name=name, changed=changed, input_text=before, output_text=current)
83
+ )
84
+ if changed:
85
+ applied.append(name)
86
+ try:
87
+ json.loads(current)
88
+ result = RepairResult(repaired=True, text=current, strategies_applied=applied)
89
+ if report:
90
+ return result, RepairReport(
91
+ original_text=text, final_text=current, success=True, steps=steps
92
+ )
93
+ return result
94
+ except json.JSONDecodeError as e:
95
+ last_error = str(e)
96
+
97
+ result = RepairResult(repaired=False, text=text, parse_error=last_error)
98
+ if report:
99
+ return result, RepairReport(
100
+ original_text=text, final_text=text, success=False, steps=steps, parse_error=last_error
101
+ )
102
+ return result
outputguard/report.py ADDED
@@ -0,0 +1,110 @@
1
+ from dataclasses import dataclass, field
2
+ from difflib import unified_diff
3
+
4
+
5
+ @dataclass
6
+ class StrategyApplication:
7
+ """Record of a single strategy being applied."""
8
+
9
+ name: str
10
+ changed: bool
11
+ input_text: str
12
+ output_text: str
13
+
14
+ @property
15
+ def diff(self) -> str:
16
+ """Unified diff of this strategy's changes."""
17
+ if not self.changed:
18
+ return ""
19
+ return "\n".join(
20
+ unified_diff(
21
+ self.input_text.splitlines(keepends=True),
22
+ self.output_text.splitlines(keepends=True),
23
+ fromfile=f"before_{self.name}",
24
+ tofile=f"after_{self.name}",
25
+ lineterm="",
26
+ )
27
+ )
28
+
29
+
30
+ @dataclass
31
+ class RepairReport:
32
+ """Detailed report of a repair operation."""
33
+
34
+ original_text: str
35
+ final_text: str
36
+ success: bool
37
+ steps: list[StrategyApplication] = field(default_factory=list)
38
+ parse_error: str | None = None
39
+
40
+ @property
41
+ def strategies_applied(self) -> list[str]:
42
+ """Names of strategies that actually changed the text."""
43
+ return [s.name for s in self.steps if s.changed]
44
+
45
+ @property
46
+ def strategies_tried(self) -> list[str]:
47
+ """Names of all strategies that were tried."""
48
+ return [s.name for s in self.steps]
49
+
50
+ @property
51
+ def diff(self) -> str:
52
+ """Unified diff from original to final text."""
53
+ if self.original_text == self.final_text:
54
+ return ""
55
+ return "\n".join(
56
+ unified_diff(
57
+ self.original_text.splitlines(keepends=True),
58
+ self.final_text.splitlines(keepends=True),
59
+ fromfile="original",
60
+ tofile="repaired",
61
+ lineterm="",
62
+ )
63
+ )
64
+
65
+ @property
66
+ def confidence(self) -> float:
67
+ """Heuristic confidence score (0.0 to 1.0) for the repair.
68
+
69
+ Higher when fewer strategies were needed and the change was minimal.
70
+ """
71
+ if not self.success:
72
+ return 0.0
73
+
74
+ len(self.steps)
75
+ applied_count = len(self.strategies_applied)
76
+
77
+ if applied_count == 0:
78
+ return 1.0 # No repair needed, already valid
79
+
80
+ # Start at 1.0, reduce by:
81
+ # - Number of strategies needed (more = less confident)
82
+ # - Ratio of text changed (more change = less confident)
83
+ strategy_penalty = min(applied_count * 0.1, 0.5)
84
+
85
+ orig_len = max(len(self.original_text), 1)
86
+ final_len = max(len(self.final_text), 1)
87
+ change_ratio = abs(orig_len - final_len) / max(orig_len, final_len)
88
+ change_penalty = min(change_ratio * 0.5, 0.3)
89
+
90
+ return max(round(1.0 - strategy_penalty - change_penalty, 2), 0.1)
91
+
92
+ @property
93
+ def summary(self) -> str:
94
+ """One-line summary of the repair."""
95
+ if not self.success:
96
+ return f"Repair failed after trying {len(self.steps)} strategies"
97
+ applied = self.strategies_applied
98
+ if not applied:
99
+ return "No repair needed — JSON was already valid"
100
+ return f"Repaired using {len(applied)} strategy(ies): {', '.join(applied)}"
101
+
102
+ def step_diffs(self) -> str:
103
+ """Show diff for each strategy that made changes, useful for --verbose."""
104
+ parts = []
105
+ for step in self.steps:
106
+ if step.changed:
107
+ parts.append(f"=== {step.name} ===")
108
+ parts.append(step.diff)
109
+ parts.append("")
110
+ return "\n".join(parts)